Skip to content

Commit a8823a1

Browse files
authored
slovdahl's submission (gunnarmorling#691)
* slovdahl: First submission * More JAVA_OPTS flags, 0.1s better locally
1 parent 47046f3 commit a8823a1

File tree

3 files changed

+322
-0
lines changed

3 files changed

+322
-0
lines changed

calculate_average_slovdahl.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/sh
2+
#
3+
# Copyright 2023 The original authors
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
JAVA_OPTS="${JAVA_OPTS} --enable-preview -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions"
19+
JAVA_OPTS="${JAVA_OPTS} -Xmx8g -Xms8g"
20+
JAVA_OPTS="${JAVA_OPTS} -XX:+TrustFinalNonStaticFields -XX:-UseCompressedOops"
21+
22+
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_slovdahl

prepare_slovdahl.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
#
3+
# Copyright 2023 The original authors
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# Uncomment below to use sdk
19+
source "$HOME/.sdkman/bin/sdkman-init.sh"
20+
21+
sdk use java 21.0.2-tem 1>&2 > /dev/null
22+
./mvnw verify
Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
/*
2+
* Copyright 2023 The original authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package dev.morling.onebrc;
17+
18+
import java.io.IOException;
19+
import java.lang.foreign.Arena;
20+
import java.lang.foreign.MemorySegment;
21+
import java.lang.foreign.ValueLayout;
22+
import java.nio.channels.FileChannel;
23+
import java.nio.file.Paths;
24+
import java.nio.file.StandardOpenOption;
25+
import java.util.ArrayList;
26+
import java.util.Arrays;
27+
import java.util.HashMap;
28+
import java.util.Map;
29+
import java.util.StringJoiner;
30+
import java.util.TreeMap;
31+
import java.util.concurrent.ExecutionException;
32+
import java.util.concurrent.ExecutorService;
33+
import java.util.concurrent.Executors;
34+
import java.util.concurrent.Future;
35+
36+
import static java.util.stream.Collectors.collectingAndThen;
37+
import static java.util.stream.Collectors.groupingBy;
38+
import static java.util.stream.Collectors.reducing;
39+
40+
public class CalculateAverage_slovdahl {
41+
42+
private static final String FILE = "./measurements.txt";
43+
44+
private static final int SLICE_SIZE = 1_048_576;
45+
46+
public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
47+
int segments = Runtime.getRuntime().availableProcessors() - 1;
48+
49+
try (Arena arena = Arena.ofShared();
50+
FileChannel channel = FileChannel.open(Paths.get(FILE), StandardOpenOption.READ);
51+
ExecutorService executor = Executors.newThreadPerTaskExecutor(Executors.defaultThreadFactory())) {
52+
53+
long size = channel.size();
54+
if (size < SLICE_SIZE) {
55+
segments = 1;
56+
}
57+
58+
long idealSegmentSize = size / segments;
59+
60+
MemorySegment mappedFile = channel.map(FileChannel.MapMode.READ_ONLY, 0, size, arena);
61+
var futures = new ArrayList<Future<Map<Station, MeasurementAggregator>>>(segments);
62+
63+
long segmentStart = 0;
64+
for (int i = 1; i <= segments; i++) {
65+
long actualSegmentOffset = idealSegmentSize * i;
66+
67+
while (actualSegmentOffset < size && mappedFile.get(ValueLayout.JAVA_BYTE, actualSegmentOffset) != (byte) '\n') {
68+
actualSegmentOffset++;
69+
}
70+
71+
long end = actualSegmentOffset - segmentStart;
72+
if (segmentStart + actualSegmentOffset - segmentStart + 1 < size) {
73+
end += 1;
74+
}
75+
76+
MemorySegment segment = mappedFile.asSlice(segmentStart, end);
77+
segmentStart = actualSegmentOffset + 1;
78+
79+
futures.add(executor.submit(() -> {
80+
byte[] array = new byte[SLICE_SIZE];
81+
MemorySegment bufferSegment = MemorySegment.ofArray(array);
82+
83+
long position = 0;
84+
long segmentSize = segment.byteSize();
85+
Map<Station, MeasurementAggregator> map = HashMap.newHashMap(10_000);
86+
87+
while (position < segmentSize) {
88+
long thisSliceSize = Math.min(SLICE_SIZE, segmentSize - position);
89+
90+
MemorySegment.copy(
91+
segment,
92+
ValueLayout.JAVA_BYTE,
93+
position,
94+
bufferSegment,
95+
ValueLayout.JAVA_BYTE,
96+
0,
97+
thisSliceSize);
98+
99+
if (thisSliceSize % 8 != 0) {
100+
bufferSegment
101+
.asSlice(thisSliceSize)
102+
.fill((byte) 0);
103+
}
104+
105+
int newlinePosition = 0;
106+
int startOffset = 0;
107+
while (true) {
108+
int semicolonPosition = nextOccurrence(array, (byte) ';', startOffset);
109+
if (semicolonPosition < 0) {
110+
break;
111+
}
112+
113+
int eolPosition = nextOccurrence(array, (byte) '\n', startOffset);
114+
if (eolPosition < 0) {
115+
if (semicolonPosition < segmentSize - 4) {
116+
break;
117+
}
118+
else {
119+
newlinePosition = (int) segmentSize;
120+
}
121+
}
122+
else {
123+
newlinePosition = eolPosition;
124+
}
125+
126+
byte[] nameArray = new byte[semicolonPosition - startOffset];
127+
System.arraycopy(array, startOffset, nameArray, 0, semicolonPosition - startOffset);
128+
Station station = new Station(nameArray);
129+
130+
int temperatureStart = semicolonPosition + 1;
131+
int temperatureLength = newlinePosition - semicolonPosition - 1;
132+
133+
int temperatureIntValue;
134+
if (array[temperatureStart] == '-') {
135+
if (temperatureLength == 4) {
136+
temperatureIntValue = -1 * ((array[temperatureStart + 1] - 48) * 10 +
137+
(array[temperatureStart + 3] - 48));
138+
}
139+
else {
140+
temperatureIntValue = -1 * ((array[temperatureStart + 1] - 48) * 100 +
141+
(array[temperatureStart + 2] - 48) * 10 +
142+
(array[temperatureStart + 4] - 48));
143+
}
144+
}
145+
else {
146+
if (temperatureLength == 3) {
147+
temperatureIntValue = (array[temperatureStart] - 48) * 10 +
148+
(array[temperatureStart + 2] - 48);
149+
}
150+
else {
151+
temperatureIntValue = (array[temperatureStart] - 48) * 100 +
152+
(array[temperatureStart + 1] - 48) * 10 +
153+
(array[temperatureStart + 3] - 48);
154+
}
155+
}
156+
157+
MeasurementAggregator agg = map.get(station);
158+
if (agg == null) {
159+
agg = new MeasurementAggregator();
160+
map.put(station, agg);
161+
}
162+
163+
agg.min = Math.min(agg.min, temperatureIntValue);
164+
agg.max = Math.max(agg.max, temperatureIntValue);
165+
agg.sum += temperatureIntValue;
166+
agg.count++;
167+
168+
// Make sure the next iteration won't find the same delimiters.
169+
array[semicolonPosition] = (byte) 0;
170+
array[newlinePosition] = (byte) 0;
171+
172+
startOffset = newlinePosition + 1;
173+
}
174+
175+
position += newlinePosition + 1;
176+
}
177+
178+
return map;
179+
}));
180+
}
181+
182+
TreeMap<String, ResultRow> result = futures.stream()
183+
.map(f -> {
184+
try {
185+
return f.get();
186+
}
187+
catch (InterruptedException | ExecutionException e) {
188+
throw new RuntimeException(e);
189+
}
190+
})
191+
.flatMap(m -> m.entrySet().stream())
192+
.collect(groupingBy(
193+
e -> new String(e.getKey().name()),
194+
TreeMap::new,
195+
collectingAndThen(
196+
reducing(
197+
new MeasurementAggregator(),
198+
Map.Entry::getValue,
199+
(agg1, agg2) -> {
200+
MeasurementAggregator res = new MeasurementAggregator();
201+
res.min = Math.min(agg1.min, agg2.min);
202+
res.max = Math.max(agg1.max, agg2.max);
203+
res.sum = agg1.sum + agg2.sum;
204+
res.count = agg1.count + agg2.count;
205+
206+
return res;
207+
}),
208+
agg -> new ResultRow(
209+
agg.min / 10.0,
210+
(Math.round((agg.sum / 10.0) * 10.0) / 10.0) / agg.count,
211+
agg.max / 10.0))));
212+
213+
System.out.println(result);
214+
215+
executor.shutdownNow();
216+
}
217+
}
218+
219+
private static int nextOccurrence(byte[] data, byte needle, int offset) {
220+
while (offset < data.length) {
221+
if (data[offset] == needle) {
222+
return offset;
223+
}
224+
offset++;
225+
}
226+
return -1;
227+
}
228+
229+
private record Station(byte[] name, int hash) {
230+
private Station(byte[] name) {
231+
this(name, Arrays.hashCode(name));
232+
}
233+
234+
@Override
235+
public boolean equals(Object o) {
236+
if (this == o) {
237+
return true;
238+
}
239+
if (o == null || getClass() != o.getClass()) {
240+
return false;
241+
}
242+
Station station = (Station) o;
243+
return Arrays.equals(name, station.name);
244+
}
245+
246+
@Override
247+
public int hashCode() {
248+
return hash;
249+
}
250+
251+
@Override
252+
public String toString() {
253+
return new StringJoiner(", ", Station.class.getSimpleName() + "[", "]")
254+
.add("name=" + new String(name))
255+
.add("hash=" + hash)
256+
.toString();
257+
}
258+
}
259+
260+
private static class MeasurementAggregator {
261+
private int min = Integer.MAX_VALUE;
262+
private int max = Integer.MIN_VALUE;
263+
private long sum;
264+
private long count;
265+
}
266+
267+
private record ResultRow(double min, double mean, double max) {
268+
269+
@Override
270+
public String toString() {
271+
return round(min) + "/" + round(mean) + "/" + round(max);
272+
}
273+
274+
private double round(double value) {
275+
return Math.round(value * 10.0) / 10.0;
276+
}
277+
}
278+
}

0 commit comments

Comments
 (0)