Skip to content

Commit 79f19ee

Browse files
authored
first attempt (gunnarmorling#226)
* first attempt * adapted to new organization
1 parent 4cfb544 commit 79f19ee

File tree

3 files changed

+234
-0
lines changed

3 files changed

+234
-0
lines changed

calculate_average_maximz101.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/sh
2+
#
3+
# Copyright 2023 The original authors
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
JAVA_OPTS="--enable-preview -Xmx6g"
19+
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_maximz101

prepare_maximz101.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
#
3+
# Copyright 2023 The original authors
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# Uncomment below to use sdk
19+
source "$HOME/.sdkman/bin/sdkman-init.sh"
20+
sdk use java 21.0.1-graal 1>&2
21+
#sdk use java 21.0.1-amzn 1>&2
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
/*
2+
* Copyright 2023 The original authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package dev.morling.onebrc;
17+
18+
import java.io.File;
19+
import java.io.IOException;
20+
import java.io.RandomAccessFile;
21+
import java.nio.MappedByteBuffer;
22+
import java.nio.channels.FileChannel;
23+
import java.nio.channels.FileChannel.MapMode;
24+
import java.nio.charset.StandardCharsets;
25+
import java.nio.file.Paths;
26+
import java.nio.file.StandardOpenOption;
27+
import java.util.*;
28+
import java.util.concurrent.CompletableFuture;
29+
import java.util.concurrent.ConcurrentHashMap;
30+
import java.util.concurrent.ExecutorService;
31+
import java.util.concurrent.Executors;
32+
33+
public class CalculateAverage_maximz101 {
34+
35+
private static final String FILE = "./measurements.txt";
36+
37+
private record Measurement(String station, double value) {
38+
}
39+
40+
private record ResultRow(double min, double mean, double max) {
41+
public String toString() {
42+
return STR."\{round(min)}/\{round(mean)}/\{round(max)}";
43+
}
44+
45+
private double round(double value) {
46+
return Math.round(value * 10.0) / 10.0;
47+
}
48+
}
49+
50+
private static class MeasurementAggregator {
51+
private double min;
52+
private double max;
53+
private double sum;
54+
private long count;
55+
56+
public MeasurementAggregator(double min, double max, double sum, long count) {
57+
this.min = min;
58+
this.max = max;
59+
this.sum = sum;
60+
this.count = count;
61+
}
62+
}
63+
64+
record FileChunkRange(long start, long end) {
65+
}
66+
67+
public static void main(String[] args) throws IOException {
68+
int parallelism = args.length == 1 ? Integer.parseInt(args[0]) : Runtime.getRuntime().availableProcessors();
69+
70+
Map<String, ResultRow> measurements = new ConcurrentHashMap<>();
71+
try (ExecutorService executor = Executors.newWorkStealingPool(parallelism)) {
72+
List<FileChunkRange> chunks = getChunks(new File(FILE), parallelism, 1_048_576);
73+
List<CompletableFuture<Void>> completableFutureList = new ArrayList<>();
74+
for (FileChunkRange chunk : chunks) {
75+
completableFutureList
76+
.add(CompletableFuture
77+
.supplyAsync(() -> computePartialAggregations(chunk), executor)
78+
.thenAccept(map -> updateResultMap(map, measurements)));
79+
}
80+
CompletableFuture.allOf(completableFutureList.toArray(new CompletableFuture[0]))
81+
.thenAccept(_ -> System.out.println(new TreeMap<>(measurements)))
82+
.join();
83+
}
84+
}
85+
86+
private static void updateResultMap(Map<String, MeasurementAggregator> map, Map<String, ResultRow> measurements) {
87+
map.forEach((station, agg) -> measurements.merge(station,
88+
new ResultRow(agg.min, agg.sum / agg.count, agg.max),
89+
(r1, r2) -> new ResultRow(Math.min(r1.min, r2.min), (r1.mean + r2.mean) / 2, Math.max(r1.max, r2.max))));
90+
}
91+
92+
private static Map<String, MeasurementAggregator> computePartialAggregations(FileChunkRange chunk) {
93+
try (FileChannel channel = FileChannel.open(Paths.get(FILE), StandardOpenOption.READ)) {
94+
MappedByteBuffer buffer = channel.map(MapMode.READ_ONLY, chunk.start(), chunk.end() - chunk.start());
95+
return process(buffer);
96+
} catch (IOException e) {
97+
throw new RuntimeException(e);
98+
}
99+
}
100+
101+
private static List<FileChunkRange> getChunks(File file, int chunksCount, int minChunkSize) {
102+
long fileSize = file.length();
103+
long chunkSize = fileSize / chunksCount;
104+
105+
if (chunkSize < minChunkSize || chunksCount == 1) {
106+
return List.of(new FileChunkRange(0, fileSize));
107+
}
108+
109+
int currentChunk = 1;
110+
long currentChunkStart = 0;
111+
long currentChunkEnd = chunkSize;
112+
var list = new ArrayList<FileChunkRange>(chunksCount);
113+
try (RandomAccessFile raf = new RandomAccessFile(file, "r")) {
114+
while (currentChunk <= chunksCount) {
115+
currentChunkEnd = findNextEOLFrom(raf, currentChunkEnd);
116+
list.add(new FileChunkRange(currentChunkStart, currentChunkEnd));
117+
// next
118+
currentChunkStart = currentChunkEnd + 1;
119+
currentChunkEnd = currentChunkStart + chunkSize;
120+
if (currentChunkEnd >= fileSize) {
121+
list.add(new FileChunkRange(currentChunkStart, fileSize));
122+
break;
123+
}
124+
currentChunk++;
125+
}
126+
} catch (IOException e) {
127+
throw new RuntimeException(e);
128+
}
129+
return list;
130+
}
131+
132+
private static long findNextEOLFrom(RandomAccessFile raf, long currentChunkEnd) throws IOException {
133+
raf.seek(currentChunkEnd);
134+
while (currentChunkEnd < raf.length() && raf.read() != '\n') {
135+
currentChunkEnd++;
136+
}
137+
return currentChunkEnd;
138+
}
139+
140+
private static Map<String, MeasurementAggregator> process(MappedByteBuffer buffer) {
141+
var map = new HashMap<String, MeasurementAggregator>();
142+
byte[] lineBytes = new byte[107];
143+
while (buffer.hasRemaining()) {
144+
int i = 0;
145+
lineBytes[i] = buffer.get();
146+
int separatorIdx = -1;
147+
while (lineBytes[i] != '\n' && buffer.hasRemaining()) {
148+
lineBytes[++i] = buffer.get();
149+
if (lineBytes[i] == ';') {
150+
separatorIdx = i;
151+
}
152+
}
153+
Measurement measurement = parseLine(lineBytes, separatorIdx, i);
154+
map.merge(measurement.station,
155+
new MeasurementAggregator(measurement.value, measurement.value, measurement.value, 1),
156+
(agg, m) -> {
157+
agg.min = Math.min(agg.min, m.min);
158+
agg.max = Math.max(agg.max, m.max);
159+
agg.sum += m.sum;
160+
agg.count++;
161+
return agg;
162+
});
163+
}
164+
return map;
165+
}
166+
167+
private static Measurement parseLine(byte[] lineBytes, int separatorIdx, int eolIdx) {
168+
return new Measurement(
169+
new String(lineBytes, 0, separatorIdx, StandardCharsets.UTF_8),
170+
bytesToDouble(lineBytes, separatorIdx + 1, eolIdx)
171+
);
172+
}
173+
174+
private static double bytesToDouble(byte[] bytes, int startIdx, int endIdx) {
175+
double d = 0d;
176+
boolean negative = bytes[startIdx] == '-';
177+
int numberStartIdx = negative ? 1 + startIdx : startIdx;
178+
boolean afterDot = false;
179+
int dots = 1;
180+
for (int i = numberStartIdx; i < endIdx; i++) {
181+
if (bytes[i] == '.') {
182+
afterDot = true;
183+
continue;
184+
}
185+
double n = bytes[i] - '0';
186+
if (afterDot) {
187+
d = d + n / Math.pow(10, dots++);
188+
} else {
189+
d = d * Math.pow(10, i - numberStartIdx) + n;
190+
}
191+
}
192+
return negative ? -d : d;
193+
}
194+
}

0 commit comments

Comments
 (0)