Skip to content

Commit 836f080

Browse files
authored
GitHub Copilot Chat with the help of agoncal (#460)
* v1 - Initial prompt * Introduce Records * v1 - Initial prompt * v2 - Introduce Records * v3 - Improves code * v4 - Improves JVM parameter * GitHub Copilot Chat with the help of agoncal * Format * Pass measurements-rounding * Added prepare script
1 parent f5c9750 commit 836f080

File tree

3 files changed

+193
-0
lines changed

3 files changed

+193
-0
lines changed

calculate_average_agoncal.sh

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/sh
2+
#
3+
# Copyright 2023 The original authors
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# sdk use java 21.0.1-tem
19+
20+
JAVA_OPTS="--enable-preview -XX:+UseShenandoahGC -XX:+UseStringDeduplication -da"
21+
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_agoncal

prepare_agoncal.sh

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
#
3+
# Copyright 2023 The original authors
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
source "$HOME/.sdkman/bin/sdkman-init.sh"
19+
sdk use java 21.0.1-tem 1>&2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/*
2+
* Copyright 2023 The original authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package dev.morling.onebrc;
17+
18+
import java.io.BufferedReader;
19+
import java.io.IOException;
20+
import java.nio.file.Files;
21+
import java.nio.file.Paths;
22+
import java.util.Iterator;
23+
import java.util.Map;
24+
import java.util.TreeMap;
25+
import java.util.concurrent.ConcurrentHashMap;
26+
27+
/**
28+
* This is the solution from GitHut Copilot Chat with the help of Antonio Goncalves (prompting and guiding, but trying not to change code directly on my own, always using Copilot).
29+
* <p>
30+
* List of prompts that has been used:
31+
* <p>
32+
* =============
33+
* =============
34+
* =============
35+
* v1 - 73603 ms
36+
* You are entering The One Billion Row Challenge (1BRC) which is an exploration of how far modern Java can be pushed for aggregating one billion rows from a text file. Grab all the (virtual) threads, reach out to SIMD, optimize the GC, or pull any other trick, and create the fastest implementation for solving this task!
37+
* The text file contains temperature values for a range of weather stations. Each row is one measurement in the format <string: station name>;<double: measurement>, with the measurement value having exactly one fractional digit. The following delimited with --- shows ten rows as an example:
38+
* ---
39+
* Hamburg;12.0
40+
* Bulawayo;8.9
41+
* Palembang;38.8
42+
* St. John's;15.2
43+
* Cracow;12.6
44+
* Bridgetown;26.9
45+
* Istanbul;6.2
46+
* Roseau;34.4
47+
* Conakry;31.2
48+
* Istanbul;23.0
49+
* ---
50+
* You have to write a Java program which reads the file, calculates the min, mean, and max temperature value per weather station, and emits the results on stdout like the result below delimited by --- (i.e. sorted alphabetically by station name, and the result values per station in the format <min>/<mean>/<max>, rounded to one fractional digit). Notice the curly braces:
51+
* ---
52+
* {Abha=-23.0/18.0/59.2, Abidjan=-16.2/26.0/67.3, Abéché=-10.0/29.4/69.0, Accra=-10.1/26.4/66.4, Addis Ababa=-23.7/16.0/67.0, Adelaide=-27.8/17.3/58.5, ...}
53+
* ---
54+
* You must use Java 21.
55+
* Create an algorithm in any way you see fit including parallelizing the computation, using the (incubating) Vector API, memory-mapping different sections of the file concurrently, using AppCDS, GraalVM, CRaC, etc. for speeding up the application start-up, choosing and tuning the garbage collector, and much more.
56+
* No external library dependencies may be used.
57+
* =============
58+
* =============
59+
* =============
60+
* (Here I had to chat with Copilot about formatting the output, there were commas missing, the curly brackets were also missed)
61+
* =============
62+
* =============
63+
* =============
64+
* v2 - 71831 ms
65+
* Being written in Java 21, please use records instead of classes for Measurement.
66+
* =============
67+
* =============
68+
* =============
69+
* v3 - 69333 ms
70+
* If the temperatures are small numbers, why use double? Can't you use another datatype ?
71+
* <p>
72+
* The profiler mentions that this line of code has very bad performance. Can you refactor it so it has better performance:
73+
* ---
74+
* String[] parts = line.split(";")
75+
* ---
76+
* <p>
77+
* There is a maximum of 10000 unique station names. Can you optimize the code taking this into account?
78+
* =============
79+
* =============
80+
* =============
81+
* v4 - 56417 ms
82+
* Which parameters can I pass to the JVM to make it run faster ?
83+
* Which GC can I use and what is the most optimized to run CalculateAverage ?
84+
*/
85+
public class CalculateAverage_agoncal {
86+
87+
private static final String FILE = "./measurements.txt";
88+
89+
record Measurement(String station, double temperature) {
90+
}
91+
92+
static class StationStats {
93+
double min;
94+
double max;
95+
double sum;
96+
int count;
97+
98+
public StationStats(double temperature) {
99+
this.min = temperature;
100+
this.max = temperature;
101+
this.sum = 0;
102+
this.count = 0;
103+
}
104+
105+
synchronized void update(double temperature) {
106+
min = Math.min(min, temperature);
107+
max = Math.max(max, temperature);
108+
sum += temperature;
109+
count++;
110+
}
111+
112+
double getAverage() {
113+
return round(sum) / count;
114+
}
115+
116+
@Override
117+
public String toString() {
118+
return String.format("%.1f/%.1f/%.1f", round(min), round(getAverage()), round(max));
119+
}
120+
}
121+
122+
public static void main(String[] args) throws IOException {
123+
Map<String, StationStats> stats = new ConcurrentHashMap<>(10_000);
124+
try (BufferedReader reader = Files.newBufferedReader(Paths.get(FILE))) {
125+
reader.lines().parallel().forEach(line -> {
126+
int separatorIndex = line.indexOf(';');
127+
String station = line.substring(0, separatorIndex);
128+
String temperature = line.substring(separatorIndex + 1);
129+
Measurement m = new Measurement(station, Double.parseDouble(temperature));
130+
stats.computeIfAbsent(m.station, k -> new StationStats(m.temperature)).update(m.temperature);
131+
});
132+
}
133+
134+
TreeMap<String, StationStats> sortedStats = new TreeMap<>(stats);
135+
Iterator<Map.Entry<String, StationStats>> iterator = sortedStats.entrySet().iterator();
136+
System.out.print("{");
137+
while (iterator.hasNext()) {
138+
Map.Entry<String, StationStats> entry = iterator.next();
139+
StationStats s = entry.getValue();
140+
if (iterator.hasNext()) {
141+
System.out.printf("%s=%s, ", entry.getKey(), s.toString());
142+
}
143+
else {
144+
System.out.printf("%s=%s", entry.getKey(), s.toString());
145+
}
146+
}
147+
System.out.println("}");
148+
}
149+
150+
private static double round(double value) {
151+
return Math.round(value * 10.0) / 10.0;
152+
}
153+
}

0 commit comments

Comments
 (0)