|
| 1 | +/* |
| 2 | + * Copyright 2023 The original authors |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | +package dev.morling.onebrc; |
| 17 | + |
| 18 | +import java.io.BufferedReader; |
| 19 | +import java.io.IOException; |
| 20 | +import java.nio.file.Files; |
| 21 | +import java.nio.file.Paths; |
| 22 | +import java.util.Iterator; |
| 23 | +import java.util.Map; |
| 24 | +import java.util.TreeMap; |
| 25 | +import java.util.concurrent.ConcurrentHashMap; |
| 26 | + |
| 27 | +/** |
| 28 | + * This is the solution from GitHut Copilot Chat with the help of Antonio Goncalves (prompting and guiding, but trying not to change code directly on my own, always using Copilot). |
| 29 | + * <p> |
| 30 | + * List of prompts that has been used: |
| 31 | + * <p> |
| 32 | + * ============= |
| 33 | + * ============= |
| 34 | + * ============= |
| 35 | + * v1 - 73603 ms |
| 36 | + * You are entering The One Billion Row Challenge (1BRC) which is an exploration of how far modern Java can be pushed for aggregating one billion rows from a text file. Grab all the (virtual) threads, reach out to SIMD, optimize the GC, or pull any other trick, and create the fastest implementation for solving this task! |
| 37 | + * The text file contains temperature values for a range of weather stations. Each row is one measurement in the format <string: station name>;<double: measurement>, with the measurement value having exactly one fractional digit. The following delimited with --- shows ten rows as an example: |
| 38 | + * --- |
| 39 | + * Hamburg;12.0 |
| 40 | + * Bulawayo;8.9 |
| 41 | + * Palembang;38.8 |
| 42 | + * St. John's;15.2 |
| 43 | + * Cracow;12.6 |
| 44 | + * Bridgetown;26.9 |
| 45 | + * Istanbul;6.2 |
| 46 | + * Roseau;34.4 |
| 47 | + * Conakry;31.2 |
| 48 | + * Istanbul;23.0 |
| 49 | + * --- |
| 50 | + * You have to write a Java program which reads the file, calculates the min, mean, and max temperature value per weather station, and emits the results on stdout like the result below delimited by --- (i.e. sorted alphabetically by station name, and the result values per station in the format <min>/<mean>/<max>, rounded to one fractional digit). Notice the curly braces: |
| 51 | + * --- |
| 52 | + * {Abha=-23.0/18.0/59.2, Abidjan=-16.2/26.0/67.3, Abéché=-10.0/29.4/69.0, Accra=-10.1/26.4/66.4, Addis Ababa=-23.7/16.0/67.0, Adelaide=-27.8/17.3/58.5, ...} |
| 53 | + * --- |
| 54 | + * You must use Java 21. |
| 55 | + * Create an algorithm in any way you see fit including parallelizing the computation, using the (incubating) Vector API, memory-mapping different sections of the file concurrently, using AppCDS, GraalVM, CRaC, etc. for speeding up the application start-up, choosing and tuning the garbage collector, and much more. |
| 56 | + * No external library dependencies may be used. |
| 57 | + * ============= |
| 58 | + * ============= |
| 59 | + * ============= |
| 60 | + * (Here I had to chat with Copilot about formatting the output, there were commas missing, the curly brackets were also missed) |
| 61 | + * ============= |
| 62 | + * ============= |
| 63 | + * ============= |
| 64 | + * v2 - 71831 ms |
| 65 | + * Being written in Java 21, please use records instead of classes for Measurement. |
| 66 | + * ============= |
| 67 | + * ============= |
| 68 | + * ============= |
| 69 | + * v3 - 69333 ms |
| 70 | + * If the temperatures are small numbers, why use double? Can't you use another datatype ? |
| 71 | + * <p> |
| 72 | + * The profiler mentions that this line of code has very bad performance. Can you refactor it so it has better performance: |
| 73 | + * --- |
| 74 | + * String[] parts = line.split(";") |
| 75 | + * --- |
| 76 | + * <p> |
| 77 | + * There is a maximum of 10000 unique station names. Can you optimize the code taking this into account? |
| 78 | + * ============= |
| 79 | + * ============= |
| 80 | + * ============= |
| 81 | + * v4 - 56417 ms |
| 82 | + * Which parameters can I pass to the JVM to make it run faster ? |
| 83 | + * Which GC can I use and what is the most optimized to run CalculateAverage ? |
| 84 | + */ |
| 85 | +public class CalculateAverage_agoncal { |
| 86 | + |
| 87 | + private static final String FILE = "./measurements.txt"; |
| 88 | + |
| 89 | + record Measurement(String station, double temperature) { |
| 90 | + } |
| 91 | + |
| 92 | + static class StationStats { |
| 93 | + double min; |
| 94 | + double max; |
| 95 | + double sum; |
| 96 | + int count; |
| 97 | + |
| 98 | + public StationStats(double temperature) { |
| 99 | + this.min = temperature; |
| 100 | + this.max = temperature; |
| 101 | + this.sum = 0; |
| 102 | + this.count = 0; |
| 103 | + } |
| 104 | + |
| 105 | + synchronized void update(double temperature) { |
| 106 | + min = Math.min(min, temperature); |
| 107 | + max = Math.max(max, temperature); |
| 108 | + sum += temperature; |
| 109 | + count++; |
| 110 | + } |
| 111 | + |
| 112 | + double getAverage() { |
| 113 | + return round(sum) / count; |
| 114 | + } |
| 115 | + |
| 116 | + @Override |
| 117 | + public String toString() { |
| 118 | + return String.format("%.1f/%.1f/%.1f", round(min), round(getAverage()), round(max)); |
| 119 | + } |
| 120 | + } |
| 121 | + |
| 122 | + public static void main(String[] args) throws IOException { |
| 123 | + Map<String, StationStats> stats = new ConcurrentHashMap<>(10_000); |
| 124 | + try (BufferedReader reader = Files.newBufferedReader(Paths.get(FILE))) { |
| 125 | + reader.lines().parallel().forEach(line -> { |
| 126 | + int separatorIndex = line.indexOf(';'); |
| 127 | + String station = line.substring(0, separatorIndex); |
| 128 | + String temperature = line.substring(separatorIndex + 1); |
| 129 | + Measurement m = new Measurement(station, Double.parseDouble(temperature)); |
| 130 | + stats.computeIfAbsent(m.station, k -> new StationStats(m.temperature)).update(m.temperature); |
| 131 | + }); |
| 132 | + } |
| 133 | + |
| 134 | + TreeMap<String, StationStats> sortedStats = new TreeMap<>(stats); |
| 135 | + Iterator<Map.Entry<String, StationStats>> iterator = sortedStats.entrySet().iterator(); |
| 136 | + System.out.print("{"); |
| 137 | + while (iterator.hasNext()) { |
| 138 | + Map.Entry<String, StationStats> entry = iterator.next(); |
| 139 | + StationStats s = entry.getValue(); |
| 140 | + if (iterator.hasNext()) { |
| 141 | + System.out.printf("%s=%s, ", entry.getKey(), s.toString()); |
| 142 | + } |
| 143 | + else { |
| 144 | + System.out.printf("%s=%s", entry.getKey(), s.toString()); |
| 145 | + } |
| 146 | + } |
| 147 | + System.out.println("}"); |
| 148 | + } |
| 149 | + |
| 150 | + private static double round(double value) { |
| 151 | + return Math.round(value * 10.0) / 10.0; |
| 152 | + } |
| 153 | +} |
0 commit comments