diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml new file mode 100644 index 000000000..27c5165db --- /dev/null +++ b/.github/workflows/maven.yml @@ -0,0 +1,49 @@ +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Build + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: 'Check out repository' + uses: actions/checkout@v2 + with: + submodules: 'true' + + - name: 'Set up Java' + uses: actions/setup-java@v2 + with: + java-version: 21 + distribution: 'temurin' + + - name: 'Cache Maven packages' + uses: actions/cache@v3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + + - name: 'Build project' + run: mvn -B clean verify diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..24b8221fa --- /dev/null +++ b/.gitignore @@ -0,0 +1,44 @@ +#Maven +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +release.properties + +# Eclipse +.project +.classpath +.settings/ +bin/ + +# IntelliJ +.idea +*.ipr +*.iml +*.iws + +# NetBeans +nb-configuration.xml + +# Visual Studio Code +.vscode +.factorypath + +# OSX +.DS_Store + +# Vim +*.swp +*.swo + +# patch +*.orig +*.rej + +# Local environment +.env + +#JReleaser +out/ + +measurements.txt diff --git a/.mvn/wrapper/maven-wrapper.jar b/.mvn/wrapper/maven-wrapper.jar new file mode 100644 index 000000000..cb28b0e37 Binary files /dev/null and b/.mvn/wrapper/maven-wrapper.jar differ diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 000000000..e347d2d24 --- /dev/null +++ b/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.4/apache-maven-3.8.4-bin.zip +wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 000000000..7a4a3ea24 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 000000000..8b4ad6968 --- /dev/null +++ b/README.md @@ -0,0 +1,132 @@ +# 1️⃣🐝🏎️ The One Billion Row Challenge + +The One Billion Row Challenge (1BRC) is a fun exploration of how far modern Java can be pushed for aggregating one billion rows from a text file. + +The text file contains temperature values for a range of weather stations. +Each row is one measurement in the format `;`. +The following shows ten rows as an example: + +``` +hamburg;11.0 +hammerfest;-12.7 +auckland;5.3 +lima;6.6 +concordia;-42.5 +hammerfest;-6.7 +hamburg;9.0 +hammerfest;-11.9 +hamburg;-0.7 +concordia;-48.5 +``` + +The task is to write a Java program which reads the file, calculates the average temperature value per weather station, and emits the result on stdout like this: + +``` +{auckland=5.3, concordia=-45.5, hamburg=6.4, hammerfest=-10.4, lima=6.6} +``` + +## Results + +| # | Result (sec) | Implementation | Submitter | +|---|--------------|--------------------|---------------| +| 1.| tbd.| [CalculateAverage.java](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| Gunnar Morling| + +See [below](#entering-the-challenge) for instructions how to enter the challenge with your own implementation. + +## Prerequisites + +[Java 21](https://openjdk.org/projects/jdk/21/) must be installed on your system. + +## Running the Challenge + +This repository contains two programs: + +* `dev.morling.onebrc.CreateMeasurements` (invoked via _create\_measurements.sh_): Creates the file _measurements.txt_ in the root directory of this project with a configurable number of random measurement values +* `dev.morling.onebrc.CalculateAverage` (invoked via _calculate\_average.sh_): Calculates the average values for the file _measurements.txt_ + +Execute the following steps to run the challenge: + +1. Build the project using Apache Maven: + + ``` + ./mvnw clean verify + ``` + +2. Create the measurements file with 1B rows (just once): + + ``` + ./create_measurements.sh 1000000000 + ``` + + This will take a few minutes. + **Attention:** the generated file has a size of approx. **12 GB**, so make sure to have enough diskspace. + +3. Calculate the average measurement values: + + ``` + ./calculate_average.sh + ``` + + The provided naive example implementation uses the Java streams API for processing the file and completes the task in ~3 min on an Apple Mac Mini M1. + It serves as the base line for comparing your own implementation. + +4. Optimize the heck out of it: + + Adjust the `CalculateAverage` program to speed it up, in any way you see fit (just sticking to a few rules described below). + Options include parallelizing the computation, using the (incubating) Vector API, memory-mapping different sections of the file concurrently, using AppCDS, GraalVM, CRaC, etc. for speeding up the application start-up, choosing and tuning the garbage collector, and much more. + +The following rules and limits apply: + +* Any Java distribution provided by [SDKMan](https://sdkman.io/jdks) as well as early access builds available on openjdk.net may be used (including EA builds for OpenJDK projects like Valhalla). +If you want to use a build not available via these channels, reach out to discuss whether it can be considered. +* No external library dependencies may be used +* Implementations must be provided as a single source file + +## Entering the Challenge + +To submit your own implementation to 1BRC, follow these steps: + +* Create a fork of the [onebrc](https://github.com/gunnarmorling/onebrc/) GitHub repository. +* Create a copy of _CalculateAverage.java_, named _CalculateAverage\_.java_, e.g. _CalculateAverage\_doloreswilson.java_. +* Make that implementation fast. Really fast. +* Create a copy of _calculate_average.sh_, named _calculate\_average\_.sh_, e.g. _calculate\_average\_doloreswilson.sh_. +* Adjust that script so that it references your implementation class name. If needed, provide any JVM arguments via the `JAVA_OPTS` variable in that script. +* (Optional) If you'd like to use native binaries (GraalVM), adjust the _pom.xml_ file so that it builds that binary. +* Create a pull request against the upstream repository, clearly stating + * The name of your implementation class. + * The JDK build to use (of not specified, the latest OpenJDK 21 upstream build will be used). +* I will run the program and determine its performance as described in the next section, and enter the result to the scoreboard. + +There is no pre-defined end date or maximum number of results for this challenge at this point, +but I may do a cut-off at some point. +To keep overhead low, please refrain from submitting entries if they wouldn't make it to the top half of the score board. + +## Evaluating Results + +Results are determined by running the program on a [Hetzner Cloud CCX33 instance](https://www.hetzner.com/cloud) (8 dedicated vCPU, 32 GB RAM). +The `time` program is used for measuring execution times, i.e. end-to-end times are measured. +Each contender will be run five times in a row. +The slowest and the fastest runs are discarded. +The mean value of the remaining three runs is the result for that contender and will be added to the results table above. + +If you'd like to spin up your own box for testing on Hetzner Cloud, you may find these [set-up scripts](https://github.com/gunnarmorling/cloud-boxes/) (based on Terraform and Ansible) useful. +Note this will incur cost you are responsible for, I am not going to pay your cloud bill :) + +## Price + +If you enter this challenge, you may learn something new, get to inspire others, and see your name listed in the scoreboard above. + +## FAQ + +_Q: Can I use Kotlin or other JVM languages other than Java?_\ +A: No, this challenge is focussed on Java only. Feel free to inofficially share implementations significantly outperforming any listed results, though. + +_Q: Can I use non-JVM languages and/or tools?_\ +A: No, this challenge is focussed on Java only. Feel free to inofficially share interesting implementations and results though. For instance it would be interesting to see how DuckDB fares with this task. + +_Q: Why_ 1️⃣🐝🏎️ _?_\ +A: It's the abbreviation of the project name: **One** **B**illion **R**ow **C**hallenge. + +## License + +This code base is available under the Apache License, version 2. diff --git a/calculate_average.sh b/calculate_average.sh new file mode 100755 index 000000000..b095e7268 --- /dev/null +++ b/calculate_average.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage diff --git a/create_measurements.sh b/create_measurements.sh new file mode 100755 index 000000000..422963557 --- /dev/null +++ b/create_measurements.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +java --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CreateMeasurements $1 diff --git a/etc/eclipse-formatter-config.xml b/etc/eclipse-formatter-config.xml new file mode 100644 index 000000000..2a3306ce0 --- /dev/null +++ b/etc/eclipse-formatter-config.xml @@ -0,0 +1,366 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/etc/license.txt b/etc/license.txt new file mode 100644 index 000000000..69b867170 --- /dev/null +++ b/etc/license.txt @@ -0,0 +1,13 @@ + Copyright 2023 The original authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mvnw b/mvnw new file mode 100755 index 000000000..8d937f4c1 --- /dev/null +++ b/mvnw @@ -0,0 +1,308 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Apache Maven Wrapper startup batch script, version 3.2.0 +# +# Required ENV vars: +# ------------------ +# JAVA_HOME - location of a JDK home dir +# +# Optional ENV vars +# ----------------- +# MAVEN_OPTS - parameters passed to the Java VM when running Maven +# e.g. to debug Maven itself, use +# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +# MAVEN_SKIP_RC - flag to disable loading of mavenrc files +# ---------------------------------------------------------------------------- + +if [ -z "$MAVEN_SKIP_RC" ] ; then + + if [ -f /usr/local/etc/mavenrc ] ; then + . /usr/local/etc/mavenrc + fi + + if [ -f /etc/mavenrc ] ; then + . /etc/mavenrc + fi + + if [ -f "$HOME/.mavenrc" ] ; then + . "$HOME/.mavenrc" + fi + +fi + +# OS specific support. $var _must_ be set to either true or false. +cygwin=false; +darwin=false; +mingw=false +case "$(uname)" in + CYGWIN*) cygwin=true ;; + MINGW*) mingw=true;; + Darwin*) darwin=true + # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home + # See https://developer.apple.com/library/mac/qa/qa1170/_index.html + if [ -z "$JAVA_HOME" ]; then + if [ -x "/usr/libexec/java_home" ]; then + JAVA_HOME="$(/usr/libexec/java_home)"; export JAVA_HOME + else + JAVA_HOME="/Library/Java/Home"; export JAVA_HOME + fi + fi + ;; +esac + +if [ -z "$JAVA_HOME" ] ; then + if [ -r /etc/gentoo-release ] ; then + JAVA_HOME=$(java-config --jre-home) + fi +fi + +# For Cygwin, ensure paths are in UNIX format before anything is touched +if $cygwin ; then + [ -n "$JAVA_HOME" ] && + JAVA_HOME=$(cygpath --unix "$JAVA_HOME") + [ -n "$CLASSPATH" ] && + CLASSPATH=$(cygpath --path --unix "$CLASSPATH") +fi + +# For Mingw, ensure paths are in UNIX format before anything is touched +if $mingw ; then + [ -n "$JAVA_HOME" ] && [ -d "$JAVA_HOME" ] && + JAVA_HOME="$(cd "$JAVA_HOME" || (echo "cannot cd into $JAVA_HOME."; exit 1); pwd)" +fi + +if [ -z "$JAVA_HOME" ]; then + javaExecutable="$(which javac)" + if [ -n "$javaExecutable" ] && ! [ "$(expr "\"$javaExecutable\"" : '\([^ ]*\)')" = "no" ]; then + # readlink(1) is not available as standard on Solaris 10. + readLink=$(which readlink) + if [ ! "$(expr "$readLink" : '\([^ ]*\)')" = "no" ]; then + if $darwin ; then + javaHome="$(dirname "\"$javaExecutable\"")" + javaExecutable="$(cd "\"$javaHome\"" && pwd -P)/javac" + else + javaExecutable="$(readlink -f "\"$javaExecutable\"")" + fi + javaHome="$(dirname "\"$javaExecutable\"")" + javaHome=$(expr "$javaHome" : '\(.*\)/bin') + JAVA_HOME="$javaHome" + export JAVA_HOME + fi + fi +fi + +if [ -z "$JAVACMD" ] ; then + if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + else + JAVACMD="$(\unset -f command 2>/dev/null; \command -v java)" + fi +fi + +if [ ! -x "$JAVACMD" ] ; then + echo "Error: JAVA_HOME is not defined correctly." >&2 + echo " We cannot execute $JAVACMD" >&2 + exit 1 +fi + +if [ -z "$JAVA_HOME" ] ; then + echo "Warning: JAVA_HOME environment variable is not set." +fi + +# traverses directory structure from process work directory to filesystem root +# first directory with .mvn subdirectory is considered project base directory +find_maven_basedir() { + if [ -z "$1" ] + then + echo "Path not specified to find_maven_basedir" + return 1 + fi + + basedir="$1" + wdir="$1" + while [ "$wdir" != '/' ] ; do + if [ -d "$wdir"/.mvn ] ; then + basedir=$wdir + break + fi + # workaround for JBEAP-8937 (on Solaris 10/Sparc) + if [ -d "${wdir}" ]; then + wdir=$(cd "$wdir/.." || exit 1; pwd) + fi + # end of workaround + done + printf '%s' "$(cd "$basedir" || exit 1; pwd)" +} + +# concatenates all lines of a file +concat_lines() { + if [ -f "$1" ]; then + # Remove \r in case we run on Windows within Git Bash + # and check out the repository with auto CRLF management + # enabled. Otherwise, we may read lines that are delimited with + # \r\n and produce $'-Xarg\r' rather than -Xarg due to word + # splitting rules. + tr -s '\r\n' ' ' < "$1" + fi +} + +log() { + if [ "$MVNW_VERBOSE" = true ]; then + printf '%s\n' "$1" + fi +} + +BASE_DIR=$(find_maven_basedir "$(dirname "$0")") +if [ -z "$BASE_DIR" ]; then + exit 1; +fi + +MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}; export MAVEN_PROJECTBASEDIR +log "$MAVEN_PROJECTBASEDIR" + +########################################################################################## +# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central +# This allows using the maven wrapper in projects that prohibit checking in binary data. +########################################################################################## +wrapperJarPath="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" +if [ -r "$wrapperJarPath" ]; then + log "Found $wrapperJarPath" +else + log "Couldn't find $wrapperJarPath, downloading it ..." + + if [ -n "$MVNW_REPOURL" ]; then + wrapperUrl="$MVNW_REPOURL/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" + else + wrapperUrl="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" + fi + while IFS="=" read -r key value; do + # Remove '\r' from value to allow usage on windows as IFS does not consider '\r' as a separator ( considers space, tab, new line ('\n'), and custom '=' ) + safeValue=$(echo "$value" | tr -d '\r') + case "$key" in (wrapperUrl) wrapperUrl="$safeValue"; break ;; + esac + done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" + log "Downloading from: $wrapperUrl" + + if $cygwin; then + wrapperJarPath=$(cygpath --path --windows "$wrapperJarPath") + fi + + if command -v wget > /dev/null; then + log "Found wget ... using wget" + [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--quiet" + if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then + wget $QUIET "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" + else + wget $QUIET --http-user="$MVNW_USERNAME" --http-password="$MVNW_PASSWORD" "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" + fi + elif command -v curl > /dev/null; then + log "Found curl ... using curl" + [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--silent" + if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then + curl $QUIET -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" + else + curl $QUIET --user "$MVNW_USERNAME:$MVNW_PASSWORD" -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" + fi + else + log "Falling back to using Java to download" + javaSource="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.java" + javaClass="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.class" + # For Cygwin, switch paths to Windows format before running javac + if $cygwin; then + javaSource=$(cygpath --path --windows "$javaSource") + javaClass=$(cygpath --path --windows "$javaClass") + fi + if [ -e "$javaSource" ]; then + if [ ! -e "$javaClass" ]; then + log " - Compiling MavenWrapperDownloader.java ..." + ("$JAVA_HOME/bin/javac" "$javaSource") + fi + if [ -e "$javaClass" ]; then + log " - Running MavenWrapperDownloader.java ..." + ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$wrapperUrl" "$wrapperJarPath") || rm -f "$wrapperJarPath" + fi + fi + fi +fi +########################################################################################## +# End of extension +########################################################################################## + +# If specified, validate the SHA-256 sum of the Maven wrapper jar file +wrapperSha256Sum="" +while IFS="=" read -r key value; do + case "$key" in (wrapperSha256Sum) wrapperSha256Sum=$value; break ;; + esac +done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" +if [ -n "$wrapperSha256Sum" ]; then + wrapperSha256Result=false + if command -v sha256sum > /dev/null; then + if echo "$wrapperSha256Sum $wrapperJarPath" | sha256sum -c > /dev/null 2>&1; then + wrapperSha256Result=true + fi + elif command -v shasum > /dev/null; then + if echo "$wrapperSha256Sum $wrapperJarPath" | shasum -a 256 -c > /dev/null 2>&1; then + wrapperSha256Result=true + fi + else + echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." + echo "Please install either command, or disable validation by removing 'wrapperSha256Sum' from your maven-wrapper.properties." + exit 1 + fi + if [ $wrapperSha256Result = false ]; then + echo "Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised." >&2 + echo "Investigate or delete $wrapperJarPath to attempt a clean download." >&2 + echo "If you updated your Maven version, you need to update the specified wrapperSha256Sum property." >&2 + exit 1 + fi +fi + +MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" + +# For Cygwin, switch paths to Windows format before running java +if $cygwin; then + [ -n "$JAVA_HOME" ] && + JAVA_HOME=$(cygpath --path --windows "$JAVA_HOME") + [ -n "$CLASSPATH" ] && + CLASSPATH=$(cygpath --path --windows "$CLASSPATH") + [ -n "$MAVEN_PROJECTBASEDIR" ] && + MAVEN_PROJECTBASEDIR=$(cygpath --path --windows "$MAVEN_PROJECTBASEDIR") +fi + +# Provide a "standardized" way to retrieve the CLI args that will +# work with both Windows and non-Windows executions. +MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $*" +export MAVEN_CMD_LINE_ARGS + +WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +# shellcheck disable=SC2086 # safe args +exec "$JAVACMD" \ + $MAVEN_OPTS \ + $MAVEN_DEBUG_OPTS \ + -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ + "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ + ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" diff --git a/mvnw.cmd b/mvnw.cmd new file mode 100644 index 000000000..f80fbad3e --- /dev/null +++ b/mvnw.cmd @@ -0,0 +1,205 @@ +@REM ---------------------------------------------------------------------------- +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM http://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM ---------------------------------------------------------------------------- + +@REM ---------------------------------------------------------------------------- +@REM Apache Maven Wrapper startup batch script, version 3.2.0 +@REM +@REM Required ENV vars: +@REM JAVA_HOME - location of a JDK home dir +@REM +@REM Optional ENV vars +@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands +@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending +@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven +@REM e.g. to debug Maven itself, use +@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files +@REM ---------------------------------------------------------------------------- + +@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' +@echo off +@REM set title of command window +title %0 +@REM enable echoing by setting MAVEN_BATCH_ECHO to 'on' +@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% + +@REM set %HOME% to equivalent of $HOME +if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") + +@REM Execute a user defined script before this one +if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre +@REM check for pre script, once with legacy .bat ending and once with .cmd ending +if exist "%USERPROFILE%\mavenrc_pre.bat" call "%USERPROFILE%\mavenrc_pre.bat" %* +if exist "%USERPROFILE%\mavenrc_pre.cmd" call "%USERPROFILE%\mavenrc_pre.cmd" %* +:skipRcPre + +@setlocal + +set ERROR_CODE=0 + +@REM To isolate internal variables from possible post scripts, we use another setlocal +@setlocal + +@REM ==== START VALIDATION ==== +if not "%JAVA_HOME%" == "" goto OkJHome + +echo. +echo Error: JAVA_HOME not found in your environment. >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +:OkJHome +if exist "%JAVA_HOME%\bin\java.exe" goto init + +echo. +echo Error: JAVA_HOME is set to an invalid directory. >&2 +echo JAVA_HOME = "%JAVA_HOME%" >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +@REM ==== END VALIDATION ==== + +:init + +@REM Find the project base dir, i.e. the directory that contains the folder ".mvn". +@REM Fallback to current working directory if not found. + +set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% +IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir + +set EXEC_DIR=%CD% +set WDIR=%EXEC_DIR% +:findBaseDir +IF EXIST "%WDIR%"\.mvn goto baseDirFound +cd .. +IF "%WDIR%"=="%CD%" goto baseDirNotFound +set WDIR=%CD% +goto findBaseDir + +:baseDirFound +set MAVEN_PROJECTBASEDIR=%WDIR% +cd "%EXEC_DIR%" +goto endDetectBaseDir + +:baseDirNotFound +set MAVEN_PROJECTBASEDIR=%EXEC_DIR% +cd "%EXEC_DIR%" + +:endDetectBaseDir + +IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig + +@setlocal EnableExtensions EnableDelayedExpansion +for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a +@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% + +:endReadAdditionalConfig + +SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" +set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" +set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +set WRAPPER_URL="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" + +FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( + IF "%%A"=="wrapperUrl" SET WRAPPER_URL=%%B +) + +@REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central +@REM This allows using the maven wrapper in projects that prohibit checking in binary data. +if exist %WRAPPER_JAR% ( + if "%MVNW_VERBOSE%" == "true" ( + echo Found %WRAPPER_JAR% + ) +) else ( + if not "%MVNW_REPOURL%" == "" ( + SET WRAPPER_URL="%MVNW_REPOURL%/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" + ) + if "%MVNW_VERBOSE%" == "true" ( + echo Couldn't find %WRAPPER_JAR%, downloading it ... + echo Downloading from: %WRAPPER_URL% + ) + + powershell -Command "&{"^ + "$webclient = new-object System.Net.WebClient;"^ + "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ + "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ + "}"^ + "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%WRAPPER_URL%', '%WRAPPER_JAR%')"^ + "}" + if "%MVNW_VERBOSE%" == "true" ( + echo Finished downloading %WRAPPER_JAR% + ) +) +@REM End of extension + +@REM If specified, validate the SHA-256 sum of the Maven wrapper jar file +SET WRAPPER_SHA_256_SUM="" +FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( + IF "%%A"=="wrapperSha256Sum" SET WRAPPER_SHA_256_SUM=%%B +) +IF NOT %WRAPPER_SHA_256_SUM%=="" ( + powershell -Command "&{"^ + "$hash = (Get-FileHash \"%WRAPPER_JAR%\" -Algorithm SHA256).Hash.ToLower();"^ + "If('%WRAPPER_SHA_256_SUM%' -ne $hash){"^ + " Write-Output 'Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised.';"^ + " Write-Output 'Investigate or delete %WRAPPER_JAR% to attempt a clean download.';"^ + " Write-Output 'If you updated your Maven version, you need to update the specified wrapperSha256Sum property.';"^ + " exit 1;"^ + "}"^ + "}" + if ERRORLEVEL 1 goto error +) + +@REM Provide a "standardized" way to retrieve the CLI args that will +@REM work with both Windows and non-Windows executions. +set MAVEN_CMD_LINE_ARGS=%* + +%MAVEN_JAVA_EXE% ^ + %JVM_CONFIG_MAVEN_PROPS% ^ + %MAVEN_OPTS% ^ + %MAVEN_DEBUG_OPTS% ^ + -classpath %WRAPPER_JAR% ^ + "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" ^ + %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* +if ERRORLEVEL 1 goto error +goto end + +:error +set ERROR_CODE=1 + +:end +@endlocal & set ERROR_CODE=%ERROR_CODE% + +if not "%MAVEN_SKIP_RC%"=="" goto skipRcPost +@REM check for post script, once with legacy .bat ending and once with .cmd ending +if exist "%USERPROFILE%\mavenrc_post.bat" call "%USERPROFILE%\mavenrc_post.bat" +if exist "%USERPROFILE%\mavenrc_post.cmd" call "%USERPROFILE%\mavenrc_post.cmd" +:skipRcPost + +@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' +if "%MAVEN_BATCH_PAUSE%"=="on" pause + +if "%MAVEN_TERMINATE_CMD%"=="on" exit %ERROR_CODE% + +cmd /C exit /B %ERROR_CODE% diff --git a/pom.xml b/pom.xml new file mode 100644 index 000000000..93d754b8a --- /dev/null +++ b/pom.xml @@ -0,0 +1,248 @@ + + + + 4.0.0 + + dev.morling.demos + average + 1.0.0-SNAPSHOT + + + 21 + true + ${java.version} + UTF-8 + UTF-8 + + + My OSS Project + My Latest OSS Project + 2021 + tbd. + + + + Apache-2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + + + org.junit + junit-bom + 5.8.1 + pom + import + + + + + + + org.junit.jupiter + junit-jupiter + test + + + org.assertj + assertj-core + 3.21.0 + test + + + + + + + + + com.mycila + license-maven-plugin + 4.1 + + + net.revelc.code.formatter + formatter-maven-plugin + 2.16.0 + + etc/eclipse-formatter-config.xml + + + + net.revelc.code + impsort-maven-plugin + 1.9.0 + + java.,javax.,org.,com. + true + true + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + true + + + + org.apache.maven.plugins + maven-clean-plugin + 3.1.0 + + + org.apache.maven.plugins + maven-deploy-plugin + 3.0.0-M1 + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.0.0-M3 + + + org.apache.maven.plugins + maven-install-plugin + 3.0.0-M1 + + + org.apache.maven.plugins + maven-jar-plugin + 3.2.0 + + + org.apache.maven.plugins + maven-resources-plugin + 3.2.0 + + + org.apache.maven.plugins + maven-site-plugin + 3.9.1 + + + org.apache.maven.plugins + maven-surefire-plugin + 3.0.0-M5 + + + + + + + + qa + + + !quick + + + + + + com.mycila + license-maven-plugin + +
etc/license.txt
+ true + true + + LICENSE.txt + **/.dontdelete + +
+ + + + check + + + +
+ + net.revelc.code.formatter + formatter-maven-plugin + + + format + + format + + process-sources + + + + + + + + + + + + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + enforce-plugin-versions + + enforce + + + + + [${java.version},) + + + true + true + true + clean,deploy,site + + + + + + +
+
+
+ + quick + + + quick + + + + true + + +
+
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage.java b/src/main/java/dev/morling/onebrc/CalculateAverage.java new file mode 100644 index 000000000..7672d7508 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage.java @@ -0,0 +1,44 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import static java.util.stream.Collectors.averagingDouble; +import static java.util.stream.Collectors.groupingBy; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Map; +import java.util.TreeMap; +import java.util.stream.Collectors; + +public class CalculateAverage { + + private static final String FILE = "./measurements.txt"; + + public static void main(String[] args) throws IOException { + Map measurements = Files.lines(Paths.get(FILE)) + .limit(10_000_000) + .map(l -> l.split(";")) + .collect(groupingBy(m -> m[0], averagingDouble(m -> Double.parseDouble(m[1])))); + + measurements = new TreeMap<>(measurements.entrySet() + .stream() + .collect(Collectors.toMap(e -> e.getKey(), e -> Math.round(e.getValue() * 10.0) / 10.0))); + + System.out.println(measurements); + } +} diff --git a/src/main/java/dev/morling/onebrc/CreateMeasurements.java b/src/main/java/dev/morling/onebrc/CreateMeasurements.java new file mode 100644 index 000000000..583c9b156 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CreateMeasurements.java @@ -0,0 +1,83 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; + +public class CreateMeasurements { + + private static final String FILE = "./measurements.txt"; + + private record WeatherStation(String id, double meanTemperature) { + double measurement() { + double m = ThreadLocalRandom.current().nextGaussian(meanTemperature, 10); + return Math.round(m * 10.0) / 10.0; + } + } + + public static void main(String[] args) throws Exception { + long start = System.currentTimeMillis(); + + if (args.length != 1) { + System.out.println("Usage: create_measurements.sh "); + System.exit(1); + } + + int size = 0; + try { + size = Integer.parseInt(args[0]); + } + catch (NumberFormatException e) { + System.out.println("Invalid value for "); + System.out.println("Usage: CreateMeasurements "); + System.exit(1); + } + + List stations = Arrays.asList( + new WeatherStation("auckland", 15.2), + new WeatherStation("concordia", -51.7), + new WeatherStation("lima", 19.2), + new WeatherStation("hamburg", 9.4), + new WeatherStation("hammerfest", 2.7), + new WeatherStation("maui", 15.2), + new WeatherStation("miami", 24.5), + new WeatherStation("nairobi", 17.8), + new WeatherStation("newdelhi", 25.3), + new WeatherStation("tokio", 15.8)); + + File measurements = new File(FILE); + try (FileOutputStream fos = new FileOutputStream(measurements); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos));) { + for (int i = 0; i < size; i++) { + if (i > 0 && i % 50_000_000 == 0) { + System.out.println("Wrote %,d measurements in %s ms".formatted(i, System.currentTimeMillis() - start)); + } + WeatherStation station = stations.get(ThreadLocalRandom.current().nextInt(stations.size())); + bw.write(station.id()); + bw.write(";" + station.measurement()); + bw.newLine(); + } + bw.flush(); + + System.out.println("Created file with %,d measurements in %s ms".formatted(size, System.currentTimeMillis() - start)); + } + } +} diff --git a/src/main/resources/.dontdelete b/src/main/resources/.dontdelete new file mode 100644 index 000000000..e69de29bb diff --git a/src/test/resources/.dontdelete b/src/test/resources/.dontdelete new file mode 100644 index 000000000..e69de29bb