-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bc88d27
commit ebcb106
Showing
33 changed files
with
57,696 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
.idea | ||
.gradle | ||
build | ||
profilers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,67 @@ | ||
# simdjson-java | ||
# simdjson-java | ||
|
||
A Java version of [simdjson](https://github.com/simdjson/simdjson) - a JSON parser using SIMD instructions, | ||
based on the paper [Parsing Gigabytes of JSON per Second](https://arxiv.org/abs/1902.08318) | ||
by Geoff Langdale and Daniel Lemire. | ||
|
||
This implementation is still missing several features available in simdsjon. For example: | ||
|
||
* Support for Unicode characters | ||
* UTF-8 validation | ||
* Full support for parsing floats | ||
* Support for 512-bit vectors | ||
|
||
## Code Sample | ||
|
||
```java | ||
byte[] json = loadTwitterJson(); | ||
|
||
SimdJsonParser parser = new SimdJsonParser(); | ||
JsonValue jsonValue = simdJsonParser.parse(json, json.length); | ||
Iterator<JsonValue> tweets = jsonValue.get("statuses").arrayIterator(); | ||
while (tweets.hasNext()) { | ||
JsonValue tweet = tweets.next(); | ||
JsonValue user = tweet.get("user"); | ||
if (user.get("default_profile").asBoolean()) { | ||
System.out.println(user.get("screen_name").asString()); | ||
} | ||
} | ||
``` | ||
|
||
## Benchmarks | ||
|
||
To run the JMH benchmarks, execute the following command: | ||
|
||
```./gradlew jmh``` | ||
|
||
## Tests | ||
|
||
To run the tests, execute the following command: | ||
|
||
```./gradlew test``` | ||
|
||
## Performance | ||
|
||
This section presents a performance comparison of different JSON parsers available as Java libraries. The benchmark used | ||
the [twitter.json](src/jmh/resources/twitter.json) dataset, and its goal was to measure the throughput (ops/s) of parsing | ||
and finding all unique users with a default profile. | ||
|
||
**Note that simdjson-java is still missing several features (mentioned in the introduction), so the following results | ||
may not reflect its real performance.** | ||
|
||
Environment: | ||
* CPU: Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz | ||
* OS: Ubuntu 23.04, kernel 6.2.0-23-generic | ||
* Java: OpenJDK 64-Bit Server VM Temurin-20.0.1+9 | ||
|
||
Library | Version | Throughput (ops/s) | ||
---------------------------------------------------|---------|-------------------- | ||
simdjson-java | - | 1450.951 | ||
simdjson-java (padded) | - | 1505.227 | ||
[jackson](https://github.com/FasterXML/jackson) | 2.15.2 | 504.562 | ||
[fastjson2](https://github.com/alibaba/fastjson) | 2.0.35 | 590.743 | ||
[jsoniter](https://github.com/json-iterator/java) | 0.9.23 | 384.664 | ||
|
||
To reproduce the benchmark results, execute the following command: | ||
|
||
```./gradlew jmh -Pjmh.includes='.*ParseAndSelectBenchmark.*'``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
118 changes: 118 additions & 0 deletions
118
src/jmh/java/com/github/piotrrzysko/simdjson/ParseAndSelectBenchmark.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
package com.github.piotrrzysko.simdjson; | ||
|
||
import com.alibaba.fastjson2.JSON; | ||
import com.alibaba.fastjson2.JSONObject; | ||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import com.jsoniter.JsonIterator; | ||
import com.jsoniter.any.Any; | ||
import org.openjdk.jmh.annotations.Benchmark; | ||
import org.openjdk.jmh.annotations.BenchmarkMode; | ||
import org.openjdk.jmh.annotations.Level; | ||
import org.openjdk.jmh.annotations.Mode; | ||
import org.openjdk.jmh.annotations.OutputTimeUnit; | ||
import org.openjdk.jmh.annotations.Scope; | ||
import org.openjdk.jmh.annotations.Setup; | ||
import org.openjdk.jmh.annotations.State; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.HashSet; | ||
import java.util.Iterator; | ||
import java.util.Set; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import static com.github.piotrrzysko.simdjson.SimdJsonPaddingUtil.padded; | ||
|
||
@State(Scope.Benchmark) | ||
@BenchmarkMode(Mode.Throughput) | ||
@OutputTimeUnit(TimeUnit.SECONDS) | ||
public class ParseAndSelectBenchmark { | ||
|
||
private final SimdJsonParser simdJsonParser = new SimdJsonParser(); | ||
private final ObjectMapper objectMapper = new ObjectMapper(); | ||
|
||
private byte[] buffer; | ||
private byte[] bufferPadded; | ||
|
||
@Setup(Level.Trial) | ||
public void setup() throws IOException { | ||
try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) { | ||
buffer = is.readAllBytes(); | ||
bufferPadded = padded(buffer); | ||
} | ||
} | ||
|
||
@Benchmark | ||
public int countUniqueUsersWithDefaultProfile_jackson() throws IOException { | ||
JsonNode jacksonJsonNode = objectMapper.readTree(buffer); | ||
Set<String> defaultUsers = new HashSet<>(); | ||
Iterator<JsonNode> tweets = jacksonJsonNode.get("statuses").elements(); | ||
while (tweets.hasNext()) { | ||
JsonNode tweet = tweets.next(); | ||
JsonNode user = tweet.get("user"); | ||
if (user.get("default_profile").asBoolean()) { | ||
defaultUsers.add(user.get("screen_name").textValue()); | ||
} | ||
} | ||
return defaultUsers.size(); | ||
} | ||
|
||
@Benchmark | ||
public int countUniqueUsersWithDefaultProfile_fastjson() { | ||
JSONObject jsonObject = (JSONObject) JSON.parse(buffer); | ||
Set<String> defaultUsers = new HashSet<>(); | ||
Iterator<Object> tweets = jsonObject.getJSONArray("statuses").iterator(); | ||
while (tweets.hasNext()) { | ||
JSONObject tweet = (JSONObject) tweets.next(); | ||
JSONObject user = (JSONObject) tweet.get("user"); | ||
if (user.getBoolean("default_profile")) { | ||
defaultUsers.add(user.getString("screen_name")); | ||
} | ||
} | ||
return defaultUsers.size(); | ||
} | ||
|
||
@Benchmark | ||
public int countUniqueUsersWithDefaultProfile_jsoniter() { | ||
Any json = JsonIterator.deserialize(buffer); | ||
Set<String> defaultUsers = new HashSet<>(); | ||
for (Any tweet : json.get("statuses")) { | ||
Any user = tweet.get("user"); | ||
if (user.get("default_profile").toBoolean()) { | ||
defaultUsers.add(user.get("screen_name").toString()); | ||
} | ||
} | ||
return defaultUsers.size(); | ||
} | ||
|
||
@Benchmark | ||
public int countUniqueUsersWithDefaultProfile_simdjson() { | ||
JsonValue simdJsonValue = simdJsonParser.parse(buffer, buffer.length); | ||
Set<String> defaultUsers = new HashSet<>(); | ||
Iterator<JsonValue> tweets = simdJsonValue.get("statuses").arrayIterator(); | ||
while (tweets.hasNext()) { | ||
JsonValue tweet = tweets.next(); | ||
JsonValue user = tweet.get("user"); | ||
if (user.get("default_profile").asBoolean()) { | ||
defaultUsers.add(user.get("screen_name").asString()); | ||
} | ||
} | ||
return defaultUsers.size(); | ||
} | ||
|
||
@Benchmark | ||
public int countUniqueUsersWithDefaultProfile_simdjsonPadded() { | ||
JsonValue simdJsonValue = simdJsonParser.parse(bufferPadded, buffer.length); | ||
Set<String> defaultUsers = new HashSet<>(); | ||
Iterator<JsonValue> tweets = simdJsonValue.get("statuses").arrayIterator(); | ||
while (tweets.hasNext()) { | ||
JsonValue tweet = tweets.next(); | ||
JsonValue user = tweet.get("user"); | ||
if (user.get("default_profile").asBoolean()) { | ||
defaultUsers.add(user.get("screen_name").asString()); | ||
} | ||
} | ||
return defaultUsers.size(); | ||
} | ||
} |
49 changes: 49 additions & 0 deletions
49
src/jmh/java/com/github/piotrrzysko/simdjson/ParseBenchmark.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package com.github.piotrrzysko.simdjson; | ||
|
||
import org.openjdk.jmh.annotations.Benchmark; | ||
import org.openjdk.jmh.annotations.BenchmarkMode; | ||
import org.openjdk.jmh.annotations.Level; | ||
import org.openjdk.jmh.annotations.Mode; | ||
import org.openjdk.jmh.annotations.OutputTimeUnit; | ||
import org.openjdk.jmh.annotations.Param; | ||
import org.openjdk.jmh.annotations.Scope; | ||
import org.openjdk.jmh.annotations.Setup; | ||
import org.openjdk.jmh.annotations.State; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import static com.github.piotrrzysko.simdjson.SimdJsonPaddingUtil.padded; | ||
|
||
@State(Scope.Benchmark) | ||
@BenchmarkMode(Mode.Throughput) | ||
@OutputTimeUnit(TimeUnit.SECONDS) | ||
public class ParseBenchmark { | ||
|
||
@Param({"/twitter.json" /*, "/gsoc-2018.json - unicode is not supported yet"*/, "/github_events.json"}) | ||
String fileName; | ||
|
||
private final SimdJsonParser simdJsonParser = new SimdJsonParser(); | ||
|
||
private byte[] buffer; | ||
private byte[] bufferPadded; | ||
|
||
@Setup(Level.Trial) | ||
public void setup() throws IOException { | ||
try (InputStream is = ParseBenchmark.class.getResourceAsStream(fileName)) { | ||
buffer = is.readAllBytes(); | ||
bufferPadded = padded(buffer); | ||
} | ||
} | ||
|
||
@Benchmark | ||
public JsonValue simdjson() { | ||
return simdJsonParser.parse(buffer, buffer.length); | ||
} | ||
|
||
@Benchmark | ||
public JsonValue simdjsonPadded() { | ||
return simdJsonParser.parse(bufferPadded, buffer.length); | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
src/jmh/java/com/github/piotrrzysko/simdjson/SimdJsonPaddingUtil.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package com.github.piotrrzysko.simdjson; | ||
|
||
class SimdJsonPaddingUtil { | ||
|
||
static byte[] padded(byte[] src) { | ||
byte[] bufferPadded = new byte[src.length + 64]; | ||
System.arraycopy(src, 0, bufferPadded, 0, src.length); | ||
return bufferPadded; | ||
} | ||
} |
Oops, something went wrong.