-
-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
43a493b
commit cda94cd
Showing
17 changed files
with
2,573 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
[package] | ||
name = "batson" | ||
description = "Binary Alternative To (J)SON. Designed to be very fast to query." | ||
readme = "../../README.md" | ||
version = {workspace = true} | ||
edition = {workspace = true} | ||
authors = {workspace = true} | ||
license = {workspace = true} | ||
keywords = {workspace = true} | ||
categories = {workspace = true} | ||
homepage = {workspace = true} | ||
repository = {workspace = true} | ||
|
||
[dependencies] | ||
bytemuck = { version = "1.17.1", features = ["aarch64_simd", "derive", "align_offset"] } | ||
jiter = { workspace = true } | ||
serde = "1.0.210" | ||
serde_json = "1.0.128" | ||
simdutf8 = { version = "0.1.4", features = ["aarch64_neon"] } | ||
smallvec = "2.0.0-alpha.7" | ||
|
||
[dev-dependencies] | ||
bencher = { workspace = true } | ||
paste = { workspace = true } | ||
codspeed-bencher-compat = { workspace = true } | ||
|
||
[[bench]] | ||
name = "main" | ||
harness = false | ||
|
||
[lints.clippy] | ||
dbg_macro = "deny" | ||
print_stdout = "deny" | ||
print_stderr = "deny" | ||
# in general we lint against the pedantic group, but we will whitelist | ||
# certain lints which we don't want to enforce (for now) | ||
pedantic = { level = "deny", priority = -1 } | ||
missing_errors_doc = "allow" | ||
cast_possible_truncation = "allow" # TODO remove | ||
cast_sign_loss = "allow" # TODO remove | ||
cast_possible_wrap = "allow" # TODO remove | ||
checked_conversions = "allow" # TODO remove |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# batson | ||
|
||
Binary Alternative To (J)SON. Designed to be very fast to query. | ||
|
||
Inspired by Postgres' [JSONB type](https://github.com/postgres/postgres/commit/d9134d0a355cfa447adc80db4505d5931084278a?diff=unified&w=0) and Snowflake's [VARIANT type](https://www.youtube.com/watch?v=jtjOfggD4YY). | ||
|
||
For a relatively small JSON document (3KB), batson is 14 to 126x faster than Jiter, and 106 to 588x faster than Serde. | ||
|
||
``` | ||
test medium_get_str_found_batson ... bench: 51 ns/iter (+/- 1) | ||
test medium_get_str_found_jiter ... bench: 755 ns/iter (+/- 66) | ||
test medium_get_str_found_serde ... bench: 5,420 ns/iter (+/- 93) | ||
test medium_get_str_missing_batson ... bench: 9 ns/iter (+/- 0) | ||
test medium_get_str_missing_jiter ... bench: 1,135 ns/iter (+/- 46) | ||
test medium_get_str_missing_serde ... bench: 5,292 ns/iter (+/- 324) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
use codspeed_bencher_compat::{benchmark_group, benchmark_main, Bencher}; | ||
use std::hint::black_box; | ||
|
||
use std::fs::File; | ||
use std::io::Read; | ||
|
||
use batson::get::{get_str, BatsonPath}; | ||
use batson::{batson_to_json_string, encode_from_json}; | ||
use jiter::JsonValue; | ||
|
||
fn read_file(path: &str) -> String { | ||
let mut file = File::open(path).unwrap(); | ||
let mut contents = String::new(); | ||
file.read_to_string(&mut contents).unwrap(); | ||
contents | ||
} | ||
|
||
/// taken from <https://github.com/datafusion-contrib/datafusion-functions-json/blob/v0.41.0/src/common.rs#L184-L216> | ||
mod jiter_find { | ||
use jiter::{Jiter, Peek}; | ||
|
||
#[derive(Debug)] | ||
pub enum JsonPath<'s> { | ||
Key(&'s str), | ||
Index(usize), | ||
None, | ||
} | ||
|
||
impl From<u64> for JsonPath<'_> { | ||
fn from(index: u64) -> Self { | ||
JsonPath::Index(usize::try_from(index).unwrap()) | ||
} | ||
} | ||
|
||
impl From<i32> for JsonPath<'_> { | ||
fn from(index: i32) -> Self { | ||
match usize::try_from(index) { | ||
Ok(i) => Self::Index(i), | ||
Err(_) => Self::None, | ||
} | ||
} | ||
} | ||
|
||
impl<'s> From<&'s str> for JsonPath<'s> { | ||
fn from(key: &'s str) -> Self { | ||
JsonPath::Key(key) | ||
} | ||
} | ||
|
||
pub fn jiter_json_find<'j>(opt_json: Option<&'j str>, path: &[JsonPath]) -> Option<(Jiter<'j>, Peek)> { | ||
let json_str = opt_json?; | ||
let mut jiter = Jiter::new(json_str.as_bytes()); | ||
let mut peek = jiter.peek().ok()?; | ||
for element in path { | ||
match element { | ||
JsonPath::Key(key) if peek == Peek::Object => { | ||
let mut next_key = jiter.known_object().ok()??; | ||
|
||
while next_key != *key { | ||
jiter.next_skip().ok()?; | ||
next_key = jiter.next_key().ok()??; | ||
} | ||
|
||
peek = jiter.peek().ok()?; | ||
} | ||
JsonPath::Index(index) if peek == Peek::Array => { | ||
let mut array_item = jiter.known_array().ok()??; | ||
|
||
for _ in 0..*index { | ||
jiter.known_skip(array_item).ok()?; | ||
array_item = jiter.array_step().ok()??; | ||
} | ||
|
||
peek = array_item; | ||
} | ||
_ => { | ||
return None; | ||
} | ||
} | ||
} | ||
Some((jiter, peek)) | ||
} | ||
|
||
pub fn get_str(json_data: Option<&str>, path: &[JsonPath]) -> Option<String> { | ||
if let Some((mut jiter, peek)) = jiter_json_find(json_data, path) { | ||
match peek { | ||
Peek::String => Some(jiter.known_str().ok()?.to_owned()), | ||
_ => None, | ||
} | ||
} else { | ||
None | ||
} | ||
} | ||
} | ||
|
||
mod serde_find { | ||
use batson::get::BatsonPath; | ||
use serde_json::Value; | ||
|
||
pub fn get_str(json_data: &[u8], path: &[BatsonPath]) -> Option<String> { | ||
let json_value: Value = serde_json::from_slice(json_data).ok()?; | ||
let mut current = &json_value; | ||
for key in path { | ||
current = match (key, current) { | ||
(BatsonPath::Key(k), Value::Object(map)) => map.get(*k)?, | ||
(BatsonPath::Index(i), Value::Array(vec)) => vec.get(*i)?, | ||
_ => return None, | ||
} | ||
} | ||
match current { | ||
Value::String(s) => Some(s.clone()), | ||
_ => None, | ||
} | ||
} | ||
} | ||
|
||
fn json_to_batson(json: &[u8]) -> Vec<u8> { | ||
let json_value = JsonValue::parse(json, false).unwrap(); | ||
encode_from_json(&json_value).unwrap() | ||
} | ||
|
||
fn medium_get_str_found_batson(bench: &mut Bencher) { | ||
let json = read_file("../jiter/benches/medium_response.json"); | ||
let json_data = json.as_bytes(); | ||
let batson_data = json_to_batson(json_data); | ||
let path: Vec<BatsonPath> = vec!["person".into(), "linkedin".into(), "handle".into()]; | ||
bench.iter(|| { | ||
let v = get_str(black_box(&batson_data), &path); | ||
black_box(v) | ||
}); | ||
} | ||
|
||
fn medium_get_str_found_jiter(bench: &mut Bencher) { | ||
let json = read_file("../jiter/benches/medium_response.json"); | ||
let path: Vec<jiter_find::JsonPath> = vec!["person".into(), "linkedin".into(), "handle".into()]; | ||
bench.iter(|| { | ||
let v = jiter_find::get_str(black_box(Some(&json)), &path); | ||
black_box(v) | ||
}); | ||
} | ||
|
||
fn medium_get_str_found_serde(bench: &mut Bencher) { | ||
let json = read_file("../jiter/benches/medium_response.json"); | ||
let json_data = json.as_bytes(); | ||
let path: Vec<BatsonPath> = vec!["person".into(), "linkedin".into(), "handle".into()]; | ||
bench.iter(|| { | ||
let v = serde_find::get_str(black_box(json_data), &path).unwrap(); | ||
black_box(v) | ||
}); | ||
} | ||
|
||
fn medium_get_str_missing_batson(bench: &mut Bencher) { | ||
let json = read_file("../jiter/benches/medium_response.json"); | ||
let json_data = json.as_bytes(); | ||
let batson_data = json_to_batson(json_data); | ||
let path: Vec<BatsonPath> = vec!["squid".into(), "linkedin".into(), "handle".into()]; | ||
bench.iter(|| { | ||
let v = get_str(black_box(&batson_data), &path); | ||
black_box(v) | ||
}); | ||
} | ||
|
||
fn medium_get_str_missing_jiter(bench: &mut Bencher) { | ||
let json = read_file("../jiter/benches/medium_response.json"); | ||
let path: Vec<jiter_find::JsonPath> = vec!["squid".into(), "linkedin".into(), "handle".into()]; | ||
bench.iter(|| { | ||
let v = jiter_find::get_str(black_box(Some(&json)), &path); | ||
black_box(v) | ||
}); | ||
} | ||
|
||
fn medium_get_str_missing_serde(bench: &mut Bencher) { | ||
let json = read_file("../jiter/benches/medium_response.json"); | ||
let json_data = json.as_bytes(); | ||
let path: Vec<BatsonPath> = vec!["squid".into(), "linkedin".into(), "handle".into()]; | ||
bench.iter(|| { | ||
let v = serde_find::get_str(black_box(json_data), &path); | ||
black_box(v) | ||
}); | ||
} | ||
|
||
fn medium_convert_batson_to_json(bench: &mut Bencher) { | ||
let json = read_file("../jiter/benches/medium_response.json"); | ||
let json_data = json.as_bytes(); | ||
let batson_data = json_to_batson(json_data); | ||
bench.iter(|| { | ||
let v = batson_to_json_string(black_box(&batson_data)).unwrap(); | ||
black_box(v) | ||
}); | ||
} | ||
|
||
fn medium_convert_json_to_batson(bench: &mut Bencher) { | ||
let json = read_file("../jiter/benches/medium_response.json"); | ||
let json = json.as_bytes(); | ||
bench.iter(|| { | ||
let json_value = JsonValue::parse(json, false).unwrap(); | ||
let b = encode_from_json(&json_value).unwrap(); | ||
black_box(b) | ||
}); | ||
} | ||
|
||
benchmark_group!( | ||
benches, | ||
medium_get_str_found_batson, | ||
medium_get_str_found_jiter, | ||
medium_get_str_found_serde, | ||
medium_get_str_missing_batson, | ||
medium_get_str_missing_jiter, | ||
medium_get_str_missing_serde, | ||
medium_convert_batson_to_json, | ||
medium_convert_json_to_batson | ||
); | ||
benchmark_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
use batson::get::BatsonPath; | ||
use batson::{batson_to_json_string, encode_from_json}; | ||
use jiter::JsonValue; | ||
use std::fs::File; | ||
use std::io::Read; | ||
|
||
fn main() { | ||
let filename = std::env::args().nth(1).expect( | ||
r#" | ||
No arguments provided! | ||
Usage: | ||
cargo run --example read_file file.json [path] | ||
"#, | ||
); | ||
|
||
let mut file = File::open(&filename).expect("failed to open file"); | ||
let mut json = Vec::new(); | ||
file.read_to_end(&mut json).expect("failed to read file"); | ||
|
||
let json_value = JsonValue::parse(&json, false).expect("invalid JSON"); | ||
let batson = encode_from_json(&json_value).expect("failed to construct batson data"); | ||
println!("json length: {}", json.len()); | ||
println!("batson length: {}", batson.len()); | ||
|
||
let output_json = batson_to_json_string(&batson).expect("failed to convert batson to JSON"); | ||
println!("output json length: {}", output_json.len()); | ||
|
||
if let Some(path) = std::env::args().nth(2) { | ||
let path: Vec<BatsonPath> = path.split('.').map(to_batson_path).collect(); | ||
let start = std::time::Instant::now(); | ||
let value = batson::get::get_str(&batson, &path).expect("failed to get value"); | ||
let elapsed = start.elapsed(); | ||
println!("Found value: {value:?} (time taken: {elapsed:?})"); | ||
} | ||
|
||
println!("reloading to check round-trip"); | ||
let json_value = JsonValue::parse(output_json.as_bytes(), false).expect("invalid JSON"); | ||
let batson = encode_from_json(&json_value).expect("failed to construct batson data"); | ||
let output_json2 = batson_to_json_string(&batson).expect("failed to convert batson to JSON"); | ||
println!("JSON unchanged after re-encoding: {:?}", output_json == output_json2); | ||
|
||
println!("\n\noutput json:\n{}", output_json); | ||
} | ||
|
||
fn to_batson_path(s: &str) -> BatsonPath { | ||
if s.chars().all(char::is_numeric) { | ||
let index: usize = s.parse().unwrap(); | ||
index.into() | ||
} else { | ||
s.into() | ||
} | ||
} |
Oops, something went wrong.