Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simd int and string parsing on aarch64 #65

Merged
merged 36 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5c2d9cb
experiment with int parsing speedups
samuelcolvin Jan 20, 2024
a605ea0
fix tess
samuelcolvin Jan 20, 2024
486097e
tweak big int parsing
samuelcolvin Jan 20, 2024
d5679fa
simd int parsing on aarch64
samuelcolvin Jan 21, 2024
b09d73a
linting
samuelcolvin Jan 21, 2024
bf5cefa
test on macos-latest-xlarge
samuelcolvin Jan 22, 2024
e02f4fd
tweaks
samuelcolvin Jan 22, 2024
f4c00aa
fix ci
samuelcolvin Jan 22, 2024
ebbbc3d
separate simd_aarch64
samuelcolvin Jan 23, 2024
72b421a
simd string parsing for aarch64
samuelcolvin Jan 23, 2024
7e6d5b3
linting
samuelcolvin Jan 24, 2024
2272c41
fix ci
samuelcolvin Jan 26, 2024
58180c0
inlining
samuelcolvin Jan 26, 2024
79de565
simplify somewhat
samuelcolvin Jan 26, 2024
320bb78
more tests
samuelcolvin Jan 26, 2024
cc9a6b3
simplify logic after end of string
samuelcolvin Feb 4, 2024
8379b9d
tweaks
samuelcolvin Feb 4, 2024
dc09f12
bump
samuelcolvin Feb 4, 2024
fb5952c
improve non-ascii checks
samuelcolvin Feb 6, 2024
764c857
fuzz on aarch64
samuelcolvin Feb 6, 2024
a896baf
simplify short int parsing
samuelcolvin Feb 6, 2024
9f976d6
fix tests, address one comment
samuelcolvin Mar 28, 2024
c09ef8a
static ONGOING_CHUNK_SIZE
samuelcolvin Mar 28, 2024
6f386fc
fix benchmarks
samuelcolvin Mar 28, 2024
3aeb18a
fix comments
samuelcolvin Mar 28, 2024
47aba14
remove on_backslash macro
samuelcolvin Mar 28, 2024
37eb734
add cargo-careful
samuelcolvin Mar 28, 2024
5bda07f
without cargo cache
samuelcolvin Mar 28, 2024
c442f2f
fixes required by careful
samuelcolvin Mar 28, 2024
74ef054
bump rust cache
samuelcolvin Mar 28, 2024
4c6e06c
clarify ONGOING_CHUNK_MULTIPLIER
samuelcolvin Apr 1, 2024
195f97d
move comment
samuelcolvin Apr 1, 2024
0f8e3c0
NumberInt::try_from(&[u8])
samuelcolvin Apr 1, 2024
2ba4502
one more test
samuelcolvin Apr 1, 2024
328f357
update README
samuelcolvin Apr 1, 2024
fc7570a
Merge branch 'main' into int-simd
samuelcolvin Apr 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
clarify ONGOING_CHUNK_MULTIPLIER
  • Loading branch information
samuelcolvin committed Apr 1, 2024
commit 4c6e06cc385d83661721de1851170bb6acc44193
12 changes: 7 additions & 5 deletions src/number_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
json_err!(InvalidNumber, index)
}
} else {
json_err!(EofWhileParsingValue, index)

Check warning on line 88 in src/number_decoder.rs

View check run for this annotation

Codecov / codecov/patch

src/number_decoder.rs#L88

Added line #L88 was not covered by tests
}
}
}
Expand Down Expand Up @@ -210,7 +210,7 @@
let (chunk, new_index) = IntChunk::parse_big(data, index);
match chunk {
IntChunk::Ongoing(value) => {
big_value *= ONGOING_CHUNK_SIZE;
big_value *= ONGOING_CHUNK_MULTIPLIER;
big_value += value;
index = new_index;
}
Expand Down Expand Up @@ -253,9 +253,11 @@
];

#[cfg(target_arch = "aarch64")]
static ONGOING_CHUNK_SIZE: u64 = POW_10[16];
// in aarch64 we use a 128 bit registers - 16 bytes
static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16);
#[cfg(not(target_arch = "aarch64"))]
static ONGOING_CHUNK_SIZE: u64 = POW_10[17];
// decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing
static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18);

pub(crate) enum IntChunk {
Ongoing(u64),
Expand Down Expand Up @@ -286,8 +288,8 @@

#[inline(always)]
pub(crate) fn decode_int_chunk_fallback(data: &[u8], mut index: usize, mut value: u64) -> (IntChunk, usize) {
// i64::MAX = 9223372036854775807 - 18 chars is always enough
for _ in 1..18 {
// i64::MAX = 9223372036854775807 (19 chars) - so 18 chars is always valid as an i64
for _ in 0..18 {
if let Some(digit) = data.get(index) {
if INT_CHAR_MAP[*digit as usize] {
// we use wrapping add to avoid branching - we know the value cannot wrap
Expand Down
25 changes: 25 additions & 0 deletions tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1295,3 +1295,28 @@ fn jiter_next_value_owned() {
assert_eq!(s, "v");
assert!(matches!(s, Cow::Owned(_)));
}

#[test]
fn i64_max() {
let json = "9223372036854775807";
assert_eq!(i64::MAX.to_string(), json);
let v = JsonValue::parse(json.as_bytes(), false).unwrap();
match v {
JsonValue::Int(v) => assert_eq!(v, i64::MAX),
JsonValue::BigInt(v) => assert_eq!(v, i64::MAX.into()),
_ => panic!("expected int"),
}
}

#[test]
fn test_all_int_lengths() {
for int_size in 1..100 {
let json = "9".repeat(int_size);
let v = JsonValue::parse(json.as_bytes(), false).unwrap();
match v {
JsonValue::Int(v) => assert_eq!(v.to_string(), json),
JsonValue::BigInt(v) => assert_eq!(v.to_string(), json),
_ => panic!("expected int"),
}
}
}
Loading