Skip to content

Commit 6b398c3

Browse files
authored
Handle non-ASCII characters in field names (simdjson#24)
1 parent dd6d5b5 commit 6b398c3

File tree

4 files changed

+124
-121
lines changed

4 files changed

+124
-121
lines changed

src/main/java/org/simdjson/JsonValue.java

Lines changed: 27 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.simdjson;
22

3+
import java.util.Arrays;
34
import java.util.Iterator;
45
import java.util.Map;
56

@@ -60,7 +61,7 @@ public Iterator<JsonValue> arrayIterator() {
6061
return new ArrayIterator(tapeIdx);
6162
}
6263

63-
public Iterator<Map.Entry<CharSequence, JsonValue>> objectIterator() {
64+
public Iterator<Map.Entry<String, JsonValue>> objectIterator() {
6465
return new ObjectIterator(tapeIdx);
6566
}
6667

@@ -76,32 +77,34 @@ public boolean asBoolean() {
7677
return tape.getType(tapeIdx) == TRUE_VALUE;
7778
}
7879

79-
public CharSequence asCharSequence() {
80-
return asCharSequence(tapeIdx);
80+
public String asString() {
81+
return getString(tapeIdx);
8182
}
8283

83-
private CharSequence asCharSequence(int idx) {
84-
int stringBufferIdx = (int) tape.getValue(idx);
84+
private String getString(int tapeIdx) {
85+
int stringBufferIdx = (int) tape.getValue(tapeIdx);
8586
int len = IntegerUtils.toInt(stringBuffer, stringBufferIdx);
86-
return new StringView(stringBufferIdx + Integer.BYTES, len);
87+
return new String(stringBuffer, stringBufferIdx + Integer.BYTES, len, UTF_8);
8788
}
8889

8990
public JsonValue get(String name) {
90-
Iterator<Map.Entry<CharSequence, JsonValue>> it = objectIterator();
91-
while (it.hasNext()) {
92-
Map.Entry<CharSequence, JsonValue> entry = it.next();
93-
CharSequence key = entry.getKey();
94-
if (CharSequence.compare(key, name) == 0) {
95-
return entry.getValue();
91+
byte[] bytes = name.getBytes(UTF_8);
92+
int idx = tapeIdx + 1;
93+
int endIdx = tape.getMatchingBraceIndex(tapeIdx) - 1;
94+
while (idx < endIdx) {
95+
int stringBufferIdx = (int) tape.getValue(idx);
96+
int len = IntegerUtils.toInt(stringBuffer, stringBufferIdx);
97+
int valIdx = tape.computeNextIndex(idx);
98+
idx = tape.computeNextIndex(valIdx);
99+
int stringBufferFromIdx = stringBufferIdx + Integer.BYTES;
100+
int stringBufferToIdx = stringBufferFromIdx + len;
101+
if (Arrays.compare(bytes, 0, bytes.length, stringBuffer, stringBufferFromIdx, stringBufferToIdx) == 0) {
102+
return new JsonValue(tape, valIdx, stringBuffer, buffer);
96103
}
97104
}
98105
return null;
99106
}
100107

101-
public String asString() {
102-
return asCharSequence().toString();
103-
}
104-
105108
public int getSize() {
106109
return tape.getScopeCount(tapeIdx);
107110
}
@@ -119,7 +122,7 @@ public String toString() {
119122
return String.valueOf(asBoolean());
120123
}
121124
case STRING -> {
122-
return asCharSequence().toString();
125+
return asString();
123126
}
124127
case NULL_VALUE -> {
125128
return "null";
@@ -160,7 +163,7 @@ public JsonValue next() {
160163
}
161164
}
162165

163-
private class ObjectIterator implements Iterator<Map.Entry<CharSequence, JsonValue>> {
166+
private class ObjectIterator implements Iterator<Map.Entry<String, JsonValue>> {
164167

165168
private final int endIdx;
166169

@@ -177,27 +180,27 @@ public boolean hasNext() {
177180
}
178181

179182
@Override
180-
public Map.Entry<CharSequence, JsonValue> next() {
181-
CharSequence key = asCharSequence(idx);
183+
public Map.Entry<String, JsonValue> next() {
184+
String key = getString(idx);
182185
idx = tape.computeNextIndex(idx);
183186
JsonValue value = new JsonValue(tape, idx, stringBuffer, buffer);
184187
idx = tape.computeNextIndex(idx);
185188
return new ObjectField(key, value);
186189
}
187190
}
188191

189-
private static class ObjectField implements Map.Entry<CharSequence, JsonValue> {
192+
private static class ObjectField implements Map.Entry<String, JsonValue> {
190193

191-
private final CharSequence key;
194+
private final String key;
192195
private final JsonValue value;
193196

194-
ObjectField(CharSequence key, JsonValue value) {
197+
ObjectField(String key, JsonValue value) {
195198
this.key = key;
196199
this.value = value;
197200
}
198201

199202
@Override
200-
public CharSequence getKey() {
203+
public String getKey() {
201204
return key;
202205
}
203206

@@ -211,35 +214,4 @@ public JsonValue setValue(JsonValue value) {
211214
throw new UnsupportedOperationException("Object fields are immutable");
212215
}
213216
}
214-
215-
private class StringView implements CharSequence {
216-
217-
private final int startIdx;
218-
private final int len;
219-
220-
StringView(int startIdx, int len) {
221-
this.startIdx = startIdx;
222-
this.len = len;
223-
}
224-
225-
@Override
226-
public int length() {
227-
return len;
228-
}
229-
230-
@Override
231-
public char charAt(int index) {
232-
return (char) stringBuffer[startIdx + index];
233-
}
234-
235-
@Override
236-
public CharSequence subSequence(int start, int end) {
237-
return new StringView(startIdx + start, startIdx + end);
238-
}
239-
240-
@Override
241-
public String toString() {
242-
return new String(stringBuffer, startIdx, len, UTF_8);
243-
}
244-
}
245217
}

src/test/java/org/simdjson/JsonValueAssert.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import org.assertj.core.api.AbstractAssert;
44
import org.assertj.core.api.Assertions;
55

6-
import static java.nio.charset.StandardCharsets.UTF_8;
7-
86
class JsonValueAssert extends AbstractAssert<JsonValueAssert, JsonValue> {
97

108
JsonValueAssert(JsonValue actual) {
@@ -36,12 +34,6 @@ JsonValueAssert isEqualTo(String expected) {
3634
.withFailMessage("Expecting value to be string but was " + getActualType())
3735
.isTrue();
3836
Assertions.assertThat(actual.asString()).isEqualTo(expected);
39-
CharSequence cs = actual.asCharSequence();
40-
byte[] bytesExpected = expected.getBytes(UTF_8);
41-
Assertions.assertThat(cs.length()).isEqualTo(bytesExpected.length);
42-
for (int i = 0; i < cs.length(); i++) {
43-
Assertions.assertThat((byte) cs.charAt(i)).isEqualTo(bytesExpected[i]);
44-
}
4537
return this;
4638
}
4739

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package org.simdjson;
2+
3+
import org.junit.jupiter.api.Test;
4+
5+
import java.util.Iterator;
6+
import java.util.Map;
7+
8+
import static org.assertj.core.api.Assertions.assertThat;
9+
import static org.simdjson.JsonValueAssert.assertThat;
10+
import static org.simdjson.StringUtils.toUtf8;
11+
12+
public class ObjectParsingTest {
13+
14+
@Test
15+
public void emptyObject() {
16+
// given
17+
SimdJsonParser parser = new SimdJsonParser();
18+
byte[] json = toUtf8("{}");
19+
20+
// when
21+
JsonValue jsonValue = parser.parse(json, json.length);
22+
23+
// then
24+
assertThat(jsonValue.isObject()).isTrue();
25+
Iterator<JsonValue> it = jsonValue.arrayIterator();
26+
assertThat(it.hasNext()).isFalse();
27+
}
28+
29+
@Test
30+
public void objectIterator() {
31+
// given
32+
SimdJsonParser parser = new SimdJsonParser();
33+
byte[] json = toUtf8("{\"a\": 1, \"b\": 2, \"c\": 3}");
34+
35+
// when
36+
JsonValue jsonValue = parser.parse(json, json.length);
37+
38+
// then
39+
assertThat(jsonValue.isObject()).isTrue();
40+
String[] expectedKeys = new String[]{"a", "b", "c"};
41+
int[] expectedValue = new int[]{1, 2, 3};
42+
int counter = 0;
43+
Iterator<Map.Entry<String, JsonValue>> it = jsonValue.objectIterator();
44+
while (it.hasNext()) {
45+
Map.Entry<String, JsonValue> field = it.next();
46+
assertThat(field.getKey()).isEqualTo(expectedKeys[counter]);
47+
assertThat(field.getValue()).isEqualTo(expectedValue[counter]);
48+
counter++;
49+
}
50+
assertThat(counter).isEqualTo(expectedKeys.length);
51+
}
52+
53+
@Test
54+
public void objectSize() {
55+
// given
56+
SimdJsonParser parser = new SimdJsonParser();
57+
byte[] json = toUtf8("{\"1\": 1, \"2\": 1, \"3\": 1}");
58+
59+
// when
60+
JsonValue jsonValue = parser.parse(json, json.length);
61+
62+
// then
63+
assertThat(jsonValue.isObject()).isTrue();
64+
assertThat(jsonValue.getSize()).isEqualTo(3);
65+
}
66+
67+
@Test
68+
public void fieldNamesWithNonAsciiCharacters() {
69+
// given
70+
SimdJsonParser parser = new SimdJsonParser();
71+
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}");
72+
73+
// when
74+
JsonValue jsonValue = parser.parse(json, json.length);
75+
76+
// then
77+
assertThat(jsonValue.get("ąćśńźż")).isEqualTo(1);
78+
assertThat(jsonValue.get("\u20A9\u0E3F")).isEqualTo(2);
79+
assertThat(jsonValue.get("αβγ")).isEqualTo(3);
80+
assertThat(jsonValue.get("😀abc😀")).isEqualTo(4);
81+
}
82+
83+
@Test
84+
public void nonexistentField() {
85+
// given
86+
SimdJsonParser parser = new SimdJsonParser();
87+
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3}");
88+
89+
// when
90+
JsonValue jsonValue = parser.parse(json, json.length);
91+
92+
// then
93+
assertThat(jsonValue.get("acsnz")).isNull();
94+
assertThat(jsonValue.get("\\u20A9\\u0E3F")).isNull();
95+
assertThat(jsonValue.get("αβ")).isNull();
96+
}
97+
}

src/test/java/org/simdjson/SimdJsonParserTest.java

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import org.junit.jupiter.params.provider.ValueSource;
66

77
import java.util.Iterator;
8-
import java.util.Map;
98

109
import static org.assertj.core.api.Assertions.assertThat;
1110
import static org.assertj.core.api.Assertions.fail;
@@ -33,24 +32,6 @@ public void testEmptyArray() {
3332
}
3433
}
3534

36-
@Test
37-
public void testEmptyObject() {
38-
// given
39-
SimdJsonParser parser = new SimdJsonParser();
40-
byte[] json = toUtf8("{}");
41-
42-
// when
43-
JsonValue jsonValue = parser.parse(json, json.length);
44-
45-
// then
46-
assertThat(jsonValue.isObject()).isTrue();
47-
Iterator<JsonValue> it = jsonValue.arrayIterator();
48-
while (it.hasNext()) {
49-
fail("Unexpected field");
50-
it.next();
51-
}
52-
}
53-
5435
@Test
5536
public void testArrayIterator() {
5637
// given
@@ -74,31 +55,6 @@ public void testArrayIterator() {
7455
assertThat(counter).isEqualTo(expectedValues.length);
7556
}
7657

77-
@Test
78-
public void testObjectIterator() {
79-
// given
80-
SimdJsonParser parser = new SimdJsonParser();
81-
byte[] json = toUtf8("{\"a\": 1, \"b\": 2, \"c\": 3}");
82-
83-
// when
84-
JsonValue jsonValue = parser.parse(json, json.length);
85-
86-
// then
87-
assertThat(jsonValue.isObject()).isTrue();
88-
String[] expectedKeys = new String[]{"a", "b", "c"};
89-
int[] expectedValue = new int[]{1, 2, 3};
90-
int counter = 0;
91-
Iterator<Map.Entry<CharSequence, JsonValue>> it = jsonValue.objectIterator();
92-
while (it.hasNext()) {
93-
Map.Entry<CharSequence, JsonValue> field = it.next();
94-
CharSequence key = field.getKey();
95-
assertThat(key).usingComparator(CharSequence::compare).isEqualTo(expectedKeys[counter]);
96-
assertThat(field.getValue()).isEqualTo(expectedValue[counter]);
97-
counter++;
98-
}
99-
assertThat(counter).isEqualTo(expectedKeys.length);
100-
}
101-
10258
@Test
10359
public void testBooleanValues() {
10460
// given
@@ -313,20 +269,6 @@ public void testArraySize() {
313269
assertThat(jsonValue.getSize()).isEqualTo(3);
314270
}
315271

316-
@Test
317-
public void testObjectSize() {
318-
// given
319-
SimdJsonParser parser = new SimdJsonParser();
320-
byte[] json = toUtf8("{\"1\":1,\"2\":1,\"3\":1}");
321-
322-
// when
323-
JsonValue jsonValue = parser.parse(json, json.length);
324-
325-
// then
326-
assertThat(jsonValue.isObject()).isTrue();
327-
assertThat(jsonValue.getSize()).isEqualTo(3);
328-
}
329-
330272
@Test
331273
public void testLargeArraySize() {
332274
// given

0 commit comments

Comments
 (0)