Skip to content

Commit 5c5abff

Browse files
committed
Fix leading and trailing whitespace.
1 parent 5ee37f3 commit 5c5abff

File tree

3 files changed

+35
-28
lines changed

3 files changed

+35
-28
lines changed

ndjson_test.go

+16-12
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package simdjson
22

33
import (
4-
"bytes"
54
"encoding/binary"
65
"fmt"
76
"io/ioutil"
@@ -186,7 +185,7 @@ func TestDemoNdjson(t *testing.T) {
186185
}
187186

188187
func TestNdjsonCountWhere(t *testing.T) {
189-
ndjson := getPatchedNdjson("testdata/parking-citations-1M.json.zst")
188+
ndjson := loadFile("testdata/parking-citations-1M.json.zst")
190189

191190
pj := internalParsedJson{}
192191
pj.initialize(len(ndjson) * 3 / 2)
@@ -199,7 +198,12 @@ func TestNdjsonCountWhere(t *testing.T) {
199198
}
200199

201200
func TestNdjsonCountWhere2(t *testing.T) {
202-
ndjson := getPatchedNdjson("testdata/RC_2009-01.json.zst")
201+
ndjson := loadFile("testdata/RC_2009-01.json.zst")
202+
// Test trimming
203+
b := make([]byte, 0, len(ndjson)+4)
204+
b = append(b, '\n', '\n')
205+
b = append(b, ndjson...)
206+
b = append(b, '\n', '\n')
203207
pj, err := ParseND(ndjson, nil)
204208
if err != nil {
205209
t.Fatal(err)
@@ -210,13 +214,13 @@ func TestNdjsonCountWhere2(t *testing.T) {
210214
}
211215
}
212216

213-
func getPatchedNdjson(filename string) []byte {
217+
func loadFile(filename string) []byte {
214218
if !strings.HasSuffix(filename, ".zst") {
215219
ndjson, err := ioutil.ReadFile(filename)
216220
if err != nil {
217221
panic("Failed to load file")
218222
}
219-
return bytes.ReplaceAll(ndjson, []byte("\n"), []byte("{"))
223+
return ndjson
220224
}
221225
var f *os.File
222226
var err error
@@ -250,12 +254,12 @@ func getPatchedNdjson(filename string) []byte {
250254
if err != nil {
251255
panic("Failed to load file")
252256
}
253-
return bytes.ReplaceAll(ndjson, []byte("\n"), []byte("{"))
257+
return ndjson
254258
}
255259

256260
func BenchmarkNdjsonStage1(b *testing.B) {
257261

258-
ndjson := getPatchedNdjson("testdata/parking-citations-1M.json.zst")
262+
ndjson := loadFile("testdata/parking-citations-1M.json.zst")
259263

260264
pj := internalParsedJson{}
261265

@@ -271,7 +275,7 @@ func BenchmarkNdjsonStage1(b *testing.B) {
271275
}
272276

273277
func BenchmarkNdjsonStage2(b *testing.B) {
274-
ndjson := getPatchedNdjson("testdata/parking-citations-1M.json.zst")
278+
ndjson := loadFile("testdata/parking-citations-1M.json.zst")
275279
pj := internalParsedJson{}
276280
pj.initialize(len(ndjson) * 3 / 2)
277281

@@ -296,7 +300,7 @@ func count_raw_tape(tape []uint64) (count int) {
296300

297301
func BenchmarkNdjsonColdCountStar(b *testing.B) {
298302

299-
ndjson := getPatchedNdjson("testdata/parking-citations-1M.json.zst")
303+
ndjson := loadFile("testdata/parking-citations-1M.json.zst")
300304

301305
b.SetBytes(int64(len(ndjson)))
302306
b.ReportAllocs()
@@ -443,7 +447,7 @@ func countObjects(data ParsedJson) (count int) {
443447
}
444448

445449
func BenchmarkNdjsonColdCountStarWithWhere(b *testing.B) {
446-
ndjson := getPatchedNdjson("testdata/parking-citations-1M.json.zst")
450+
ndjson := loadFile("testdata/parking-citations-1M.json.zst")
447451
const want = 110349
448452
runtime.GC()
449453
pj := internalParsedJson{}
@@ -482,7 +486,7 @@ func BenchmarkNdjsonColdCountStarWithWhere(b *testing.B) {
482486
}
483487

484488
func BenchmarkNdjsonWarmCountStar(b *testing.B) {
485-
ndjson := getPatchedNdjson("testdata/parking-citations-1M.json.zst")
489+
ndjson := loadFile("testdata/parking-citations-1M.json.zst")
486490

487491
pj := internalParsedJson{}
488492
pj.initialize(len(ndjson) * 3 / 2)
@@ -498,7 +502,7 @@ func BenchmarkNdjsonWarmCountStar(b *testing.B) {
498502
}
499503

500504
func BenchmarkNdjsonWarmCountStarWithWhere(b *testing.B) {
501-
ndjson := getPatchedNdjson("testdata/parking-citations-1M.json.zst")
505+
ndjson := loadFile("testdata/parking-citations-1M.json.zst")
502506

503507
pj := internalParsedJson{}
504508
pj.initialize(len(ndjson) * 3 / 2)

parsed_serialize_test.go

+13-13
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,14 @@ func BenchmarkDeSerialize(b *testing.B) {
105105
}
106106

107107
func BenchmarkSerializeNDJSON(b *testing.B) {
108-
ndjson := getPatchedNdjson("testdata/parking-citations-1M.json.zst")
109-
110-
pj := internalParsedJson{}
111-
pj.initialize(len(ndjson) * 3 / 2)
112-
pj.parseMessage(ndjson)
108+
ndjson := loadFile("testdata/parking-citations-1M.json.zst")
113109

110+
pj, err := ParseND(ndjson, nil)
111+
if err != nil {
112+
b.Fatal(err)
113+
}
114114
bench := func(b *testing.B, s *Serializer) {
115-
output := s.Serialize(nil, pj.ParsedJson)
115+
output := s.Serialize(nil, *pj)
116116
if true {
117117
b.Log(len(ndjson), "(JSON) ->", len(output), "(Serialized)", 100*float64(len(output))/float64(len(ndjson)), "%")
118118
}
@@ -121,7 +121,7 @@ func BenchmarkSerializeNDJSON(b *testing.B) {
121121
b.ReportAllocs()
122122
b.ResetTimer()
123123
for i := 0; i < b.N; i++ {
124-
output = s.Serialize(output[:0], pj.ParsedJson)
124+
output = s.Serialize(output[:0], *pj)
125125
}
126126
}
127127
b.Run("default", func(b *testing.B) {
@@ -146,14 +146,14 @@ func BenchmarkSerializeNDJSON(b *testing.B) {
146146
}
147147

148148
func BenchmarkDeSerializeNDJSON(b *testing.B) {
149-
ndjson := getPatchedNdjson("testdata/parking-citations-1M.json.zst")
150-
151-
pj := internalParsedJson{}
152-
pj.initialize(len(ndjson) * 3 / 2)
153-
pj.parseMessage(ndjson)
149+
ndjson := loadFile("testdata/parking-citations-1M.json.zst")
154150

151+
pj, err := ParseND(ndjson, nil)
152+
if err != nil {
153+
b.Fatal(err)
154+
}
155155
bench := func(b *testing.B, s *Serializer) {
156-
output := s.Serialize(nil, pj.ParsedJson)
156+
output := s.Serialize(nil, *pj)
157157
if false {
158158
b.Log(len(ndjson), "(JSON) ->", len(output), "(Serialized)", 100*float64(len(output))/float64(len(ndjson)), "%")
159159
}

simdjson.go

+6-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package simdjson
22

33
import (
44
"bufio"
5+
"bytes"
56
"errors"
67
"fmt"
78
"io"
@@ -52,6 +53,7 @@ func ParseND(b []byte, reuse *ParsedJson) (*ParsedJson, error) {
5253
if reuse != nil {
5354
pj.ParsedJson = *reuse
5455
}
56+
b = bytes.TrimSpace(b)
5557
pj.initialize(len(b) * 3 / 2)
5658

5759
// FIXME(fwessels): We should not modify input.
@@ -123,11 +125,12 @@ func ParseNDStream(r io.Reader, res chan<- Stream, reuse <-chan *ParsedJson) {
123125
tmp = append(tmp, b...)
124126
}
125127
// TODO: Do the parsing in several goroutines, but keep output in order.
126-
if len(tmp) > 0 {
128+
trimmed := bytes.TrimSpace(tmp)
129+
if len(trimmed) > 0 {
127130
// We cannot reuse the result since we share it
128131
pj.ParsedJson = ParsedJson{}
129-
pj.initialize(len(tmp) * 3 / 2)
130-
parseErr := pj.parseMessageNdjson(tmp)
132+
pj.initialize(len(trimmed) * 3 / 2)
133+
parseErr := pj.parseMessageNdjson(trimmed)
131134
if parseErr != nil {
132135
res <- Stream{
133136
Value: nil,

0 commit comments

Comments
 (0)