Skip to content

Commit

Permalink
encoding/json: speed up tokenization of literals
Browse files Browse the repository at this point in the history
Decoder.Decode and Unmarshal actually scan the input bytes twice - the
first time to check for syntax errors and the length of the value, and
the second to perform the decoding.

It's in the second scan that we actually tokenize the bytes. Since
syntax errors aren't a possibility, we can take shortcuts.

In particular, literals such as quoted strings are very common in JSON,
so we can avoid a lot of work by special casing them.

name                  old time/op    new time/op    delta
CodeDecoder-8           10.3ms ± 1%     9.1ms ± 0%  -11.89%  (p=0.002 n=6+6)
UnicodeDecoder-8         342ns ± 0%     283ns ± 0%  -17.25%  (p=0.000 n=6+5)
DecoderStream-8          239ns ± 0%     230ns ± 0%   -3.90%  (p=0.000 n=6+5)
CodeUnmarshal-8         11.0ms ± 0%     9.8ms ± 0%  -11.45%  (p=0.002 n=6+6)
CodeUnmarshalReuse-8    10.3ms ± 0%     9.0ms ± 0%  -12.72%  (p=0.004 n=5+6)
UnmarshalString-8        104ns ± 0%      92ns ± 0%  -11.35%  (p=0.002 n=6+6)
UnmarshalFloat64-8      93.2ns ± 0%    87.6ns ± 0%   -6.01%  (p=0.010 n=6+4)
UnmarshalInt64-8        74.5ns ± 0%    71.5ns ± 0%   -3.91%  (p=0.000 n=5+6)

name                  old speed      new speed      delta
CodeDecoder-8          189MB/s ± 1%   214MB/s ± 0%  +13.50%  (p=0.002 n=6+6)
UnicodeDecoder-8      40.9MB/s ± 0%  49.5MB/s ± 0%  +20.96%  (p=0.002 n=6+6)
CodeUnmarshal-8        176MB/s ± 0%   199MB/s ± 0%  +12.93%  (p=0.002 n=6+6)

Updates #28923.

Change-Id: I7a5e2aef51bd4ddf2004aad24210f6f50e01eaeb
Reviewed-on: https://go-review.googlesource.com/c/go/+/151042
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
  • Loading branch information
mvdan committed Apr 3, 2019
1 parent 4d04418 commit 8e5172f
Showing 1 changed file with 50 additions and 4 deletions.
54 changes: 50 additions & 4 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,52 @@ func (d *decodeState) scanWhile(op int) {
d.opcode = d.scan.eof()
}

// rescanLiteral is similar to scanWhile(scanContinue), but it specialises the
// common case where we're decoding a literal. The decoder scans the input
// twice, once for syntax errors and to check the length of the value, and the
// second to perform the decoding.
//
// Only in the second step do we use decodeState to tokenize literals, so we
// know there aren't any syntax errors. We can take advantage of that knowledge,
// and scan a literal's bytes much more quickly.
func (d *decodeState) rescanLiteral() {
data, i := d.data, d.off
Switch:
switch data[i-1] {
case '"': // string
for ; i < len(data); i++ {
switch data[i] {
case '\\':
i++ // escaped char
case '"':
i++ // tokenize the closing quote too
break Switch
}
}
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number
for ; i < len(data); i++ {
switch data[i] {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'.', 'e', 'E', '+', '-':
default:
break Switch
}
}
case 't': // true
i += len("rue")
case 'f': // false
i += len("alse")
case 'n': // null
i += len("ull")
}
if i < len(data) {
d.opcode = stateEndValue(&d.scan, data[i])
} else {
d.opcode = scanEnd
}
d.off = i + 1
}

// value consumes a JSON value from d.data[d.off-1:], decoding into v, and
// reads the following byte ahead. If v is invalid, the value is discarded.
// The first byte of the value has been read already.
Expand Down Expand Up @@ -392,7 +438,7 @@ func (d *decodeState) value(v reflect.Value) error {
case scanBeginLiteral:
// All bytes inside literal return scanContinue op code.
start := d.readIndex()
d.scanWhile(scanContinue)
d.rescanLiteral()

if v.IsValid() {
if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil {
Expand Down Expand Up @@ -677,7 +723,7 @@ func (d *decodeState) object(v reflect.Value) error {

// Read key.
start := d.readIndex()
d.scanWhile(scanContinue)
d.rescanLiteral()
item := d.data[start:d.readIndex()]
key, ok := unquoteBytes(item)
if !ok {
Expand Down Expand Up @@ -1083,7 +1129,7 @@ func (d *decodeState) objectInterface() map[string]interface{} {

// Read string key.
start := d.readIndex()
d.scanWhile(scanContinue)
d.rescanLiteral()
item := d.data[start:d.readIndex()]
key, ok := unquote(item)
if !ok {
Expand Down Expand Up @@ -1122,7 +1168,7 @@ func (d *decodeState) objectInterface() map[string]interface{} {
func (d *decodeState) literalInterface() interface{} {
// All bytes inside literal return scanContinue op code.
start := d.readIndex()
d.scanWhile(scanContinue)
d.rescanLiteral()

item := d.data[start:d.readIndex()]

Expand Down

0 comments on commit 8e5172f

Please sign in to comment.