Skip to content

Commit

Permalink
encoding/json: don't reuse slice elements when decoding
Browse files Browse the repository at this point in the history
The previous behavior directly contradicted the docs that have been in
place for years:

	To unmarshal a JSON array into a slice, Unmarshal resets the
	slice length to zero and then appends each element to the slice.

We could use reflect.New to create a new element and reflect.Append to
then append it to the destination slice, but benchmarks have shown that
reflect.Append is very slow compared to the code that manually grows a
slice in this file.

Instead, if we're decoding into an element that came from the original
backing array, zero it before decoding into it. We're going to be using
the CodeDecoder benchmark, as it has a slice of struct pointers that's
decoded very often.

Note that we still reuse existing values from arrays being decoded into,
as the documentation agrees with the existing implementation in that
case:

	To unmarshal a JSON array into a Go array, Unmarshal decodes
	JSON array elements into corresponding Go array elements.

The numbers with the benchmark as-is might seem catastrophic, but that's
only because the benchmark is decoding into the same variable over and
over again. Since the old decoder was happy to reuse slice elements, it
would save a lot of allocations by not having to zero and re-allocate
said elements:

	name           old time/op    new time/op    delta
	CodeDecoder-8    10.4ms ± 1%    10.9ms ± 1%   +4.41%  (p=0.000 n=10+10)

	name           old speed      new speed      delta
	CodeDecoder-8   186MB/s ± 1%   178MB/s ± 1%   -4.23%  (p=0.000 n=10+10)

	name           old alloc/op   new alloc/op   delta
	CodeDecoder-8    2.19MB ± 0%    3.59MB ± 0%  +64.09%  (p=0.000 n=10+10)

	name           old allocs/op  new allocs/op  delta
	CodeDecoder-8     76.8k ± 0%     92.7k ± 0%  +20.71%  (p=0.000 n=10+10)

We can prove this by moving 'var r codeResponse' into the loop, so that
the benchmark no longer reuses the destination pointer. And sure enough,
we no longer see the slow-down caused by the extra allocations:

	name           old time/op    new time/op    delta
	CodeDecoder-8    10.9ms ± 0%    10.9ms ± 1%  -0.37%  (p=0.043 n=10+10)

	name           old speed      new speed      delta
	CodeDecoder-8   177MB/s ± 0%   178MB/s ± 1%  +0.37%  (p=0.041 n=10+10)

	name           old alloc/op   new alloc/op   delta
	CodeDecoder-8    3.59MB ± 0%    3.59MB ± 0%    ~     (p=0.780 n=10+10)

	name           old allocs/op  new allocs/op  delta
	CodeDecoder-8     92.7k ± 0%     92.7k ± 0%    ~     (all equal)

I believe that it's useful to leave the benchmarks as they are now,
because the decoder does reuse memory in some cases. For example,
existing map elements are reused. However, subtle changes like this one
need to be benchmarked carefully.

Finally, add a couple of tests involving both a slice and an array of
structs.

Fixes #21092.

Change-Id: I8b1194f25e723a31abd146fbfe9428ac10c1389d
Reviewed-on: https://go-review.googlesource.com/c/go/+/191783
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
  • Loading branch information
mvdan authored and bradfitz committed May 7, 2020
1 parent 5c7da0d commit 1f1b639
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 19 deletions.
40 changes: 22 additions & 18 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,7 @@ func (d *decodeState) unmarshal(v interface{}) error {
d.scanWhile(scanSkipSpace)
// We decode rv not rv.Elem because the Unmarshaler interface
// test must be applied at the top level of the value.
err := d.value(rv)
if err != nil {
if err := d.value(rv); err != nil {
return d.addErrorContext(err)
}
return d.savedError
Expand Down Expand Up @@ -525,6 +524,7 @@ func (d *decodeState) array(v reflect.Value) error {
return nil
}
v = pv
initialSliceCap := 0

// Check type of target.
switch v.Kind() {
Expand All @@ -541,8 +541,9 @@ func (d *decodeState) array(v reflect.Value) error {
d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)})
d.skip()
return nil
case reflect.Array, reflect.Slice:
break
case reflect.Slice:
initialSliceCap = v.Cap()
case reflect.Array:
}

i := 0
Expand All @@ -553,7 +554,6 @@ func (d *decodeState) array(v reflect.Value) error {
break
}

// Get element of array, growing if necessary.
if v.Kind() == reflect.Slice {
// Grow slice if necessary
if i >= v.Cap() {
Expand All @@ -569,19 +569,22 @@ func (d *decodeState) array(v reflect.Value) error {
v.SetLen(i + 1)
}
}

var into reflect.Value
if i < v.Len() {
// Decode into element.
if err := d.value(v.Index(i)); err != nil {
return err
}
} else {
// Ran out of fixed array: skip.
if err := d.value(reflect.Value{}); err != nil {
return err
into = v.Index(i)
if i < initialSliceCap {
// Reusing an element from the slice's original
// backing array; zero it before decoding.
into.Set(reflect.Zero(v.Type().Elem()))
}
}
i++
// Note that we decode the value even if we ran past the end of
// the fixed array. In that case, we decode into an empty value
// and do nothing with it.
if err := d.value(into); err != nil {
return err
}

// Next token must be , or ].
if d.opcode == scanSkipSpace {
Expand All @@ -597,16 +600,17 @@ func (d *decodeState) array(v reflect.Value) error {

if i < v.Len() {
if v.Kind() == reflect.Array {
// Array. Zero the rest.
z := reflect.Zero(v.Type().Elem())
// Zero the remaining elements.
zero := reflect.Zero(v.Type().Elem())
for ; i < v.Len(); i++ {
v.Index(i).Set(z)
v.Index(i).Set(zero)
}
} else {
v.SetLen(i)
}
}
if i == 0 && v.Kind() == reflect.Slice {
if v.Kind() == reflect.Slice && v.IsNil() {
// Don't allow the resulting slice to be nil.
v.Set(reflect.MakeSlice(v.Type(), 0, 0))
}
return nil
Expand Down
15 changes: 14 additions & 1 deletion decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2099,7 +2099,10 @@ func TestSkipArrayObjects(t *testing.T) {
// slices, and arrays.
// Issues 4900 and 8837, among others.
func TestPrefilled(t *testing.T) {
// Values here change, cannot reuse table across runs.
type T struct {
A, B int
}
// Values here change, cannot reuse the table across runs.
var prefillTests = []struct {
in string
ptr interface{}
Expand Down Expand Up @@ -2135,6 +2138,16 @@ func TestPrefilled(t *testing.T) {
ptr: &[...]int{1, 2},
out: &[...]int{3, 0},
},
{
in: `[{"A": 3}]`,
ptr: &[]T{{A: -1, B: -2}, {A: -3, B: -4}},
out: &[]T{{A: 3}},
},
{
in: `[{"A": 3}]`,
ptr: &[...]T{{A: -1, B: -2}, {A: -3, B: -4}},
out: &[...]T{{A: 3, B: -2}, {}},
},
}

for _, tt := range prefillTests {
Expand Down

0 comments on commit 1f1b639

Please sign in to comment.