Skip to content

Commit 0e2afcc

Browse files
committed
zstd: Inline blockDec.checkCRC
checkCRC is now a uint32 with a boolean indicating whether it needs checking. This makes the comparison shorter and saves some allocations. Benchmark results on older i7: name old speed new speed delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-8 458MB/s ± 0% 458MB/s ± 0% ~ (p=0.139 n=9+8) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-8 568MB/s ± 2% 566MB/s ± 4% ~ (p=0.971 n=10+10) Decoder_DecoderSmall/geo.protodata.zst/buffered-8 1.19GB/s ± 0% 1.20GB/s ± 1% ~ (p=0.051 n=9+10) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-8 832MB/s ± 2% 839MB/s ± 2% ~ (p=0.156 n=10+9) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-8 352MB/s ± 0% 352MB/s ± 0% ~ (p=0.148 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-8 534MB/s ± 7% 542MB/s ± 3% ~ (p=0.315 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-8 422MB/s ± 0% 422MB/s ± 0% ~ (p=0.408 n=8+10) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-8 606MB/s ± 9% 618MB/s ± 7% ~ (p=0.393 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-8 362MB/s ± 5% 367MB/s ± 0% +1.44% (p=0.026 n=9+10) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-8 449MB/s ± 3% 463MB/s ± 4% +3.28% (p=0.003 n=10+9) Decoder_DecoderSmall/alice29.txt.zst/buffered-8 361MB/s ± 0% 361MB/s ± 0% ~ (p=0.888 n=8+9) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-8 569MB/s ± 8% 577MB/s ± 2% ~ (p=0.541 n=9+8) Decoder_DecoderSmall/html_x_4.zst/buffered-8 2.46GB/s ± 1% 2.46GB/s ± 0% ~ (p=0.720 n=10+9) Decoder_DecoderSmall/html_x_4.zst/unbuffered-8 1.67GB/s ± 6% 1.65GB/s ± 4% ~ (p=0.315 n=10+9) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-8 3.87GB/s ± 0% 3.87GB/s ± 0% ~ (p=0.497 n=10+9) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-8 1.84GB/s ± 7% 1.83GB/s ± 2% ~ (p=0.912 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-8 8.63GB/s ± 1% 8.61GB/s ± 0% ~ (p=0.063 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-8 3.34GB/s ± 3% 3.32GB/s ± 5% ~ (p=0.579 n=10+10) Decoder_DecoderSmall/urls.10K.zst/buffered-8 586MB/s ± 1% 584MB/s ± 0% -0.46% (p=0.005 n=9+7) Decoder_DecoderSmall/urls.10K.zst/unbuffered-8 877MB/s ± 2% 879MB/s ± 2% ~ (p=0.796 n=10+10) Decoder_DecoderSmall/html.zst/buffered-8 961MB/s ± 0% 963MB/s ± 0% +0.19% (p=0.004 n=9+9) Decoder_DecoderSmall/html.zst/unbuffered-8 709MB/s ± 1% 715MB/s ± 3% ~ (p=0.203 n=8+10) Decoder_DecoderSmall/comp-data.bin.zst/buffered-8 407MB/s ± 0% 405MB/s ± 0% -0.47% (p=0.000 n=10+8) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-8 152MB/s ± 3% 151MB/s ± 4% ~ (p=0.853 n=10+10) name old alloc/op new alloc/op delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-8 54.0B ± 0% 54.0B ± 0% ~ (all equal) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-8 4.57kB ± 3% 4.57kB ± 3% ~ (p=0.985 n=10+10) Decoder_DecoderSmall/geo.protodata.zst/buffered-8 49.0B ± 0% 49.0B ± 0% ~ (all equal) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-8 4.47kB ± 0% 4.47kB ± 1% ~ (p=0.481 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-8 68.6B ± 2% 68.0B ± 0% ~ (p=0.173 n=10+9) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-8 4.64kB ± 5% 4.62kB ± 4% ~ (p=1.000 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-8 63.0B ± 0% 63.0B ± 0% ~ (all equal) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-8 4.65kB ± 3% 4.56kB ± 3% -1.94% (p=0.014 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-8 53.0B ± 0% 53.0B ± 0% ~ (all equal) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-8 4.49kB ± 1% 4.45kB ± 2% -0.93% (p=0.006 n=9+9) Decoder_DecoderSmall/alice29.txt.zst/buffered-8 77.9B ± 1% 77.5B ± 1% ~ (p=0.176 n=9+8) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-8 4.52kB ± 2% 4.48kB ± 1% -0.94% (p=0.003 n=10+9) Decoder_DecoderSmall/html_x_4.zst/buffered-8 53.0B ± 6% 56.0B ± 0% ~ (p=0.059 n=10+8) Decoder_DecoderSmall/html_x_4.zst/unbuffered-8 4.46kB ± 0% 4.48kB ± 2% ~ (p=1.000 n=8+10) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-8 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-8 4.46kB ± 0% 4.43kB ± 0% -0.72% (p=0.000 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-8 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-8 4.07kB ± 0% 4.04kB ± 0% -0.79% (p=0.000 n=9+10) Decoder_DecoderSmall/urls.10K.zst/buffered-8 66.0B ± 0% 66.0B ± 0% ~ (all equal) Decoder_DecoderSmall/urls.10K.zst/unbuffered-8 4.60kB ± 3% 4.53kB ± 5% ~ (p=0.052 n=10+10) Decoder_DecoderSmall/html.zst/buffered-8 49.0B ± 0% 49.0B ± 0% ~ (all equal) Decoder_DecoderSmall/html.zst/unbuffered-8 4.47kB ± 0% 4.46kB ± 1% ~ (p=0.165 n=8+10) Decoder_DecoderSmall/comp-data.bin.zst/buffered-8 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-8 4.44kB ± 0% 4.44kB ± 0% ~ (p=0.171 n=10+10) name old allocs/op new allocs/op delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/geo.protodata.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/alice29.txt.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/html_x_4.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/html_x_4.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-8 32.0 ± 0% 24.0 ± 0% -25.00% (p=0.000 n=10+10) Decoder_DecoderSmall/urls.10K.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/urls.10K.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/html.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/html.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/comp-data.bin.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-8 32.0 ± 0% 32.0 ± 0% ~ (all equal) On a newer i5: name old speed new speed delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-16 730MB/s ± 2% 729MB/s ± 2% ~ (p=0.971 n=10+10) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-16 1.16GB/s ± 5% 1.17GB/s ± 5% ~ (p=0.720 n=10+9) Decoder_DecoderSmall/geo.protodata.zst/buffered-16 2.45GB/s ± 2% 2.45GB/s ± 4% ~ (p=0.853 n=10+10) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-16 2.47GB/s ± 1% 2.47GB/s ± 1% ~ (p=0.631 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-16 520MB/s ± 1% 528MB/s ± 2% +1.58% (p=0.015 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-16 1.15GB/s ±12% 1.18GB/s ± 5% ~ (p=0.393 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-16 615MB/s ± 2% 627MB/s ± 2% +1.80% (p=0.001 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-16 1.19GB/s ± 4% 1.20GB/s ± 5% ~ (p=0.684 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-16 565MB/s ± 2% 572MB/s ± 2% ~ (p=0.075 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-16 1.10GB/s ±10% 1.11GB/s ± 9% ~ (p=0.481 n=10+10) Decoder_DecoderSmall/alice29.txt.zst/buffered-16 560MB/s ± 1% 567MB/s ± 2% +1.22% (p=0.008 n=10+9) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-16 928MB/s ± 3% 919MB/s ± 3% ~ (p=0.218 n=10+10) Decoder_DecoderSmall/html_x_4.zst/buffered-16 4.12GB/s ± 2% 4.19GB/s ± 2% +1.75% (p=0.008 n=9+10) Decoder_DecoderSmall/html_x_4.zst/unbuffered-16 4.79GB/s ± 2% 4.77GB/s ± 2% ~ (p=0.542 n=10+10) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-16 6.02GB/s ± 2% 5.96GB/s ± 2% ~ (p=0.123 n=10+10) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-16 5.78GB/s ± 1% 5.81GB/s ± 1% ~ (p=0.123 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-16 11.5GB/s ± 2% 11.3GB/s ± 2% -1.77% (p=0.005 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-16 9.83GB/s ± 1% 9.80GB/s ± 1% ~ (p=0.353 n=10+10) Decoder_DecoderSmall/urls.10K.zst/buffered-16 845MB/s ± 1% 845MB/s ± 2% ~ (p=0.796 n=10+10) Decoder_DecoderSmall/urls.10K.zst/unbuffered-16 1.46GB/s ± 2% 1.46GB/s ± 3% ~ (p=0.971 n=10+10) Decoder_DecoderSmall/html.zst/buffered-16 1.80GB/s ± 2% 1.84GB/s ± 2% +2.11% (p=0.006 n=9+8) Decoder_DecoderSmall/html.zst/unbuffered-16 2.16GB/s ± 1% 2.18GB/s ± 1% +0.97% (p=0.019 n=10+10) Decoder_DecoderSmall/comp-data.bin.zst/buffered-16 702MB/s ± 3% 708MB/s ± 1% ~ (p=0.165 n=10+10) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-16 609MB/s ± 1% 608MB/s ± 1% ~ (p=0.150 n=9+10) name old alloc/op new alloc/op delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-16 55.0B ± 0% 55.0B ± 0% ~ (all equal) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-16 4.51kB ± 1% 4.47kB ± 1% -0.93% (p=0.001 n=10+9) Decoder_DecoderSmall/geo.protodata.zst/buffered-16 49.0B ± 0% 49.0B ± 0% ~ (all equal) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-16 4.48kB ± 1% 4.44kB ± 0% -0.87% (p=0.000 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-16 74.5B ± 1% 74.2B ± 2% ~ (p=0.450 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-16 4.56kB ± 2% 4.53kB ± 2% ~ (p=0.306 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-16 67.5B ± 1% 67.0B ± 0% -0.74% (p=0.033 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-16 4.55kB ± 1% 4.50kB ± 1% -1.14% (p=0.000 n=9+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-16 54.0B ± 0% 54.0B ± 0% ~ (all equal) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-16 4.47kB ± 0% 4.44kB ± 0% -0.66% (p=0.000 n=9+10) Decoder_DecoderSmall/alice29.txt.zst/buffered-16 74.6B ± 6% 75.3B ± 7% ~ (p=0.883 n=10+10) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-16 4.48kB ± 0% 4.45kB ± 1% -0.57% (p=0.005 n=7+9) Decoder_DecoderSmall/html_x_4.zst/buffered-16 54.0B ± 0% 52.4B ± 5% -2.96% (p=0.046 n=8+10) Decoder_DecoderSmall/html_x_4.zst/unbuffered-16 4.48kB ± 0% 4.46kB ± 0% -0.43% (p=0.016 n=10+9) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-16 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-16 4.46kB ± 0% 4.43kB ± 0% -0.69% (p=0.000 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-16 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-16 4.07kB ± 0% 4.04kB ± 0% -0.80% (p=0.000 n=7+9) Decoder_DecoderSmall/urls.10K.zst/buffered-16 71.4B ± 1% 71.6B ± 1% ~ (p=0.656 n=10+10) Decoder_DecoderSmall/urls.10K.zst/unbuffered-16 4.55kB ± 2% 4.53kB ± 2% ~ (p=0.529 n=10+10) Decoder_DecoderSmall/html.zst/buffered-16 49.0B ± 0% 49.0B ± 0% ~ (all equal) Decoder_DecoderSmall/html.zst/unbuffered-16 4.48kB ± 0% 4.44kB ± 0% -0.91% (p=0.000 n=10+9) Decoder_DecoderSmall/comp-data.bin.zst/buffered-16 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-16 4.44kB ± 0% 4.44kB ± 0% ~ (p=0.897 n=10+10) allocs/op is the same as on the previous machine.
1 parent eb034a2 commit 0e2afcc

File tree

2 files changed

+20
-18
lines changed

2 files changed

+20
-18
lines changed

zstd/blockdec.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,9 @@ type blockDec struct {
8282

8383
err error
8484

85-
// Check against this crc
86-
checkCRC []byte
85+
// Check against this crc, if hasCRC is true.
86+
checkCRC uint32
87+
hasCRC bool
8788

8889
// Frame to use for singlethreaded decoding.
8990
// Should not be used by the decoder itself since parent may be another frame.

zstd/decoder.go

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
package zstd
66

77
import (
8-
"bytes"
98
"context"
109
"encoding/binary"
1110
"io"
@@ -471,18 +470,16 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
471470
}
472471
}
473472
}
474-
if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
475-
got := d.current.crc.Sum64()
476-
var tmp [4]byte
477-
binary.LittleEndian.PutUint32(tmp[:], uint32(got))
478-
if !bytes.Equal(tmp[:], next.d.checkCRC) {
473+
if next.err == nil && next.d != nil && next.d.hasCRC {
474+
got := uint32(d.current.crc.Sum64())
475+
if got != next.d.checkCRC {
479476
if debugDecoder {
480-
println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
477+
printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC)
481478
}
482479
d.current.err = ErrCRCMismatch
483480
} else {
484481
if debugDecoder {
485-
println("CRC ok", tmp[:])
482+
printf("CRC ok %08x\n", got)
486483
}
487484
}
488485
}
@@ -922,18 +919,22 @@ decodeStream:
922919
println("next block returned error:", err)
923920
}
924921
dec.err = err
925-
dec.checkCRC = nil
922+
dec.hasCRC = false
926923
if dec.Last && frame.HasCheckSum && err == nil {
927924
crc, err := frame.rawInput.readSmall(4)
928-
if err != nil {
925+
if len(crc) < 4 {
926+
if err != nil {
927+
err = io.ErrUnexpectedEOF
928+
929+
}
929930
println("CRC missing?", err)
930931
dec.err = err
931-
}
932-
var tmp [4]byte
933-
copy(tmp[:], crc)
934-
dec.checkCRC = tmp[:]
935-
if debugDecoder {
936-
println("found crc to check:", dec.checkCRC)
932+
} else {
933+
dec.checkCRC = binary.LittleEndian.Uint32(crc)
934+
dec.hasCRC = true
935+
if debugDecoder {
936+
printf("found crc to check: %08x\n", dec.checkCRC)
937+
}
937938
}
938939
}
939940
err = dec.err

0 commit comments

Comments
 (0)