From b41026b8e9a278686103bfb4e5b637e966c7d16e Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Mon, 10 Jan 2022 03:01:26 -0800 Subject: [PATCH] zstd: Minor decoder improvements (#467) Around 1%. Parallel results are outliers. ``` benchmark old ns/op new ns/op delta BenchmarkDecoder_DecoderSmall/kppkn.gtb.zst-32 4569931 4482917 -1.90% BenchmarkDecoder_DecoderSmall/geo.protodata.zst-32 1041234 1035972 -0.51% BenchmarkDecoder_DecoderSmall/plrabn12.txt.zst-32 14863278 14590060 -1.84% BenchmarkDecoder_DecoderSmall/lcet10.txt.zst-32 10867406 10873836 +0.06% BenchmarkDecoder_DecoderSmall/asyoulik.txt.zst-32 3639723 3623579 -0.44% BenchmarkDecoder_DecoderSmall/alice29.txt.zst-32 4795746 4712242 -1.74% BenchmarkDecoder_DecoderSmall/html_x_4.zst-32 1998022 2011177 +0.66% BenchmarkDecoder_DecoderSmall/paper-100k.pdf.zst-32 195653 192640 -1.54% BenchmarkDecoder_DecoderSmall/fireworks.jpeg.zst-32 79823 79958 +0.17% BenchmarkDecoder_DecoderSmall/urls.10K.zst-32 12293289 12420972 +1.04% BenchmarkDecoder_DecoderSmall/html.zst-32 1115542 1120073 +0.41% BenchmarkDecoder_DecoderSmall/comp-data.bin.zst-32 87392 86033 -1.56% BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-32 565993 561070 -0.87% BenchmarkDecoder_DecodeAll/geo.protodata.zst-32 129586 128363 -0.94% BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-32 1793372 1780863 -0.70% BenchmarkDecoder_DecodeAll/lcet10.txt.zst-32 1332514 1317041 -1.16% BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-32 453067 450499 -0.57% BenchmarkDecoder_DecodeAll/alice29.txt.zst-32 589049 587888 -0.20% BenchmarkDecoder_DecodeAll/html_x_4.zst-32 246588 245277 -0.53% BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-32 24020 23410 -2.54% BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-32 9355 9349 -0.06% BenchmarkDecoder_DecodeAll/urls.10K.zst-32 1505506 1488845 -1.11% BenchmarkDecoder_DecodeAll/html.zst-32 139603 137762 -1.32% BenchmarkDecoder_DecodeAll/comp-data.bin.zst-32 10905 10724 -1.66% BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 29132 28871 -0.90% BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 6839 6824 -0.22% BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 94506 93872 -0.67% BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 70281 69755 -0.75% BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 24104 23808 -1.23% BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 31807 31375 -1.36% BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 13238 13140 -0.74% BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1347 1327 -1.48% BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 2011 1709 -15.02% BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 76267 76475 +0.27% BenchmarkDecoder_DecodeAllParallel/html.zst-32 7523 7567 +0.58% BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 936 784 -16.21% ``` --- zstd/bitreader.go | 15 +++++++++++---- zstd/fse_decoder.go | 2 +- zstd/seqdec.go | 4 ++-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/zstd/bitreader.go b/zstd/bitreader.go index 8544585371..753d17df63 100644 --- a/zstd/bitreader.go +++ b/zstd/bitreader.go @@ -50,16 +50,23 @@ func (b *bitReader) getBits(n uint8) int { if n == 0 /*|| b.bitsRead >= 64 */ { return 0 } - return b.getBitsFast(n) + return int(b.get32BitsFast(n)) } -// getBitsFast requires that at least one bit is requested every time. +// get32BitsFast requires that at least one bit is requested every time. // There are no checks if the buffer is filled. -func (b *bitReader) getBitsFast(n uint8) int { +func (b *bitReader) get32BitsFast(n uint8) uint32 { const regMask = 64 - 1 v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) b.bitsRead += n - return int(v) + return v +} + +func (b *bitReader) get16BitsFast(n uint8) uint16 { + const regMask = 64 - 1 + v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) + b.bitsRead += n + return v } // fillFast() will make sure at least 32 bits are available. diff --git a/zstd/fse_decoder.go b/zstd/fse_decoder.go index e6d3d49b39..bb3d4fd6c3 100644 --- a/zstd/fse_decoder.go +++ b/zstd/fse_decoder.go @@ -379,7 +379,7 @@ func (s decSymbol) final() (int, uint8) { // This can only be used if no symbols are 0 bits. // At least tablelog bits must be available in the bit reader. func (s *fseState) nextFast(br *bitReader) (uint32, uint8) { - lowBits := uint16(br.getBitsFast(s.state.nbBits())) + lowBits := br.get16BitsFast(s.state.nbBits()) s.state = s.dt[s.state.newState()+lowBits] return s.state.baseline(), s.state.addBits() } diff --git a/zstd/seqdec.go b/zstd/seqdec.go index 1dd39e63b7..bc731e4cb6 100644 --- a/zstd/seqdec.go +++ b/zstd/seqdec.go @@ -278,7 +278,7 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { mlState = mlTable[mlState.newState()&maxTableMask] ofState = ofTable[ofState.newState()&maxTableMask] } else { - bits := br.getBitsFast(nBits) + bits := br.get32BitsFast(nBits) lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) llState = llTable[(llState.newState()+lowBits)&maxTableMask] @@ -326,7 +326,7 @@ func (s *sequenceDecs) updateAlt(br *bitReader) { s.offsets.state.state = s.offsets.state.dt[c.newState()] return } - bits := br.getBitsFast(nBits) + bits := br.get32BitsFast(nBits) lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31)) s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]