Skip to content

Commit

Permalink
zstd: Minor decoder improvements (#467)
Browse files Browse the repository at this point in the history
Around 1%. Parallel results are outliers.
```
benchmark                                                    old ns/op     new ns/op     delta
BenchmarkDecoder_DecoderSmall/kppkn.gtb.zst-32               4569931       4482917       -1.90%
BenchmarkDecoder_DecoderSmall/geo.protodata.zst-32           1041234       1035972       -0.51%
BenchmarkDecoder_DecoderSmall/plrabn12.txt.zst-32            14863278      14590060      -1.84%
BenchmarkDecoder_DecoderSmall/lcet10.txt.zst-32              10867406      10873836      +0.06%
BenchmarkDecoder_DecoderSmall/asyoulik.txt.zst-32            3639723       3623579       -0.44%
BenchmarkDecoder_DecoderSmall/alice29.txt.zst-32             4795746       4712242       -1.74%
BenchmarkDecoder_DecoderSmall/html_x_4.zst-32                1998022       2011177       +0.66%
BenchmarkDecoder_DecoderSmall/paper-100k.pdf.zst-32          195653        192640        -1.54%
BenchmarkDecoder_DecoderSmall/fireworks.jpeg.zst-32          79823         79958         +0.17%
BenchmarkDecoder_DecoderSmall/urls.10K.zst-32                12293289      12420972      +1.04%
BenchmarkDecoder_DecoderSmall/html.zst-32                    1115542       1120073       +0.41%
BenchmarkDecoder_DecoderSmall/comp-data.bin.zst-32           87392         86033         -1.56%
BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-32                  565993        561070        -0.87%
BenchmarkDecoder_DecodeAll/geo.protodata.zst-32              129586        128363        -0.94%
BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-32               1793372       1780863       -0.70%
BenchmarkDecoder_DecodeAll/lcet10.txt.zst-32                 1332514       1317041       -1.16%
BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-32               453067        450499        -0.57%
BenchmarkDecoder_DecodeAll/alice29.txt.zst-32                589049        587888        -0.20%
BenchmarkDecoder_DecodeAll/html_x_4.zst-32                   246588        245277        -0.53%
BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-32             24020         23410         -2.54%
BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-32             9355          9349          -0.06%
BenchmarkDecoder_DecodeAll/urls.10K.zst-32                   1505506       1488845       -1.11%
BenchmarkDecoder_DecodeAll/html.zst-32                       139603        137762        -1.32%
BenchmarkDecoder_DecodeAll/comp-data.bin.zst-32              10905         10724         -1.66%
BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32          29132         28871         -0.90%
BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32      6839          6824          -0.22%
BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32       94506         93872         -0.67%
BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32         70281         69755         -0.75%
BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32       24104         23808         -1.23%
BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32        31807         31375         -1.36%
BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32           13238         13140         -0.74%
BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32     1347          1327          -1.48%
BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32     2011          1709          -15.02%
BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32           76267         76475         +0.27%
BenchmarkDecoder_DecodeAllParallel/html.zst-32               7523          7567          +0.58%
BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32      936           784           -16.21%
```
  • Loading branch information
klauspost authored Jan 10, 2022
1 parent 35a5ed5 commit b41026b
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 7 deletions.
15 changes: 11 additions & 4 deletions zstd/bitreader.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,23 @@ func (b *bitReader) getBits(n uint8) int {
if n == 0 /*|| b.bitsRead >= 64 */ {
return 0
}
return b.getBitsFast(n)
return int(b.get32BitsFast(n))
}

// getBitsFast requires that at least one bit is requested every time.
// get32BitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReader) getBitsFast(n uint8) int {
func (b *bitReader) get32BitsFast(n uint8) uint32 {
const regMask = 64 - 1
v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return int(v)
return v
}

func (b *bitReader) get16BitsFast(n uint8) uint16 {
const regMask = 64 - 1
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return v
}

// fillFast() will make sure at least 32 bits are available.
Expand Down
2 changes: 1 addition & 1 deletion zstd/fse_decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ func (s decSymbol) final() (int, uint8) {
// This can only be used if no symbols are 0 bits.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
lowBits := uint16(br.getBitsFast(s.state.nbBits()))
lowBits := br.get16BitsFast(s.state.nbBits())
s.state = s.dt[s.state.newState()+lowBits]
return s.state.baseline(), s.state.addBits()
}
4 changes: 2 additions & 2 deletions zstd/seqdec.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
mlState = mlTable[mlState.newState()&maxTableMask]
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.getBitsFast(nBits)
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]

Expand Down Expand Up @@ -326,7 +326,7 @@ func (s *sequenceDecs) updateAlt(br *bitReader) {
s.offsets.state.state = s.offsets.state.dt[c.newState()]
return
}
bits := br.getBitsFast(nBits)
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]

Expand Down

0 comments on commit b41026b

Please sign in to comment.