Skip to content

Commit bf8e070

Browse files
authored
Optimize single block encodes (#199)
* Optimize single block encodes * Blocks without history cannot have invalid repeat codes. * Remove bounds check. * Don't rank empty part. * Big speedup on huff0 encoding.
1 parent 5c5a6c1 commit bf8e070

File tree

7 files changed

+709
-54
lines changed

7 files changed

+709
-54
lines changed

huff0/bitwriter.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,25 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
3838
b.nBits += bits
3939
}
4040

41-
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
41+
// encSymbol will add up to 16 bits. value may not contain more set bits than indicated.
4242
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
4343
func (b *bitWriter) encSymbol(ct cTable, symbol byte) {
4444
enc := ct[symbol]
4545
b.bitContainer |= uint64(enc.val) << (b.nBits & 63)
4646
b.nBits += enc.nBits
4747
}
4848

49+
// encTwoSymbols will add up to 32 bits. value may not contain more set bits than indicated.
50+
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
51+
func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
52+
encA := ct[av]
53+
encB := ct[bv]
54+
sh := b.nBits & 63
55+
combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63))
56+
b.bitContainer |= combined << sh
57+
b.nBits += encA.nBits + encB.nBits
58+
}
59+
4960
// addBits16ZeroNC will add up to 16 bits.
5061
// It will not check if there is space for them,
5162
// so the caller must ensure that it has flushed recently.

huff0/compress.go

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -163,28 +163,23 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
163163
for i := len(src) & 3; i > 0; i-- {
164164
bw.encSymbol(cTable, src[n+i-1])
165165
}
166+
n -= 4
166167
if s.actualTableLog <= 8 {
167-
n -= 4
168168
for ; n >= 0; n -= 4 {
169169
tmp := src[n : n+4]
170170
// tmp should be len 4
171171
bw.flush32()
172-
bw.encSymbol(cTable, tmp[3])
173-
bw.encSymbol(cTable, tmp[2])
174-
bw.encSymbol(cTable, tmp[1])
175-
bw.encSymbol(cTable, tmp[0])
172+
bw.encTwoSymbols(cTable, tmp[3], tmp[2])
173+
bw.encTwoSymbols(cTable, tmp[1], tmp[0])
176174
}
177175
} else {
178-
n -= 4
179176
for ; n >= 0; n -= 4 {
180177
tmp := src[n : n+4]
181178
// tmp should be len 4
182179
bw.flush32()
183-
bw.encSymbol(cTable, tmp[3])
184-
bw.encSymbol(cTable, tmp[2])
180+
bw.encTwoSymbols(cTable, tmp[3], tmp[2])
185181
bw.flush32()
186-
bw.encSymbol(cTable, tmp[1])
187-
bw.encSymbol(cTable, tmp[0])
182+
bw.encTwoSymbols(cTable, tmp[1], tmp[0])
188183
}
189184
}
190185
err := bw.close()
@@ -439,7 +434,7 @@ func (s *Scratch) buildCTable() error {
439434
return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax)
440435
}
441436
var nbPerRank [tableLogMax + 1]uint16
442-
var valPerRank [tableLogMax + 1]uint16
437+
var valPerRank [16]uint16
443438
for _, v := range huffNode[:nonNullRank+1] {
444439
nbPerRank[v.nbBits]++
445440
}
@@ -455,16 +450,17 @@ func (s *Scratch) buildCTable() error {
455450
}
456451

457452
// push nbBits per symbol, symbol order
458-
// TODO: changed `s.symbolLen` -> `nonNullRank+1` (micro-opt)
459453
for _, v := range huffNode[:nonNullRank+1] {
460454
s.cTable[v.symbol].nBits = v.nbBits
461455
}
462456

463457
// assign value within rank, symbol order
464-
for n, val := range s.cTable[:s.symbolLen] {
465-
v := valPerRank[val.nBits]
466-
s.cTable[n].val = v
467-
valPerRank[val.nBits] = v + 1
458+
t := s.cTable[:s.symbolLen]
459+
for n, val := range t {
460+
nbits := val.nBits & 15
461+
v := valPerRank[nbits]
462+
t[n].val = v
463+
valPerRank[nbits] = v + 1
468464
}
469465

470466
return nil
@@ -488,10 +484,12 @@ func (s *Scratch) huffSort() {
488484
r := highBit32(v+1) & 31
489485
rank[r].base++
490486
}
491-
for n := 30; n > 0; n-- {
487+
// maxBitLength is log2(BlockSizeMax) + 1
488+
const maxBitLength = 18 + 1
489+
for n := maxBitLength; n > 0; n-- {
492490
rank[n-1].base += rank[n].base
493491
}
494-
for n := range rank[:] {
492+
for n := range rank[:maxBitLength] {
495493
rank[n].current = rank[n].base
496494
}
497495
for n, c := range s.count[:s.symbolLen] {
@@ -510,7 +508,7 @@ func (s *Scratch) huffSort() {
510508
}
511509

512510
func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
513-
maxNbBits := s.TableLog
511+
maxNbBits := s.actualTableLog
514512
huffNode := s.nodes[1 : huffNodesLen+1]
515513
//huffNode = huffNode[: huffNodesLen]
516514

zstd/blockenc.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,20 @@ func (b *blockEnc) encodeRaw(a []byte) {
299299
}
300300
}
301301

302+
// encodeRaw can be used to set the output to a raw representation of supplied bytes.
303+
func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
304+
var bh blockHeader
305+
bh.setLast(b.last)
306+
bh.setSize(uint32(len(src)))
307+
bh.setType(blockTypeRaw)
308+
dst = bh.appendTo(dst)
309+
dst = append(dst, src...)
310+
if debug {
311+
println("Adding RAW block, length", len(src))
312+
}
313+
return dst
314+
}
315+
302316
// encodeLits can be used if the block is only litLen.
303317
func (b *blockEnc) encodeLits(raw bool) error {
304318
var bh blockHeader
@@ -437,7 +451,7 @@ func fuzzFseEncoder(data []byte) int {
437451
return 1
438452
}
439453

440-
// encode will encode the block and put the output in b.output.
454+
// encode will encode the block and append the output in b.output.
441455
func (b *blockEnc) encode(raw bool) error {
442456
if len(b.sequences) == 0 {
443457
return b.encodeLits(raw)
@@ -451,6 +465,8 @@ func (b *blockEnc) encode(raw bool) error {
451465
var lh literalsHeader
452466
bh.setLast(b.last)
453467
bh.setType(blockTypeCompressed)
468+
// Store offset of the block header. Needed when we know the size.
469+
bhOffset := len(b.output)
454470
b.output = bh.appendTo(b.output)
455471

456472
var (
@@ -468,6 +484,7 @@ func (b *blockEnc) encode(raw bool) error {
468484
} else {
469485
err = huff0.ErrIncompressible
470486
}
487+
471488
switch err {
472489
case huff0.ErrIncompressible:
473490
lh.setType(literalsBlockRaw)
@@ -735,18 +752,18 @@ func (b *blockEnc) encode(raw bool) error {
735752
}
736753
b.output = wr.out
737754

738-
if len(b.output)-3 >= b.size {
755+
if len(b.output)-3-bhOffset >= b.size {
739756
// Maybe even add a bigger margin.
740757
b.litEnc.Reuse = huff0.ReusePolicyNone
741758
return errIncompressible
742759
}
743760

744761
// Size is output minus block header.
745-
bh.setSize(uint32(len(b.output)) - 3)
762+
bh.setSize(uint32(len(b.output)-bhOffset) - 3)
746763
if debug {
747764
println("Rewriting block header", bh)
748765
}
749-
_ = bh.appendTo(b.output[:0])
766+
_ = bh.appendTo(b.output[bhOffset:bhOffset])
750767
b.coders.setPrev(llEnc, mlEnc, ofEnc)
751768
return nil
752769
}

0 commit comments

Comments
 (0)