Skip to content

Commit 24a2710

Browse files
authored
flate: Faster token writer, 1-2% on fastest (#489)
* flate: Faster token writer * Move masking. * Update docs
1 parent a1a9cfc commit 24a2710

File tree

3 files changed

+36
-30
lines changed

3 files changed

+36
-30
lines changed

flate/huffman_bit_writer.go

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,11 @@ const (
3636
bufferSize = bufferFlushSize + 8
3737
)
3838

39+
// Minimum length code that emits bits.
40+
const lengthExtraBitsMinCode = 8
41+
3942
// The number of extra bits needed by length code X - LENGTH_CODES_START.
40-
var lengthExtraBits = [32]int8{
43+
var lengthExtraBits = [32]uint8{
4144
/* 257 */ 0, 0, 0,
4245
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
4346
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
@@ -51,6 +54,9 @@ var lengthBase = [32]uint8{
5154
64, 80, 96, 112, 128, 160, 192, 224, 255,
5255
}
5356

57+
// Minimum offset code that emits bits.
58+
const offsetExtraBitsMinCode = 4
59+
5460
// offset code word extra bits.
5561
var offsetExtraBits = [32]int8{
5662
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
@@ -78,10 +84,10 @@ func init() {
7884

7985
for i := range offsetCombined[:] {
8086
// Don't use extended window values...
81-
if offsetBase[i] > 0x006000 {
87+
if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 {
8288
continue
8389
}
84-
offsetCombined[i] = uint32(offsetExtraBits[i])<<16 | (offsetBase[i])
90+
offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8)
8591
}
8692
}
8793

@@ -97,7 +103,7 @@ type huffmanBitWriter struct {
97103
// Data waiting to be written is bytes[0:nbytes]
98104
// and then the low nbits of bits.
99105
bits uint64
100-
nbits uint16
106+
nbits uint8
101107
nbytes uint8
102108
lastHuffMan bool
103109
literalEncoding *huffmanEncoder
@@ -215,7 +221,7 @@ func (w *huffmanBitWriter) write(b []byte) {
215221
_, w.err = w.writer.Write(b)
216222
}
217223

218-
func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
224+
func (w *huffmanBitWriter) writeBits(b int32, nb uint8) {
219225
w.bits |= uint64(b) << (w.nbits & 63)
220226
w.nbits += nb
221227
if w.nbits >= 48 {
@@ -858,12 +864,12 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
858864

859865
// Write the length
860866
length := t.length()
861-
lengthCode := lengthCode(length)
867+
lengthCode := lengthCode(length) & 31
862868
if false {
863-
w.writeCode(lengths[lengthCode&31])
869+
w.writeCode(lengths[lengthCode])
864870
} else {
865871
// inlined
866-
c := lengths[lengthCode&31]
872+
c := lengths[lengthCode]
867873
bits |= uint64(c.code) << (nbits & 63)
868874
nbits += c.len
869875
if nbits >= 48 {
@@ -883,10 +889,10 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
883889
}
884890
}
885891

886-
extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
887-
if extraLengthBits > 0 {
892+
if lengthCode >= lengthExtraBitsMinCode {
893+
extraLengthBits := lengthExtraBits[lengthCode]
888894
//w.writeBits(extraLength, extraLengthBits)
889-
extraLength := int32(length - lengthBase[lengthCode&31])
895+
extraLength := int32(length - lengthBase[lengthCode])
890896
bits |= uint64(extraLength) << (nbits & 63)
891897
nbits += extraLengthBits
892898
if nbits >= 48 {
@@ -907,10 +913,9 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
907913
}
908914
// Write the offset
909915
offset := t.offset()
910-
offsetCode := offset >> 16
911-
offset &= matchOffsetOnlyMask
916+
offsetCode := (offset >> 16) & 31
912917
if false {
913-
w.writeCode(offs[offsetCode&31])
918+
w.writeCode(offs[offsetCode])
914919
} else {
915920
// inlined
916921
c := offs[offsetCode]
@@ -932,11 +937,12 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
932937
}
933938
}
934939
}
935-
offsetComb := offsetCombined[offsetCode]
936-
if offsetComb > 1<<16 {
940+
941+
if offsetCode >= offsetExtraBitsMinCode {
942+
offsetComb := offsetCombined[offsetCode]
937943
//w.writeBits(extraOffset, extraOffsetBits)
938-
bits |= uint64(offset-(offsetComb&0xffff)) << (nbits & 63)
939-
nbits += uint16(offsetComb >> 16)
944+
bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
945+
nbits += uint8(offsetComb)
940946
if nbits >= 48 {
941947
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
942948
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits

flate/huffman_code.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ const (
1717

1818
// hcode is a huffman code with a bit code and bit length.
1919
type hcode struct {
20-
code, len uint16
20+
code uint16
21+
len uint8
2122
}
2223

2324
type huffmanEncoder struct {
@@ -56,7 +57,7 @@ type levelInfo struct {
5657
}
5758

5859
// set sets the code and length of an hcode.
59-
func (h *hcode) set(code uint16, length uint16) {
60+
func (h *hcode) set(code uint16, length uint8) {
6061
h.len = length
6162
h.code = code
6263
}
@@ -80,7 +81,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
8081
var ch uint16
8182
for ch = 0; ch < literalCount; ch++ {
8283
var bits uint16
83-
var size uint16
84+
var size uint8
8485
switch {
8586
case ch < 144:
8687
// size 8, 000110000 .. 10111111
@@ -99,7 +100,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
99100
bits = ch + 192 - 280
100101
size = 8
101102
}
102-
codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
103+
codes[ch] = hcode{code: reverseBits(bits, size), len: size}
103104
}
104105
return h
105106
}
@@ -296,7 +297,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
296297

297298
sortByLiteral(chunk)
298299
for _, node := range chunk {
299-
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
300+
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint8(n)}
300301
code++
301302
}
302303
list = list[0 : len(list)-int(bits)]

flate/token.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ import (
1313
)
1414

1515
const (
16-
// From top
17-
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
18-
// 8 bits: xlength = length - MIN_MATCH_LENGTH
19-
// 5 bits offsetcode
20-
// 16 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
16+
// bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
17+
// bits 16-22 offsetcode - 5 bits
18+
// bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits
19+
// bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits
2120
lengthShift = 22
2221
offsetMask = 1<<lengthShift - 1
2322
typeMask = 3 << 30
@@ -356,8 +355,8 @@ func (t token) offset() uint32 { return uint32(t) & offsetMask }
356355

357356
func (t token) length() uint8 { return uint8(t >> lengthShift) }
358357

359-
// The code is never more than 8 bits, but is returned as uint32 for convenience.
360-
func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
358+
// Convert length to code.
359+
func lengthCode(len uint8) uint8 { return lengthCodes[len] }
361360

362361
// Returns the offset code corresponding to a specific offset
363362
func offsetCode(off uint32) uint32 {

0 commit comments

Comments
 (0)