Skip to content

Commit 3c0d308

Browse files
authored
flate: Faster load+st0re (#1104)
1 parent 6e2f5d5 commit 3c0d308

File tree

4 files changed

+18
-26
lines changed

4 files changed

+18
-26
lines changed

flate/deflate.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
package flate
77

88
import (
9-
"encoding/binary"
109
"errors"
1110
"fmt"
1211
"io"
1312
"math"
13+
14+
"github.com/klauspost/compress/internal/le"
1415
)
1516

1617
const (
@@ -362,7 +363,7 @@ func (d *compressor) writeStoredBlock(buf []byte) error {
362363
// of the supplied slice.
363364
// The caller must ensure that len(b) >= 4.
364365
func hash4(b []byte) uint32 {
365-
return hash4u(binary.LittleEndian.Uint32(b), hashBits)
366+
return hash4u(le.Load32(b, 0), hashBits)
366367
}
367368

368369
// hash4 returns the hash of u to fit in a hash table with h bits.
@@ -377,7 +378,7 @@ func bulkHash4(b []byte, dst []uint32) {
377378
if len(b) < 4 {
378379
return
379380
}
380-
hb := binary.LittleEndian.Uint32(b)
381+
hb := le.Load32(b, 0)
381382

382383
dst[0] = hash4u(hb, hashBits)
383384
end := len(b) - 4 + 1

flate/huffman_bit_writer.go

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -434,8 +434,8 @@ func (w *huffmanBitWriter) writeOutBits() {
434434
w.nbits -= 48
435435
n := w.nbytes
436436

437-
// We over-write, but faster...
438-
le.Store64(w.bytes[n:], bits)
437+
// We overwrite, but faster...
438+
le.Store64(w.bytes[:], n, bits)
439439
n += 6
440440

441441
if n >= bufferFlushSize {
@@ -851,8 +851,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
851851
bits |= c.code64() << (nbits & 63)
852852
nbits += c.len()
853853
if nbits >= 48 {
854-
le.Store64(w.bytes[nbytes:], bits)
855-
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
854+
le.Store64(w.bytes[:], nbytes, bits)
856855
bits >>= 48
857856
nbits -= 48
858857
nbytes += 6
@@ -879,8 +878,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
879878
bits |= c.code64() << (nbits & 63)
880879
nbits += c.len()
881880
if nbits >= 48 {
882-
le.Store64(w.bytes[nbytes:], bits)
883-
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
881+
le.Store64(w.bytes[:], nbytes, bits)
884882
bits >>= 48
885883
nbits -= 48
886884
nbytes += 6
@@ -902,8 +900,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
902900
bits |= uint64(extraLength) << (nbits & 63)
903901
nbits += extraLengthBits
904902
if nbits >= 48 {
905-
le.Store64(w.bytes[nbytes:], bits)
906-
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
903+
le.Store64(w.bytes[:], nbytes, bits)
907904
bits >>= 48
908905
nbits -= 48
909906
nbytes += 6
@@ -928,8 +925,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
928925
bits |= c.code64() << (nbits & 63)
929926
nbits += c.len()
930927
if nbits >= 48 {
931-
le.Store64(w.bytes[nbytes:], bits)
932-
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
928+
le.Store64(w.bytes[:], nbytes, bits)
933929
bits >>= 48
934930
nbits -= 48
935931
nbytes += 6
@@ -950,8 +946,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
950946
bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
951947
nbits += uint8(offsetComb)
952948
if nbits >= 48 {
953-
le.Store64(w.bytes[nbytes:], bits)
954-
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
949+
le.Store64(w.bytes[:], nbytes, bits)
955950
bits >>= 48
956951
nbits -= 48
957952
nbytes += 6
@@ -1104,7 +1099,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
11041099
// We must have at least 48 bits free.
11051100
if nbits >= 8 {
11061101
n := nbits >> 3
1107-
le.Store64(w.bytes[nbytes:], bits)
1102+
le.Store64(w.bytes[:], nbytes, bits)
11081103
bits >>= (n * 8) & 63
11091104
nbits -= n * 8
11101105
nbytes += n
@@ -1133,8 +1128,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
11331128
// Remaining...
11341129
for _, t := range input {
11351130
if nbits >= 48 {
1136-
le.Store64(w.bytes[nbytes:], bits)
1137-
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
1131+
le.Store64(w.bytes[:], nbytes, bits)
11381132
bits >>= 48
11391133
nbits -= 48
11401134
nbytes += 6

internal/le/unsafe_disabled.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,6 @@ func Store32(b []byte, v uint32) {
3737
}
3838

3939
// Store64 will store v at b.
40-
func Store64(b []byte, v uint64) {
41-
binary.LittleEndian.PutUint64(b, v)
40+
func Store64[I Indexer](b []byte, i I, v uint64) {
41+
binary.LittleEndian.PutUint64(b[i:], v)
4242
}

internal/le/unsafe_enabled.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,15 @@ func Load64[I Indexer](b []byte, i I) uint64 {
3838

3939
// Store16 will store v at b.
4040
func Store16(b []byte, v uint16) {
41-
//binary.LittleEndian.PutUint16(b, v)
4241
*(*uint16)(unsafe.Pointer(unsafe.SliceData(b))) = v
4342
}
4443

4544
// Store32 will store v at b.
4645
func Store32(b []byte, v uint32) {
47-
//binary.LittleEndian.PutUint32(b, v)
4846
*(*uint32)(unsafe.Pointer(unsafe.SliceData(b))) = v
4947
}
5048

51-
// Store64 will store v at b.
52-
func Store64(b []byte, v uint64) {
53-
//binary.LittleEndian.PutUint64(b, v)
54-
*(*uint64)(unsafe.Pointer(unsafe.SliceData(b))) = v
49+
// Store64 will store v at b[i:].
50+
func Store64[I Indexer](b []byte, i I, v uint64) {
51+
*(*uint64)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) = v
5552
}

0 commit comments

Comments
 (0)