Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
- name: Test Race
env:
CGO_ENABLED: 1
run: go test -cpu="1,4" -short -race ./...
run: go test -cpu="1,4" -short -race -v ./...

build-special:
env:
Expand Down
32 changes: 21 additions & 11 deletions zstd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ file out level insize outsize millis mb/s
silesia.tar zskp 1 211947520 73101992 643 313.87
silesia.tar zskp 2 211947520 67504318 969 208.38
silesia.tar zskp 3 211947520 64595893 2007 100.68
silesia.tar zskp 4 211947520 60995370 7691 26.28
silesia.tar zskp 4 211947520 60995370 8825 22.90

cgo zstd:
silesia.tar zstd 1 211947520 73605392 543 371.56
Expand All @@ -162,7 +162,7 @@ silesia.tar zstd 9 211947520 60212393 5063 39.92

gzip, stdlib/this package:
silesia.tar gzstd 1 211947520 80007735 1654 122.21
silesia.tar gzkp 1 211947520 80369488 1168 173.06
silesia.tar gzkp 1 211947520 80136201 1152 175.45

GOB stream of binary data. Highly compressible.
https://files.klauspost.com/compress/gob-stream.7z
Expand All @@ -171,13 +171,15 @@ file out level insize outsize millis mb/s
gob-stream zskp 1 1911399616 235022249 3088 590.30
gob-stream zskp 2 1911399616 205669791 3786 481.34
gob-stream zskp 3 1911399616 175034659 9636 189.17
gob-stream zskp 4 1911399616 167273881 29337 62.13
gob-stream zskp 4 1911399616 165609838 50369 36.19

gob-stream zstd 1 1911399616 249810424 2637 691.26
gob-stream zstd 3 1911399616 208192146 3490 522.31
gob-stream zstd 6 1911399616 193632038 6687 272.56
gob-stream zstd 9 1911399616 177620386 16175 112.70

gob-stream gzstd 1 1911399616 357382641 10251 177.82
gob-stream gzkp 1 1911399616 362156523 5695 320.08
gob-stream gzkp 1 1911399616 359753026 5438 335.20

The test data for the Large Text Compression Benchmark is the first
10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
Expand All @@ -187,11 +189,13 @@ file out level insize outsize millis mb/s
enwik9 zskp 1 1000000000 343848582 3609 264.18
enwik9 zskp 2 1000000000 317276632 5746 165.97
enwik9 zskp 3 1000000000 292243069 12162 78.41
enwik9 zskp 4 1000000000 275241169 36430 26.18
enwik9 zskp 4 1000000000 262183768 82837 11.51

enwik9 zstd 1 1000000000 358072021 3110 306.65
enwik9 zstd 3 1000000000 313734672 4784 199.35
enwik9 zstd 6 1000000000 295138875 10290 92.68
enwik9 zstd 9 1000000000 278348700 28549 33.40

enwik9 gzstd 1 1000000000 382578136 9604 99.30
enwik9 gzkp 1 1000000000 383825945 6544 145.73

Expand All @@ -202,13 +206,15 @@ file out level insize outsize millis mb/s
github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75
github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78
github-june-2days-2019.json zskp 4 6273951764 470320075 170190 35.16

github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16

github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03
github-june-2days-2019.json gzkp 1 6273951764 1125417694 21788 274.61

VM Image, Linux mint with a few installed applications:
https://files.klauspost.com/compress/rawstudio-mint14.7z
Expand All @@ -217,13 +223,15 @@ file out level insize outsize millis mb/s
rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08
rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16
rawstudio-mint14.tar zskp 4 8558382592 2965110639 857750 9.52

rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91

rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49
rawstudio-mint14.tar gzkp 1 8558382592 3962605659 45113 180.92

CSV data:
https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
Expand All @@ -232,13 +240,15 @@ file out level insize outsize millis mb/s
nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66
nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10
nyc-taxi-data-10M.csv zskp 4 3325605752 476268884 135958 23.33

nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12

nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53
nyc-taxi-data-10M.csv gzkp 1 3325605752 922257165 16780 189.00
```

## Decompressor
Expand Down
3 changes: 3 additions & 0 deletions zstd/dict_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ func TestEncoder_SmallDict(t *testing.T) {
}
dicts = append(dicts, in)
for level := SpeedFastest; level < speedLast; level++ {
if isRaceTest && level >= SpeedBestCompression {
break
}
enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderDict(in), WithEncoderLevel(level), WithWindowSize(1<<17))
if err != nil {
t.Fatal(err)
Expand Down
108 changes: 75 additions & 33 deletions zstd/enc_best.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,59 @@ package zstd

import (
"fmt"
"math/bits"

"github.com/klauspost/compress"
)

const (
bestLongTableBits = 20 // Bits used in the long match table
bestLongTableBits = 22 // Bits used in the long match table
bestLongTableSize = 1 << bestLongTableBits // Size of the table
bestLongLen = 8 // Bytes used for table hash

// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
// long match table and match offsets are quite big.
// This greatly depends on the type of input.
bestShortTableBits = 16 // Bits used in the short match table
bestShortTableBits = 18 // Bits used in the short match table
bestShortTableSize = 1 << bestShortTableBits // Size of the table
bestShortLen = 4 // Bytes used for table hash

)

type match struct {
offset int32
s int32
length int32
rep int32
est int32
}

const highScore = 25000

// estBits will estimate output bits from predefined tables.
func (m *match) estBits(bitsPerByte int32) {
mlc := mlCode(uint32(m.length - zstdMinMatch))
var ofc uint8
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
ofc = ofCode(uint32(m.rep))
}
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]

// Add cost of match encoding...
m.est = int32(ofTT.outBits + mlTT.outBits)
m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
// Subtract savings compared to literal encoding...
m.est -= (m.length * bitsPerByte) >> 10
if m.est > 0 {
// Unlikely gain..
m.length = 0
m.est = highScore
}
}

// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
// The long match table contains the previous entry with the same hash,
// effectively making it a "chain" of length 2.
Expand Down Expand Up @@ -112,6 +147,14 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
return
}

// Use this to estimate literal cost.
// Scaled by 10 bits.
bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src))
// Huffman can never go < 1 bit/byte
if bitsPerByte < 1024 {
bitsPerByte = 1024
}

// Override src
src = e.hist
sLimit := int32(len(src)) - inputMargin
Expand Down Expand Up @@ -148,29 +191,8 @@ encodeLoop:
panic("offset0 was 0")
}

type match struct {
offset int32
s int32
length int32
rep int32
}
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{offset: offset, s: s}
}
return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
}

bestOf := func(a, b match) match {
aScore := b.s - a.s + a.length
bScore := a.s - b.s + b.length
if a.rep < 0 {
aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
}
if b.rep < 0 {
bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
}
if aScore >= bScore {
if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
return a
}
return b
Expand All @@ -182,17 +204,31 @@ encodeLoop:
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]

matchAt := func(offset int32, s int32, first uint32, rep int32) match {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{s: s, est: highScore}
}
m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
m.estBits(bitsPerByte)
return m
}

best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))

if canRepeat && best.length < goodEnough {
best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
cv := uint32(cv >> 8)
spp := s + 1
best = bestOf(best, matchAt(spp-offset1, spp, cv, 1))
best = bestOf(best, matchAt(spp-offset2, spp, cv, 2))
best = bestOf(best, matchAt(spp-offset3, spp, cv, 3))
if best.length > 0 {
best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
cv >>= 16
spp += 2
best = bestOf(best, matchAt(spp-offset1, spp, cv, 1))
best = bestOf(best, matchAt(spp-offset2, spp, cv, 2))
best = bestOf(best, matchAt(spp-offset3, spp, cv, 3))
}
}
// Load next and check...
Expand All @@ -218,12 +254,18 @@ encodeLoop:
candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]

// Short at s+1
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
// Long at s+1, s+2
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))

if false {
// Short at s+3.
// Too often worse...
best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
}
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
if sAt := best.s + best.length; sAt < sLimit {
Expand Down Expand Up @@ -428,7 +470,7 @@ func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
e.Encode(blk, src)
}

// ResetDict will reset and set a dictionary if not nil
// Reset will reset and set a dictionary if not nil
func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
if d == nil {
Expand Down
13 changes: 13 additions & 0 deletions zstd/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ type testEncOpt struct {
func getEncOpts(cMax int) []testEncOpt {
var o []testEncOpt
for level := speedNotSet + 1; level < speedLast; level++ {
if isRaceTest && level >= SpeedBestCompression {
break
}
for conc := 1; conc <= 4; conc *= 2 {
for _, wind := range testWindowSizes {
addOpt := func(name string, options ...EOption) {
Expand Down Expand Up @@ -75,6 +78,7 @@ func TestEncoder_EncodeAllSimple(t *testing.T) {
in = append(in, in...)
for _, opts := range getEncOpts(4) {
t.Run(opts.name, func(t *testing.T) {
runtime.GC()
e, err := NewWriter(nil, opts.o...)
if err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -172,6 +176,9 @@ func TestEncoder_EncodeAllEncodeXML(t *testing.T) {

for level := speedNotSet + 1; level < speedLast; level++ {
t.Run(level.String(), func(t *testing.T) {
if isRaceTest && level >= SpeedBestCompression {
t.SkipNow()
}
e, err := NewWriter(nil, WithEncoderLevel(level))
if err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -291,6 +298,9 @@ func TestEncoder_EncodeAllTwain(t *testing.T) {

for level := speedNotSet + 1; level < speedLast; level++ {
t.Run(level.String(), func(t *testing.T) {
if isRaceTest && level >= SpeedBestCompression {
t.SkipNow()
}
for _, windowSize := range testWindowSizes {
t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
Expand Down Expand Up @@ -337,6 +347,9 @@ func TestEncoder_EncodeAllPi(t *testing.T) {

for level := speedNotSet + 1; level < speedLast; level++ {
t.Run(level.String(), func(t *testing.T) {
if isRaceTest && level >= SpeedBestCompression {
t.SkipNow()
}
for _, windowSize := range testWindowSizes {
t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
Expand Down
10 changes: 10 additions & 0 deletions zstd/race_enabled_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.

// +build race

package zstd

func init() {
isRaceTest = true
}
5 changes: 5 additions & 0 deletions zstd/zstd_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.

package zstd

import (
Expand All @@ -9,6 +12,8 @@ import (
"time"
)

var isRaceTest bool

func TestMain(m *testing.M) {
ec := m.Run()
if ec == 0 && runtime.NumGoroutine() > 1 {
Expand Down