Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor zstd decoder #498

Merged
merged 35 commits into from
Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
1a14c30
Refactor zstd decoder
klauspost Jan 18, 2022
b5ab519
Merge branch 'master' into refactor-zstd-decoder
klauspost Feb 9, 2022
984fc8f
Make it compile
klauspost Feb 11, 2022
dca58e6
Fix up single decodes.
klauspost Feb 11, 2022
f01cd33
Merge branch 'master' into refactor-zstd-decoder
klauspost Feb 14, 2022
3c54049
Almost working now...
klauspost Feb 16, 2022
8bf62b7
Tests pass.
klauspost Feb 18, 2022
ac20ec2
Avoid a few allocs
klauspost Feb 18, 2022
9a9ac6b
Add stream decompression with no goroutines.
klauspost Feb 18, 2022
b2951ef
Check FrameContentSize and max decoded size.
klauspost Feb 18, 2022
9d525e5
Remove unused var+func.
klauspost Feb 18, 2022
b74ecce
Tweaks and cleanup
klauspost Feb 18, 2022
e217e78
Use maxsize as documented.
klauspost Feb 19, 2022
6598a07
Merge branch 'master' into refactor-zstd-decoder
klauspost Feb 21, 2022
561b94c
Ensure history from frames cannot overlap.
klauspost Feb 21, 2022
3db2dcb
Fix deadlock on error.
klauspost Feb 21, 2022
55e4c4b
Stricter framecontent size checks and consistency.
klauspost Feb 21, 2022
d6e790a
Fix short test.
klauspost Feb 21, 2022
8b43a92
Add bench
klauspost Feb 22, 2022
d0ce155
Merge branch 'master' into refactor-zstd-decoder
klauspost Feb 22, 2022
50a135c
Reject big RLE/RAW blocks as per https://github.com/facebook/zstd/iss…
klauspost Feb 22, 2022
d731396
Use os.FileInfo for Go 1.15.
klauspost Feb 22, 2022
03b301a
Break all on errors
klauspost Feb 22, 2022
4c5a306
Move sync code to separate method.
klauspost Feb 22, 2022
d3974d3
Don't read sent error.
klauspost Feb 22, 2022
133d52c
Fix consistent error reporting and dict inits.
klauspost Feb 22, 2022
2b89e67
Fix error message
klauspost Feb 22, 2022
606373a
Check if huff0 X4 blocks match size exactly.
klauspost Feb 23, 2022
a197f86
Fix decoder leakage.
klauspost Feb 23, 2022
d1f91e2
Forward blocks, so we don't risk run-away decoding.
klauspost Feb 23, 2022
e7906eb
Fix test race
klauspost Feb 23, 2022
7a09f3d
Save last before sending.
klauspost Feb 23, 2022
f5e0961
Reuse history between async calls.
klauspost Feb 23, 2022
6cabc28
Protect local frame.
klauspost Feb 23, 2022
f20d563
Clarify error msg
klauspost Feb 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Tweaks and cleanup
  • Loading branch information
klauspost committed Feb 18, 2022
commit b74ecce15ccd6014eedb21a0d87feab182b59ff6
3 changes: 3 additions & 0 deletions zstd/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
frame.rawInput = nil
frame.bBuf = nil
if frame.history.decoders.br != nil {
frame.history.decoders.br.in = nil
}
d.decoders <- block
}()
frame.bBuf = input
Expand Down
148 changes: 2 additions & 146 deletions zstd/decoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ func TestNewDecoderBigFile(t *testing.T) {
}
defer f.Close()
start := time.Now()
dec, err := NewReader(f, WithDecoderConcurrency(4), WithDecoderLowmem(true))
dec, err := NewReader(f)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -1159,7 +1159,7 @@ func BenchmarkDecoder_DecodeAllParallel(b *testing.B) {
if err != nil {
b.Fatal(err)
}
dec, err := NewReader(nil)
dec, err := NewReader(nil, WithDecoderConcurrency(runtime.GOMAXPROCS(0)))
if err != nil {
b.Fatal(err)
return
Expand Down Expand Up @@ -1199,150 +1199,6 @@ func BenchmarkDecoder_DecodeAllParallel(b *testing.B) {
}
}

/*
func BenchmarkDecoder_DecodeAllCgo(b *testing.B) {
fn := "testdata/benchdecoder.zip"
data, err := ioutil.ReadFile(fn)
if err != nil {
b.Fatal(err)
}
zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
if err != nil {
b.Fatal(err)
}
for _, tt := range zr.File {
if !strings.HasSuffix(tt.Name, ".zst") {
continue
}
b.Run(tt.Name, func(b *testing.B) {
tt := tt
r, err := tt.Open()
if err != nil {
b.Fatal(err)
}
defer r.Close()
in, err := ioutil.ReadAll(r)
if err != nil {
b.Fatal(err)
}
got, err := zstd.Decompress(nil, in)
if err != nil {
b.Fatal(err)
}
b.SetBytes(int64(len(got)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
got, err = zstd.Decompress(got, in)
if err != nil {
b.Fatal(err)
}
}
})
}
}

func BenchmarkDecoder_DecodeAllParallelCgo(b *testing.B) {
fn := "testdata/benchdecoder.zip"
data, err := ioutil.ReadFile(fn)
if err != nil {
b.Fatal(err)
}
zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
if err != nil {
b.Fatal(err)
}
for _, tt := range zr.File {
if !strings.HasSuffix(tt.Name, ".zst") {
continue
}
b.Run(tt.Name, func(b *testing.B) {
r, err := tt.Open()
if err != nil {
b.Fatal(err)
}
defer r.Close()
in, err := ioutil.ReadAll(r)
if err != nil {
b.Fatal(err)
}
got, err := zstd.Decompress(nil, in)
if err != nil {
b.Fatal(err)
}
b.SetBytes(int64(len(got)))
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
got := make([]byte, len(got))
for pb.Next() {
got, err = zstd.Decompress(got, in)
if err != nil {
b.Fatal(err)
}
}
})
})
}
}

func BenchmarkDecoderSilesiaCgo(b *testing.B) {
fn := "testdata/silesia.tar.zst"
data, err := ioutil.ReadFile(fn)
if err != nil {
if os.IsNotExist(err) {
b.Skip("Missing testdata/silesia.tar.zst")
return
}
b.Fatal(err)
}
dec := zstd.NewReader(bytes.NewBuffer(data))
n, err := io.Copy(ioutil.Discard, dec)
if err != nil {
b.Fatal(err)
}

b.SetBytes(n)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
dec := zstd.NewReader(bytes.NewBuffer(data))
_, err := io.CopyN(ioutil.Discard, dec, n)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkDecoderEnwik9Cgo(b *testing.B) {
fn := "testdata/enwik9-1.zst"
data, err := ioutil.ReadFile(fn)
if err != nil {
if os.IsNotExist(err) {
b.Skip("Missing " + fn)
return
}
b.Fatal(err)
}
dec := zstd.NewReader(bytes.NewBuffer(data))
n, err := io.Copy(ioutil.Discard, dec)
if err != nil {
b.Fatal(err)
}

b.SetBytes(n)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
dec := zstd.NewReader(bytes.NewBuffer(data))
_, err := io.CopyN(ioutil.Discard, dec, n)
if err != nil {
b.Fatal(err)
}
}
}

*/

func BenchmarkDecoderSilesia(b *testing.B) {
fn := "testdata/silesia.tar.zst"
data, err := ioutil.ReadFile(fn)
Expand Down
2 changes: 1 addition & 1 deletion zstd/history.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func (h *history) reset() {
if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined {
fseDecoderPool.Put(f)
}
h.decoders = sequenceDecs{}
h.decoders = sequenceDecs{br: h.decoders.br}
if h.huffTree != nil {
if h.dict == nil || h.dict.litEnc != h.huffTree {
huffDecoderPool.Put(h.huffTree)
Expand Down
55 changes: 26 additions & 29 deletions zstd/seqdec.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,11 @@ type sequenceDecs struct {
dict []byte
literals []byte
out []byte
//seq []seqVals
nSeqs int
br *bitReader
seqSize int
windowSize int
maxBits uint8
nSeqs int
br *bitReader
seqSize int
windowSize int
maxBits uint8
}

// initialize all 3 decoders from the stream input.
Expand Down Expand Up @@ -256,64 +255,62 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
}

for _, seq := range seqs {
ll, ml, mo := seq.ll, seq.ml, seq.mo

// Add literals
s.out = append(s.out, s.literals[:ll]...)
s.literals = s.literals[ll:]
s.out = append(s.out, s.literals[:seq.ll]...)
s.literals = s.literals[seq.ll:]
out := s.out

// Copy form dictionary...
if mo > len(s.out)+len(hist) || mo > s.windowSize {
if seq.mo > len(s.out)+len(hist) || seq.mo > s.windowSize {
if len(s.dict) == 0 {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, len(s.out)+len(hist))
}

// we may be in dictionary.
dictO := len(s.dict) - (mo - (len(s.out) + len(hist)))
dictO := len(s.dict) - (seq.mo - (len(s.out) + len(hist)))
if dictO < 0 || dictO >= len(s.dict) {
return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", mo, len(s.out)+len(hist)+len(s.dict))
return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, len(s.out)+len(hist)+len(s.dict))
}
end := dictO + ml
end := dictO + seq.ml
if end > len(s.dict) {
out = append(out, s.dict[dictO:]...)
mo -= len(s.dict) - dictO
ml -= len(s.dict) - dictO
seq.mo -= len(s.dict) - dictO
seq.ml -= len(s.dict) - dictO
} else {
s.out = append(out, s.dict[dictO:end]...)
continue
}
}

// Copy from history.
if v := mo - len(s.out); v > 0 {
if v := seq.mo - len(s.out); v > 0 {
// v is the start position in history from end.
start := len(hist) - v
if ml > v {
if seq.ml > v {
// Some goes into current block.
// Copy remainder of history
out = append(out, hist[start:]...)
mo -= v
ml -= v
seq.mo -= v
seq.ml -= v
} else {
s.out = append(out, hist[start:start+ml]...)
s.out = append(out, hist[start:start+seq.ml]...)
continue
}
}
// We must be in current buffer now
if ml > 0 {
start := len(s.out) - mo
if ml <= len(s.out)-start {
if seq.ml > 0 {
start := len(s.out) - seq.mo
if seq.ml <= len(s.out)-start {
// No overlap
s.out = append(out, s.out[start:start+ml]...)
s.out = append(out, s.out[start:start+seq.ml]...)
continue
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
out = out[:len(out)+ml]
src := out[start : start+ml]
out = out[:len(out)+seq.ml]
src := out[start : start+seq.ml]
// Destination is the space we just added.
dst := out[len(out)-ml:]
dst := out[len(out)-seq.ml:]
dst = dst[:len(src)]
for i := range src {
dst[i] = src[i]
Expand Down