Skip to content

Commit

Permalink
flate: Improve speed in big stateless blocks. (#718)
Browse files Browse the repository at this point in the history
* flate: Improve speed in big stateless blocks.

Don't re-alloc and copy dict for every block when compressing more than 32KB.

```
benchmark                         old ns/op     new ns/op     delta
BenchmarkEncodeDigitsSL1e4-32     52954         52850         -0.20%
BenchmarkEncodeDigitsSL1e5-32     781061        745420        -4.56%
BenchmarkEncodeDigitsSL1e6-32     8143640       7715674       -5.26%
BenchmarkEncodeTwainSL1e4-32      68150         68415         +0.39%
BenchmarkEncodeTwainSL1e5-32      715140        687326        -3.89%
BenchmarkEncodeTwainSL1e6-32      7718175       7339694       -4.90%

benchmark                         old MB/s     new MB/s     speedup
BenchmarkEncodeDigitsSL1e4-32     188.84       189.21       1.00x
BenchmarkEncodeDigitsSL1e5-32     128.03       134.15       1.05x
BenchmarkEncodeDigitsSL1e6-32     122.80       129.61       1.06x
BenchmarkEncodeTwainSL1e4-32      146.74       146.17       1.00x
BenchmarkEncodeTwainSL1e5-32      139.83       145.49       1.04x
BenchmarkEncodeTwainSL1e6-32      129.56       136.25       1.05x

benchmark                         old allocs     new allocs     delta
BenchmarkEncodeDigitsSL1e4-32     0              0              +0.00%
BenchmarkEncodeDigitsSL1e5-32     3              0              -100.00%
BenchmarkEncodeDigitsSL1e6-32     41             0              -100.00%
BenchmarkEncodeTwainSL1e4-32      0              0              +0.00%
BenchmarkEncodeTwainSL1e5-32      3              0              -100.00%
BenchmarkEncodeTwainSL1e6-32      41             0              -100.00%

benchmark                         old bytes     new bytes     delta
BenchmarkEncodeDigitsSL1e4-32     0             0             +0.00%
BenchmarkEncodeDigitsSL1e5-32     92929         9             -99.99%
BenchmarkEncodeDigitsSL1e6-32     1298964       97            -99.99%
BenchmarkEncodeTwainSL1e4-32      0             0             +0.00%
BenchmarkEncodeTwainSL1e5-32      92928         8             -99.99%
BenchmarkEncodeTwainSL1e6-32      1298871       92            -99.99%
```

* Pin garble to v0.7.2
  • Loading branch information
klauspost authored Dec 19, 2022
1 parent 1f355e8 commit b3140ce
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ jobs:
run: go build github.com/klauspost/compress/s2/cmd/s2c && go build github.com/klauspost/compress/s2/cmd/s2d&&./s2c -verify s2c &&./s2d s2c.s2&&rm ./s2c&&rm s2d&&rm s2c.s2

- name: install garble
run: go install mvdan.cc/garble@v0.7.0
run: go install mvdan.cc/garble@v0.7.2

- name: goreleaser deprecation
run: curl -sfL https://git.io/goreleaser | VERSION=v1.9.2 sh -s -- check
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
go-version: 1.19.x
-
name: install garble
run: go install mvdan.cc/garble@v0.7.1
run: go install mvdan.cc/garble@v0.7.2
-
name: Run GoReleaser
uses: goreleaser/goreleaser-action@v2
Expand Down
2 changes: 1 addition & 1 deletion .goreleaser.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
before:
hooks:
- ./gen.sh
- go install mvdan.cc/garble@latest
- go install mvdan.cc/garble@v0.7.2

builds:
-
Expand Down
19 changes: 16 additions & 3 deletions flate/stateless.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,19 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
dict = dict[len(dict)-maxStatelessDict:]
}

// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
var inDict []byte

for len(in) > 0 {
todo := in
if len(todo) > maxStatelessBlock-len(dict) {
if len(inDict) > 0 {
if len(todo) > maxStatelessBlock-maxStatelessDict {
todo = todo[:maxStatelessBlock-maxStatelessDict]
}
} else if len(todo) > maxStatelessBlock-len(dict) {
todo = todo[:maxStatelessBlock-len(dict)]
}
inOrg := in
in = in[len(todo):]
uncompressed := todo
if len(dict) > 0 {
Expand All @@ -102,7 +110,11 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
todo = combined
}
// Compress
statelessEnc(&dst, todo, int16(len(dict)))
if len(inDict) == 0 {
statelessEnc(&dst, todo, int16(len(dict)))
} else {
statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
}
isEof := eof && len(in) == 0

if dst.n == 0 {
Expand All @@ -119,7 +131,8 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
}
if len(in) > 0 {
// Retain a dict if we have more
dict = todo[len(todo)-maxStatelessDict:]
inDict = inOrg[len(uncompressed)-maxStatelessDict:]
dict = nil
dst.Reset()
}
if bw.err != nil {
Expand Down

0 comments on commit b3140ce

Please sign in to comment.