Skip to content

Commit

Permalink
s2: Clean up decoder (#312)
Browse files Browse the repository at this point in the history
Removes and impossible check from the Go decoder, otherwise no changes.
  • Loading branch information
klauspost authored Jan 13, 2021
1 parent 617727c commit 85d8ebf
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 42 deletions.
51 changes: 48 additions & 3 deletions s2/cmd/s2d/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ package main

import (
"bufio"
"bytes"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"runtime/debug"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -76,9 +78,52 @@ Options:`)
}

*quiet = *quiet || *stdout
allFiles := files
for i := 0; i < *bench; i++ {
files = append(files, allFiles...)

if *bench > 0 {
debug.SetGCPercent(10)
for _, filename := range files {
switch {
case strings.HasSuffix(filename, ".s2"):
case strings.HasSuffix(filename, ".snappy"):
default:
fmt.Println("Skipping", filename)
continue
}

func() {
if !*quiet {
fmt.Print("Reading ", filename, "...")
}
// Input file.
file, err := os.Open(filename)
exitErr(err)
finfo, err := file.Stat()
exitErr(err)
b := make([]byte, finfo.Size())
_, err = io.ReadFull(file, b)
exitErr(err)
file.Close()

for i := 0; i < *bench; i++ {
if !*quiet {
fmt.Print("\nDecompressing...")
}
r.Reset(bytes.NewBuffer(b))
start := time.Now()
output, err := io.Copy(ioutil.Discard, r)
exitErr(err)
if !*quiet {
elapsed := time.Since(start)
ms := elapsed.Round(time.Millisecond)
mbPerSec := (float64(output) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
pct := float64(output) * 100 / float64(len(b))
fmt.Printf(" %d -> %d [%.02f%%]; %v, %.01fMB/s", len(b), output, pct, ms, mbPerSec)
}
}
fmt.Println("")
}()
}
os.Exit(0)
}

for _, filename := range files {
Expand Down
14 changes: 4 additions & 10 deletions s2/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ var (
ErrTooLarge = errors.New("s2: decoded block is too large")
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("s2: unsupported input")

errUnsupportedLiteralLength = errors.New("s2: unsupported literal length")
)

// DecodedLen returns the length of the decoded block.
Expand All @@ -46,8 +44,7 @@ func decodedLen(src []byte) (blockLen, headerLen int, err error) {
}

const (
decodeErrCodeCorrupt = 1
decodeErrCodeUnsupportedLiteralLength = 2
decodeErrCodeCorrupt = 1
)

// Decode returns the decoded form of src. The returned slice may be a sub-
Expand All @@ -65,13 +62,10 @@ func Decode(dst, src []byte) ([]byte, error) {
} else {
dst = make([]byte, dLen)
}
switch s2Decode(dst, src[s:]) {
case 0:
return dst, nil
case decodeErrCodeUnsupportedLiteralLength:
return nil, errUnsupportedLiteralLength
if s2Decode(dst, src[s:]) != 0 {
return nil, ErrCorrupt
}
return nil, ErrCorrupt
return dst, nil
}

// NewReader returns a new Reader that decompresses from r, using the framing
Expand Down
3 changes: 0 additions & 3 deletions s2/decode_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@ func s2Decode(dst, src []byte) int {
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length <= 0 {
return decodeErrCodeUnsupportedLiteralLength
}
if length > len(dst)-d || length > len(src)-s {
return decodeErrCodeCorrupt
}
Expand Down
40 changes: 14 additions & 26 deletions s2/s2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1328,16 +1328,6 @@ func benchDecode(b *testing.B, src []byte) {
}
}

func benchDecodeBetter(b *testing.B, src []byte) {
encoded := EncodeBetter(nil, src)
// Bandwidth is in amount of uncompressed data.
b.SetBytes(int64(len(src)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
Decode(src, encoded)
}
}

func benchEncode(b *testing.B, src []byte) {
// Bandwidth is in amount of uncompressed data.
b.SetBytes(int64(len(src)))
Expand Down Expand Up @@ -1386,29 +1376,27 @@ func expand(src []byte, n int) []byte {
return dst
}

func benchWords(b *testing.B, n int, decode bool) {
// Note: the file is OS-language dependent so the resulting values are not
// directly comparable for non-US-English OS installations.
data := expand(readFile(b, "/usr/share/dict/words"), n)
func benchTwain(b *testing.B, n int, decode bool) {
data := expand(readFile(b, "../testdata/Mark.Twain-Tom.Sawyer.txt"), n)
if decode {
benchDecode(b, data)
} else {
benchEncode(b, data)
}
}

func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
func BenchmarkTwainDecode1e1(b *testing.B) { benchTwain(b, 1e1, true) }
func BenchmarkTwainDecode1e2(b *testing.B) { benchTwain(b, 1e2, true) }
func BenchmarkTwainDecode1e3(b *testing.B) { benchTwain(b, 1e3, true) }
func BenchmarkTwainDecode1e4(b *testing.B) { benchTwain(b, 1e4, true) }
func BenchmarkTwainDecode1e5(b *testing.B) { benchTwain(b, 1e5, true) }
func BenchmarkTwainDecode1e6(b *testing.B) { benchTwain(b, 1e6, true) }
func BenchmarkTwainEncode1e1(b *testing.B) { benchTwain(b, 1e1, false) }
func BenchmarkTwainEncode1e2(b *testing.B) { benchTwain(b, 1e2, false) }
func BenchmarkTwainEncode1e3(b *testing.B) { benchTwain(b, 1e3, false) }
func BenchmarkTwainEncode1e4(b *testing.B) { benchTwain(b, 1e4, false) }
func BenchmarkTwainEncode1e5(b *testing.B) { benchTwain(b, 1e5, false) }
func BenchmarkTwainEncode1e6(b *testing.B) { benchTwain(b, 1e6, false) }

func BenchmarkRandomEncodeBlock1MB(b *testing.B) {
rng := rand.New(rand.NewSource(1))
Expand Down

0 comments on commit 85d8ebf

Please sign in to comment.