diff --git a/s2/cmd/s2d/main.go b/s2/cmd/s2d/main.go index cdebe2514c..cd36799664 100644 --- a/s2/cmd/s2d/main.go +++ b/s2/cmd/s2d/main.go @@ -2,6 +2,7 @@ package main import ( "bufio" + "bytes" "errors" "flag" "fmt" @@ -9,6 +10,7 @@ import ( "io/ioutil" "os" "path/filepath" + "runtime/debug" "strings" "sync" "time" @@ -76,9 +78,52 @@ Options:`) } *quiet = *quiet || *stdout - allFiles := files - for i := 0; i < *bench; i++ { - files = append(files, allFiles...) + + if *bench > 0 { + debug.SetGCPercent(10) + for _, filename := range files { + switch { + case strings.HasSuffix(filename, ".s2"): + case strings.HasSuffix(filename, ".snappy"): + default: + fmt.Println("Skipping", filename) + continue + } + + func() { + if !*quiet { + fmt.Print("Reading ", filename, "...") + } + // Input file. + file, err := os.Open(filename) + exitErr(err) + finfo, err := file.Stat() + exitErr(err) + b := make([]byte, finfo.Size()) + _, err = io.ReadFull(file, b) + exitErr(err) + file.Close() + + for i := 0; i < *bench; i++ { + if !*quiet { + fmt.Print("\nDecompressing...") + } + r.Reset(bytes.NewBuffer(b)) + start := time.Now() + output, err := io.Copy(ioutil.Discard, r) + exitErr(err) + if !*quiet { + elapsed := time.Since(start) + ms := elapsed.Round(time.Millisecond) + mbPerSec := (float64(output) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second))) + pct := float64(output) * 100 / float64(len(b)) + fmt.Printf(" %d -> %d [%.02f%%]; %v, %.01fMB/s", len(b), output, pct, ms, mbPerSec) + } + } + fmt.Println("") + }() + } + os.Exit(0) } for _, filename := range files { diff --git a/s2/decode.go b/s2/decode.go index 605b85dd91..0b99b3b038 100644 --- a/s2/decode.go +++ b/s2/decode.go @@ -20,8 +20,6 @@ var ( ErrTooLarge = errors.New("s2: decoded block is too large") // ErrUnsupported reports that the input isn't supported. ErrUnsupported = errors.New("s2: unsupported input") - - errUnsupportedLiteralLength = errors.New("s2: unsupported literal length") ) // DecodedLen returns the length of the decoded block. @@ -46,8 +44,7 @@ func decodedLen(src []byte) (blockLen, headerLen int, err error) { } const ( - decodeErrCodeCorrupt = 1 - decodeErrCodeUnsupportedLiteralLength = 2 + decodeErrCodeCorrupt = 1 ) // Decode returns the decoded form of src. The returned slice may be a sub- @@ -65,13 +62,10 @@ func Decode(dst, src []byte) ([]byte, error) { } else { dst = make([]byte, dLen) } - switch s2Decode(dst, src[s:]) { - case 0: - return dst, nil - case decodeErrCodeUnsupportedLiteralLength: - return nil, errUnsupportedLiteralLength + if s2Decode(dst, src[s:]) != 0 { + return nil, ErrCorrupt } - return nil, ErrCorrupt + return dst, nil } // NewReader returns a new Reader that decompresses from r, using the framing diff --git a/s2/decode_other.go b/s2/decode_other.go index 4cb61001cc..7ee037b9d1 100644 --- a/s2/decode_other.go +++ b/s2/decode_other.go @@ -54,9 +54,6 @@ func s2Decode(dst, src []byte) int { x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 } length = int(x) + 1 - if length <= 0 { - return decodeErrCodeUnsupportedLiteralLength - } if length > len(dst)-d || length > len(src)-s { return decodeErrCodeCorrupt } diff --git a/s2/s2_test.go b/s2/s2_test.go index 2e8d747d29..2fc5b1da36 100644 --- a/s2/s2_test.go +++ b/s2/s2_test.go @@ -1328,16 +1328,6 @@ func benchDecode(b *testing.B, src []byte) { } } -func benchDecodeBetter(b *testing.B, src []byte) { - encoded := EncodeBetter(nil, src) - // Bandwidth is in amount of uncompressed data. - b.SetBytes(int64(len(src))) - b.ResetTimer() - for i := 0; i < b.N; i++ { - Decode(src, encoded) - } -} - func benchEncode(b *testing.B, src []byte) { // Bandwidth is in amount of uncompressed data. b.SetBytes(int64(len(src))) @@ -1386,10 +1376,8 @@ func expand(src []byte, n int) []byte { return dst } -func benchWords(b *testing.B, n int, decode bool) { - // Note: the file is OS-language dependent so the resulting values are not - // directly comparable for non-US-English OS installations. - data := expand(readFile(b, "/usr/share/dict/words"), n) +func benchTwain(b *testing.B, n int, decode bool) { + data := expand(readFile(b, "../testdata/Mark.Twain-Tom.Sawyer.txt"), n) if decode { benchDecode(b, data) } else { @@ -1397,18 +1385,18 @@ func benchWords(b *testing.B, n int, decode bool) { } } -func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) } -func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) } -func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) } -func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) } -func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) } -func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) } -func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) } -func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) } -func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) } -func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) } -func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) } -func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) } +func BenchmarkTwainDecode1e1(b *testing.B) { benchTwain(b, 1e1, true) } +func BenchmarkTwainDecode1e2(b *testing.B) { benchTwain(b, 1e2, true) } +func BenchmarkTwainDecode1e3(b *testing.B) { benchTwain(b, 1e3, true) } +func BenchmarkTwainDecode1e4(b *testing.B) { benchTwain(b, 1e4, true) } +func BenchmarkTwainDecode1e5(b *testing.B) { benchTwain(b, 1e5, true) } +func BenchmarkTwainDecode1e6(b *testing.B) { benchTwain(b, 1e6, true) } +func BenchmarkTwainEncode1e1(b *testing.B) { benchTwain(b, 1e1, false) } +func BenchmarkTwainEncode1e2(b *testing.B) { benchTwain(b, 1e2, false) } +func BenchmarkTwainEncode1e3(b *testing.B) { benchTwain(b, 1e3, false) } +func BenchmarkTwainEncode1e4(b *testing.B) { benchTwain(b, 1e4, false) } +func BenchmarkTwainEncode1e5(b *testing.B) { benchTwain(b, 1e5, false) } +func BenchmarkTwainEncode1e6(b *testing.B) { benchTwain(b, 1e6, false) } func BenchmarkRandomEncodeBlock1MB(b *testing.B) { rng := rand.New(rand.NewSource(1))