From 3436db4ebb37b6faf1d01eed53f516046ab4ae73 Mon Sep 17 00:00:00 2001 From: Ayan George Date: Thu, 13 Aug 2020 14:54:18 -0400 Subject: [PATCH] refactor: Use binary.Read() instead of io.ReadFull() (#19323) The original version of verifyVersion() reads into a byte slice, manually ensures its byte order, then converts it to a type comparable with Version and MagicNumber. This patch hides those details by calling binary.Read() and reading values into properly typed variables. This adds a bit of overhead but this code isn't in the hot-path and this patch greatly simplifies the code. verifyVersion() originally accepted an io.ReadSeeker. It is only called in once place and that function immediately calls seek after verifyVersion(), therefore it is probably safe to call Seek() BEFORE verifyVersion(). The benefit is that verifyVersion() is easier to test since we can pass it a bytes.Buffer. This patch adds a test for verifyVersion() as well as a benchmark. benchmark old ns/op new ns/op delta BenchmarkVerifyVersion-8 73.5 123 +67.35% Finally, this commit moves verifyVersion() from writer.go to reader.go which is where it is actually used. --- tsdb/engine/tsm1/reader.go | 32 ++++++++++++++ tsdb/engine/tsm1/reader_test.go | 78 +++++++++++++++++++++++++++++++++ tsdb/engine/tsm1/writer.go | 26 ----------- 3 files changed, 110 insertions(+), 26 deletions(-) diff --git a/tsdb/engine/tsm1/reader.go b/tsdb/engine/tsm1/reader.go index 67b28d9b26c..846ccbb088d 100644 --- a/tsdb/engine/tsm1/reader.go +++ b/tsdb/engine/tsm1/reader.go @@ -1313,10 +1313,42 @@ type mmapAccessor struct { index *indirectIndex } +// verifyVersion verifies that the reader's bytes are a TSM byte +// stream of the correct version (1) +func verifyVersion(r io.Reader) error { + // Attempt to read magic number. + var magic uint32 + if err := binary.Read(r, binary.BigEndian, &magic); err != nil { + return fmt.Errorf("init: error reading magic number of file: %v", err) + } + + // Attempt to read version. + var version byte + if err := binary.Read(r, binary.BigEndian, &version); err != nil { + return fmt.Errorf("init: error reading version: %v", err) + } + + // Ensure magic matches expectations. + if magic != MagicNumber { + return fmt.Errorf("can only read from tsm file") + } + + // Ensure version matches expectations. + if version != Version { + return fmt.Errorf("init: file is version %b. expected %b", version, Version) + } + + return nil +} + func (m *mmapAccessor) init() (*indirectIndex, error) { m.mu.Lock() defer m.mu.Unlock() + if _, err := m.f.Seek(0, 0); err != nil { + return nil, fmt.Errorf("init: failed to seek: %v", err) + } + if err := verifyVersion(m.f); err != nil { return nil, err } diff --git a/tsdb/engine/tsm1/reader_test.go b/tsdb/engine/tsm1/reader_test.go index 8e5a636ed13..647ba95cbba 100644 --- a/tsdb/engine/tsm1/reader_test.go +++ b/tsdb/engine/tsm1/reader_test.go @@ -1,6 +1,8 @@ package tsm1 import ( + "bytes" + "encoding/binary" "fmt" "io/ioutil" "math" @@ -1940,3 +1942,79 @@ func BenchmarkBlockIterator_Next(b *testing.B) { } } } + +func TestVerifyVersion(t *testing.T) { + // write a valid MagicNumber and Version to a bytes buffer. + tests := map[string]struct { + shouldErr bool + buf func() *bytes.Buffer + }{ + "Well Formed Header": { + buf: func() *bytes.Buffer { + buf := &bytes.Buffer{} + binary.Write(buf, binary.BigEndian, MagicNumber) + binary.Write(buf, binary.BigEndian, Version) + return buf + }, + }, + "Header With Bad Version": { + shouldErr: true, + buf: func() *bytes.Buffer { + buf := &bytes.Buffer{} + binary.Write(buf, binary.BigEndian, MagicNumber) + binary.Write(buf, binary.BigEndian, byte(128)) + return buf + }, + }, + "Header With Bad Magic": { + shouldErr: true, + buf: func() *bytes.Buffer { + buf := &bytes.Buffer{} + binary.Write(buf, binary.BigEndian, uint32(0xffffffff)) + binary.Write(buf, binary.BigEndian, Version) + return buf + }, + }, + "Emtpy Header": { + shouldErr: true, + buf: func() *bytes.Buffer { + buf := &bytes.Buffer{} + return buf + }, + }, + } + + t.Parallel() + for name, test := range tests { + t.Run(name, func(t *testing.T) { + buf := test.buf() + // Attempt to read MagicNumber and Version + err := verifyVersion(buf) + + if testing.Verbose() { + t.Logf("note: err is %v", err) + } + + if !test.shouldErr && err != nil { + t.Fatal(err) + } + + if test.shouldErr && err == nil { + t.Fatal("expected a non-nil error from verifyVersion() but got nil") + } + }) + } +} + +func BenchmarkVerifyVersion(b *testing.B) { + header := &bytes.Buffer{} + binary.Write(header, binary.BigEndian, MagicNumber) + binary.Write(header, binary.BigEndian, Version) + + for i := 0; i < b.N; i++ { + buf := bytes.NewBuffer(header.Bytes()) + if err := verifyVersion(buf); err != nil { + b.Fatal(err) + } + } +} diff --git a/tsdb/engine/tsm1/writer.go b/tsdb/engine/tsm1/writer.go index 2ba453a5465..4784dc6d558 100644 --- a/tsdb/engine/tsm1/writer.go +++ b/tsdb/engine/tsm1/writer.go @@ -796,29 +796,3 @@ func (t *tsmWriter) Remove() error { func (t *tsmWriter) Size() uint32 { return uint32(t.n) + t.index.Size() } - -// verifyVersion verifies that the reader's bytes are a TSM byte -// stream of the correct version (1) -func verifyVersion(r io.ReadSeeker) error { - _, err := r.Seek(0, 0) - if err != nil { - return fmt.Errorf("init: failed to seek: %v", err) - } - var b [4]byte - _, err = io.ReadFull(r, b[:]) - if err != nil { - return fmt.Errorf("init: error reading magic number of file: %v", err) - } - if binary.BigEndian.Uint32(b[:]) != MagicNumber { - return fmt.Errorf("can only read from tsm file") - } - _, err = io.ReadFull(r, b[:1]) - if err != nil { - return fmt.Errorf("init: error reading version: %v", err) - } - if b[0] != Version { - return fmt.Errorf("init: file is version %b. expected %b", b[0], Version) - } - - return nil -}