-
Notifications
You must be signed in to change notification settings - Fork 3.6k
/
Copy pathstring.go
129 lines (106 loc) · 3.2 KB
/
string.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
package tsm1
// String encoding uses snappy compression to compress each string. Each string is
// appended to byte slice prefixed with a variable byte length followed by the string
// bytes. The bytes are compressed using snappy compressor and a 1 byte header is used
// to indicate the type of encoding.
import (
"encoding/binary"
"fmt"
"github.com/golang/snappy"
)
// Note: an uncompressed format is not yet implemented.
// stringCompressedSnappy is a compressed encoding using Snappy compression
const stringCompressedSnappy = 1
// StringEncoder encodes multiple strings into a byte slice.
type StringEncoder struct {
// The encoded bytes
bytes []byte
}
// NewStringEncoder returns a new StringEncoder with an initial buffer ready to hold sz bytes.
func NewStringEncoder(sz int) StringEncoder {
return StringEncoder{
bytes: make([]byte, 0, sz),
}
}
// Flush is no-op
func (e *StringEncoder) Flush() {}
// Reset sets the encoder back to its initial state.
func (e *StringEncoder) Reset() {
e.bytes = e.bytes[:0]
}
// Write encodes s to the underlying buffer.
func (e *StringEncoder) Write(s string) {
b := make([]byte, 10)
// Append the length of the string using variable byte encoding
i := binary.PutUvarint(b, uint64(len(s)))
e.bytes = append(e.bytes, b[:i]...)
// Append the string bytes
e.bytes = append(e.bytes, s...)
}
// Bytes returns a copy of the underlying buffer.
func (e *StringEncoder) Bytes() ([]byte, error) {
// Compress the currently appended bytes using snappy and prefix with
// a 1 byte header for future extension
data := snappy.Encode(nil, e.bytes)
return append([]byte{stringCompressedSnappy << 4}, data...), nil
}
// StringDecoder decodes a byte slice into strings.
type StringDecoder struct {
b []byte
l int
i int
err error
}
// SetBytes initializes the decoder with bytes to read from.
// This must be called before calling any other method.
func (e *StringDecoder) SetBytes(b []byte) error {
// First byte stores the encoding type, only have snappy format
// currently so ignore for now.
var data []byte
if len(b) > 0 {
var err error
data, err = snappy.Decode(nil, b[1:])
if err != nil {
return fmt.Errorf("failed to decode string block: %v", err.Error())
}
}
e.b = data
e.l = 0
e.i = 0
e.err = nil
return nil
}
// Next returns true if there are any values remaining to be decoded.
func (e *StringDecoder) Next() bool {
if e.err != nil {
return false
}
e.i += e.l
return e.i < len(e.b)
}
// Read returns the next value from the decoder.
func (e *StringDecoder) Read() string {
// Read the length of the string
length, n := binary.Uvarint(e.b[e.i:])
if n <= 0 {
e.err = fmt.Errorf("stringDecoder: invalid encoded string length")
return ""
}
// The length of this string plus the length of the variable byte encoded length
e.l = int(length) + n
lower := e.i + n
upper := lower + int(length)
if upper < lower {
e.err = fmt.Errorf("stringDecoder: length overflow")
return ""
}
if upper > len(e.b) {
e.err = fmt.Errorf("stringDecoder: not enough data to represent encoded string")
return ""
}
return string(e.b[lower:upper])
}
// Error returns the last error encountered by the decoder.
func (e *StringDecoder) Error() error {
return e.err
}