Skip to content

Commit b5611a5

Browse files
maxnoeldomodwyer
authored andcommitted
bson: Added Encoder and Decoder types for stream encoding/decoding. (#127)
* bson: Added Encoder and Decoder types for stream encoding/decoding. Those types are analog to those found in json and yaml. They allow us to operate on io.Readers/io.Writers instead of raw byte slices. Streams are expected to be sequences of concatenated BSON documents: *.bson files from MongoDB dumps, for example. * Stream: NewEncoder and NewDecoder now return pointers. JSON and YAML do that too, so let's be consistent. * Stream decoder: added checks on document size limits, and panic handler. Strangely, the BSON spec defines the document size header as a signed int32, but: - No document can be smaller than 5 bytes (size header + null terminator). - MongoDB constrains BSON documents to 16 MiB at most. Therefore, documents whose header doesn't obey those limits are discarded and Decode returns ErrInvalidDocumentSize. In addition, we're reusing the handleErr panic handler in Decode to protect from unwanted panics in Unmarshal. * Exported MinDocumentSize and MaxDocumentSize consts.
1 parent 69bef6a commit b5611a5

File tree

2 files changed

+167
-0
lines changed

2 files changed

+167
-0
lines changed

bson/stream.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
package bson
2+
3+
import (
4+
"bytes"
5+
"encoding/binary"
6+
"fmt"
7+
"io"
8+
)
9+
10+
const (
11+
// MinDocumentSize is the size of the smallest possible valid BSON document:
12+
// an int32 size header + 0x00 (end of document).
13+
MinDocumentSize = 5
14+
15+
// MaxDocumentSize is the largest possible size for a BSON document allowed by MongoDB,
16+
// that is, 16 MiB (see https://docs.mongodb.com/manual/reference/limits/).
17+
MaxDocumentSize = 16777216
18+
)
19+
20+
// ErrInvalidDocumentSize is an error returned when a BSON document's header
21+
// contains a size smaller than MinDocumentSize or greater than MaxDocumentSize.
22+
type ErrInvalidDocumentSize struct {
23+
DocumentSize int32
24+
}
25+
26+
func (e ErrInvalidDocumentSize) Error() string {
27+
return fmt.Sprintf("invalid document size %d", e.DocumentSize)
28+
}
29+
30+
// A Decoder reads and decodes BSON values from an input stream.
31+
type Decoder struct {
32+
source io.Reader
33+
}
34+
35+
// NewDecoder returns a new Decoder that reads from source.
36+
// It does not add any extra buffering, and may not read data from source beyond the BSON values requested.
37+
func NewDecoder(source io.Reader) *Decoder {
38+
return &Decoder{source: source}
39+
}
40+
41+
// Decode reads the next BSON-encoded value from its input and stores it in the value pointed to by v.
42+
// See the documentation for Unmarshal for details about the conversion of BSON into a Go value.
43+
func (dec *Decoder) Decode(v interface{}) (err error) {
44+
// BSON documents start with their size as a *signed* int32.
45+
var docSize int32
46+
if err = binary.Read(dec.source, binary.LittleEndian, &docSize); err != nil {
47+
return
48+
}
49+
50+
if docSize < MinDocumentSize || docSize > MaxDocumentSize {
51+
return ErrInvalidDocumentSize{DocumentSize: docSize}
52+
}
53+
54+
docBuffer := bytes.NewBuffer(make([]byte, 0, docSize))
55+
if err = binary.Write(docBuffer, binary.LittleEndian, docSize); err != nil {
56+
return
57+
}
58+
59+
// docSize is the *full* document's size (including the 4-byte size header,
60+
// which has already been read).
61+
if _, err = io.CopyN(docBuffer, dec.source, int64(docSize-4)); err != nil {
62+
return
63+
}
64+
65+
// Let Unmarshal handle the rest.
66+
defer handleErr(&err)
67+
return Unmarshal(docBuffer.Bytes(), v)
68+
}
69+
70+
// An Encoder encodes and writes BSON values to an output stream.
71+
type Encoder struct {
72+
target io.Writer
73+
}
74+
75+
// NewEncoder returns a new Encoder that writes to target.
76+
func NewEncoder(target io.Writer) *Encoder {
77+
return &Encoder{target: target}
78+
}
79+
80+
// Encode encodes v to BSON, and if successful writes it to the Encoder's output stream.
81+
// See the documentation for Marshal for details about the conversion of Go values to BSON.
82+
func (enc *Encoder) Encode(v interface{}) error {
83+
data, err := Marshal(v)
84+
if err != nil {
85+
return err
86+
}
87+
88+
_, err = enc.target.Write(data)
89+
return err
90+
}

bson/stream_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package bson_test
2+
3+
import (
4+
"bytes"
5+
6+
"github.com/globalsign/mgo/bson"
7+
. "gopkg.in/check.v1"
8+
)
9+
10+
var invalidSizeDocuments = [][]byte{
11+
// Empty document
12+
[]byte{},
13+
// Incomplete header
14+
[]byte{0x04},
15+
// Negative size
16+
[]byte{0xff, 0xff, 0xff, 0xff},
17+
// Full, valid size header but too small (less than 5 bytes)
18+
[]byte{0x04, 0x00, 0x00, 0x00},
19+
// Valid header, valid size but incomplete document
20+
[]byte{0xff, 0x00, 0x00, 0x00, 0x00},
21+
// Too big
22+
[]byte{0xff, 0xff, 0xff, 0x7f},
23+
}
24+
25+
// Reusing sampleItems from bson_test
26+
27+
func (s *S) TestEncodeSampleItems(c *C) {
28+
for i, item := range sampleItems {
29+
buf := bytes.NewBuffer(nil)
30+
enc := bson.NewEncoder(buf)
31+
32+
err := enc.Encode(item.obj)
33+
c.Assert(err, IsNil)
34+
c.Assert(string(buf.Bytes()), Equals, item.data, Commentf("Failed on item %d", i))
35+
}
36+
}
37+
38+
func (s *S) TestDecodeSampleItems(c *C) {
39+
for i, item := range sampleItems {
40+
buf := bytes.NewBuffer([]byte(item.data))
41+
dec := bson.NewDecoder(buf)
42+
43+
value := bson.M{}
44+
err := dec.Decode(&value)
45+
c.Assert(err, IsNil)
46+
c.Assert(value, DeepEquals, item.obj, Commentf("Failed on item %d", i))
47+
}
48+
}
49+
50+
func (s *S) TestStreamRoundTrip(c *C) {
51+
buf := bytes.NewBuffer(nil)
52+
enc := bson.NewEncoder(buf)
53+
54+
for _, item := range sampleItems {
55+
err := enc.Encode(item.obj)
56+
c.Assert(err, IsNil)
57+
}
58+
59+
// Ensure that everything that was encoded is decodable in the same order.
60+
dec := bson.NewDecoder(buf)
61+
for i, item := range sampleItems {
62+
value := bson.M{}
63+
err := dec.Decode(&value)
64+
c.Assert(err, IsNil)
65+
c.Assert(value, DeepEquals, item.obj, Commentf("Failed on item %d", i))
66+
}
67+
}
68+
69+
func (s *S) TestDecodeDocumentTooSmall(c *C) {
70+
for i, item := range invalidSizeDocuments {
71+
buf := bytes.NewBuffer(item)
72+
dec := bson.NewDecoder(buf)
73+
value := bson.M{}
74+
err := dec.Decode(&value)
75+
c.Assert(err, NotNil, Commentf("Failed on invalid size item %d", i))
76+
}
77+
}

0 commit comments

Comments
 (0)