Skip to content

Commit

Permalink
Chunking (#10)
Browse files Browse the repository at this point in the history
* Add checkenc without serialisation for now.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* Simplify interface and add serialisatio`

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* Move away from \xFF magic to something simple

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* Add serialisation and Deserialisation

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* Modify interface to be closer to logish interface.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* chunkenc: Fix race b/w append and iteration.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* chunkenc: Make iterators honour bounds

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* chunkenc: Remove locks as safety is assured externally

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* chunkenc: Add checksums

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* Add code quotes around block design.

* Split headBlock into it's own type.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* Simplify encoding and decoding.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

* Expose flags.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Use the already existing EntryIterator interface

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Use existing Chunk interface.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Review feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Integrate the compressed chunk and add metrics

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
  • Loading branch information
gouthamve authored Jul 9, 2018
1 parent c0b153e commit 8f4e12a
Show file tree
Hide file tree
Showing 12 changed files with 1,240 additions and 107 deletions.
28 changes: 28 additions & 0 deletions pkg/chunkenc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Chunk format

```
| | |
| MagicNumber(4b) | version(1b) |
| | |
--------------------------------------------------
| block-1 bytes | checksum (4b) |
--------------------------------------------------
| block-2 bytes | checksum (4b) |
--------------------------------------------------
| block-n bytes | checksum (4b) |
--------------------------------------------------
| #blocks (uvarint) |
--------------------------------------------------
| #entries(uvarint) | mint, maxt (varint) | offset, len (uvarint) |
-------------------------------------------------------------------
| #entries(uvarint) | mint, maxt (varint) | offset, len (uvarint) |
-------------------------------------------------------------------
| #entries(uvarint) | mint, maxt (varint) | offset, len (uvarint) |
-------------------------------------------------------------------
| #entries(uvarint) | mint, maxt (varint) | offset, len (uvarint) |
-------------------------------------------------------------------
| checksum(from #blocks) |
-------------------------------------------------------------------
| metasOffset - offset to the point with #blocks |
--------------------------------------------------
```
177 changes: 177 additions & 0 deletions pkg/chunkenc/encoding_helpers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
package chunkenc

import (
"encoding/binary"
"hash"
"hash/crc32"
)

// enbuf is a helper type to populate a byte slice with various types.
type encbuf struct {
b []byte
c [binary.MaxVarintLen64]byte
}

func (e *encbuf) reset() { e.b = e.b[:0] }
func (e *encbuf) get() []byte { return e.b }
func (e *encbuf) len() int { return len(e.b) }

func (e *encbuf) putString(s string) { e.b = append(e.b, s...) }
func (e *encbuf) putBytes(b []byte) { e.b = append(e.b, b...) }
func (e *encbuf) putByte(c byte) { e.b = append(e.b, c) }

func (e *encbuf) putBE32int(x int) { e.putBE32(uint32(x)) }
func (e *encbuf) putBE64int(x int) { e.putBE64(uint64(x)) }
func (e *encbuf) putBE64int64(x int64) { e.putBE64(uint64(x)) }
func (e *encbuf) putUvarint32(x uint32) { e.putUvarint64(uint64(x)) }
func (e *encbuf) putUvarint(x int) { e.putUvarint64(uint64(x)) }

func (e *encbuf) putBE32(x uint32) {
binary.BigEndian.PutUint32(e.c[:], x)
e.b = append(e.b, e.c[:4]...)
}

func (e *encbuf) putBE64(x uint64) {
binary.BigEndian.PutUint64(e.c[:], x)
e.b = append(e.b, e.c[:8]...)
}

func (e *encbuf) putUvarint64(x uint64) {
n := binary.PutUvarint(e.c[:], x)
e.b = append(e.b, e.c[:n]...)
}

func (e *encbuf) putVarint64(x int64) {
n := binary.PutVarint(e.c[:], x)
e.b = append(e.b, e.c[:n]...)
}

// putVarintStr writes a string to the buffer prefixed by its varint length (in bytes!).
func (e *encbuf) putUvarintStr(s string) {
e.putUvarint(len(s))
e.putString(s)
}

// putHash appends a hash over the buffers current contents to the buffer.
func (e *encbuf) putHash(h hash.Hash) {
h.Reset()
_, err := h.Write(e.b)
if err != nil {
panic(err) // The CRC32 implementation does not error
}
e.b = h.Sum(e.b)
}

// decbuf provides safe methods to extract data from a byte slice. It does all
// necessary bounds checking and advancing of the byte slice.
// Several datums can be extracted without checking for errors. However, before using
// any datum, the err() method must be checked.
type decbuf struct {
b []byte
e error
}

func (d *decbuf) uvarint() int { return int(d.uvarint64()) }
func (d *decbuf) uvarint32() uint32 { return uint32(d.uvarint64()) }
func (d *decbuf) be32int() int { return int(d.be32()) }
func (d *decbuf) be64int64() int64 { return int64(d.be64()) }

// crc32 returns a CRC32 checksum over the remaining bytes.
func (d *decbuf) crc32() uint32 {
return crc32.Checksum(d.b, castagnoliTable)
}

func (d *decbuf) uvarintStr() string {
l := d.uvarint64()
if d.e != nil {
return ""
}
if len(d.b) < int(l) {
d.e = ErrInvalidSize
return ""
}
s := string(d.b[:l])
d.b = d.b[l:]
return s
}

func (d *decbuf) varint64() int64 {
if d.e != nil {
return 0
}
x, n := binary.Varint(d.b)
if n < 1 {
d.e = ErrInvalidSize
return 0
}
d.b = d.b[n:]
return x
}

func (d *decbuf) uvarint64() uint64 {
if d.e != nil {
return 0
}
x, n := binary.Uvarint(d.b)
if n < 1 {
d.e = ErrInvalidSize
return 0
}
d.b = d.b[n:]
return x
}

func (d *decbuf) be64() uint64 {
if d.e != nil {
return 0
}
if len(d.b) < 4 {
d.e = ErrInvalidSize
return 0
}
x := binary.BigEndian.Uint64(d.b)
d.b = d.b[8:]
return x
}

func (d *decbuf) be32() uint32 {
if d.e != nil {
return 0
}
if len(d.b) < 4 {
d.e = ErrInvalidSize
return 0
}
x := binary.BigEndian.Uint32(d.b)
d.b = d.b[4:]
return x
}

func (d *decbuf) byte() byte {
if d.e != nil {
return 0
}
if len(d.b) < 1 {
d.e = ErrInvalidSize
return 0
}
x := d.b[0]
d.b = d.b[1:]
return x
}

func (d *decbuf) decbuf(l int) decbuf {
if d.e != nil {
return decbuf{e: d.e}
}
if l > len(d.b) {
return decbuf{e: ErrInvalidSize}
}
r := decbuf{b: d.b[:l]}
d.b = d.b[l:]
return r
}

func (d *decbuf) err() error { return d.e }
func (d *decbuf) len() int { return len(d.b) }
func (d *decbuf) get() []byte { return d.b }
Loading

0 comments on commit 8f4e12a

Please sign in to comment.