Skip to content

Commit

Permalink
Add progressive erasure shard encoding (#170)
Browse files Browse the repository at this point in the history
* Add progressive erasure shard encoding

Allow progressively building erasure shards.
  • Loading branch information
klauspost authored Oct 26, 2021
1 parent 7761c8f commit 0eef97b
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 1 deletion.
57 changes: 57 additions & 0 deletions examples_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,63 @@ func ExampleEncoder() {
// ok
}

// Simple example of how to use all functions of the EncoderIdx.
// Note that all error checks have been removed to keep it short.
func ExampleEncoder_EncodeIdx() {
const dataShards = 7
const erasureShards = 3

// Create some sample data
var data = make([]byte, 250000)
fillRandom(data)

// Create an encoder with 17 data and 3 parity slices.

This comment has been minimized.

Copy link
@harshavardhana

harshavardhana Nov 15, 2021

Contributor

// Create an encoder with 17 data and 3 parity slices.

Looks like this is '7'

enc, _ := reedsolomon.New(dataShards, erasureShards)

// Split the data into shards
shards, _ := enc.Split(data)

// Zero erasure shards.
for i := 0; i < erasureShards; i++ {
clear := shards[dataShards+i]
for j := range clear {
clear[j] = 0
}
}

for i := 0; i < dataShards; i++ {
// Encode one shard at the time.
// Note how this gives linear access.
// There is however no requirement on shards being delivered in order.
// All parity shards will be updated on each run.
_ = enc.EncodeIdx(shards[i], i, shards[dataShards:])
}

// Verify the parity set
ok, err := enc.Verify(shards)
if ok {
fmt.Println("ok")
} else {
fmt.Println(err)
}

// Delete two shards
shards[dataShards-2], shards[dataShards-2] = nil, nil

// Reconstruct the shards
_ = enc.Reconstruct(shards)

// Verify the data set
ok, err = enc.Verify(shards)
if ok {
fmt.Println("ok")
} else {
fmt.Println(err)
}
// Output: ok
// ok
}

// This demonstrates that shards can be arbitrary sliced and
// merged and still remain valid.
func ExampleEncoder_slicing() {
Expand Down
48 changes: 48 additions & 0 deletions reedsolomon.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ type Encoder interface {
// data shards while this is running.
Encode(shards [][]byte) error

// EncodeIdx will add parity for a single data shard.
// Parity shards should start out as 0. The caller must zero them.
// Data shards must be delivered exactly once. There is no check for this.
// The parity shards will always be updated and the data shards will remain the same.
EncodeIdx(dataShard []byte, idx int, parity [][]byte) error

// Verify returns true if the parity shards contain correct data.
// The data is the same format as Encode. No data is modified, so
// you are allowed to read from data while this is running.
Expand Down Expand Up @@ -396,6 +402,48 @@ func (r *reedSolomon) Encode(shards [][]byte) error {
return nil
}

// EncodeIdx will add parity for a single data shard.
// Parity shards should start out zeroed. The caller must zero them before first call.
// Data shards should only be delivered once. There is no check for this.
// The parity shards will always be updated and the data shards will remain the unchanged.
func (r *reedSolomon) EncodeIdx(dataShard []byte, idx int, parity [][]byte) error {
if len(parity) != r.ParityShards {
return ErrTooFewShards
}
if len(parity) == 0 {
return nil
}
if idx < 0 || idx >= r.DataShards {
return ErrInvShardNum
}
err := checkShards(parity, false)
if err != nil {
return err
}
if len(parity[0]) != len(dataShard) {
return ErrShardSize
}

// Process using no goroutines for now.
start, end := 0, r.o.perRound
if end > len(dataShard) {
end = len(dataShard)
}

for start < len(dataShard) {
in := dataShard[start:end]
for iRow := 0; iRow < r.ParityShards; iRow++ {
galMulSliceXor(r.parity[iRow][idx], in, parity[iRow][start:end], &r.o)
}
start = end
end += r.o.perRound
if end > len(dataShard) {
end = len(dataShard)
}
}
return nil
}

// ErrInvalidInput is returned if invalid input parameter of Update.
var ErrInvalidInput = errors.New("invalid input")

Expand Down
112 changes: 111 additions & 1 deletion reedsolomon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,18 @@ func TestEncoding(t *testing.T) {
t.Run("default", func(t *testing.T) {
testEncoding(t, testOptions()...)
})
t.Run("default-dx", func(t *testing.T) {
testEncodingIdx(t, testOptions()...)
})
for i, o := range testOpts() {
t.Run(fmt.Sprintf("opt-%d", i), func(t *testing.T) {
testEncoding(t, o...)
})
if !testing.Short() {
t.Run(fmt.Sprintf("idx-opt-%d", i), func(t *testing.T) {
testEncodingIdx(t, o...)
})
}
}
}

Expand Down Expand Up @@ -207,7 +215,7 @@ func testEncoding(t *testing.T, o ...Option) {
shards[s] = make([]byte, perShard)
}

for s := 0; s < data; s++ {
for s := 0; s < len(shards); s++ {
rng.Read(shards[s])
}

Expand Down Expand Up @@ -281,6 +289,108 @@ func testEncoding(t *testing.T, o ...Option) {
}
}

func testEncodingIdx(t *testing.T, o ...Option) {
for _, size := range testSizes {
data, parity := size[0], size[1]
rng := rand.New(rand.NewSource(0xabadc0cac01a))
t.Run(fmt.Sprintf("%dx%d", data, parity), func(t *testing.T) {
sz := testDataSizes
if testing.Short() {
sz = testDataSizesShort
}
for _, perShard := range sz {
t.Run(fmt.Sprint(perShard), func(t *testing.T) {

r, err := New(data, parity, testOptions(o...)...)
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, data+parity)
for s := range shards {
shards[s] = make([]byte, perShard)
}
shuffle := make([]int, data)
for i := range shuffle {
shuffle[i] = i
}
rng.Shuffle(len(shuffle), func(i, j int) { shuffle[i], shuffle[j] = shuffle[j], shuffle[i] })

// Send shards in random order.
for s := 0; s < data; s++ {
s := shuffle[s]
rng.Read(shards[s])
err = r.EncodeIdx(shards[s], s, shards[data:])
if err != nil {
t.Fatal(err)
}
}

ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}

if parity == 0 {
// Check that Reconstruct and ReconstructData do nothing
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
err = r.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}

// Skip integrity checks
return
}

// Delete one in data
idx := rng.Intn(data)
want := shards[idx]
shards[idx] = nil

err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(shards[idx], want) {
t.Fatal("did not ReconstructData correctly")
}

// Delete one randomly
idx = rng.Intn(data + parity)
want = shards[idx]
shards[idx] = nil
err = r.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(shards[idx], want) {
t.Fatal("did not Reconstruct correctly")
}

err = r.Encode(make([][]byte, 1))
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}

// Make one too short.
shards[idx] = shards[idx][:perShard-1]
err = r.Encode(shards)
if err != ErrShardSize {
t.Errorf("expected %v, got %v", ErrShardSize, err)
}
})
}
})

}
}

func TestUpdate(t *testing.T) {
for i, o := range testOpts() {
t.Run(fmt.Sprintf("options %d", i), func(t *testing.T) {
Expand Down

0 comments on commit 0eef97b

Please sign in to comment.