-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix : fix compute hash #9
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,17 +3,17 @@ Support common libs for different repos of greenfield | |
|
||
## Supported Common Functions | ||
|
||
### Erasure encode/decode algorithm | ||
### 1. Erasure encode/decode algorithm | ||
|
||
1. erasure package support RSEncoder which contain basic Encode and Decode reedSolomon APIs | ||
(1) erasure package support RSEncoder which contain basic Encode and Decode reedSolomon APIs | ||
``` | ||
RSEncoderStorage, err := NewRSEncoder(dataShards, parityShards, int64(blockSize)) | ||
// encode data and return the encoded shard number | ||
func (r *RSEncoder) EncodeData(content []byte) ([][]byte, error) | ||
func (r *RSEncoder) EncodeData(content []byte) ([][]byte, error) | ||
// decodes the input erasure encoded data shards data. | ||
func (r *RSEncoder) DecodeDataShards(content [][]byte) error { | ||
func (r *RSEncoder) DecodeDataShards(content [][]byte) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ReconstructData or RecoverData seems clearer There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DecodeDataShards and DecodeShards is corresponding to EncodeShards. |
||
``` | ||
2. redundancy package support methods to encode/decode segments data using RSEncoder | ||
(2) redundancy package support methods to encode/decode segments data using RSEncoder | ||
``` | ||
// encode segment | ||
func EncodeRawSegment(content []byte, dataShards, parityShards int) ([][]byte, error) | ||
|
@@ -22,7 +22,7 @@ func EncodeRawSegment(content []byte, dataShards, parityShards int) ([][]byte, e | |
func DecodeRawSegment(pieceData [][]byte, segmentSize int64, dataShards, parityShards int) ([]byte, error) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1.What's the difference between segmentSize and blockSize? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No difference, These two interfaces are applied on the SP side as the segments is called in gnfd side, so the naming is closer to the application layer |
||
``` | ||
|
||
### Compute sha256 hash of file content | ||
### 2. Compute sha256 hash of file content | ||
|
||
hash package support methods to compute hash roots of greenfield objects , the computed methods is based on | ||
redundancy Strategy of greenfield | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Strategy-> strategy? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
|
@@ -36,4 +36,4 @@ func ComputerHashFromFile(filePath string, segmentSize int64, dataShards, parity | |
``` | ||
|
||
|
||
### Generate checksum and integrity hash | ||
### 3. Generate checksum and integrity hash |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,22 +6,26 @@ import ( | |
"os" | ||
"sync" | ||
|
||
"github.com/rs/zerolog/log" | ||
|
||
"github.com/bnb-chain/greenfield-common/go/redundancy" | ||
"github.com/rs/zerolog/log" | ||
) | ||
|
||
// ComputerHash split the reader into segment, ec encode the data, compute the hash roots of pieces | ||
// return the hash result array list and data size | ||
func ComputerHash(reader io.Reader, segmentSize int64, dataShards, parityShards int) ([]string, int64, error) { | ||
func ComputerHash(reader io.Reader, segmentSize int64, dataShards, parityShards int) ([][]byte, int64, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CompluterHash's There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed to ComputeIntegrityHash |
||
var segChecksumList [][]byte | ||
var result []string | ||
encodeData := make([][][]byte, dataShards+parityShards) | ||
seg := make([]byte, segmentSize) | ||
var result [][]byte | ||
ecShards := dataShards + parityShards | ||
encodeData := make([][][]byte, ecShards) | ||
|
||
for i := 0; i < ecShards; i++ { | ||
encodeData[i] = make([][]byte, 0) | ||
} | ||
|
||
contentLen := int64(0) | ||
// read the data by segment size | ||
for { | ||
seg := make([]byte, segmentSize) | ||
n, err := reader.Read(seg) | ||
if err != nil { | ||
if err != io.EOF { | ||
|
@@ -32,51 +36,44 @@ func ComputerHash(reader io.Reader, segmentSize int64, dataShards, parityShards | |
} | ||
if n > 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe n != segmentSize and err == nil, need retry read from io.reader. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as the API description below , the n may be less than segmentSize (end-of-file condition) , so if the err happen, just return an error. If end-of-file condition, the n just need to be less than segmentSize . changed to condition to " if n > 0 && n <= int(segmentSize)" |
||
contentLen += int64(n) | ||
data := seg[:n] | ||
// compute segment hash | ||
segmentReader := bytes.NewReader(seg[:n]) | ||
if segmentReader != nil { | ||
checksum, err := CalcSHA256HashByte(segmentReader) | ||
if err != nil { | ||
log.Error().Msg("compute checksum failed:" + err.Error()) | ||
return nil, 0, err | ||
} | ||
segChecksumList = append(segChecksumList, checksum) | ||
} | ||
|
||
checksum := GenerateChecksum(data) | ||
segChecksumList = append(segChecksumList, checksum) | ||
// get erasure encode bytes | ||
encodeShards, err := redundancy.EncodeRawSegment(seg[:n], dataShards, parityShards) | ||
encodeShards, err := redundancy.EncodeRawSegment(data, dataShards, parityShards) | ||
if err != nil { | ||
return nil, 0, err | ||
} | ||
|
||
for index, shard := range encodeShards { | ||
encodeData[index] = append(encodeData[index], shard) | ||
} | ||
} | ||
} | ||
|
||
// combine the hash root of pieces of the PrimarySP | ||
segBytesTotal := bytes.Join(segChecksumList, []byte("")) | ||
segmentRootHash := CalcSHA256Hex(segBytesTotal) | ||
segmentRootHash := GenerateIntegrityHash(segChecksumList) | ||
result = append(result, segmentRootHash) | ||
|
||
// compute the hash root of pieces of the SecondarySP | ||
wg := &sync.WaitGroup{} | ||
spLen := len(encodeData) | ||
wg.Add(spLen) | ||
hashList := make([]string, spLen) | ||
hashList := make([][]byte, spLen) | ||
for spID, content := range encodeData { | ||
go func(data [][]byte, id int) { | ||
defer wg.Done() | ||
var checksumList [][]byte | ||
for _, pieces := range data { | ||
piecesHash := CalcSHA256(pieces) | ||
piecesHash := GenerateChecksum(pieces) | ||
checksumList = append(checksumList, piecesHash) | ||
} | ||
|
||
piecesBytesTotal := bytes.Join(checksumList, []byte("")) | ||
hashList[id] = CalcSHA256Hex(piecesBytesTotal) | ||
hashList[id] = GenerateIntegrityHash(checksumList) | ||
}(content, spID) | ||
} | ||
|
||
wg.Wait() | ||
|
||
for i := 0; i < spLen; i++ { | ||
|
@@ -86,7 +83,7 @@ func ComputerHash(reader io.Reader, segmentSize int64, dataShards, parityShards | |
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why result = append(result, hashList[i]) ?directly return hashList? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, some adjustments have been made, the result variable is no longer needed |
||
|
||
// ComputerHashFromFile open a local file and compute hash result | ||
func ComputerHashFromFile(filePath string, segmentSize int64, dataShards, parityShards int) ([]string, int64, error) { | ||
func ComputerHashFromFile(filePath string, segmentSize int64, dataShards, parityShards int) ([][]byte, int64, error) { | ||
f, err := os.Open(filePath) | ||
// If any error fail quickly here. | ||
if err != nil { | ||
|
@@ -97,3 +94,9 @@ func ComputerHashFromFile(filePath string, segmentSize int64, dataShards, parity | |
|
||
return ComputerHash(f, segmentSize, dataShards, parityShards) | ||
} | ||
|
||
// ComputerHashFromBuffer support compute hash from byte buffer | ||
func ComputerHashFromBuffer(content []byte, segmentSize int64, dataShards, parityShards int) ([][]byte, int64, error) { | ||
reader := bytes.NewReader(content) | ||
return ComputerHash(reader, segmentSize, dataShards, parityShards) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
package hash | ||
|
||
import ( | ||
"bytes" | ||
"encoding/base64" | ||
"fmt" | ||
"math/rand" | ||
"strings" | ||
|
@@ -33,8 +35,9 @@ func TestHash(t *testing.T) { | |
if len(hashResult) != redundancy.DataBlocks+redundancy.ParityBlocks+1 { | ||
t.Errorf("compute hash num not right") | ||
} | ||
|
||
for _, hash := range hashResult { | ||
if len(hash) != 64 { | ||
if len(hash) != 32 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Define constant. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
t.Errorf("hash length not right") | ||
} | ||
} | ||
|
@@ -44,12 +47,44 @@ func TestHash(t *testing.T) { | |
t.Errorf("compute hash num not right") | ||
} | ||
for _, hash := range hashResult { | ||
if len(hash) != 64 { | ||
if len(hash) != 32 { | ||
t.Errorf("hash length not right") | ||
} | ||
} | ||
} | ||
|
||
func TestHashResult(t *testing.T) { | ||
var buffer bytes.Buffer | ||
line := `1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890` | ||
|
||
// generate 98 buffer | ||
for i := 0; i < 1024*1024; i++ { | ||
buffer.WriteString(fmt.Sprintf("[%05d] %s\n", i, line)) | ||
} | ||
hashList, _, err := ComputerHash(bytes.NewReader(buffer.Bytes()), int64(segmentSize), redundancy.DataBlocks, redundancy.ParityBlocks) | ||
if err != nil { | ||
t.Errorf(err.Error()) | ||
} | ||
|
||
// this is generated from sp side | ||
expectedHashList := []string{ | ||
"6YA/kt2H0pS6+/tyR20LCqqeWmNCelS4wQcEUIhnAko=", | ||
"C00Wks+pfo6NBQkG8iRGN5M0EtTvUAwMyaQ8+RsG4rA=", | ||
"Z5AW9CvNIsDo9jtxeQysSpn2ayNml3Kr4ksm/2WUu8s=", | ||
"dMlsKDw2dGRUygEgkyHJvOHYn9jVtycpUb7zvIGvEEk=", | ||
"v7vNLlbIg+27zFAOYfT2UDkoAId53Z1gDkcTA7VWT5A=", | ||
"1b7QsyQ8QT+7UoMU7K1SRhKOfIylogIfrSFsKJUfi4U=", | ||
"/7A2gwAnaJ5jFuK6sbov6iFAkhfOga4wdAK/NlCuJBo=", | ||
} | ||
|
||
for id, hash := range hashList { | ||
if base64.StdEncoding.EncodeToString(hash) != expectedHashList[id] { | ||
t.Errorf("compare hash error") | ||
} | ||
} | ||
|
||
} | ||
|
||
func createTestData(size int64) *strings.Reader { | ||
const letterBytes = "abcdefghijklmnopqrstuvwxyz" | ||
buf := make([]byte, size) | ||
|
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
1.int64(blocksize) -> blocksize
2.Explain the meaning of blocksize?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated