Skip to content

Commit

Permalink
fix: fix testcase and improve function
Browse files Browse the repository at this point in the history
  • Loading branch information
flywukong committed Jul 17, 2023
1 parent e2b1a98 commit 5aa27c4
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 9 deletions.
31 changes: 23 additions & 8 deletions go/hash/hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ import (
"github.com/bnb-chain/greenfield-common/go/redundancy"
)

const maxThreadNum = 5
const (
maxThreadNum = 5
jobChannelSize = 100
)

// IntegrityHasher compute integrityHash
type IntegrityHasher struct {
Expand Down Expand Up @@ -150,9 +153,21 @@ func (i *IntegrityHasher) computeBufferHash() error {
return nil
}

// ComputeIntegrityHash split the reader into segment, ec encode the data, compute the hash roots of pieces
// return the hash result array list and data segmentSize
func ComputeIntegrityHash(reader io.Reader, segmentSize int64, dataShards, parityShards int) ([][]byte, int64,
// ComputeIntegrityHash return the integrity hash of file and data size
// If isSerial is true, compute the integrity hash using the serial version
// If isSerial is false or not provided, compute the integrity hash using the parallel version
func ComputeIntegrityHash(reader io.Reader, segmentSize int64, dataShards, parityShards int, isSerial bool) ([][]byte, int64,
storageTypes.RedundancyType, error,
) {
if isSerial {
return ComputeIntegrityHashSerial(reader, segmentSize, dataShards, parityShards)
}
return ComputeIntegrityHashParallel(reader, segmentSize, parityShards, dataShards)
}

// ComputeIntegrityHashSerial split the reader into segment, ec encode the data, compute the hash roots of pieces in a serial way
// return the hash result array list and data size
func ComputeIntegrityHashSerial(reader io.Reader, segmentSize int64, dataShards, parityShards int) ([][]byte, int64,
storageTypes.RedundancyType, error,
) {
var segChecksumList [][]byte
Expand Down Expand Up @@ -234,13 +249,13 @@ func ComputerHashFromFile(filePath string, segmentSize int64, dataShards, parity
}
defer f.Close()

return ComputeIntegrityHash(f, segmentSize, dataShards, parityShards)
return ComputeIntegrityHash(f, segmentSize, dataShards, parityShards, false)
}

// ComputerHashFromBuffer support computing hash and segmentSize from byte buffer
func ComputerHashFromBuffer(content []byte, segmentSize int64, dataShards, parityShards int) ([][]byte, int64, storageTypes.RedundancyType, error) {
reader := bytes.NewReader(content)
return ComputeIntegrityHash(reader, segmentSize, dataShards, parityShards)
return ComputeIntegrityHash(reader, segmentSize, dataShards, parityShards, false)
}

// computePieceHashes encode the segment and return the hashes of ec pieces
Expand Down Expand Up @@ -290,14 +305,14 @@ func ComputeIntegrityHashParallel(reader io.Reader, segmentSize int64, dataShard
contentLen = int64(0)
wg sync.WaitGroup
)

// use sync.map to store the corresponding data of intermediate hash results and segment IDs
segHashMap := &sync.Map{}
pieceHashMap := &sync.Map{}
encodeDataHash := make([][][]byte, ecShards)
// store the result of integrity hash
hashList := make([][]byte, ecShards+1)

jobChan := make(chan SegmentInfo, 100)
jobChan := make(chan SegmentInfo, jobChannelSize)
errChan := make(chan error, 1)
// the thread num should be less than maxThreadNum
threadNum := runtime.NumCPU() / 2
Expand Down
9 changes: 8 additions & 1 deletion go/hash/hash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,15 @@ func TestParallelHashResult(t *testing.T) {
}
}

// TestCompareHashResult compare serial and parallel version function hash results with different file size,
// it is expected that the hash result are same with different version.
func TestCompareHashResult(t *testing.T) {
var buffer bytes.Buffer
line := `1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,123456789012`
// test file less than 16M
for i := 0; i < 1024*100; i++ {
buffer.WriteString(fmt.Sprintf("[%05d] %s\n", i, line))
}

compareHashResult(buffer, t)

buffer.Reset()
Expand All @@ -147,6 +148,12 @@ func TestCompareHashResult(t *testing.T) {
buffer.WriteString(fmt.Sprintf("[%05d] %s\n", i, line))
}
compareHashResult(buffer, t)

// test file 1G
for i := 0; i < 1024*1024*10; i++ {
buffer.WriteString(fmt.Sprintf("[%05d] %s\n", i, line))
}
compareHashResult(buffer, t)
}

// compareHashResult compare serial and parallel version function hash results
Expand Down

0 comments on commit 5aa27c4

Please sign in to comment.