Skip to content

Commit

Permalink
add words count in .seg (breaking change in snapshot format) (#3140)
Browse files Browse the repository at this point in the history
  • Loading branch information
AskAlexSharov authored Dec 21, 2021
1 parent 502e933 commit 82753a6
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 11 deletions.
22 changes: 14 additions & 8 deletions cmd/hack/hack.go
Original file line number Diff line number Diff line change
Expand Up @@ -2171,26 +2171,27 @@ func reducedict(name string, segmentFileName string) error {
wg.Add(1)
go reduceDictWorker(ch, &wg, &pt, collector, inputSize, outputSize, posMap)
}
i := 0
var wordsCount uint64
if err := snapshotsync.ReadSimpleFile(name+".dat", func(v []byte) error {
input := make([]byte, 8+int(len(v)))
binary.BigEndian.PutUint64(input, uint64(i))
binary.BigEndian.PutUint64(input, wordsCount)
copy(input[8:], v)
ch <- input
i++
wordsCount++
select {
default:
case <-logEvery.C:
var m runtime.MemStats
runtime.ReadMemStats(&m)
log.Info("Replacement preprocessing", "processed", fmt.Sprintf("%dK", i/1_000), "input", common.StorageSize(inputSize.Load()), "output", common.StorageSize(outputSize.Load()), "alloc", common.StorageSize(m.Alloc), "sys", common.StorageSize(m.Sys))
log.Info("Replacement preprocessing", "processed", fmt.Sprintf("%dK", wordsCount/1_000), "input", common.StorageSize(inputSize.Load()), "output", common.StorageSize(outputSize.Load()), "alloc", common.StorageSize(m.Alloc), "sys", common.StorageSize(m.Sys))
}
return nil
}); err != nil {
return err
}
close(ch)
wg.Wait()

var m runtime.MemStats
runtime.ReadMemStats(&m)
log.Info("Done", "input", common.StorageSize(inputSize.Load()), "output", common.StorageSize(outputSize.Load()), "alloc", common.StorageSize(m.Alloc), "sys", common.StorageSize(m.Sys))
Expand Down Expand Up @@ -2223,7 +2224,7 @@ func reducedict(name string, segmentFileName string) error {
offset += uint64(n + len(p.w))
}
patternCutoff := offset // All offsets below this will be considered patterns
i = 0
i := 0
log.Info("Effective dictionary", "size", patternList.Len())
// Build Huffman tree for codes
var codeHeap PatternHeap
Expand Down Expand Up @@ -2284,17 +2285,22 @@ func reducedict(name string, segmentFileName string) error {
return err
}
cw := bufio.NewWriterSize(cf, etl.BufIOSize)
// First, output dictionary
// 1-st, output dictionary
binary.BigEndian.PutUint64(numBuf, wordsCount) // Dictionary size
if _, err = cw.Write(numBuf[:8]); err != nil {
return err
}
// 2-nd, output dictionary
binary.BigEndian.PutUint64(numBuf, offset) // Dictionary size
if _, err = cw.Write(numBuf[:8]); err != nil {
return err
}
// Secondly, output directory root
// 3-rd, output directory root
binary.BigEndian.PutUint64(numBuf, root.offset)
if _, err = cw.Write(numBuf[:8]); err != nil {
return err
}
// Thirdly, output pattern cutoff offset
// 4-th, output pattern cutoff offset
binary.BigEndian.PutUint64(numBuf, patternCutoff)
if _, err = cw.Write(numBuf[:8]); err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ require (
github.com/json-iterator/go v1.1.12
github.com/julienschmidt/httprouter v1.3.0
github.com/kevinburke/go-bindata v3.21.0+incompatible
github.com/ledgerwatch/erigon-lib v0.0.0-20211217093546-8d06531e4ed3
github.com/ledgerwatch/erigon-lib v0.0.0-20211221034520-583c3f9b5b6f
github.com/ledgerwatch/log/v3 v3.4.0
github.com/ledgerwatch/secp256k1 v1.0.0
github.com/logrusorgru/aurora/v3 v3.0.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -617,8 +617,8 @@ github.com/kylelemons/godebug v0.0.0-20170224010052-a616ab194758 h1:0D5M2HQSGD3P
github.com/kylelemons/godebug v0.0.0-20170224010052-a616ab194758/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8=
github.com/ledgerwatch/erigon-lib v0.0.0-20211217093546-8d06531e4ed3 h1:8qDZvisP+6pFiVFd20BTD2y8/rYAe4go//HdBnk6CX8=
github.com/ledgerwatch/erigon-lib v0.0.0-20211217093546-8d06531e4ed3/go.mod h1:lyGP3i0x4CeabdKZ4beycD5xZfHWZwJsAX+70OfGj4Y=
github.com/ledgerwatch/erigon-lib v0.0.0-20211221034520-583c3f9b5b6f h1:MCIljelbCsLcgMzNTsrRg2Nu5DFyNlLxf5ZSWdy3CiM=
github.com/ledgerwatch/erigon-lib v0.0.0-20211221034520-583c3f9b5b6f/go.mod h1:lyGP3i0x4CeabdKZ4beycD5xZfHWZwJsAX+70OfGj4Y=
github.com/ledgerwatch/log/v3 v3.4.0 h1:SEIOcv5a2zkG3PmoT5jeTU9m/0nEUv0BJS5bzsjwKCI=
github.com/ledgerwatch/log/v3 v3.4.0/go.mod h1:VXcz6Ssn6XEeU92dCMc39/g1F0OYAjw1Mt+dGP5DjXY=
github.com/ledgerwatch/secp256k1 v1.0.0 h1:Usvz87YoTG0uePIV8woOof5cQnLXGYa162rFf3YnwaQ=
Expand Down

0 comments on commit 82753a6

Please sign in to comment.