Skip to content

Commit 6699924

Browse files
Optimize key creation in hashing (#2899)
1 parent e4b82cf commit 6699924

File tree

2 files changed

+165
-6
lines changed

2 files changed

+165
-6
lines changed

x/merkledb/view.go

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -290,12 +290,66 @@ func (v *view) hashChangedNodes(ctx context.Context) {
290290
// Calculates the ID of all descendants of [n] which need to be recalculated,
291291
// and then calculates the ID of [n] itself.
292292
func (v *view) hashChangedNode(n *node) ids.ID {
293-
// We use [wg] to wait until all descendants of [n] have been updated.
294-
var wg sync.WaitGroup
293+
// If there are no children, we can avoid allocating [keyBuffer].
294+
if len(n.children) == 0 {
295+
return n.calculateID(v.db.metrics)
296+
}
297+
298+
// Calculate the size of the largest child key of this node. This allows
299+
// only allocating a single slice for all of the keys.
300+
var maxChildBitLength int
301+
for _, childEntry := range n.children {
302+
maxChildBitLength = max(maxChildBitLength, childEntry.compressedKey.length)
303+
}
304+
305+
var (
306+
maxBytesNeeded = bytesNeeded(n.key.length + v.tokenSize + maxChildBitLength)
307+
// keyBuffer is allocated onto the heap because it is dynamically sized.
308+
keyBuffer = make([]byte, maxBytesNeeded)
309+
// childBuffer is allocated on the stack.
310+
childBuffer = make([]byte, 1)
311+
dualIndex = dualBitIndex(v.tokenSize)
312+
bytesForKey = bytesNeeded(n.key.length)
313+
// We track the last byte of [n.key] so that we can reset the value for
314+
// each key. This is needed because the child buffer may get ORed at
315+
// this byte.
316+
lastKeyByte byte
317+
318+
// We use [wg] to wait until all descendants of [n] have been updated.
319+
wg sync.WaitGroup
320+
)
321+
322+
if bytesForKey > 0 {
323+
// We only need to copy this node's key once because it does not change
324+
// as we iterate over the children.
325+
copy(keyBuffer, n.key.value)
326+
lastKeyByte = keyBuffer[bytesForKey-1]
327+
}
295328

296329
for childIndex, childEntry := range n.children {
297-
childEntry := childEntry // New variable so goroutine doesn't capture loop variable.
298-
childKey := n.key.Extend(ToToken(childIndex, v.tokenSize), childEntry.compressedKey)
330+
childBuffer[0] = childIndex << dualIndex
331+
childIndexAsKey := Key{
332+
// It is safe to use byteSliceToString because [childBuffer] is not
333+
// modified while [childIndexAsKey] is in use.
334+
value: byteSliceToString(childBuffer),
335+
length: v.tokenSize,
336+
}
337+
338+
totalBitLength := n.key.length + v.tokenSize + childEntry.compressedKey.length
339+
buffer := keyBuffer[:bytesNeeded(totalBitLength)]
340+
// Make sure the last byte of the key is originally set correctly
341+
if bytesForKey > 0 {
342+
buffer[bytesForKey-1] = lastKeyByte
343+
}
344+
extendIntoBuffer(buffer, childIndexAsKey, n.key.length)
345+
extendIntoBuffer(buffer, childEntry.compressedKey, n.key.length+v.tokenSize)
346+
childKey := Key{
347+
// It is safe to use byteSliceToString because [buffer] is not
348+
// modified while [childKey] is in use.
349+
value: byteSliceToString(buffer),
350+
length: totalBitLength,
351+
}
352+
299353
childNodeChange, ok := v.changes.nodes[childKey]
300354
if !ok {
301355
// This child wasn't changed.
@@ -306,11 +360,11 @@ func (v *view) hashChangedNode(n *node) ids.ID {
306360
// Try updating the child and its descendants in a goroutine.
307361
if ok := v.db.hashNodesSema.TryAcquire(1); ok {
308362
wg.Add(1)
309-
go func() {
363+
go func(childEntry *child) {
310364
childEntry.id = v.hashChangedNode(childNodeChange.after)
311365
v.db.hashNodesSema.Release(1)
312366
wg.Done()
313-
}()
367+
}(childEntry)
314368
} else {
315369
// We're at the goroutine limit; do the work in this goroutine.
316370
childEntry.id = v.hashChangedNode(childNodeChange.after)

x/merkledb/view_test.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package merkledb
5+
6+
import (
7+
"context"
8+
"encoding/binary"
9+
"testing"
10+
11+
"github.com/stretchr/testify/require"
12+
13+
"github.com/ava-labs/avalanchego/database"
14+
"github.com/ava-labs/avalanchego/database/memdb"
15+
"github.com/ava-labs/avalanchego/utils/hashing"
16+
)
17+
18+
var hashChangedNodesTests = []struct {
19+
name string
20+
numKeys uint64
21+
expectedRootHash string
22+
}{
23+
{
24+
name: "1",
25+
numKeys: 1,
26+
expectedRootHash: "2A4DRkSWbTvSxgA1UMGp1Mpt1yzMFaeMMiDnrijVGJXPcRYiD4",
27+
},
28+
{
29+
name: "10",
30+
numKeys: 10,
31+
expectedRootHash: "2PGy7QvbYwVwn5QmLgj4KBgV2BisanZE8Nue2SxK9ffybb4mAn",
32+
},
33+
{
34+
name: "100",
35+
numKeys: 100,
36+
expectedRootHash: "LCeS4DWh6TpNKWH4ke9a2piSiwwLbmxGUj8XuaWx1XDGeCMAv",
37+
},
38+
{
39+
name: "1000",
40+
numKeys: 1000,
41+
expectedRootHash: "2S6f84wdRHmnx51mj35DF2owzf8wio5pzNJXfEWfFYFNxUB64T",
42+
},
43+
{
44+
name: "10000",
45+
numKeys: 10000,
46+
expectedRootHash: "wF6UnhaDoA9fAqiXAcx27xCYBK2aspDBEXkicmC7rs8EzLCD8",
47+
},
48+
{
49+
name: "100000",
50+
numKeys: 100000,
51+
expectedRootHash: "2Dy3RWZeNDUnUvzXpruB5xdp1V7xxb14M53ywdZVACDkdM66M1",
52+
},
53+
}
54+
55+
func makeViewForHashChangedNodes(t require.TestingT, numKeys uint64, parallelism uint) *view {
56+
config := newDefaultConfig()
57+
config.RootGenConcurrency = parallelism
58+
db, err := newDatabase(
59+
context.Background(),
60+
memdb.New(),
61+
config,
62+
&mockMetrics{},
63+
)
64+
require.NoError(t, err)
65+
66+
ops := make([]database.BatchOp, 0, numKeys)
67+
for i := uint64(0); i < numKeys; i++ {
68+
k := binary.AppendUvarint(nil, i)
69+
ops = append(ops, database.BatchOp{
70+
Key: k,
71+
Value: hashing.ComputeHash256(k),
72+
})
73+
}
74+
75+
ctx := context.Background()
76+
viewIntf, err := db.NewView(ctx, ViewChanges{BatchOps: ops})
77+
require.NoError(t, err)
78+
79+
view := viewIntf.(*view)
80+
require.NoError(t, view.calculateNodeChanges(ctx))
81+
return view
82+
}
83+
84+
func Test_HashChangedNodes(t *testing.T) {
85+
for _, test := range hashChangedNodesTests {
86+
t.Run(test.name, func(t *testing.T) {
87+
view := makeViewForHashChangedNodes(t, test.numKeys, 16)
88+
ctx := context.Background()
89+
view.hashChangedNodes(ctx)
90+
require.Equal(t, test.expectedRootHash, view.changes.rootID.String())
91+
})
92+
}
93+
}
94+
95+
func Benchmark_HashChangedNodes(b *testing.B) {
96+
for _, test := range hashChangedNodesTests {
97+
view := makeViewForHashChangedNodes(b, test.numKeys, 1)
98+
ctx := context.Background()
99+
b.Run(test.name, func(b *testing.B) {
100+
for i := 0; i < b.N; i++ {
101+
view.hashChangedNodes(ctx)
102+
}
103+
})
104+
}
105+
}

0 commit comments

Comments
 (0)