Skip to content

Commit 4b783c0

Browse files
authored
trie: improve the node iterator seek operation (#22470)
This change improves the efficiency of the nodeIterator seek operation. Previously, seek essentially ran the iterator forward until it found the matching node. With this change, it skips over fullnode children and avoids resolving them from the database.
1 parent 3e68d62 commit 4b783c0

File tree

2 files changed

+202
-24
lines changed

2 files changed

+202
-24
lines changed

trie/iterator.go

+121-24
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ func (it *nodeIterator) seek(prefix []byte) error {
243243
key = key[:len(key)-1]
244244
// Move forward until we're just before the closest match to key.
245245
for {
246-
state, parentIndex, path, err := it.peek(bytes.HasPrefix(key, it.path))
246+
state, parentIndex, path, err := it.peekSeek(key)
247247
if err == errIteratorEnd {
248248
return errIteratorEnd
249249
} else if err != nil {
@@ -255,16 +255,21 @@ func (it *nodeIterator) seek(prefix []byte) error {
255255
}
256256
}
257257

258+
// init initializes the the iterator.
259+
func (it *nodeIterator) init() (*nodeIteratorState, error) {
260+
root := it.trie.Hash()
261+
state := &nodeIteratorState{node: it.trie.root, index: -1}
262+
if root != emptyRoot {
263+
state.hash = root
264+
}
265+
return state, state.resolve(it.trie, nil)
266+
}
267+
258268
// peek creates the next state of the iterator.
259269
func (it *nodeIterator) peek(descend bool) (*nodeIteratorState, *int, []byte, error) {
270+
// Initialize the iterator if we've just started.
260271
if len(it.stack) == 0 {
261-
// Initialize the iterator if we've just started.
262-
root := it.trie.Hash()
263-
state := &nodeIteratorState{node: it.trie.root, index: -1}
264-
if root != emptyRoot {
265-
state.hash = root
266-
}
267-
err := state.resolve(it.trie, nil)
272+
state, err := it.init()
268273
return state, nil, nil, err
269274
}
270275
if !descend {
@@ -292,6 +297,39 @@ func (it *nodeIterator) peek(descend bool) (*nodeIteratorState, *int, []byte, er
292297
return nil, nil, nil, errIteratorEnd
293298
}
294299

300+
// peekSeek is like peek, but it also tries to skip resolving hashes by skipping
301+
// over the siblings that do not lead towards the desired seek position.
302+
func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []byte, error) {
303+
// Initialize the iterator if we've just started.
304+
if len(it.stack) == 0 {
305+
state, err := it.init()
306+
return state, nil, nil, err
307+
}
308+
if !bytes.HasPrefix(seekKey, it.path) {
309+
// If we're skipping children, pop the current node first
310+
it.pop()
311+
}
312+
313+
// Continue iteration to the next child
314+
for len(it.stack) > 0 {
315+
parent := it.stack[len(it.stack)-1]
316+
ancestor := parent.hash
317+
if (ancestor == common.Hash{}) {
318+
ancestor = parent.parent
319+
}
320+
state, path, ok := it.nextChildAt(parent, ancestor, seekKey)
321+
if ok {
322+
if err := state.resolve(it.trie, path); err != nil {
323+
return parent, &parent.index, path, err
324+
}
325+
return state, &parent.index, path, nil
326+
}
327+
// No more child nodes, move back up.
328+
it.pop()
329+
}
330+
return nil, nil, nil, errIteratorEnd
331+
}
332+
295333
func (st *nodeIteratorState) resolve(tr *Trie, path []byte) error {
296334
if hash, ok := st.node.(hashNode); ok {
297335
resolved, err := tr.resolveHash(hash, path)
@@ -304,25 +342,38 @@ func (st *nodeIteratorState) resolve(tr *Trie, path []byte) error {
304342
return nil
305343
}
306344

345+
func findChild(n *fullNode, index int, path []byte, ancestor common.Hash) (node, *nodeIteratorState, []byte, int) {
346+
var (
347+
child node
348+
state *nodeIteratorState
349+
childPath []byte
350+
)
351+
for ; index < len(n.Children); index++ {
352+
if n.Children[index] != nil {
353+
child = n.Children[index]
354+
hash, _ := child.cache()
355+
state = &nodeIteratorState{
356+
hash: common.BytesToHash(hash),
357+
node: child,
358+
parent: ancestor,
359+
index: -1,
360+
pathlen: len(path),
361+
}
362+
childPath = append(childPath, path...)
363+
childPath = append(childPath, byte(index))
364+
return child, state, childPath, index
365+
}
366+
}
367+
return nil, nil, nil, 0
368+
}
369+
307370
func (it *nodeIterator) nextChild(parent *nodeIteratorState, ancestor common.Hash) (*nodeIteratorState, []byte, bool) {
308371
switch node := parent.node.(type) {
309372
case *fullNode:
310-
// Full node, move to the first non-nil child.
311-
for i := parent.index + 1; i < len(node.Children); i++ {
312-
child := node.Children[i]
313-
if child != nil {
314-
hash, _ := child.cache()
315-
state := &nodeIteratorState{
316-
hash: common.BytesToHash(hash),
317-
node: child,
318-
parent: ancestor,
319-
index: -1,
320-
pathlen: len(it.path),
321-
}
322-
path := append(it.path, byte(i))
323-
parent.index = i - 1
324-
return state, path, true
325-
}
373+
//Full node, move to the first non-nil child.
374+
if child, state, path, index := findChild(node, parent.index+1, it.path, ancestor); child != nil {
375+
parent.index = index - 1
376+
return state, path, true
326377
}
327378
case *shortNode:
328379
// Short node, return the pointer singleton child
@@ -342,6 +393,52 @@ func (it *nodeIterator) nextChild(parent *nodeIteratorState, ancestor common.Has
342393
return parent, it.path, false
343394
}
344395

396+
// nextChildAt is similar to nextChild, except that it targets a child as close to the
397+
// target key as possible, thus skipping siblings.
398+
func (it *nodeIterator) nextChildAt(parent *nodeIteratorState, ancestor common.Hash, key []byte) (*nodeIteratorState, []byte, bool) {
399+
switch n := parent.node.(type) {
400+
case *fullNode:
401+
// Full node, move to the first non-nil child before the desired key position
402+
child, state, path, index := findChild(n, parent.index+1, it.path, ancestor)
403+
if child == nil {
404+
// No more children in this fullnode
405+
return parent, it.path, false
406+
}
407+
// If the child we found is already past the seek position, just return it.
408+
if bytes.Compare(path, key) >= 0 {
409+
parent.index = index - 1
410+
return state, path, true
411+
}
412+
// The child is before the seek position. Try advancing
413+
for {
414+
nextChild, nextState, nextPath, nextIndex := findChild(n, index+1, it.path, ancestor)
415+
// If we run out of children, or skipped past the target, return the
416+
// previous one
417+
if nextChild == nil || bytes.Compare(nextPath, key) >= 0 {
418+
parent.index = index - 1
419+
return state, path, true
420+
}
421+
// We found a better child closer to the target
422+
state, path, index = nextState, nextPath, nextIndex
423+
}
424+
case *shortNode:
425+
// Short node, return the pointer singleton child
426+
if parent.index < 0 {
427+
hash, _ := n.Val.cache()
428+
state := &nodeIteratorState{
429+
hash: common.BytesToHash(hash),
430+
node: n.Val,
431+
parent: ancestor,
432+
index: -1,
433+
pathlen: len(it.path),
434+
}
435+
path := append(it.path, n.Key...)
436+
return state, path, true
437+
}
438+
}
439+
return parent, it.path, false
440+
}
441+
345442
func (it *nodeIterator) push(state *nodeIteratorState, parentIndex *int, path []byte) {
346443
it.path = path
347444
it.stack = append(it.stack, state)

trie/iterator_test.go

+81
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,14 @@ package trie
1818

1919
import (
2020
"bytes"
21+
"encoding/binary"
2122
"fmt"
2223
"math/rand"
2324
"testing"
2425

2526
"github.com/ethereum/go-ethereum/common"
27+
"github.com/ethereum/go-ethereum/crypto"
28+
"github.com/ethereum/go-ethereum/ethdb"
2629
"github.com/ethereum/go-ethereum/ethdb/memorydb"
2730
)
2831

@@ -440,3 +443,81 @@ func checkIteratorNoDups(t *testing.T, it NodeIterator, seen map[string]bool) in
440443
}
441444
return len(seen)
442445
}
446+
447+
type loggingDb struct {
448+
getCount uint64
449+
backend ethdb.KeyValueStore
450+
}
451+
452+
func (l *loggingDb) Has(key []byte) (bool, error) {
453+
return l.backend.Has(key)
454+
}
455+
456+
func (l *loggingDb) Get(key []byte) ([]byte, error) {
457+
l.getCount++
458+
return l.backend.Get(key)
459+
}
460+
461+
func (l *loggingDb) Put(key []byte, value []byte) error {
462+
return l.backend.Put(key, value)
463+
}
464+
465+
func (l *loggingDb) Delete(key []byte) error {
466+
return l.backend.Delete(key)
467+
}
468+
469+
func (l *loggingDb) NewBatch() ethdb.Batch {
470+
return l.backend.NewBatch()
471+
}
472+
473+
func (l *loggingDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator {
474+
fmt.Printf("NewIterator\n")
475+
return l.backend.NewIterator(prefix, start)
476+
}
477+
func (l *loggingDb) Stat(property string) (string, error) {
478+
return l.backend.Stat(property)
479+
}
480+
481+
func (l *loggingDb) Compact(start []byte, limit []byte) error {
482+
return l.backend.Compact(start, limit)
483+
}
484+
485+
func (l *loggingDb) Close() error {
486+
return l.backend.Close()
487+
}
488+
489+
// makeLargeTestTrie create a sample test trie
490+
func makeLargeTestTrie() (*Database, *SecureTrie, *loggingDb) {
491+
// Create an empty trie
492+
logDb := &loggingDb{0, memorydb.New()}
493+
triedb := NewDatabase(logDb)
494+
trie, _ := NewSecure(common.Hash{}, triedb)
495+
496+
// Fill it with some arbitrary data
497+
for i := 0; i < 10000; i++ {
498+
key := make([]byte, 32)
499+
val := make([]byte, 32)
500+
binary.BigEndian.PutUint64(key, uint64(i))
501+
binary.BigEndian.PutUint64(val, uint64(i))
502+
key = crypto.Keccak256(key)
503+
val = crypto.Keccak256(val)
504+
trie.Update(key, val)
505+
}
506+
trie.Commit(nil)
507+
// Return the generated trie
508+
return triedb, trie, logDb
509+
}
510+
511+
// Tests that the node iterator indeed walks over the entire database contents.
512+
func TestNodeIteratorLargeTrie(t *testing.T) {
513+
// Create some arbitrary test trie to iterate
514+
db, trie, logDb := makeLargeTestTrie()
515+
db.Cap(0) // flush everything
516+
// Do a seek operation
517+
trie.NodeIterator(common.FromHex("0x77667766776677766778855885885885"))
518+
// master: 24 get operations
519+
// this pr: 5 get operations
520+
if have, want := logDb.getCount, uint64(5); have != want {
521+
t.Fatalf("Too many lookups during seek, have %d want %d", have, want)
522+
}
523+
}

0 commit comments

Comments
 (0)