Skip to content

Commit 5a9c964

Browse files
authored
trie: separate hashes and committer, collapse on commit
* trie: make db insert use size instead of full data * core/state: minor optimization in state onleaf allocation * trie: implement dedicated committer and hasher * trie: use dedicated committer/hasher * trie: linter nitpicks * core/state, trie: avoid unnecessary storage trie load+commit * trie: review feedback, mainly docs + minor changes * trie: start deprecating old hasher * trie: fix misspell+lint * trie: deprecate hasher.go, make proof framework use new hasher * trie: rename pure_committer/hasher to committer/hasher * trie, core/state: fix review concerns * trie: more review concerns * trie: make commit collapse into hashnode, don't touch dirtyness * trie: goimports fixes * trie: remove panics
1 parent 4cc89a5 commit 5a9c964

File tree

9 files changed

+457
-156
lines changed

9 files changed

+457
-156
lines changed

core/state/state_object.go

+12-4
Original file line numberDiff line numberDiff line change
@@ -272,10 +272,13 @@ func (s *stateObject) finalise() {
272272
}
273273

274274
// updateTrie writes cached storage modifications into the object's storage trie.
275+
// It will return nil if the trie has not been loaded and no changes have been made
275276
func (s *stateObject) updateTrie(db Database) Trie {
276277
// Make sure all dirty slots are finalized into the pending storage area
277278
s.finalise()
278-
279+
if len(s.pendingStorage) == 0 {
280+
return s.trie
281+
}
279282
// Track the amount of time wasted on updating the storge trie
280283
if metrics.EnabledExpensive {
281284
defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now())
@@ -305,8 +308,10 @@ func (s *stateObject) updateTrie(db Database) Trie {
305308

306309
// UpdateRoot sets the trie root to the current root hash of
307310
func (s *stateObject) updateRoot(db Database) {
308-
s.updateTrie(db)
309-
311+
// If nothing changed, don't bother with hashing anything
312+
if s.updateTrie(db) == nil {
313+
return
314+
}
310315
// Track the amount of time wasted on hashing the storge trie
311316
if metrics.EnabledExpensive {
312317
defer func(start time.Time) { s.db.StorageHashes += time.Since(start) }(time.Now())
@@ -317,7 +322,10 @@ func (s *stateObject) updateRoot(db Database) {
317322
// CommitTrie the storage trie of the object to db.
318323
// This updates the trie root.
319324
func (s *stateObject) CommitTrie(db Database) error {
320-
s.updateTrie(db)
325+
// If nothing changed, don't bother with hashing anything
326+
if s.updateTrie(db) == nil {
327+
return nil
328+
}
321329
if s.dbErr != nil {
322330
return s.dbErr
323331
}

core/state/statedb.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,8 @@ func (s *StateDB) StorageTrie(addr common.Address) Trie {
330330
return nil
331331
}
332332
cpy := stateObject.deepCopy(s)
333-
return cpy.updateTrie(s.db)
333+
cpy.updateTrie(s.db)
334+
return cpy.getTrie(s.db)
334335
}
335336

336337
func (s *StateDB) HasSuicided(addr common.Address) bool {
@@ -750,8 +751,10 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
750751
if metrics.EnabledExpensive {
751752
defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now())
752753
}
754+
// The onleaf func is called _serially_, so we can reuse the same account
755+
// for unmarshalling every time.
756+
var account Account
753757
return s.trie.Commit(func(leaf []byte, parent common.Hash) error {
754-
var account Account
755758
if err := rlp.DecodeBytes(leaf, &account); err != nil {
756759
return nil
757760
}

trie/committer.go

+279
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
// Copyright 2019 The go-ethereum Authors
2+
// This file is part of the go-ethereum library.
3+
//
4+
// The go-ethereum library is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU Lesser General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// The go-ethereum library is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU Lesser General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU Lesser General Public License
15+
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16+
17+
package trie
18+
19+
import (
20+
"errors"
21+
"fmt"
22+
"sync"
23+
24+
"github.com/ethereum/go-ethereum/common"
25+
"github.com/ethereum/go-ethereum/rlp"
26+
"golang.org/x/crypto/sha3"
27+
)
28+
29+
// leafChanSize is the size of the leafCh. It's a pretty arbitrary number, to allow
30+
// some paralellism but not incur too much memory overhead.
31+
const leafChanSize = 200
32+
33+
// leaf represents a trie leaf value
34+
type leaf struct {
35+
size int // size of the rlp data (estimate)
36+
hash common.Hash // hash of rlp data
37+
node node // the node to commit
38+
vnodes bool // set to true if the node (possibly) contains a valueNode
39+
}
40+
41+
// committer is a type used for the trie Commit operation. A committer has some
42+
// internal preallocated temp space, and also a callback that is invoked when
43+
// leaves are committed. The leafs are passed through the `leafCh`, to allow
44+
// some level of paralellism.
45+
// By 'some level' of parallelism, it's still the case that all leaves will be
46+
// processed sequentially - onleaf will never be called in parallel or out of order.
47+
type committer struct {
48+
tmp sliceBuffer
49+
sha keccakState
50+
51+
onleaf LeafCallback
52+
leafCh chan *leaf
53+
}
54+
55+
// committers live in a global sync.Pool
56+
var committerPool = sync.Pool{
57+
New: func() interface{} {
58+
return &committer{
59+
tmp: make(sliceBuffer, 0, 550), // cap is as large as a full fullNode.
60+
sha: sha3.NewLegacyKeccak256().(keccakState),
61+
}
62+
},
63+
}
64+
65+
// newCommitter creates a new committer or picks one from the pool.
66+
func newCommitter() *committer {
67+
return committerPool.Get().(*committer)
68+
}
69+
70+
func returnCommitterToPool(h *committer) {
71+
h.onleaf = nil
72+
h.leafCh = nil
73+
committerPool.Put(h)
74+
}
75+
76+
// commitNeeded returns 'false' if the given node is already in sync with db
77+
func (c *committer) commitNeeded(n node) bool {
78+
hash, dirty := n.cache()
79+
return hash == nil || dirty
80+
}
81+
82+
// commit collapses a node down into a hash node and inserts it into the database
83+
func (c *committer) Commit(n node, db *Database) (hashNode, error) {
84+
if db == nil {
85+
return nil, errors.New("no db provided")
86+
}
87+
h, err := c.commit(n, db, true)
88+
if err != nil {
89+
return nil, err
90+
}
91+
return h.(hashNode), nil
92+
}
93+
94+
// commit collapses a node down into a hash node and inserts it into the database
95+
func (c *committer) commit(n node, db *Database, force bool) (node, error) {
96+
// if this path is clean, use available cached data
97+
hash, dirty := n.cache()
98+
if hash != nil && !dirty {
99+
return hash, nil
100+
}
101+
// Commit children, then parent, and remove remove the dirty flag.
102+
switch cn := n.(type) {
103+
case *shortNode:
104+
// Commit child
105+
collapsed := cn.copy()
106+
if _, ok := cn.Val.(valueNode); !ok {
107+
if childV, err := c.commit(cn.Val, db, false); err != nil {
108+
return nil, err
109+
} else {
110+
collapsed.Val = childV
111+
}
112+
}
113+
// The key needs to be copied, since we're delivering it to database
114+
collapsed.Key = hexToCompact(cn.Key)
115+
hashedNode := c.store(collapsed, db, force, true)
116+
if hn, ok := hashedNode.(hashNode); ok {
117+
return hn, nil
118+
} else {
119+
return collapsed, nil
120+
}
121+
case *fullNode:
122+
hashedKids, hasVnodes, err := c.commitChildren(cn, db, force)
123+
if err != nil {
124+
return nil, err
125+
}
126+
collapsed := cn.copy()
127+
collapsed.Children = hashedKids
128+
129+
hashedNode := c.store(collapsed, db, force, hasVnodes)
130+
if hn, ok := hashedNode.(hashNode); ok {
131+
return hn, nil
132+
} else {
133+
return collapsed, nil
134+
}
135+
case valueNode:
136+
return c.store(cn, db, force, false), nil
137+
// hashnodes aren't stored
138+
case hashNode:
139+
return cn, nil
140+
}
141+
return hash, nil
142+
}
143+
144+
// commitChildren commits the children of the given fullnode
145+
func (c *committer) commitChildren(n *fullNode, db *Database, force bool) ([17]node, bool, error) {
146+
var children [17]node
147+
var hasValueNodeChildren = false
148+
for i, child := range n.Children {
149+
if child == nil {
150+
continue
151+
}
152+
hnode, err := c.commit(child, db, false)
153+
if err != nil {
154+
return children, false, err
155+
}
156+
children[i] = hnode
157+
if _, ok := hnode.(valueNode); ok {
158+
hasValueNodeChildren = true
159+
}
160+
}
161+
return children, hasValueNodeChildren, nil
162+
}
163+
164+
// store hashes the node n and if we have a storage layer specified, it writes
165+
// the key/value pair to it and tracks any node->child references as well as any
166+
// node->external trie references.
167+
func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren bool) node {
168+
// Larger nodes are replaced by their hash and stored in the database.
169+
var (
170+
hash, _ = n.cache()
171+
size int
172+
)
173+
if hash == nil {
174+
if vn, ok := n.(valueNode); ok {
175+
c.tmp.Reset()
176+
if err := rlp.Encode(&c.tmp, vn); err != nil {
177+
panic("encode error: " + err.Error())
178+
}
179+
size = len(c.tmp)
180+
if size < 32 && !force {
181+
return n // Nodes smaller than 32 bytes are stored inside their parent
182+
}
183+
hash = c.makeHashNode(c.tmp)
184+
} else {
185+
// This was not generated - must be a small node stored in the parent
186+
// No need to do anything here
187+
return n
188+
}
189+
} else {
190+
// We have the hash already, estimate the RLP encoding-size of the node.
191+
// The size is used for mem tracking, does not need to be exact
192+
size = estimateSize(n)
193+
}
194+
// If we're using channel-based leaf-reporting, send to channel.
195+
// The leaf channel will be active only when there an active leaf-callback
196+
if c.leafCh != nil {
197+
c.leafCh <- &leaf{
198+
size: size,
199+
hash: common.BytesToHash(hash),
200+
node: n,
201+
vnodes: hasVnodeChildren,
202+
}
203+
} else if db != nil {
204+
// No leaf-callback used, but there's still a database. Do serial
205+
// insertion
206+
db.lock.Lock()
207+
db.insert(common.BytesToHash(hash), size, n)
208+
db.lock.Unlock()
209+
}
210+
return hash
211+
}
212+
213+
// commitLoop does the actual insert + leaf callback for nodes
214+
func (c *committer) commitLoop(db *Database) {
215+
for item := range c.leafCh {
216+
var (
217+
hash = item.hash
218+
size = item.size
219+
n = item.node
220+
hasVnodes = item.vnodes
221+
)
222+
// We are pooling the trie nodes into an intermediate memory cache
223+
db.lock.Lock()
224+
db.insert(hash, size, n)
225+
db.lock.Unlock()
226+
if c.onleaf != nil && hasVnodes {
227+
switch n := n.(type) {
228+
case *shortNode:
229+
if child, ok := n.Val.(valueNode); ok {
230+
c.onleaf(child, hash)
231+
}
232+
case *fullNode:
233+
for i := 0; i < 16; i++ {
234+
if child, ok := n.Children[i].(valueNode); ok {
235+
c.onleaf(child, hash)
236+
}
237+
}
238+
}
239+
}
240+
}
241+
}
242+
243+
func (c *committer) makeHashNode(data []byte) hashNode {
244+
n := make(hashNode, c.sha.Size())
245+
c.sha.Reset()
246+
c.sha.Write(data)
247+
c.sha.Read(n)
248+
return n
249+
}
250+
251+
// estimateSize estimates the size of an rlp-encoded node, without actually
252+
// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
253+
// with 1000 leafs, the only errors above 1% are on small shortnodes, where this
254+
// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
255+
func estimateSize(n node) int {
256+
switch n := n.(type) {
257+
case *shortNode:
258+
// A short node contains a compacted key, and a value.
259+
return 3 + len(n.Key) + estimateSize(n.Val)
260+
case *fullNode:
261+
// A full node contains up to 16 hashes (some nils), and a key
262+
s := 3
263+
for i := 0; i < 16; i++ {
264+
if child := n.Children[i]; child != nil {
265+
s += estimateSize(child)
266+
} else {
267+
s += 1
268+
}
269+
}
270+
return s
271+
case valueNode:
272+
return 1 + len(n)
273+
case hashNode:
274+
return 1 + len(n)
275+
default:
276+
panic(fmt.Sprintf("node type %T", n))
277+
278+
}
279+
}

trie/database.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -310,24 +310,24 @@ func (db *Database) InsertBlob(hash common.Hash, blob []byte) {
310310
db.lock.Lock()
311311
defer db.lock.Unlock()
312312

313-
db.insert(hash, blob, rawNode(blob))
313+
db.insert(hash, len(blob), rawNode(blob))
314314
}
315315

316316
// insert inserts a collapsed trie node into the memory database. This method is
317317
// a more generic version of InsertBlob, supporting both raw blob insertions as
318-
// well ex trie node insertions. The blob must always be specified to allow proper
318+
// well ex trie node insertions. The blob size must be specified to allow proper
319319
// size tracking.
320-
func (db *Database) insert(hash common.Hash, blob []byte, node node) {
320+
func (db *Database) insert(hash common.Hash, size int, node node) {
321321
// If the node's already cached, skip
322322
if _, ok := db.dirties[hash]; ok {
323323
return
324324
}
325-
memcacheDirtyWriteMeter.Mark(int64(len(blob)))
325+
memcacheDirtyWriteMeter.Mark(int64(size))
326326

327327
// Create the cached entry for this node
328328
entry := &cachedNode{
329329
node: simplifyNode(node),
330-
size: uint16(len(blob)),
330+
size: uint16(size),
331331
flushPrev: db.newest,
332332
}
333333
entry.forChilds(func(child common.Hash) {

0 commit comments

Comments
 (0)