diff --git a/core/rawdb/accessors_chain.go b/core/rawdb/accessors_chain.go
index 6a1a2eae8f..2b201f3290 100644
--- a/core/rawdb/accessors_chain.go
+++ b/core/rawdb/accessors_chain.go
@@ -316,8 +316,8 @@ func ReadHeaderRange(db ethdb.Reader, number uint64, count uint64) []rlp.RawValu
if count == 0 {
return rlpHeaders
}
- // read remaining from ancients
- data, err := db.AncientRange(ChainFreezerHeaderTable, i+1-count, count, 0)
+ // read remaining from ancients, cap at 2M
+ data, err := db.AncientRange(ChainFreezerHeaderTable, i+1-count, count, 2*1024*1024)
if err != nil {
log.Error("Failed to read headers from freezer", "err", err)
return rlpHeaders
diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go
index 681be7ebc0..8a0fd1989a 100644
--- a/core/state/snapshot/conversion.go
+++ b/core/state/snapshot/conversion.go
@@ -362,15 +362,15 @@ func generateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, accou
}
func stackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan trieKV, out chan common.Hash) {
- options := trie.NewStackTrieOptions()
+ var onTrieNode trie.OnTrieNode
if db != nil {
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ onTrieNode = func(path []byte, hash common.Hash, blob []byte) {
rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme)
- })
+ }
}
- t := trie.NewStackTrie(options)
+ t := trie.NewStackTrie(onTrieNode)
for leaf := range in {
t.Update(leaf.key[:], leaf.value)
}
- out <- t.Commit()
+ out <- t.Hash()
}
diff --git a/core/state/statedb.go b/core/state/statedb.go
index e9328d0aef..9fc529b86b 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -961,12 +961,10 @@ func (s *StateDB) fastDeleteStorage(addrHash common.Hash, root common.Hash) (boo
nodes = trienode.NewNodeSet(addrHash)
slots = make(map[common.Hash][]byte)
)
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
nodes.AddNode(path, trienode.NewDeleted())
size += common.StorageSize(len(path))
})
- stack := trie.NewStackTrie(options)
for iter.Next() {
if size > storageDeleteLimit {
return true, size, nil, nil, nil
diff --git a/eth/filters/api.go b/eth/filters/api.go
index 8cf701ec57..173e40c972 100644
--- a/eth/filters/api.go
+++ b/eth/filters/api.go
@@ -43,6 +43,9 @@ var (
// The maximum number of topic criteria allowed, vm.LOG4 - vm.LOG0
const maxTopics = 4
+// The maximum number of allowed topics within a topic criteria
+const maxSubTopics = 1000
+
// filter is a helper struct that holds meta information over the filter type
// and associated subscription in the event system.
type filter struct {
@@ -545,6 +548,9 @@ func (args *FilterCriteria) UnmarshalJSON(data []byte) error {
return errors.New("invalid addresses in query")
}
}
+ if len(raw.Topics) > maxTopics {
+ return errExceedMaxTopics
+ }
// topics is an array consisting of strings and/or arrays of strings.
// JSON null values are converted to common.Hash{} and ignored by the filter manager.
@@ -565,6 +571,9 @@ func (args *FilterCriteria) UnmarshalJSON(data []byte) error {
case []interface{}:
// or case e.g. [null, "topic0", "topic1"]
+ if len(topic) > maxSubTopics {
+ return errExceedMaxTopics
+ }
for _, rawTopic := range topic {
if rawTopic == nil {
// null component, match all
diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go
new file mode 100644
index 0000000000..8ef1a00753
--- /dev/null
+++ b/eth/protocols/snap/gentrie.go
@@ -0,0 +1,287 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package snap
+
+import (
+ "bytes"
+
+ "github.com/ethereum/go-ethereum/common"
+ "github.com/ethereum/go-ethereum/core/rawdb"
+ "github.com/ethereum/go-ethereum/ethdb"
+ "github.com/ethereum/go-ethereum/trie"
+)
+
+// genTrie interface is used by the snap syncer to generate merkle tree nodes
+// based on a received batch of states.
+type genTrie interface {
+ // update inserts the state item into generator trie.
+ update(key, value []byte) error
+
+ // commit flushes the right boundary nodes if complete flag is true. This
+ // function must be called before flushing the associated database batch.
+ commit(complete bool) common.Hash
+}
+
+// pathTrie is a wrapper over the stackTrie, incorporating numerous additional
+// logics to handle the semi-completed trie and potential leftover dangling
+// nodes in the database. It is utilized for constructing the merkle tree nodes
+// in path mode during the snap sync process.
+type pathTrie struct {
+ owner common.Hash // identifier of trie owner, empty for account trie
+ tr *trie.StackTrie // underlying raw stack trie
+ first []byte // the path of first committed node by stackTrie
+ last []byte // the path of last committed node by stackTrie
+
+ // This flag indicates whether nodes on the left boundary are skipped for
+ // committing. If set, the left boundary nodes are considered incomplete
+ // due to potentially missing left children.
+ skipLeftBoundary bool
+ db ethdb.KeyValueReader
+ batch ethdb.Batch
+}
+
+// newPathTrie initializes the path trie.
+func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie {
+ tr := &pathTrie{
+ owner: owner,
+ skipLeftBoundary: skipLeftBoundary,
+ db: db,
+ batch: batch,
+ }
+ tr.tr = trie.NewStackTrie(tr.onTrieNode)
+ return tr
+}
+
+// onTrieNode is invoked whenever a new node is committed by the stackTrie.
+//
+// As the committed nodes might be incomplete if they are on the boundaries
+// (left or right), this function has the ability to detect the incomplete
+// ones and filter them out for committing.
+//
+// Additionally, the assumption is made that there may exist leftover dangling
+// nodes in the database. This function has the ability to detect the dangling
+// nodes that fall within the path space of committed nodes (specifically on
+// the path covered by internal extension nodes) and remove them from the
+// database. This property ensures that the entire path space is uniquely
+// occupied by committed nodes.
+//
+// Furthermore, all leftover dangling nodes along the path from committed nodes
+// to the trie root (left and right boundaries) should be removed as well;
+// otherwise, they might potentially disrupt the state healing process.
+func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
+ // Filter out the nodes on the left boundary if skipLeftBoundary is
+ // configured. Nodes are considered to be on the left boundary if
+ // it's the first one to be committed, or the parent/ancestor of the
+ // first committed node.
+ if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) {
+ if t.first == nil {
+ // Memorize the path of first committed node, which is regarded
+ // as left boundary. Deep-copy is necessary as the path given
+ // is volatile.
+ t.first = append([]byte{}, path...)
+
+ // The left boundary can be uniquely determined by the first committed node
+ // from stackTrie (e.g., N_1), as the shared path prefix between the first
+ // two inserted state items is deterministic (the path of N_3). The path
+ // from trie root towards the first committed node is considered the left
+ // boundary. The potential leftover dangling nodes on left boundary should
+ // be cleaned out.
+ //
+ // +-----+
+ // | N_3 | shared path prefix of state_1 and state_2
+ // +-----+
+ // /- -\
+ // +-----+ +-----+
+ // First committed node | N_1 | | N_2 | latest inserted node (contain state_2)
+ // +-----+ +-----+
+ //
+ // The node with the path of the first committed one (e.g, N_1) is not
+ // removed because it's a sibling of the nodes we want to commit, not
+ // the parent or ancestor.
+ for i := 0; i < len(path); i++ {
+ t.delete(path[:i], false)
+ }
+ }
+ return
+ }
+ // If boundary filtering is not configured, or the node is not on the left
+ // boundary, commit it to database.
+ //
+ // Note: If the current committed node is an extension node, then the nodes
+ // falling within the path between itself and its standalone (not embedded
+ // in parent) child should be cleaned out for exclusively occupy the inner
+ // path.
+ //
+ // This is essential in snap sync to avoid leaving dangling nodes within
+ // this range covered by extension node which could potentially break the
+ // state healing.
+ //
+ // The extension node is detected if its path is the prefix of last committed
+ // one and path gap is larger than one. If the path gap is only one byte,
+ // the current node could either be a full node, or a extension with single
+ // byte key. In either case, no gaps will be left in the path.
+ if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 {
+ for i := len(path) + 1; i < len(t.last); i++ {
+ t.delete(t.last[:i], true)
+ }
+ }
+ t.write(path, blob)
+
+ // Update the last flag. Deep-copy is necessary as the provided path is volatile.
+ if t.last == nil {
+ t.last = append([]byte{}, path...)
+ } else {
+ t.last = append(t.last[:0], path...)
+ }
+}
+
+// write commits the node write to provided database batch in path mode.
+func (t *pathTrie) write(path []byte, blob []byte) {
+ if t.owner == (common.Hash{}) {
+ rawdb.WriteAccountTrieNode(t.batch, path, blob)
+ } else {
+ rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob)
+ }
+}
+
+func (t *pathTrie) deleteAccountNode(path []byte, inner bool) {
+ if inner {
+ accountInnerLookupGauge.Inc(1)
+ } else {
+ accountOuterLookupGauge.Inc(1)
+ }
+ if !rawdb.ExistsAccountTrieNode(t.db, path) {
+ return
+ }
+ if inner {
+ accountInnerDeleteGauge.Inc(1)
+ } else {
+ accountOuterDeleteGauge.Inc(1)
+ }
+ rawdb.DeleteAccountTrieNode(t.batch, path)
+}
+
+func (t *pathTrie) deleteStorageNode(path []byte, inner bool) {
+ if inner {
+ storageInnerLookupGauge.Inc(1)
+ } else {
+ storageOuterLookupGauge.Inc(1)
+ }
+ if !rawdb.ExistsStorageTrieNode(t.db, t.owner, path) {
+ return
+ }
+ if inner {
+ storageInnerDeleteGauge.Inc(1)
+ } else {
+ storageOuterDeleteGauge.Inc(1)
+ }
+ rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
+}
+
+// delete commits the node deletion to provided database batch in path mode.
+func (t *pathTrie) delete(path []byte, inner bool) {
+ if t.owner == (common.Hash{}) {
+ t.deleteAccountNode(path, inner)
+ } else {
+ t.deleteStorageNode(path, inner)
+ }
+}
+
+// update implements genTrie interface, inserting a (key, value) pair into the
+// stack trie.
+func (t *pathTrie) update(key, value []byte) error {
+ return t.tr.Update(key, value)
+}
+
+// commit implements genTrie interface, flushing the right boundary if it's
+// considered as complete. Otherwise, the nodes on the right boundary are
+// discarded and cleaned up.
+//
+// Note, this function must be called before flushing database batch, otherwise,
+// dangling nodes might be left in database.
+func (t *pathTrie) commit(complete bool) common.Hash {
+ // If the right boundary is claimed as complete, flush them out.
+ // The nodes on both left and right boundary will still be filtered
+ // out if left boundary filtering is configured.
+ if complete {
+ // Commit all inserted but not yet committed nodes(on the right
+ // boundary) in the stackTrie.
+ hash := t.tr.Hash()
+ if t.skipLeftBoundary {
+ return common.Hash{} // hash is meaningless if left side is incomplete
+ }
+ return hash
+ }
+ // Discard nodes on the right boundary as it's claimed as incomplete. These
+ // nodes might be incomplete due to missing children on the right side.
+ // Furthermore, the potential leftover nodes on right boundary should also
+ // be cleaned out.
+ //
+ // The right boundary can be uniquely determined by the last committed node
+ // from stackTrie (e.g., N_1), as the shared path prefix between the last
+ // two inserted state items is deterministic (the path of N_3). The path
+ // from trie root towards the last committed node is considered the right
+ // boundary (root to N_3).
+ //
+ // +-----+
+ // | N_3 | shared path prefix of last two states
+ // +-----+
+ // /- -\
+ // +-----+ +-----+
+ // Last committed node | N_1 | | N_2 | latest inserted node (contain last state)
+ // +-----+ +-----+
+ //
+ // Another interesting scenario occurs when the trie is committed due to
+ // too many items being accumulated in the batch. To flush them out to
+ // the database, the path of the last inserted node (N_2) is temporarily
+ // treated as an incomplete right boundary, and nodes on this path are
+ // removed (e.g. from root to N_3).
+ // However, this path will be reclaimed as an internal path by inserting
+ // more items after the batch flush. New nodes on this path can be committed
+ // with no issues as they are actually complete. Also, from a database
+ // perspective, first deleting and then rewriting is a valid data update.
+ for i := 0; i < len(t.last); i++ {
+ t.delete(t.last[:i], false)
+ }
+ return common.Hash{} // the hash is meaningless for incomplete commit
+}
+
+// hashTrie is a wrapper over the stackTrie for implementing genTrie interface.
+type hashTrie struct {
+ tr *trie.StackTrie
+}
+
+// newHashTrie initializes the hash trie.
+func newHashTrie(batch ethdb.Batch) *hashTrie {
+ return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+ rawdb.WriteLegacyTrieNode(batch, hash, blob)
+ })}
+}
+
+// update implements genTrie interface, inserting a (key, value) pair into
+// the stack trie.
+func (t *hashTrie) update(key, value []byte) error {
+ return t.tr.Update(key, value)
+}
+
+// commit implements genTrie interface, committing the nodes on right boundary.
+func (t *hashTrie) commit(complete bool) common.Hash {
+ if !complete {
+ return common.Hash{} // the hash is meaningless for incomplete commit
+ }
+ return t.tr.Hash() // return hash only if it's claimed as complete
+}
diff --git a/eth/protocols/snap/gentrie_test.go b/eth/protocols/snap/gentrie_test.go
new file mode 100644
index 0000000000..1fb2dbce75
--- /dev/null
+++ b/eth/protocols/snap/gentrie_test.go
@@ -0,0 +1,553 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package snap
+
+import (
+ "bytes"
+ "math/rand"
+ "slices"
+ "testing"
+
+ "github.com/ethereum/go-ethereum/common"
+ "github.com/ethereum/go-ethereum/core/rawdb"
+ "github.com/ethereum/go-ethereum/crypto"
+ "github.com/ethereum/go-ethereum/ethdb"
+ "github.com/ethereum/go-ethereum/internal/testrand"
+ "github.com/ethereum/go-ethereum/trie"
+)
+
+type replayer struct {
+ paths []string // sort in fifo order
+ hashes []common.Hash // empty for deletion
+ unknowns int // counter for unknown write
+}
+
+func newBatchReplay() *replayer {
+ return &replayer{}
+}
+
+func (r *replayer) decode(key []byte, value []byte) {
+ account := rawdb.IsAccountTrieNode(key)
+ storage := rawdb.IsStorageTrieNode(key)
+ if !account && !storage {
+ r.unknowns += 1
+ return
+ }
+ var path []byte
+ if account {
+ _, path = rawdb.ResolveAccountTrieNodeKey(key)
+ } else {
+ _, owner, inner := rawdb.ResolveStorageTrieNode(key)
+ path = append(owner.Bytes(), inner...)
+ }
+ r.paths = append(r.paths, string(path))
+
+ if len(value) == 0 {
+ r.hashes = append(r.hashes, common.Hash{})
+ } else {
+ r.hashes = append(r.hashes, crypto.Keccak256Hash(value))
+ }
+}
+
+// updates returns a set of effective mutations. Multiple mutations targeting
+// the same node path will be merged in FIFO order.
+func (r *replayer) modifies() map[string]common.Hash {
+ set := make(map[string]common.Hash)
+ for i, path := range r.paths {
+ set[path] = r.hashes[i]
+ }
+ return set
+}
+
+// updates returns the number of updates.
+func (r *replayer) updates() int {
+ var count int
+ for _, hash := range r.modifies() {
+ if hash == (common.Hash{}) {
+ continue
+ }
+ count++
+ }
+ return count
+}
+
+// Put inserts the given value into the key-value data store.
+func (r *replayer) Put(key []byte, value []byte) error {
+ r.decode(key, value)
+ return nil
+}
+
+// Delete removes the key from the key-value data store.
+func (r *replayer) Delete(key []byte) error {
+ r.decode(key, nil)
+ return nil
+}
+
+func byteToHex(str []byte) []byte {
+ l := len(str) * 2
+ var nibbles = make([]byte, l)
+ for i, b := range str {
+ nibbles[i*2] = b / 16
+ nibbles[i*2+1] = b % 16
+ }
+ return nibbles
+}
+
+// innerNodes returns the internal nodes narrowed by two boundaries along with
+// the leftmost and rightmost sub-trie roots.
+func innerNodes(first, last []byte, includeLeft, includeRight bool, nodes map[string]common.Hash, t *testing.T) (map[string]common.Hash, []byte, []byte) {
+ var (
+ leftRoot []byte
+ rightRoot []byte
+ firstHex = byteToHex(first)
+ lastHex = byteToHex(last)
+ inner = make(map[string]common.Hash)
+ )
+ for path, hash := range nodes {
+ if hash == (common.Hash{}) {
+ t.Fatalf("Unexpected deletion, %v", []byte(path))
+ }
+ // Filter out the siblings on the left side or the left boundary nodes.
+ if !includeLeft && (bytes.Compare(firstHex, []byte(path)) > 0 || bytes.HasPrefix(firstHex, []byte(path))) {
+ continue
+ }
+ // Filter out the siblings on the right side or the right boundary nodes.
+ if !includeRight && (bytes.Compare(lastHex, []byte(path)) < 0 || bytes.HasPrefix(lastHex, []byte(path))) {
+ continue
+ }
+ inner[path] = hash
+
+ // Track the path of the leftmost sub trie root
+ if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 {
+ leftRoot = []byte(path)
+ }
+ // Track the path of the rightmost sub trie root
+ if rightRoot == nil ||
+ (bytes.Compare(rightRoot, []byte(path)) < 0) ||
+ (bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) {
+ rightRoot = []byte(path)
+ }
+ }
+ return inner, leftRoot, rightRoot
+}
+
+func buildPartial(owner common.Hash, db ethdb.KeyValueReader, batch ethdb.Batch, entries []*kv, first, last int) *replayer {
+ tr := newPathTrie(owner, first != 0, db, batch)
+ for i := first; i <= last; i++ {
+ tr.update(entries[i].k, entries[i].v)
+ }
+ tr.commit(last == len(entries)-1)
+
+ replay := newBatchReplay()
+ batch.Replay(replay)
+
+ return replay
+}
+
+// TestPartialGentree verifies if the trie constructed with partial states can
+// generate consistent trie nodes that match those of the full trie.
+func TestPartialGentree(t *testing.T) {
+ for round := 0; round < 100; round++ {
+ var (
+ n = rand.Intn(1024) + 10
+ entries []*kv
+ )
+ for i := 0; i < n; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ nodes := make(map[string]common.Hash)
+ tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+ nodes[string(path)] = hash
+ })
+ for i := 0; i < len(entries); i++ {
+ tr.Update(entries[i].k, entries[i].v)
+ }
+ tr.Hash()
+
+ check := func(first, last int) {
+ var (
+ db = rawdb.NewMemoryDatabase()
+ batch = db.NewBatch()
+ )
+ // Build the partial tree with specific boundaries
+ r := buildPartial(common.Hash{}, db, batch, entries, first, last)
+ if r.unknowns > 0 {
+ t.Fatalf("Unknown database write: %d", r.unknowns)
+ }
+
+ // Ensure all the internal nodes are produced
+ var (
+ set = r.modifies()
+ inner, _, _ = innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t)
+ )
+ for path, hash := range inner {
+ if _, ok := set[path]; !ok {
+ t.Fatalf("Missing nodes %v", []byte(path))
+ }
+ if hash != set[path] {
+ t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
+ }
+ }
+ if r.updates() != len(inner) {
+ t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates())
+ }
+ }
+ for j := 0; j < 100; j++ {
+ var (
+ first int
+ last int
+ )
+ for {
+ first = rand.Intn(len(entries))
+ last = rand.Intn(len(entries))
+ if first <= last {
+ break
+ }
+ }
+ check(first, last)
+ }
+ var cases = []struct {
+ first int
+ last int
+ }{
+ {0, len(entries) - 1}, // full
+ {1, len(entries) - 1}, // no left
+ {2, len(entries) - 1}, // no left
+ {2, len(entries) - 2}, // no left and right
+ {2, len(entries) - 2}, // no left and right
+ {len(entries) / 2, len(entries) / 2}, // single
+ {0, 0}, // single first
+ {len(entries) - 1, len(entries) - 1}, // single last
+ }
+ for _, c := range cases {
+ check(c.first, c.last)
+ }
+ }
+}
+
+// TestGentreeDanglingClearing tests if the dangling nodes falling within the
+// path space of constructed tree can be correctly removed.
+func TestGentreeDanglingClearing(t *testing.T) {
+ for round := 0; round < 100; round++ {
+ var (
+ n = rand.Intn(1024) + 10
+ entries []*kv
+ )
+ for i := 0; i < n; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ nodes := make(map[string]common.Hash)
+ tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+ nodes[string(path)] = hash
+ })
+ for i := 0; i < len(entries); i++ {
+ tr.Update(entries[i].k, entries[i].v)
+ }
+ tr.Hash()
+
+ check := func(first, last int) {
+ var (
+ db = rawdb.NewMemoryDatabase()
+ batch = db.NewBatch()
+ )
+ // Write the junk nodes as the dangling
+ var injects []string
+ for path := range nodes {
+ for i := 0; i < len(path); i++ {
+ _, ok := nodes[path[:i]]
+ if ok {
+ continue
+ }
+ injects = append(injects, path[:i])
+ }
+ }
+ if len(injects) == 0 {
+ return
+ }
+ for _, path := range injects {
+ rawdb.WriteAccountTrieNode(db, []byte(path), testrand.Bytes(32))
+ }
+
+ // Build the partial tree with specific range
+ replay := buildPartial(common.Hash{}, db, batch, entries, first, last)
+ if replay.unknowns > 0 {
+ t.Fatalf("Unknown database write: %d", replay.unknowns)
+ }
+ set := replay.modifies()
+
+ // Make sure the injected junks falling within the path space of
+ // committed trie nodes are correctly deleted.
+ _, leftRoot, rightRoot := innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t)
+ for _, path := range injects {
+ if bytes.Compare([]byte(path), leftRoot) < 0 && !bytes.HasPrefix(leftRoot, []byte(path)) {
+ continue
+ }
+ if bytes.Compare([]byte(path), rightRoot) > 0 {
+ continue
+ }
+ if hash, ok := set[path]; !ok || hash != (common.Hash{}) {
+ t.Fatalf("Missing delete, %v", []byte(path))
+ }
+ }
+ }
+ for j := 0; j < 100; j++ {
+ var (
+ first int
+ last int
+ )
+ for {
+ first = rand.Intn(len(entries))
+ last = rand.Intn(len(entries))
+ if first <= last {
+ break
+ }
+ }
+ check(first, last)
+ }
+ var cases = []struct {
+ first int
+ last int
+ }{
+ {0, len(entries) - 1}, // full
+ {1, len(entries) - 1}, // no left
+ {2, len(entries) - 1}, // no left
+ {2, len(entries) - 2}, // no left and right
+ {2, len(entries) - 2}, // no left and right
+ {len(entries) / 2, len(entries) / 2}, // single
+ {0, 0}, // single first
+ {len(entries) - 1, len(entries) - 1}, // single last
+ }
+ for _, c := range cases {
+ check(c.first, c.last)
+ }
+ }
+}
+
+// TestFlushPartialTree tests the gentrie can produce complete inner trie nodes
+// even with lots of batch flushes.
+func TestFlushPartialTree(t *testing.T) {
+ var entries []*kv
+ for i := 0; i < 1024; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ nodes := make(map[string]common.Hash)
+ tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+ nodes[string(path)] = hash
+ })
+ for i := 0; i < len(entries); i++ {
+ tr.Update(entries[i].k, entries[i].v)
+ }
+ tr.Hash()
+
+ var cases = []struct {
+ first int
+ last int
+ }{
+ {0, len(entries) - 1}, // full
+ {1, len(entries) - 1}, // no left
+ {10, len(entries) - 1}, // no left
+ {10, len(entries) - 2}, // no left and right
+ {10, len(entries) - 10}, // no left and right
+ {11, 11}, // single
+ {0, 0}, // single first
+ {len(entries) - 1, len(entries) - 1}, // single last
+ }
+ for _, c := range cases {
+ var (
+ db = rawdb.NewMemoryDatabase()
+ batch = db.NewBatch()
+ combined = db.NewBatch()
+ )
+ inner, _, _ := innerNodes(entries[c.first].k, entries[c.last].k, c.first == 0, c.last == len(entries)-1, nodes, t)
+
+ tr := newPathTrie(common.Hash{}, c.first != 0, db, batch)
+ for i := c.first; i <= c.last; i++ {
+ tr.update(entries[i].k, entries[i].v)
+ if rand.Intn(2) == 0 {
+ tr.commit(false)
+
+ batch.Replay(combined)
+ batch.Write()
+ batch.Reset()
+ }
+ }
+ tr.commit(c.last == len(entries)-1)
+
+ batch.Replay(combined)
+ batch.Write()
+ batch.Reset()
+
+ r := newBatchReplay()
+ combined.Replay(r)
+
+ // Ensure all the internal nodes are produced
+ set := r.modifies()
+ for path, hash := range inner {
+ if _, ok := set[path]; !ok {
+ t.Fatalf("Missing nodes %v", []byte(path))
+ }
+ if hash != set[path] {
+ t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
+ }
+ }
+ if r.updates() != len(inner) {
+ t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates())
+ }
+ }
+}
+
+// TestBoundSplit ensures two consecutive trie chunks are not overlapped with
+// each other.
+func TestBoundSplit(t *testing.T) {
+ var entries []*kv
+ for i := 0; i < 1024; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ for j := 0; j < 100; j++ {
+ var (
+ next int
+ last int
+ db = rawdb.NewMemoryDatabase()
+
+ lastRightRoot []byte
+ )
+ for {
+ if next == len(entries) {
+ break
+ }
+ last = rand.Intn(len(entries)-next) + next
+
+ r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last)
+ set := r.modifies()
+
+ // Skip if the chunk is zero-size
+ if r.updates() == 0 {
+ next = last + 1
+ continue
+ }
+
+ // Ensure the updates in two consecutive chunks are not overlapped.
+ // The only overlapping part should be deletion.
+ if lastRightRoot != nil && len(set) > 0 {
+ // Derive the path of left-most node in this chunk
+ var leftRoot []byte
+ for path, hash := range r.modifies() {
+ if hash == (common.Hash{}) {
+ t.Fatalf("Unexpected deletion %v", []byte(path))
+ }
+ if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 {
+ leftRoot = []byte(path)
+ }
+ }
+ if bytes.HasPrefix(lastRightRoot, leftRoot) || bytes.HasPrefix(leftRoot, lastRightRoot) {
+ t.Fatalf("Two chunks are not correctly separated, lastRight: %v, left: %v", lastRightRoot, leftRoot)
+ }
+ }
+
+ // Track the updates as the last chunk
+ var rightRoot []byte
+ for path := range set {
+ if rightRoot == nil ||
+ (bytes.Compare(rightRoot, []byte(path)) < 0) ||
+ (bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) {
+ rightRoot = []byte(path)
+ }
+ }
+ lastRightRoot = rightRoot
+ next = last + 1
+ }
+ }
+}
+
+// TestTinyPartialTree tests if the partial tree is too tiny(has less than two
+// states), then nothing should be committed.
+func TestTinyPartialTree(t *testing.T) {
+ var entries []*kv
+ for i := 0; i < 1024; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ for i := 0; i < len(entries); i++ {
+ next := i
+ last := i + 1
+ if last >= len(entries) {
+ last = len(entries) - 1
+ }
+ db := rawdb.NewMemoryDatabase()
+ r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last)
+
+ if next != 0 && last != len(entries)-1 {
+ if r.updates() != 0 {
+ t.Fatalf("Unexpected data writes, got: %d", r.updates())
+ }
+ }
+ }
+}
diff --git a/eth/protocols/snap/metrics.go b/eth/protocols/snap/metrics.go
index a7d071953f..25dbcc6386 100644
--- a/eth/protocols/snap/metrics.go
+++ b/eth/protocols/snap/metrics.go
@@ -27,21 +27,28 @@ var (
IngressRegistrationErrorMeter = metrics.NewRegisteredMeter(ingressRegistrationErrorName, nil)
EgressRegistrationErrorMeter = metrics.NewRegisteredMeter(egressRegistrationErrorName, nil)
- // deletionGauge is the metric to track how many trie node deletions
- // are performed in total during the sync process.
- deletionGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete", nil)
+ // accountInnerDeleteGauge is the metric to track how many dangling trie nodes
+ // covered by extension node in account trie are deleted during the sync.
+ accountInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/inner", nil)
- // lookupGauge is the metric to track how many trie node lookups are
- // performed to determine if node needs to be deleted.
- lookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/lookup", nil)
+ // storageInnerDeleteGauge is the metric to track how many dangling trie nodes
+ // covered by extension node in storage trie are deleted during the sync.
+ storageInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/inner", nil)
+
+ // accountOuterDeleteGauge is the metric to track how many dangling trie nodes
+ // above the committed nodes in account trie are deleted during the sync.
+ accountOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/outer", nil)
- // boundaryAccountNodesGauge is the metric to track how many boundary trie
- // nodes in account trie are met.
- boundaryAccountNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/account", nil)
+ // storageOuterDeleteGauge is the metric to track how many dangling trie nodes
+ // above the committed nodes in storage trie are deleted during the sync.
+ storageOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/outer", nil)
- // boundaryAccountNodesGauge is the metric to track how many boundary trie
- // nodes in storage tries are met.
- boundaryStorageNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/storage", nil)
+ // lookupGauge is the metric to track how many trie node lookups are
+ // performed to determine if node needs to be deleted.
+ accountInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/inner", nil)
+ accountOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/outer", nil)
+ storageInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/inner", nil)
+ storageOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/outer", nil)
// smallStorageGauge is the metric to track how many storages are small enough
// to retrieved in one or two request.
@@ -54,4 +61,9 @@ var (
// skipStorageHealingGauge is the metric to track how many storages are retrieved
// in multiple requests but healing is not necessary.
skipStorageHealingGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/noheal", nil)
+
+ // largeStorageDiscardGauge is the metric to track how many chunked storages are
+ // discarded during the snap sync.
+ largeStorageDiscardGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/discard", nil)
+ largeStorageResumedGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/resume", nil)
)
diff --git a/eth/protocols/snap/progress_test.go b/eth/protocols/snap/progress_test.go
new file mode 100644
index 0000000000..9d923bd2f5
--- /dev/null
+++ b/eth/protocols/snap/progress_test.go
@@ -0,0 +1,154 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package snap
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/ethereum/go-ethereum/common"
+)
+
+// Legacy sync progress definitions
+type legacyStorageTask struct {
+ Next common.Hash // Next account to sync in this interval
+ Last common.Hash // Last account to sync in this interval
+}
+
+type legacyAccountTask struct {
+ Next common.Hash // Next account to sync in this interval
+ Last common.Hash // Last account to sync in this interval
+ SubTasks map[common.Hash][]*legacyStorageTask // Storage intervals needing fetching for large contracts
+}
+
+type legacyProgress struct {
+ Tasks []*legacyAccountTask // The suspended account tasks (contract tasks within)
+}
+
+func compareProgress(a legacyProgress, b SyncProgress) bool {
+ if len(a.Tasks) != len(b.Tasks) {
+ return false
+ }
+ for i := 0; i < len(a.Tasks); i++ {
+ if a.Tasks[i].Next != b.Tasks[i].Next {
+ return false
+ }
+ if a.Tasks[i].Last != b.Tasks[i].Last {
+ return false
+ }
+ // new fields are not checked here
+
+ if len(a.Tasks[i].SubTasks) != len(b.Tasks[i].SubTasks) {
+ return false
+ }
+ for addrHash, subTasksA := range a.Tasks[i].SubTasks {
+ subTasksB, ok := b.Tasks[i].SubTasks[addrHash]
+ if !ok || len(subTasksB) != len(subTasksA) {
+ return false
+ }
+ for j := 0; j < len(subTasksA); j++ {
+ if subTasksA[j].Next != subTasksB[j].Next {
+ return false
+ }
+ if subTasksA[j].Last != subTasksB[j].Last {
+ return false
+ }
+ }
+ }
+ }
+ return true
+}
+
+func makeLegacyProgress() legacyProgress {
+ return legacyProgress{
+ Tasks: []*legacyAccountTask{
+ {
+ Next: common.Hash{},
+ Last: common.Hash{0x77},
+ SubTasks: map[common.Hash][]*legacyStorageTask{
+ common.Hash{0x1}: {
+ {
+ Next: common.Hash{},
+ Last: common.Hash{0xff},
+ },
+ },
+ },
+ },
+ {
+ Next: common.Hash{0x88},
+ Last: common.Hash{0xff},
+ },
+ },
+ }
+}
+
+func convertLegacy(legacy legacyProgress) SyncProgress {
+ var progress SyncProgress
+ for i, task := range legacy.Tasks {
+ subTasks := make(map[common.Hash][]*storageTask)
+ for owner, list := range task.SubTasks {
+ var cpy []*storageTask
+ for i := 0; i < len(list); i++ {
+ cpy = append(cpy, &storageTask{
+ Next: list[i].Next,
+ Last: list[i].Last,
+ })
+ }
+ subTasks[owner] = cpy
+ }
+ accountTask := &accountTask{
+ Next: task.Next,
+ Last: task.Last,
+ SubTasks: subTasks,
+ }
+ if i == 0 {
+ accountTask.StorageCompleted = []common.Hash{{0xaa}, {0xbb}} // fulfill new fields
+ }
+ progress.Tasks = append(progress.Tasks, accountTask)
+ }
+ return progress
+}
+
+func TestSyncProgressCompatibility(t *testing.T) {
+ // Decode serialized bytes of legacy progress, backward compatibility
+ legacy := makeLegacyProgress()
+ blob, err := json.Marshal(legacy)
+ if err != nil {
+ t.Fatalf("Failed to marshal progress %v", err)
+ }
+ var dec SyncProgress
+ if err := json.Unmarshal(blob, &dec); err != nil {
+ t.Fatalf("Failed to unmarshal progress %v", err)
+ }
+ if !compareProgress(legacy, dec) {
+ t.Fatal("sync progress is not backward compatible")
+ }
+
+ // Decode serialized bytes of new format progress
+ progress := convertLegacy(legacy)
+ blob, err = json.Marshal(progress)
+ if err != nil {
+ t.Fatalf("Failed to marshal progress %v", err)
+ }
+ var legacyDec legacyProgress
+ if err := json.Unmarshal(blob, &legacyDec); err != nil {
+ t.Fatalf("Failed to unmarshal progress %v", err)
+ }
+ if !compareProgress(legacyDec, progress) {
+ t.Fatal("sync progress is not forward compatible")
+ }
+}
diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go
index 887a50775d..208d3ba3bc 100644
--- a/eth/protocols/snap/sync.go
+++ b/eth/protocols/snap/sync.go
@@ -94,6 +94,9 @@ const (
// trienodeHealThrottleDecrease is the divisor for the throttle when the
// rate of arriving data is lower than the rate of processing it.
trienodeHealThrottleDecrease = 1.25
+
+ // batchSizeThreshold is the maximum size allowed for gentrie batch.
+ batchSizeThreshold = 8 * 1024 * 1024
)
var (
@@ -295,11 +298,19 @@ type bytecodeHealResponse struct {
// accountTask represents the sync task for a chunk of the account snapshot.
type accountTask struct {
- // These fields get serialized to leveldb on shutdown
+ // These fields get serialized to key-value store on shutdown
Next common.Hash // Next account to sync in this interval
Last common.Hash // Last account to sync in this interval
SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts
+ // This is a list of account hashes whose storage are already completed
+ // in this cycle. This field is newly introduced in v1.14 and will be
+ // empty if the task is resolved from legacy progress data. Furthermore,
+ // this additional field will be ignored by legacy Geth. The only side
+ // effect is that these contracts might be resynced in the new cycle,
+ // retaining the legacy behavior.
+ StorageCompleted []common.Hash `json:",omitempty"`
+
// These fields are internals used during runtime
req *accountRequest // Pending request to fill this task
res *accountResponse // Validate response filling this task
@@ -309,15 +320,40 @@ type accountTask struct {
needState []bool // Flags whether the filling accounts need storage retrieval
needHeal []bool // Flags whether the filling accounts's state was chunked and need healing
- codeTasks map[common.Hash]struct{} // Code hashes that need retrieval
- stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
+ codeTasks map[common.Hash]struct{} // Code hashes that need retrieval
+ stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
+ stateCompleted map[common.Hash]struct{} // Account hashes whose storage have been completed
- genBatch ethdb.Batch // Batch used by the node generator
- genTrie *trie.StackTrie // Node generator from storage slots
+ genBatch ethdb.Batch // Batch used by the node generator
+ genTrie genTrie // Node generator from storage slots
done bool // Flag whether the task can be removed
}
+// activeSubTasks returns the set of storage tasks covered by the current account
+// range. Normally this would be the entire subTask set, but on a sync interrupt
+// and later resume it can happen that a shorter account range is retrieved. This
+// method ensures that we only start up the subtasks covered by the latest account
+// response.
+//
+// Nil is returned if the account range is empty.
+func (task *accountTask) activeSubTasks() map[common.Hash][]*storageTask {
+ if len(task.res.hashes) == 0 {
+ return nil
+ }
+ var (
+ tasks = make(map[common.Hash][]*storageTask)
+ last = task.res.hashes[len(task.res.hashes)-1]
+ )
+ for hash, subTasks := range task.SubTasks {
+ subTasks := subTasks // closure
+ if hash.Cmp(last) <= 0 {
+ tasks[hash] = subTasks
+ }
+ }
+ return tasks
+}
+
// storageTask represents the sync task for a chunk of the storage snapshot.
type storageTask struct {
Next common.Hash // Next account to sync in this interval
@@ -327,8 +363,8 @@ type storageTask struct {
root common.Hash // Storage root hash for this instance
req *storageRequest // Pending request to fill this task
- genBatch ethdb.Batch // Batch used by the node generator
- genTrie *trie.StackTrie // Node generator from storage slots
+ genBatch ethdb.Batch // Batch used by the node generator
+ genTrie genTrie // Node generator from storage slots
done bool // Flag whether the task can be removed
}
@@ -716,19 +752,6 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
}
}
-// cleanPath is used to remove the dangling nodes in the stackTrie.
-func (s *Syncer) cleanPath(batch ethdb.Batch, owner common.Hash, path []byte) {
- if owner == (common.Hash{}) && rawdb.ExistsAccountTrieNode(s.db, path) {
- rawdb.DeleteAccountTrieNode(batch, path)
- deletionGauge.Inc(1)
- }
- if owner != (common.Hash{}) && rawdb.ExistsStorageTrieNode(s.db, owner, path) {
- rawdb.DeleteStorageTrieNode(batch, owner, path)
- deletionGauge.Inc(1)
- }
- lookupGauge.Inc(1)
-}
-
// loadSyncStatus retrieves a previously aborted sync status from the database,
// or generates a fresh one if none is available.
func (s *Syncer) loadSyncStatus() {
@@ -745,28 +768,27 @@ func (s *Syncer) loadSyncStatus() {
for _, task := range s.tasks {
task := task // closure for task.genBatch in the stacktrie writer callback
+ // Restore the completed storages
+ task.stateCompleted = make(map[common.Hash]struct{})
+ for _, hash := range task.StorageCompleted {
+ task.stateCompleted[hash] = struct{}{}
+ }
+ task.StorageCompleted = nil
+
+ // Allocate batch for account trie generation
task.genBatch = ethdb.HookedBatch{
Batch: s.db.NewBatch(),
OnPut: func(key []byte, value []byte) {
s.accountBytes += common.StorageSize(len(key) + len(value))
},
}
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, blob, s.scheme)
- })
+ if s.scheme == rawdb.HashScheme {
+ task.genTrie = newHashTrie(task.genBatch)
+ }
if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner and also filter out boundary nodes
- // only in the context of the path scheme. Deletion is forbidden in the
- // hash scheme, as it can disrupt state completeness.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(task.genBatch, common.Hash{}, path)
- })
- // Skip the left boundary if it's not the first range.
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(task.Next != (common.Hash{}), task.Last != common.MaxHash, boundaryAccountNodesGauge)
+ task.genTrie = newPathTrie(common.Hash{}, task.Next != common.Hash{}, s.db, task.genBatch)
}
- task.genTrie = trie.NewStackTrie(options)
+ // Restore leftover storage tasks
for accountHash, subtasks := range task.SubTasks {
for _, subtask := range subtasks {
subtask := subtask // closure for subtask.genBatch in the stacktrie writer callback
@@ -777,23 +799,12 @@ func (s *Syncer) loadSyncStatus() {
s.storageBytes += common.StorageSize(len(key) + len(value))
},
}
- owner := accountHash // local assignment for stacktrie writer closure
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, blob, s.scheme)
- })
+ if s.scheme == rawdb.HashScheme {
+ subtask.genTrie = newHashTrie(subtask.genBatch)
+ }
if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner and also filter out boundary nodes
- // only in the context of the path scheme. Deletion is forbidden in the
- // hash scheme, as it can disrupt state completeness.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(subtask.genBatch, owner, path)
- })
- // Skip the left boundary if it's not the first range.
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(subtask.Next != common.Hash{}, subtask.Last != common.MaxHash, boundaryStorageNodesGauge)
+ subtask.genTrie = newPathTrie(accountHash, subtask.Next != common.Hash{}, s.db, subtask.genBatch)
}
- subtask.genTrie = trie.NewStackTrie(options)
}
}
}
@@ -845,27 +856,20 @@ func (s *Syncer) loadSyncStatus() {
s.accountBytes += common.StorageSize(len(key) + len(value))
},
}
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, blob, s.scheme)
- })
+ var tr genTrie
+ if s.scheme == rawdb.HashScheme {
+ tr = newHashTrie(batch)
+ }
if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner and also filter out boundary nodes
- // only in the context of the path scheme. Deletion is forbidden in the
- // hash scheme, as it can disrupt state completeness.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(batch, common.Hash{}, path)
- })
- // Skip the left boundary if it's not the first range.
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(next != common.Hash{}, last != common.MaxHash, boundaryAccountNodesGauge)
+ tr = newPathTrie(common.Hash{}, next != common.Hash{}, s.db, batch)
}
s.tasks = append(s.tasks, &accountTask{
- Next: next,
- Last: last,
- SubTasks: make(map[common.Hash][]*storageTask),
- genBatch: batch,
- genTrie: trie.NewStackTrie(options),
+ Next: next,
+ Last: last,
+ SubTasks: make(map[common.Hash][]*storageTask),
+ genBatch: batch,
+ stateCompleted: make(map[common.Hash]struct{}),
+ genTrie: tr,
})
log.Debug("Created account sync task", "from", next, "last", last)
next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
@@ -876,16 +880,31 @@ func (s *Syncer) loadSyncStatus() {
func (s *Syncer) saveSyncStatus() {
// Serialize any partial progress to disk before spinning down
for _, task := range s.tasks {
+ // Claim the right boundary as incomplete before flushing the
+ // accumulated nodes in batch, the nodes on right boundary
+ // will be discarded and cleaned up by this call.
+ task.genTrie.commit(false)
if err := task.genBatch.Write(); err != nil {
log.Error("Failed to persist account slots", "err", err)
}
for _, subtasks := range task.SubTasks {
for _, subtask := range subtasks {
+ // Same for account trie, discard and cleanup the
+ // incomplete right boundary.
+ subtask.genTrie.commit(false)
if err := subtask.genBatch.Write(); err != nil {
log.Error("Failed to persist storage slots", "err", err)
}
}
}
+ // Save the account hashes of completed storage.
+ task.StorageCompleted = make([]common.Hash, 0, len(task.stateCompleted))
+ for hash := range task.stateCompleted {
+ task.StorageCompleted = append(task.StorageCompleted, hash)
+ }
+ if len(task.StorageCompleted) > 0 {
+ log.Debug("Leftover completed storages", "number", len(task.StorageCompleted), "next", task.Next, "last", task.Last)
+ }
}
// Store the actual progress markers
progress := &SyncProgress{
@@ -970,6 +989,10 @@ func (s *Syncer) cleanStorageTasks() {
delete(task.SubTasks, account)
task.pend--
+ // Mark the state as complete to prevent resyncing, regardless
+ // if state healing is necessary.
+ task.stateCompleted[account] = struct{}{}
+
// If this was the last pending task, forward the account task
if task.pend == 0 {
s.forwardAccountTask(task)
@@ -1209,7 +1232,8 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
continue
}
// Skip tasks that are already retrieving (or done with) all small states
- if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 {
+ storageTasks := task.activeSubTasks()
+ if len(storageTasks) == 0 && len(task.stateTasks) == 0 {
continue
}
// Task pending retrieval, try to find an idle peer. If no such peer
@@ -1253,7 +1277,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
roots = make([]common.Hash, 0, storageSets)
subtask *storageTask
)
- for account, subtasks := range task.SubTasks {
+ for account, subtasks := range storageTasks {
for _, st := range subtasks {
// Skip any subtasks already filling
if st.req != nil {
@@ -1850,11 +1874,11 @@ func (s *Syncer) processAccountResponse(res *accountResponse) {
res.task.res = res
// Ensure that the response doesn't overflow into the subsequent task
- last := res.task.Last.Big()
+ lastBig := res.task.Last.Big()
for i, hash := range res.hashes {
// Mark the range complete if the last is already included.
// Keep iteration to delete the extra states if exists.
- cmp := hash.Big().Cmp(last)
+ cmp := hash.Big().Cmp(lastBig)
if cmp == 0 {
res.cont = false
continue
@@ -1890,7 +1914,21 @@ func (s *Syncer) processAccountResponse(res *accountResponse) {
}
// Check if the account is a contract with an unknown storage trie
if account.Root != types.EmptyRootHash {
- if !rawdb.HasTrieNode(s.db, res.hashes[i], nil, account.Root, s.scheme) {
+ // If the storage was already retrieved in the last cycle, there's no need
+ // to resync it again, regardless of whether the storage root is consistent
+ // or not.
+ if _, exist := res.task.stateCompleted[res.hashes[i]]; exist {
+ // The leftover storage tasks are not expected, unless system is
+ // very wrong.
+ if _, ok := res.task.SubTasks[res.hashes[i]]; ok {
+ panic(fmt.Errorf("unexpected leftover storage tasks, owner: %x", res.hashes[i]))
+ }
+ // Mark the healing tag if storage root node is inconsistent, or
+ // it's non-existent due to storage chunking.
+ if !rawdb.HasTrieNode(s.db, res.hashes[i], nil, account.Root, s.scheme) {
+ res.task.needHeal[i] = true
+ }
+ } else {
// If there was a previous large state retrieval in progress,
// don't restart it from scratch. This happens if a sync cycle
// is interrupted and resumed later. However, *do* update the
@@ -1902,7 +1940,12 @@ func (s *Syncer) processAccountResponse(res *accountResponse) {
}
res.task.needHeal[i] = true
resumed[res.hashes[i]] = struct{}{}
+ largeStorageResumedGauge.Inc(1)
} else {
+ // It's possible that in the hash scheme, the storage, along
+ // with the trie nodes of the given root, is already present
+ // in the database. Schedule the storage task anyway to simplify
+ // the logic here.
res.task.stateTasks[res.hashes[i]] = account.Root
}
res.task.needState[i] = true
@@ -1910,13 +1953,29 @@ func (s *Syncer) processAccountResponse(res *accountResponse) {
}
}
}
- // Delete any subtasks that have been aborted but not resumed. This may undo
- // some progress if a new peer gives us less accounts than an old one, but for
- // now we have to live with that.
- for hash := range res.task.SubTasks {
- if _, ok := resumed[hash]; !ok {
- log.Debug("Aborting suspended storage retrieval", "account", hash)
- delete(res.task.SubTasks, hash)
+ // Delete any subtasks that have been aborted but not resumed. It's essential
+ // as the corresponding contract might be self-destructed in this cycle(it's
+ // no longer possible in ethereum as self-destruction is disabled in Cancun
+ // Fork, but the condition is still necessary for other networks).
+ //
+ // Keep the leftover storage tasks if they are not covered by the responded
+ // account range which should be picked up in next account wave.
+ if len(res.hashes) > 0 {
+ // The hash of last delivered account in the response
+ last := res.hashes[len(res.hashes)-1]
+ for hash := range res.task.SubTasks {
+ // TODO(rjl493456442) degrade the log level before merging.
+ if hash.Cmp(last) > 0 {
+ log.Info("Keeping suspended storage retrieval", "account", hash)
+ continue
+ }
+ // TODO(rjl493456442) degrade the log level before merging.
+ // It should never happen in ethereum.
+ if _, ok := resumed[hash]; !ok {
+ log.Error("Aborting suspended storage retrieval", "account", hash)
+ delete(res.task.SubTasks, hash)
+ largeStorageDiscardGauge.Inc(1)
+ }
}
}
// If the account range contained no contracts, or all have been fully filled
@@ -2014,6 +2073,7 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) {
res.mainTask.needState[j] = false
res.mainTask.pend--
+ res.mainTask.stateCompleted[account] = struct{}{} // mark it as completed
smallStorageGauge.Inc(1)
}
// If the last contract was chunked, mark it as needing healing
@@ -2062,25 +2122,20 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
s.storageBytes += common.StorageSize(len(key) + len(value))
},
}
- owner := account // local assignment for stacktrie writer closure
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme)
- })
+ var tr genTrie
+ if s.scheme == rawdb.HashScheme {
+ tr = newHashTrie(batch)
+ }
if s.scheme == rawdb.PathScheme {
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(batch, owner, path)
- })
// Keep the left boundary as it's the first range.
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(false, r.End() != common.MaxHash, boundaryStorageNodesGauge)
+ tr = newPathTrie(account, false, s.db, batch)
}
tasks = append(tasks, &storageTask{
Next: common.Hash{},
Last: r.End(),
root: acc.Root,
genBatch: batch,
- genTrie: trie.NewStackTrie(options),
+ genTrie: tr,
})
for r.Next() {
batch := ethdb.HookedBatch{
@@ -2089,27 +2144,19 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
s.storageBytes += common.StorageSize(len(key) + len(value))
},
}
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme)
- })
+ var tr genTrie
+ if s.scheme == rawdb.HashScheme {
+ tr = newHashTrie(batch)
+ }
if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner and also filter out boundary nodes
- // only in the context of the path scheme. Deletion is forbidden in the
- // hash scheme, as it can disrupt state completeness.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(batch, owner, path)
- })
- // Skip the left boundary as it's not the first range
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(true, r.End() != common.MaxHash, boundaryStorageNodesGauge)
+ tr = newPathTrie(account, true, s.db, batch)
}
tasks = append(tasks, &storageTask{
Next: r.Start(),
Last: r.End(),
root: acc.Root,
genBatch: batch,
- genTrie: trie.NewStackTrie(options),
+ genTrie: tr,
})
}
for _, task := range tasks {
@@ -2155,26 +2202,18 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
if i < len(res.hashes)-1 || res.subTask == nil {
// no need to make local reassignment of account: this closure does not outlive the loop
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(batch, account, path, hash, blob, s.scheme)
- })
+ var tr genTrie
+ if s.scheme == rawdb.HashScheme {
+ tr = newHashTrie(batch)
+ }
if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner only in the context of the
- // path scheme. Deletion is forbidden in the hash scheme, as it can
- // disrupt state completeness.
- //
- // Notably, boundary nodes can be also kept because the whole storage
- // trie is complete.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(batch, account, path)
- })
+ // Keep the left boundary as it's complete
+ tr = newPathTrie(account, false, s.db, batch)
}
- tr := trie.NewStackTrie(options)
for j := 0; j < len(res.hashes[i]); j++ {
- tr.Update(res.hashes[i][j][:], res.slots[i][j])
+ tr.update(res.hashes[i][j][:], res.slots[i][j])
}
- tr.Commit()
+ tr.commit(true)
}
// Persist the received storage segments. These flat state maybe
// outdated during the sync, but it can be fixed later during the
@@ -2185,14 +2224,14 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
// If we're storing large contracts, generate the trie nodes
// on the fly to not trash the gluing points
if i == len(res.hashes)-1 && res.subTask != nil {
- res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j])
+ res.subTask.genTrie.update(res.hashes[i][j][:], res.slots[i][j])
}
}
}
// Large contracts could have generated new trie nodes, flush them to disk
if res.subTask != nil {
if res.subTask.done {
- root := res.subTask.genTrie.Commit()
+ root := res.subTask.genTrie.commit(res.subTask.Last == common.MaxHash)
if err := res.subTask.genBatch.Write(); err != nil {
log.Error("Failed to persist stack slots", "err", err)
}
@@ -2209,8 +2248,8 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
}
}
}
- }
- if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize {
+ } else if res.subTask.genBatch.ValueSize() > batchSizeThreshold {
+ res.subTask.genTrie.commit(false)
if err := res.subTask.genBatch.Write(); err != nil {
log.Error("Failed to persist stack slots", "err", err)
}
@@ -2393,7 +2432,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
if err != nil {
panic(err) // Really shouldn't ever happen
}
- task.genTrie.Update(hash[:], full)
+ task.genTrie.update(hash[:], full)
}
}
// Flush anything written just now and update the stats
@@ -2409,17 +2448,30 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
return
}
task.Next = incHash(hash)
+
+ // Remove the completion flag once the account range is pushed
+ // forward. The leftover accounts will be skipped in the next
+ // cycle.
+ delete(task.stateCompleted, hash)
}
// All accounts marked as complete, track if the entire task is done
task.done = !res.cont
+ // Error out if there is any leftover completion flag.
+ if task.done && len(task.stateCompleted) != 0 {
+ panic(fmt.Errorf("storage completion flags should be emptied, %d left", len(task.stateCompleted)))
+ }
// Stack trie could have generated trie nodes, push them to disk (we need to
// flush after finalizing task.done. It's fine even if we crash and lose this
// write as it will only cause more data to be downloaded during heal.
if task.done {
- task.genTrie.Commit()
- }
- if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done {
+ task.genTrie.commit(task.Last == common.MaxHash)
+ if err := task.genBatch.Write(); err != nil {
+ log.Error("Failed to persist stack account", "err", err)
+ }
+ task.genBatch.Reset()
+ } else if task.genBatch.ValueSize() > batchSizeThreshold {
+ task.genTrie.commit(false)
if err := task.genBatch.Write(); err != nil {
log.Error("Failed to persist stack account", "err", err)
}
diff --git a/fork.yaml b/fork.yaml
index 15f26bc7e7..14336f78eb 100644
--- a/fork.yaml
+++ b/fork.yaml
@@ -5,7 +5,7 @@ footer: |
base:
name: go-ethereum
url: https://github.com/ethereum/go-ethereum
- hash: 2bd6bd01d2e8561dd7fc21b631f4a34ac16627a1 v1.13.14
+ hash: c5ba367eb6232e3eddd7d6226bfd374449c63164 v1.13.15
fork:
name: op-geth
url: https://github.com/ethereum-optimism/op-geth
diff --git a/internal/testrand/rand.go b/internal/testrand/rand.go
new file mode 100644
index 0000000000..690993de05
--- /dev/null
+++ b/internal/testrand/rand.go
@@ -0,0 +1,53 @@
+// Copyright 2023 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package testrand
+
+import (
+ crand "crypto/rand"
+ "encoding/binary"
+ mrand "math/rand"
+
+ "github.com/ethereum/go-ethereum/common"
+)
+
+// prng is a pseudo random number generator seeded by strong randomness.
+// The randomness is printed on startup in order to make failures reproducible.
+var prng = initRand()
+
+func initRand() *mrand.Rand {
+ var seed [8]byte
+ crand.Read(seed[:])
+ rnd := mrand.New(mrand.NewSource(int64(binary.LittleEndian.Uint64(seed[:]))))
+ return rnd
+}
+
+// Bytes generates a random byte slice with specified length.
+func Bytes(n int) []byte {
+ r := make([]byte, n)
+ prng.Read(r)
+ return r
+}
+
+// Hash generates a random hash.
+func Hash() common.Hash {
+ return common.BytesToHash(Bytes(common.HashLength))
+}
+
+// Address generates a random address.
+func Address() common.Address {
+ return common.BytesToAddress(Bytes(common.AddressLength))
+}
diff --git a/params/version.go b/params/version.go
index bb6f38e9e6..86adc36db7 100644
--- a/params/version.go
+++ b/params/version.go
@@ -26,7 +26,7 @@ import (
const (
VersionMajor = 1 // Major version component of the current release
VersionMinor = 13 // Minor version component of the current release
- VersionPatch = 14 // Patch version component of the current release
+ VersionPatch = 15 // Patch version component of the current release
VersionMeta = "stable" // Version metadata to append to the version string
)
diff --git a/trie/stacktrie.go b/trie/stacktrie.go
index f2f5355c49..9c574db0bf 100644
--- a/trie/stacktrie.go
+++ b/trie/stacktrie.go
@@ -23,8 +23,6 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
- "github.com/ethereum/go-ethereum/log"
- "github.com/ethereum/go-ethereum/metrics"
)
var (
@@ -32,62 +30,32 @@ var (
_ = types.TrieHasher((*StackTrie)(nil))
)
-// StackTrieOptions contains the configured options for manipulating the stackTrie.
-type StackTrieOptions struct {
- Writer func(path []byte, hash common.Hash, blob []byte) // The function to commit the dirty nodes
- Cleaner func(path []byte) // The function to clean up dangling nodes
-
- SkipLeftBoundary bool // Flag whether the nodes on the left boundary are skipped for committing
- SkipRightBoundary bool // Flag whether the nodes on the right boundary are skipped for committing
- boundaryGauge metrics.Gauge // Gauge to track how many boundary nodes are met
-}
-
-// NewStackTrieOptions initializes an empty options for stackTrie.
-func NewStackTrieOptions() *StackTrieOptions { return &StackTrieOptions{} }
-
-// WithWriter configures trie node writer within the options.
-func (o *StackTrieOptions) WithWriter(writer func(path []byte, hash common.Hash, blob []byte)) *StackTrieOptions {
- o.Writer = writer
- return o
-}
-
-// WithCleaner configures the cleaner in the option for removing dangling nodes.
-func (o *StackTrieOptions) WithCleaner(cleaner func(path []byte)) *StackTrieOptions {
- o.Cleaner = cleaner
- return o
-}
-
-// WithSkipBoundary configures whether the left and right boundary nodes are
-// filtered for committing, along with a gauge metrics to track how many
-// boundary nodes are met.
-func (o *StackTrieOptions) WithSkipBoundary(skipLeft, skipRight bool, gauge metrics.Gauge) *StackTrieOptions {
- o.SkipLeftBoundary = skipLeft
- o.SkipRightBoundary = skipRight
- o.boundaryGauge = gauge
- return o
-}
+// OnTrieNode is a callback method invoked when a trie node is committed
+// by the stack trie. The node is only committed if it's considered complete.
+//
+// The caller should not modify the contents of the returned path and blob
+// slice, and their contents may be changed after the call. It is up to the
+// `onTrieNode` receiver function to deep-copy the data if it wants to retain
+// it after the call ends.
+type OnTrieNode func(path []byte, hash common.Hash, blob []byte)
// StackTrie is a trie implementation that expects keys to be inserted
// in order. Once it determines that a subtree will no longer be inserted
// into, it will hash it and free up the memory it uses.
type StackTrie struct {
- options *StackTrieOptions
- root *stNode
- h *hasher
-
- first []byte // The (hex-encoded without terminator) key of first inserted entry, tracked as left boundary.
- last []byte // The (hex-encoded without terminator) key of last inserted entry, tracked as right boundary.
+ root *stNode
+ h *hasher
+ last []byte
+ onTrieNode OnTrieNode
}
-// NewStackTrie allocates and initializes an empty trie.
-func NewStackTrie(options *StackTrieOptions) *StackTrie {
- if options == nil {
- options = NewStackTrieOptions()
- }
+// NewStackTrie allocates and initializes an empty trie. The committed nodes
+// will be discarded immediately if no callback is configured.
+func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
return &StackTrie{
- options: options,
- root: stPool.Get().(*stNode),
- h: newHasher(false),
+ root: stPool.Get().(*stNode),
+ h: newHasher(false),
+ onTrieNode: onTrieNode,
}
}
@@ -101,10 +69,6 @@ func (t *StackTrie) Update(key, value []byte) error {
if bytes.Compare(t.last, k) >= 0 {
return errors.New("non-ascending key order")
}
- // track the first and last inserted entries.
- if t.first == nil {
- t.first = append([]byte{}, k...)
- }
if t.last == nil {
t.last = append([]byte{}, k...) // allocate key slice
} else {
@@ -114,19 +78,9 @@ func (t *StackTrie) Update(key, value []byte) error {
return nil
}
-// MustUpdate is a wrapper of Update and will omit any encountered error but
-// just print out an error message.
-func (t *StackTrie) MustUpdate(key, value []byte) {
- if err := t.Update(key, value); err != nil {
- log.Error("Unhandled trie error in StackTrie.Update", "err", err)
- }
-}
-
// Reset resets the stack trie object to empty state.
func (t *StackTrie) Reset() {
- t.options = NewStackTrieOptions()
t.root = stPool.Get().(*stNode)
- t.first = nil
t.last = nil
}
@@ -346,10 +300,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
//
// This method also sets 'st.type' to hashedNode, and clears 'st.key'.
func (t *StackTrie) hash(st *stNode, path []byte) {
- var (
- blob []byte // RLP-encoded node blob
- internal [][]byte // List of node paths covered by the extension node
- )
+ var blob []byte // RLP-encoded node blob
switch st.typ {
case hashedNode:
return
@@ -384,15 +335,6 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
// recursively hash and commit child as the first step
t.hash(st.children[0], append(path, st.key...))
- // Collect the path of internal nodes between shortNode and its **in disk**
- // child. This is essential in the case of path mode scheme to avoid leaving
- // danging nodes within the range of this internal path on disk, which would
- // break the guarantee for state healing.
- if len(st.children[0].val) >= 32 && t.options.Cleaner != nil {
- for i := 1; i < len(st.key); i++ {
- internal = append(internal, append(path, st.key[:i]...))
- }
- }
// encode the extension node
n := shortNode{Key: hexToCompactInPlace(st.key)}
if len(st.children[0].val) < 32 {
@@ -416,11 +358,12 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
default:
panic("invalid node type")
}
-
+ // Convert the node type to hashNode and reset the key slice.
st.typ = hashedNode
st.key = st.key[:0]
- // Skip committing the non-root node if the size is smaller than 32 bytes.
+ // Skip committing the non-root node if the size is smaller than 32 bytes
+ // as tiny nodes are always embedded in their parent except root node.
if len(blob) < 32 && len(path) > 0 {
st.val = common.CopyBytes(blob)
return
@@ -429,51 +372,20 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
// input values.
st.val = t.h.hashData(blob)
- // Short circuit if the stack trie is not configured for writing.
- if t.options.Writer == nil {
- return
+ // Invoke the callback it's provided. Notably, the path and blob slices are
+ // volatile, please deep-copy the slices in callback if the contents need
+ // to be retained.
+ if t.onTrieNode != nil {
+ t.onTrieNode(path, common.BytesToHash(st.val), blob)
}
- // Skip committing if the node is on the left boundary and stackTrie is
- // configured to filter the boundary.
- if t.options.SkipLeftBoundary && bytes.HasPrefix(t.first, path) {
- if t.options.boundaryGauge != nil {
- t.options.boundaryGauge.Inc(1)
- }
- return
- }
- // Skip committing if the node is on the right boundary and stackTrie is
- // configured to filter the boundary.
- if t.options.SkipRightBoundary && bytes.HasPrefix(t.last, path) {
- if t.options.boundaryGauge != nil {
- t.options.boundaryGauge.Inc(1)
- }
- return
- }
- // Clean up the internal dangling nodes covered by the extension node.
- // This should be done before writing the node to adhere to the committing
- // order from bottom to top.
- for _, path := range internal {
- t.options.Cleaner(path)
- }
- t.options.Writer(path, common.BytesToHash(st.val), blob)
}
// Hash will firstly hash the entire trie if it's still not hashed and then commit
-// all nodes to the associated database. Actually most of the trie nodes have been
-// committed already. The main purpose here is to commit the nodes on right boundary.
-//
-// For stack trie, Hash and Commit are functionally identical.
+// all leftover nodes to the associated database. Actually most of the trie nodes
+// have been committed already. The main purpose here is to commit the nodes on
+// right boundary.
func (t *StackTrie) Hash() common.Hash {
n := t.root
t.hash(n, nil)
return common.BytesToHash(n.val)
}
-
-// Commit will firstly hash the entire trie if it's still not hashed and then commit
-// all nodes to the associated database. Actually most of the trie nodes have been
-// committed already. The main purpose here is to commit the nodes on right boundary.
-//
-// For stack trie, Hash and Commit are functionally identical.
-func (t *StackTrie) Commit() common.Hash {
- return t.Hash()
-}
diff --git a/trie/stacktrie_fuzzer_test.go b/trie/stacktrie_fuzzer_test.go
index 50b5c4de52..c8e568355c 100644
--- a/trie/stacktrie_fuzzer_test.go
+++ b/trie/stacktrie_fuzzer_test.go
@@ -46,11 +46,9 @@ func fuzz(data []byte, debugging bool) {
trieA = NewEmpty(dbA)
spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
dbB = newTestDatabase(rawdb.NewDatabase(spongeB), rawdb.HashScheme)
-
- options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ trieB = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
rawdb.WriteTrieNode(spongeB, common.Hash{}, path, hash, blob, dbB.Scheme())
})
- trieB = NewStackTrie(options)
vals []*kv
maxElements = 10000
// operate on unique keys only
@@ -99,10 +97,9 @@ func fuzz(data []byte, debugging bool) {
if debugging {
fmt.Printf("{\"%#x\" , \"%#x\"} // stacktrie.Update\n", kv.k, kv.v)
}
- trieB.MustUpdate(kv.k, kv.v)
+ trieB.Update(kv.k, kv.v)
}
rootB := trieB.Hash()
- trieB.Commit()
if rootA != rootB {
panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB))
}
@@ -114,20 +111,19 @@ func fuzz(data []byte, debugging bool) {
// Ensure all the nodes are persisted correctly
var (
- nodeset = make(map[string][]byte) // path -> blob
- optionsC = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ nodeset = make(map[string][]byte) // path -> blob
+ trieC = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
if crypto.Keccak256Hash(blob) != hash {
panic("invalid node blob")
}
nodeset[string(path)] = common.CopyBytes(blob)
})
- trieC = NewStackTrie(optionsC)
checked int
)
for _, kv := range vals {
- trieC.MustUpdate(kv.k, kv.v)
+ trieC.Update(kv.k, kv.v)
}
- rootC := trieC.Commit()
+ rootC := trieC.Hash()
if rootA != rootC {
panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootC))
}
diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go
index 3a0e1cb260..f053b5112d 100644
--- a/trie/stacktrie_test.go
+++ b/trie/stacktrie_test.go
@@ -19,15 +19,12 @@ package trie
import (
"bytes"
"math/big"
- "math/rand"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/crypto"
- "github.com/ethereum/go-ethereum/trie/testutil"
"github.com/stretchr/testify/assert"
- "golang.org/x/exp/slices"
)
func TestStackTrieInsertAndHash(t *testing.T) {
@@ -381,90 +378,6 @@ func TestStacktrieNotModifyValues(t *testing.T) {
}
}
-func buildPartialTree(entries []*kv, t *testing.T) map[string]common.Hash {
- var (
- options = NewStackTrieOptions()
- nodes = make(map[string]common.Hash)
- )
- var (
- first int
- last = len(entries) - 1
-
- noLeft bool
- noRight bool
- )
- // Enter split mode if there are at least two elements
- if rand.Intn(5) != 0 {
- for {
- first = rand.Intn(len(entries))
- last = rand.Intn(len(entries))
- if first <= last {
- break
- }
- }
- if first != 0 {
- noLeft = true
- }
- if last != len(entries)-1 {
- noRight = true
- }
- }
- options = options.WithSkipBoundary(noLeft, noRight, nil)
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- nodes[string(path)] = hash
- })
- tr := NewStackTrie(options)
-
- for i := first; i <= last; i++ {
- tr.MustUpdate(entries[i].k, entries[i].v)
- }
- tr.Commit()
- return nodes
-}
-
-func TestPartialStackTrie(t *testing.T) {
- for round := 0; round < 100; round++ {
- var (
- n = rand.Intn(100) + 1
- entries []*kv
- )
- for i := 0; i < n; i++ {
- var val []byte
- if rand.Intn(3) == 0 {
- val = testutil.RandBytes(3)
- } else {
- val = testutil.RandBytes(32)
- }
- entries = append(entries, &kv{
- k: testutil.RandBytes(32),
- v: val,
- })
- }
- slices.SortFunc(entries, (*kv).cmp)
-
- var (
- nodes = make(map[string]common.Hash)
- options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- nodes[string(path)] = hash
- })
- )
- tr := NewStackTrie(options)
-
- for i := 0; i < len(entries); i++ {
- tr.MustUpdate(entries[i].k, entries[i].v)
- }
- tr.Commit()
-
- for j := 0; j < 100; j++ {
- for path, hash := range buildPartialTree(entries, t) {
- if nodes[path] != hash {
- t.Errorf("%v, want %x, got %x", []byte(path), nodes[path], hash)
- }
- }
- }
- }
-}
-
func TestStackTrieErrors(t *testing.T) {
s := NewStackTrie(nil)
// Deletion
diff --git a/trie/trie_test.go b/trie/trie_test.go
index 379a866f7e..c141c52078 100644
--- a/trie/trie_test.go
+++ b/trie/trie_test.go
@@ -963,11 +963,9 @@ func TestCommitSequenceStackTrie(t *testing.T) {
id: "b",
values: make(map[string]string),
}
- options := NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme())
})
- stTrie := NewStackTrie(options)
// Fill the trie with elements
for i := 0; i < count; i++ {
@@ -993,7 +991,7 @@ func TestCommitSequenceStackTrie(t *testing.T) {
s.Flush()
// And flush stacktrie -> disk
- stRoot := stTrie.Commit()
+ stRoot := stTrie.Hash()
if stRoot != root {
t.Fatalf("root wrong, got %x exp %x", stRoot, root)
}
@@ -1034,12 +1032,9 @@ func TestCommitSequenceSmallRoot(t *testing.T) {
id: "b",
values: make(map[string]string),
}
- options := NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme())
})
- stTrie := NewStackTrie(options)
-
// Add a single small-element to the trie(s)
key := make([]byte, 5)
key[0] = 1
@@ -1053,7 +1048,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) {
db.Commit(root)
// And flush stacktrie -> disk
- stRoot := stTrie.Commit()
+ stRoot := stTrie.Hash()
if stRoot != root {
t.Fatalf("root wrong, got %x exp %x", stRoot, root)
}