diff --git a/core/rawdb/accessors_chain.go b/core/rawdb/accessors_chain.go index 6a1a2eae8f..2b201f3290 100644 --- a/core/rawdb/accessors_chain.go +++ b/core/rawdb/accessors_chain.go @@ -316,8 +316,8 @@ func ReadHeaderRange(db ethdb.Reader, number uint64, count uint64) []rlp.RawValu if count == 0 { return rlpHeaders } - // read remaining from ancients - data, err := db.AncientRange(ChainFreezerHeaderTable, i+1-count, count, 0) + // read remaining from ancients, cap at 2M + data, err := db.AncientRange(ChainFreezerHeaderTable, i+1-count, count, 2*1024*1024) if err != nil { log.Error("Failed to read headers from freezer", "err", err) return rlpHeaders diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go index 681be7ebc0..8a0fd1989a 100644 --- a/core/state/snapshot/conversion.go +++ b/core/state/snapshot/conversion.go @@ -362,15 +362,15 @@ func generateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, accou } func stackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan trieKV, out chan common.Hash) { - options := trie.NewStackTrieOptions() + var onTrieNode trie.OnTrieNode if db != nil { - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + onTrieNode = func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme) - }) + } } - t := trie.NewStackTrie(options) + t := trie.NewStackTrie(onTrieNode) for leaf := range in { t.Update(leaf.key[:], leaf.value) } - out <- t.Commit() + out <- t.Hash() } diff --git a/core/state/statedb.go b/core/state/statedb.go index e9328d0aef..9fc529b86b 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -961,12 +961,10 @@ func (s *StateDB) fastDeleteStorage(addrHash common.Hash, root common.Hash) (boo nodes = trienode.NewNodeSet(addrHash) slots = make(map[common.Hash][]byte) ) - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { nodes.AddNode(path, trienode.NewDeleted()) size += common.StorageSize(len(path)) }) - stack := trie.NewStackTrie(options) for iter.Next() { if size > storageDeleteLimit { return true, size, nil, nil, nil diff --git a/eth/filters/api.go b/eth/filters/api.go index 8cf701ec57..173e40c972 100644 --- a/eth/filters/api.go +++ b/eth/filters/api.go @@ -43,6 +43,9 @@ var ( // The maximum number of topic criteria allowed, vm.LOG4 - vm.LOG0 const maxTopics = 4 +// The maximum number of allowed topics within a topic criteria +const maxSubTopics = 1000 + // filter is a helper struct that holds meta information over the filter type // and associated subscription in the event system. type filter struct { @@ -545,6 +548,9 @@ func (args *FilterCriteria) UnmarshalJSON(data []byte) error { return errors.New("invalid addresses in query") } } + if len(raw.Topics) > maxTopics { + return errExceedMaxTopics + } // topics is an array consisting of strings and/or arrays of strings. // JSON null values are converted to common.Hash{} and ignored by the filter manager. @@ -565,6 +571,9 @@ func (args *FilterCriteria) UnmarshalJSON(data []byte) error { case []interface{}: // or case e.g. [null, "topic0", "topic1"] + if len(topic) > maxSubTopics { + return errExceedMaxTopics + } for _, rawTopic := range topic { if rawTopic == nil { // null component, match all diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go new file mode 100644 index 0000000000..8ef1a00753 --- /dev/null +++ b/eth/protocols/snap/gentrie.go @@ -0,0 +1,287 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "bytes" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie" +) + +// genTrie interface is used by the snap syncer to generate merkle tree nodes +// based on a received batch of states. +type genTrie interface { + // update inserts the state item into generator trie. + update(key, value []byte) error + + // commit flushes the right boundary nodes if complete flag is true. This + // function must be called before flushing the associated database batch. + commit(complete bool) common.Hash +} + +// pathTrie is a wrapper over the stackTrie, incorporating numerous additional +// logics to handle the semi-completed trie and potential leftover dangling +// nodes in the database. It is utilized for constructing the merkle tree nodes +// in path mode during the snap sync process. +type pathTrie struct { + owner common.Hash // identifier of trie owner, empty for account trie + tr *trie.StackTrie // underlying raw stack trie + first []byte // the path of first committed node by stackTrie + last []byte // the path of last committed node by stackTrie + + // This flag indicates whether nodes on the left boundary are skipped for + // committing. If set, the left boundary nodes are considered incomplete + // due to potentially missing left children. + skipLeftBoundary bool + db ethdb.KeyValueReader + batch ethdb.Batch +} + +// newPathTrie initializes the path trie. +func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie { + tr := &pathTrie{ + owner: owner, + skipLeftBoundary: skipLeftBoundary, + db: db, + batch: batch, + } + tr.tr = trie.NewStackTrie(tr.onTrieNode) + return tr +} + +// onTrieNode is invoked whenever a new node is committed by the stackTrie. +// +// As the committed nodes might be incomplete if they are on the boundaries +// (left or right), this function has the ability to detect the incomplete +// ones and filter them out for committing. +// +// Additionally, the assumption is made that there may exist leftover dangling +// nodes in the database. This function has the ability to detect the dangling +// nodes that fall within the path space of committed nodes (specifically on +// the path covered by internal extension nodes) and remove them from the +// database. This property ensures that the entire path space is uniquely +// occupied by committed nodes. +// +// Furthermore, all leftover dangling nodes along the path from committed nodes +// to the trie root (left and right boundaries) should be removed as well; +// otherwise, they might potentially disrupt the state healing process. +func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) { + // Filter out the nodes on the left boundary if skipLeftBoundary is + // configured. Nodes are considered to be on the left boundary if + // it's the first one to be committed, or the parent/ancestor of the + // first committed node. + if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) { + if t.first == nil { + // Memorize the path of first committed node, which is regarded + // as left boundary. Deep-copy is necessary as the path given + // is volatile. + t.first = append([]byte{}, path...) + + // The left boundary can be uniquely determined by the first committed node + // from stackTrie (e.g., N_1), as the shared path prefix between the first + // two inserted state items is deterministic (the path of N_3). The path + // from trie root towards the first committed node is considered the left + // boundary. The potential leftover dangling nodes on left boundary should + // be cleaned out. + // + // +-----+ + // | N_3 | shared path prefix of state_1 and state_2 + // +-----+ + // /- -\ + // +-----+ +-----+ + // First committed node | N_1 | | N_2 | latest inserted node (contain state_2) + // +-----+ +-----+ + // + // The node with the path of the first committed one (e.g, N_1) is not + // removed because it's a sibling of the nodes we want to commit, not + // the parent or ancestor. + for i := 0; i < len(path); i++ { + t.delete(path[:i], false) + } + } + return + } + // If boundary filtering is not configured, or the node is not on the left + // boundary, commit it to database. + // + // Note: If the current committed node is an extension node, then the nodes + // falling within the path between itself and its standalone (not embedded + // in parent) child should be cleaned out for exclusively occupy the inner + // path. + // + // This is essential in snap sync to avoid leaving dangling nodes within + // this range covered by extension node which could potentially break the + // state healing. + // + // The extension node is detected if its path is the prefix of last committed + // one and path gap is larger than one. If the path gap is only one byte, + // the current node could either be a full node, or a extension with single + // byte key. In either case, no gaps will be left in the path. + if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 { + for i := len(path) + 1; i < len(t.last); i++ { + t.delete(t.last[:i], true) + } + } + t.write(path, blob) + + // Update the last flag. Deep-copy is necessary as the provided path is volatile. + if t.last == nil { + t.last = append([]byte{}, path...) + } else { + t.last = append(t.last[:0], path...) + } +} + +// write commits the node write to provided database batch in path mode. +func (t *pathTrie) write(path []byte, blob []byte) { + if t.owner == (common.Hash{}) { + rawdb.WriteAccountTrieNode(t.batch, path, blob) + } else { + rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob) + } +} + +func (t *pathTrie) deleteAccountNode(path []byte, inner bool) { + if inner { + accountInnerLookupGauge.Inc(1) + } else { + accountOuterLookupGauge.Inc(1) + } + if !rawdb.ExistsAccountTrieNode(t.db, path) { + return + } + if inner { + accountInnerDeleteGauge.Inc(1) + } else { + accountOuterDeleteGauge.Inc(1) + } + rawdb.DeleteAccountTrieNode(t.batch, path) +} + +func (t *pathTrie) deleteStorageNode(path []byte, inner bool) { + if inner { + storageInnerLookupGauge.Inc(1) + } else { + storageOuterLookupGauge.Inc(1) + } + if !rawdb.ExistsStorageTrieNode(t.db, t.owner, path) { + return + } + if inner { + storageInnerDeleteGauge.Inc(1) + } else { + storageOuterDeleteGauge.Inc(1) + } + rawdb.DeleteStorageTrieNode(t.batch, t.owner, path) +} + +// delete commits the node deletion to provided database batch in path mode. +func (t *pathTrie) delete(path []byte, inner bool) { + if t.owner == (common.Hash{}) { + t.deleteAccountNode(path, inner) + } else { + t.deleteStorageNode(path, inner) + } +} + +// update implements genTrie interface, inserting a (key, value) pair into the +// stack trie. +func (t *pathTrie) update(key, value []byte) error { + return t.tr.Update(key, value) +} + +// commit implements genTrie interface, flushing the right boundary if it's +// considered as complete. Otherwise, the nodes on the right boundary are +// discarded and cleaned up. +// +// Note, this function must be called before flushing database batch, otherwise, +// dangling nodes might be left in database. +func (t *pathTrie) commit(complete bool) common.Hash { + // If the right boundary is claimed as complete, flush them out. + // The nodes on both left and right boundary will still be filtered + // out if left boundary filtering is configured. + if complete { + // Commit all inserted but not yet committed nodes(on the right + // boundary) in the stackTrie. + hash := t.tr.Hash() + if t.skipLeftBoundary { + return common.Hash{} // hash is meaningless if left side is incomplete + } + return hash + } + // Discard nodes on the right boundary as it's claimed as incomplete. These + // nodes might be incomplete due to missing children on the right side. + // Furthermore, the potential leftover nodes on right boundary should also + // be cleaned out. + // + // The right boundary can be uniquely determined by the last committed node + // from stackTrie (e.g., N_1), as the shared path prefix between the last + // two inserted state items is deterministic (the path of N_3). The path + // from trie root towards the last committed node is considered the right + // boundary (root to N_3). + // + // +-----+ + // | N_3 | shared path prefix of last two states + // +-----+ + // /- -\ + // +-----+ +-----+ + // Last committed node | N_1 | | N_2 | latest inserted node (contain last state) + // +-----+ +-----+ + // + // Another interesting scenario occurs when the trie is committed due to + // too many items being accumulated in the batch. To flush them out to + // the database, the path of the last inserted node (N_2) is temporarily + // treated as an incomplete right boundary, and nodes on this path are + // removed (e.g. from root to N_3). + // However, this path will be reclaimed as an internal path by inserting + // more items after the batch flush. New nodes on this path can be committed + // with no issues as they are actually complete. Also, from a database + // perspective, first deleting and then rewriting is a valid data update. + for i := 0; i < len(t.last); i++ { + t.delete(t.last[:i], false) + } + return common.Hash{} // the hash is meaningless for incomplete commit +} + +// hashTrie is a wrapper over the stackTrie for implementing genTrie interface. +type hashTrie struct { + tr *trie.StackTrie +} + +// newHashTrie initializes the hash trie. +func newHashTrie(batch ethdb.Batch) *hashTrie { + return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteLegacyTrieNode(batch, hash, blob) + })} +} + +// update implements genTrie interface, inserting a (key, value) pair into +// the stack trie. +func (t *hashTrie) update(key, value []byte) error { + return t.tr.Update(key, value) +} + +// commit implements genTrie interface, committing the nodes on right boundary. +func (t *hashTrie) commit(complete bool) common.Hash { + if !complete { + return common.Hash{} // the hash is meaningless for incomplete commit + } + return t.tr.Hash() // return hash only if it's claimed as complete +} diff --git a/eth/protocols/snap/gentrie_test.go b/eth/protocols/snap/gentrie_test.go new file mode 100644 index 0000000000..1fb2dbce75 --- /dev/null +++ b/eth/protocols/snap/gentrie_test.go @@ -0,0 +1,553 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "bytes" + "math/rand" + "slices" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/internal/testrand" + "github.com/ethereum/go-ethereum/trie" +) + +type replayer struct { + paths []string // sort in fifo order + hashes []common.Hash // empty for deletion + unknowns int // counter for unknown write +} + +func newBatchReplay() *replayer { + return &replayer{} +} + +func (r *replayer) decode(key []byte, value []byte) { + account := rawdb.IsAccountTrieNode(key) + storage := rawdb.IsStorageTrieNode(key) + if !account && !storage { + r.unknowns += 1 + return + } + var path []byte + if account { + _, path = rawdb.ResolveAccountTrieNodeKey(key) + } else { + _, owner, inner := rawdb.ResolveStorageTrieNode(key) + path = append(owner.Bytes(), inner...) + } + r.paths = append(r.paths, string(path)) + + if len(value) == 0 { + r.hashes = append(r.hashes, common.Hash{}) + } else { + r.hashes = append(r.hashes, crypto.Keccak256Hash(value)) + } +} + +// updates returns a set of effective mutations. Multiple mutations targeting +// the same node path will be merged in FIFO order. +func (r *replayer) modifies() map[string]common.Hash { + set := make(map[string]common.Hash) + for i, path := range r.paths { + set[path] = r.hashes[i] + } + return set +} + +// updates returns the number of updates. +func (r *replayer) updates() int { + var count int + for _, hash := range r.modifies() { + if hash == (common.Hash{}) { + continue + } + count++ + } + return count +} + +// Put inserts the given value into the key-value data store. +func (r *replayer) Put(key []byte, value []byte) error { + r.decode(key, value) + return nil +} + +// Delete removes the key from the key-value data store. +func (r *replayer) Delete(key []byte) error { + r.decode(key, nil) + return nil +} + +func byteToHex(str []byte) []byte { + l := len(str) * 2 + var nibbles = make([]byte, l) + for i, b := range str { + nibbles[i*2] = b / 16 + nibbles[i*2+1] = b % 16 + } + return nibbles +} + +// innerNodes returns the internal nodes narrowed by two boundaries along with +// the leftmost and rightmost sub-trie roots. +func innerNodes(first, last []byte, includeLeft, includeRight bool, nodes map[string]common.Hash, t *testing.T) (map[string]common.Hash, []byte, []byte) { + var ( + leftRoot []byte + rightRoot []byte + firstHex = byteToHex(first) + lastHex = byteToHex(last) + inner = make(map[string]common.Hash) + ) + for path, hash := range nodes { + if hash == (common.Hash{}) { + t.Fatalf("Unexpected deletion, %v", []byte(path)) + } + // Filter out the siblings on the left side or the left boundary nodes. + if !includeLeft && (bytes.Compare(firstHex, []byte(path)) > 0 || bytes.HasPrefix(firstHex, []byte(path))) { + continue + } + // Filter out the siblings on the right side or the right boundary nodes. + if !includeRight && (bytes.Compare(lastHex, []byte(path)) < 0 || bytes.HasPrefix(lastHex, []byte(path))) { + continue + } + inner[path] = hash + + // Track the path of the leftmost sub trie root + if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 { + leftRoot = []byte(path) + } + // Track the path of the rightmost sub trie root + if rightRoot == nil || + (bytes.Compare(rightRoot, []byte(path)) < 0) || + (bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) { + rightRoot = []byte(path) + } + } + return inner, leftRoot, rightRoot +} + +func buildPartial(owner common.Hash, db ethdb.KeyValueReader, batch ethdb.Batch, entries []*kv, first, last int) *replayer { + tr := newPathTrie(owner, first != 0, db, batch) + for i := first; i <= last; i++ { + tr.update(entries[i].k, entries[i].v) + } + tr.commit(last == len(entries)-1) + + replay := newBatchReplay() + batch.Replay(replay) + + return replay +} + +// TestPartialGentree verifies if the trie constructed with partial states can +// generate consistent trie nodes that match those of the full trie. +func TestPartialGentree(t *testing.T) { + for round := 0; round < 100; round++ { + var ( + n = rand.Intn(1024) + 10 + entries []*kv + ) + for i := 0; i < n; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + nodes := make(map[string]common.Hash) + tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + for i := 0; i < len(entries); i++ { + tr.Update(entries[i].k, entries[i].v) + } + tr.Hash() + + check := func(first, last int) { + var ( + db = rawdb.NewMemoryDatabase() + batch = db.NewBatch() + ) + // Build the partial tree with specific boundaries + r := buildPartial(common.Hash{}, db, batch, entries, first, last) + if r.unknowns > 0 { + t.Fatalf("Unknown database write: %d", r.unknowns) + } + + // Ensure all the internal nodes are produced + var ( + set = r.modifies() + inner, _, _ = innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t) + ) + for path, hash := range inner { + if _, ok := set[path]; !ok { + t.Fatalf("Missing nodes %v", []byte(path)) + } + if hash != set[path] { + t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path]) + } + } + if r.updates() != len(inner) { + t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates()) + } + } + for j := 0; j < 100; j++ { + var ( + first int + last int + ) + for { + first = rand.Intn(len(entries)) + last = rand.Intn(len(entries)) + if first <= last { + break + } + } + check(first, last) + } + var cases = []struct { + first int + last int + }{ + {0, len(entries) - 1}, // full + {1, len(entries) - 1}, // no left + {2, len(entries) - 1}, // no left + {2, len(entries) - 2}, // no left and right + {2, len(entries) - 2}, // no left and right + {len(entries) / 2, len(entries) / 2}, // single + {0, 0}, // single first + {len(entries) - 1, len(entries) - 1}, // single last + } + for _, c := range cases { + check(c.first, c.last) + } + } +} + +// TestGentreeDanglingClearing tests if the dangling nodes falling within the +// path space of constructed tree can be correctly removed. +func TestGentreeDanglingClearing(t *testing.T) { + for round := 0; round < 100; round++ { + var ( + n = rand.Intn(1024) + 10 + entries []*kv + ) + for i := 0; i < n; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + nodes := make(map[string]common.Hash) + tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + for i := 0; i < len(entries); i++ { + tr.Update(entries[i].k, entries[i].v) + } + tr.Hash() + + check := func(first, last int) { + var ( + db = rawdb.NewMemoryDatabase() + batch = db.NewBatch() + ) + // Write the junk nodes as the dangling + var injects []string + for path := range nodes { + for i := 0; i < len(path); i++ { + _, ok := nodes[path[:i]] + if ok { + continue + } + injects = append(injects, path[:i]) + } + } + if len(injects) == 0 { + return + } + for _, path := range injects { + rawdb.WriteAccountTrieNode(db, []byte(path), testrand.Bytes(32)) + } + + // Build the partial tree with specific range + replay := buildPartial(common.Hash{}, db, batch, entries, first, last) + if replay.unknowns > 0 { + t.Fatalf("Unknown database write: %d", replay.unknowns) + } + set := replay.modifies() + + // Make sure the injected junks falling within the path space of + // committed trie nodes are correctly deleted. + _, leftRoot, rightRoot := innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t) + for _, path := range injects { + if bytes.Compare([]byte(path), leftRoot) < 0 && !bytes.HasPrefix(leftRoot, []byte(path)) { + continue + } + if bytes.Compare([]byte(path), rightRoot) > 0 { + continue + } + if hash, ok := set[path]; !ok || hash != (common.Hash{}) { + t.Fatalf("Missing delete, %v", []byte(path)) + } + } + } + for j := 0; j < 100; j++ { + var ( + first int + last int + ) + for { + first = rand.Intn(len(entries)) + last = rand.Intn(len(entries)) + if first <= last { + break + } + } + check(first, last) + } + var cases = []struct { + first int + last int + }{ + {0, len(entries) - 1}, // full + {1, len(entries) - 1}, // no left + {2, len(entries) - 1}, // no left + {2, len(entries) - 2}, // no left and right + {2, len(entries) - 2}, // no left and right + {len(entries) / 2, len(entries) / 2}, // single + {0, 0}, // single first + {len(entries) - 1, len(entries) - 1}, // single last + } + for _, c := range cases { + check(c.first, c.last) + } + } +} + +// TestFlushPartialTree tests the gentrie can produce complete inner trie nodes +// even with lots of batch flushes. +func TestFlushPartialTree(t *testing.T) { + var entries []*kv + for i := 0; i < 1024; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + nodes := make(map[string]common.Hash) + tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + for i := 0; i < len(entries); i++ { + tr.Update(entries[i].k, entries[i].v) + } + tr.Hash() + + var cases = []struct { + first int + last int + }{ + {0, len(entries) - 1}, // full + {1, len(entries) - 1}, // no left + {10, len(entries) - 1}, // no left + {10, len(entries) - 2}, // no left and right + {10, len(entries) - 10}, // no left and right + {11, 11}, // single + {0, 0}, // single first + {len(entries) - 1, len(entries) - 1}, // single last + } + for _, c := range cases { + var ( + db = rawdb.NewMemoryDatabase() + batch = db.NewBatch() + combined = db.NewBatch() + ) + inner, _, _ := innerNodes(entries[c.first].k, entries[c.last].k, c.first == 0, c.last == len(entries)-1, nodes, t) + + tr := newPathTrie(common.Hash{}, c.first != 0, db, batch) + for i := c.first; i <= c.last; i++ { + tr.update(entries[i].k, entries[i].v) + if rand.Intn(2) == 0 { + tr.commit(false) + + batch.Replay(combined) + batch.Write() + batch.Reset() + } + } + tr.commit(c.last == len(entries)-1) + + batch.Replay(combined) + batch.Write() + batch.Reset() + + r := newBatchReplay() + combined.Replay(r) + + // Ensure all the internal nodes are produced + set := r.modifies() + for path, hash := range inner { + if _, ok := set[path]; !ok { + t.Fatalf("Missing nodes %v", []byte(path)) + } + if hash != set[path] { + t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path]) + } + } + if r.updates() != len(inner) { + t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates()) + } + } +} + +// TestBoundSplit ensures two consecutive trie chunks are not overlapped with +// each other. +func TestBoundSplit(t *testing.T) { + var entries []*kv + for i := 0; i < 1024; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + for j := 0; j < 100; j++ { + var ( + next int + last int + db = rawdb.NewMemoryDatabase() + + lastRightRoot []byte + ) + for { + if next == len(entries) { + break + } + last = rand.Intn(len(entries)-next) + next + + r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last) + set := r.modifies() + + // Skip if the chunk is zero-size + if r.updates() == 0 { + next = last + 1 + continue + } + + // Ensure the updates in two consecutive chunks are not overlapped. + // The only overlapping part should be deletion. + if lastRightRoot != nil && len(set) > 0 { + // Derive the path of left-most node in this chunk + var leftRoot []byte + for path, hash := range r.modifies() { + if hash == (common.Hash{}) { + t.Fatalf("Unexpected deletion %v", []byte(path)) + } + if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 { + leftRoot = []byte(path) + } + } + if bytes.HasPrefix(lastRightRoot, leftRoot) || bytes.HasPrefix(leftRoot, lastRightRoot) { + t.Fatalf("Two chunks are not correctly separated, lastRight: %v, left: %v", lastRightRoot, leftRoot) + } + } + + // Track the updates as the last chunk + var rightRoot []byte + for path := range set { + if rightRoot == nil || + (bytes.Compare(rightRoot, []byte(path)) < 0) || + (bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) { + rightRoot = []byte(path) + } + } + lastRightRoot = rightRoot + next = last + 1 + } + } +} + +// TestTinyPartialTree tests if the partial tree is too tiny(has less than two +// states), then nothing should be committed. +func TestTinyPartialTree(t *testing.T) { + var entries []*kv + for i := 0; i < 1024; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + for i := 0; i < len(entries); i++ { + next := i + last := i + 1 + if last >= len(entries) { + last = len(entries) - 1 + } + db := rawdb.NewMemoryDatabase() + r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last) + + if next != 0 && last != len(entries)-1 { + if r.updates() != 0 { + t.Fatalf("Unexpected data writes, got: %d", r.updates()) + } + } + } +} diff --git a/eth/protocols/snap/metrics.go b/eth/protocols/snap/metrics.go index a7d071953f..25dbcc6386 100644 --- a/eth/protocols/snap/metrics.go +++ b/eth/protocols/snap/metrics.go @@ -27,21 +27,28 @@ var ( IngressRegistrationErrorMeter = metrics.NewRegisteredMeter(ingressRegistrationErrorName, nil) EgressRegistrationErrorMeter = metrics.NewRegisteredMeter(egressRegistrationErrorName, nil) - // deletionGauge is the metric to track how many trie node deletions - // are performed in total during the sync process. - deletionGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete", nil) + // accountInnerDeleteGauge is the metric to track how many dangling trie nodes + // covered by extension node in account trie are deleted during the sync. + accountInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/inner", nil) - // lookupGauge is the metric to track how many trie node lookups are - // performed to determine if node needs to be deleted. - lookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/lookup", nil) + // storageInnerDeleteGauge is the metric to track how many dangling trie nodes + // covered by extension node in storage trie are deleted during the sync. + storageInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/inner", nil) + + // accountOuterDeleteGauge is the metric to track how many dangling trie nodes + // above the committed nodes in account trie are deleted during the sync. + accountOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/outer", nil) - // boundaryAccountNodesGauge is the metric to track how many boundary trie - // nodes in account trie are met. - boundaryAccountNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/account", nil) + // storageOuterDeleteGauge is the metric to track how many dangling trie nodes + // above the committed nodes in storage trie are deleted during the sync. + storageOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/outer", nil) - // boundaryAccountNodesGauge is the metric to track how many boundary trie - // nodes in storage tries are met. - boundaryStorageNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/storage", nil) + // lookupGauge is the metric to track how many trie node lookups are + // performed to determine if node needs to be deleted. + accountInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/inner", nil) + accountOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/outer", nil) + storageInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/inner", nil) + storageOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/outer", nil) // smallStorageGauge is the metric to track how many storages are small enough // to retrieved in one or two request. @@ -54,4 +61,9 @@ var ( // skipStorageHealingGauge is the metric to track how many storages are retrieved // in multiple requests but healing is not necessary. skipStorageHealingGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/noheal", nil) + + // largeStorageDiscardGauge is the metric to track how many chunked storages are + // discarded during the snap sync. + largeStorageDiscardGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/discard", nil) + largeStorageResumedGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/resume", nil) ) diff --git a/eth/protocols/snap/progress_test.go b/eth/protocols/snap/progress_test.go new file mode 100644 index 0000000000..9d923bd2f5 --- /dev/null +++ b/eth/protocols/snap/progress_test.go @@ -0,0 +1,154 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "encoding/json" + "testing" + + "github.com/ethereum/go-ethereum/common" +) + +// Legacy sync progress definitions +type legacyStorageTask struct { + Next common.Hash // Next account to sync in this interval + Last common.Hash // Last account to sync in this interval +} + +type legacyAccountTask struct { + Next common.Hash // Next account to sync in this interval + Last common.Hash // Last account to sync in this interval + SubTasks map[common.Hash][]*legacyStorageTask // Storage intervals needing fetching for large contracts +} + +type legacyProgress struct { + Tasks []*legacyAccountTask // The suspended account tasks (contract tasks within) +} + +func compareProgress(a legacyProgress, b SyncProgress) bool { + if len(a.Tasks) != len(b.Tasks) { + return false + } + for i := 0; i < len(a.Tasks); i++ { + if a.Tasks[i].Next != b.Tasks[i].Next { + return false + } + if a.Tasks[i].Last != b.Tasks[i].Last { + return false + } + // new fields are not checked here + + if len(a.Tasks[i].SubTasks) != len(b.Tasks[i].SubTasks) { + return false + } + for addrHash, subTasksA := range a.Tasks[i].SubTasks { + subTasksB, ok := b.Tasks[i].SubTasks[addrHash] + if !ok || len(subTasksB) != len(subTasksA) { + return false + } + for j := 0; j < len(subTasksA); j++ { + if subTasksA[j].Next != subTasksB[j].Next { + return false + } + if subTasksA[j].Last != subTasksB[j].Last { + return false + } + } + } + } + return true +} + +func makeLegacyProgress() legacyProgress { + return legacyProgress{ + Tasks: []*legacyAccountTask{ + { + Next: common.Hash{}, + Last: common.Hash{0x77}, + SubTasks: map[common.Hash][]*legacyStorageTask{ + common.Hash{0x1}: { + { + Next: common.Hash{}, + Last: common.Hash{0xff}, + }, + }, + }, + }, + { + Next: common.Hash{0x88}, + Last: common.Hash{0xff}, + }, + }, + } +} + +func convertLegacy(legacy legacyProgress) SyncProgress { + var progress SyncProgress + for i, task := range legacy.Tasks { + subTasks := make(map[common.Hash][]*storageTask) + for owner, list := range task.SubTasks { + var cpy []*storageTask + for i := 0; i < len(list); i++ { + cpy = append(cpy, &storageTask{ + Next: list[i].Next, + Last: list[i].Last, + }) + } + subTasks[owner] = cpy + } + accountTask := &accountTask{ + Next: task.Next, + Last: task.Last, + SubTasks: subTasks, + } + if i == 0 { + accountTask.StorageCompleted = []common.Hash{{0xaa}, {0xbb}} // fulfill new fields + } + progress.Tasks = append(progress.Tasks, accountTask) + } + return progress +} + +func TestSyncProgressCompatibility(t *testing.T) { + // Decode serialized bytes of legacy progress, backward compatibility + legacy := makeLegacyProgress() + blob, err := json.Marshal(legacy) + if err != nil { + t.Fatalf("Failed to marshal progress %v", err) + } + var dec SyncProgress + if err := json.Unmarshal(blob, &dec); err != nil { + t.Fatalf("Failed to unmarshal progress %v", err) + } + if !compareProgress(legacy, dec) { + t.Fatal("sync progress is not backward compatible") + } + + // Decode serialized bytes of new format progress + progress := convertLegacy(legacy) + blob, err = json.Marshal(progress) + if err != nil { + t.Fatalf("Failed to marshal progress %v", err) + } + var legacyDec legacyProgress + if err := json.Unmarshal(blob, &legacyDec); err != nil { + t.Fatalf("Failed to unmarshal progress %v", err) + } + if !compareProgress(legacyDec, progress) { + t.Fatal("sync progress is not forward compatible") + } +} diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 887a50775d..208d3ba3bc 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -94,6 +94,9 @@ const ( // trienodeHealThrottleDecrease is the divisor for the throttle when the // rate of arriving data is lower than the rate of processing it. trienodeHealThrottleDecrease = 1.25 + + // batchSizeThreshold is the maximum size allowed for gentrie batch. + batchSizeThreshold = 8 * 1024 * 1024 ) var ( @@ -295,11 +298,19 @@ type bytecodeHealResponse struct { // accountTask represents the sync task for a chunk of the account snapshot. type accountTask struct { - // These fields get serialized to leveldb on shutdown + // These fields get serialized to key-value store on shutdown Next common.Hash // Next account to sync in this interval Last common.Hash // Last account to sync in this interval SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts + // This is a list of account hashes whose storage are already completed + // in this cycle. This field is newly introduced in v1.14 and will be + // empty if the task is resolved from legacy progress data. Furthermore, + // this additional field will be ignored by legacy Geth. The only side + // effect is that these contracts might be resynced in the new cycle, + // retaining the legacy behavior. + StorageCompleted []common.Hash `json:",omitempty"` + // These fields are internals used during runtime req *accountRequest // Pending request to fill this task res *accountResponse // Validate response filling this task @@ -309,15 +320,40 @@ type accountTask struct { needState []bool // Flags whether the filling accounts need storage retrieval needHeal []bool // Flags whether the filling accounts's state was chunked and need healing - codeTasks map[common.Hash]struct{} // Code hashes that need retrieval - stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval + codeTasks map[common.Hash]struct{} // Code hashes that need retrieval + stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval + stateCompleted map[common.Hash]struct{} // Account hashes whose storage have been completed - genBatch ethdb.Batch // Batch used by the node generator - genTrie *trie.StackTrie // Node generator from storage slots + genBatch ethdb.Batch // Batch used by the node generator + genTrie genTrie // Node generator from storage slots done bool // Flag whether the task can be removed } +// activeSubTasks returns the set of storage tasks covered by the current account +// range. Normally this would be the entire subTask set, but on a sync interrupt +// and later resume it can happen that a shorter account range is retrieved. This +// method ensures that we only start up the subtasks covered by the latest account +// response. +// +// Nil is returned if the account range is empty. +func (task *accountTask) activeSubTasks() map[common.Hash][]*storageTask { + if len(task.res.hashes) == 0 { + return nil + } + var ( + tasks = make(map[common.Hash][]*storageTask) + last = task.res.hashes[len(task.res.hashes)-1] + ) + for hash, subTasks := range task.SubTasks { + subTasks := subTasks // closure + if hash.Cmp(last) <= 0 { + tasks[hash] = subTasks + } + } + return tasks +} + // storageTask represents the sync task for a chunk of the storage snapshot. type storageTask struct { Next common.Hash // Next account to sync in this interval @@ -327,8 +363,8 @@ type storageTask struct { root common.Hash // Storage root hash for this instance req *storageRequest // Pending request to fill this task - genBatch ethdb.Batch // Batch used by the node generator - genTrie *trie.StackTrie // Node generator from storage slots + genBatch ethdb.Batch // Batch used by the node generator + genTrie genTrie // Node generator from storage slots done bool // Flag whether the task can be removed } @@ -716,19 +752,6 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { } } -// cleanPath is used to remove the dangling nodes in the stackTrie. -func (s *Syncer) cleanPath(batch ethdb.Batch, owner common.Hash, path []byte) { - if owner == (common.Hash{}) && rawdb.ExistsAccountTrieNode(s.db, path) { - rawdb.DeleteAccountTrieNode(batch, path) - deletionGauge.Inc(1) - } - if owner != (common.Hash{}) && rawdb.ExistsStorageTrieNode(s.db, owner, path) { - rawdb.DeleteStorageTrieNode(batch, owner, path) - deletionGauge.Inc(1) - } - lookupGauge.Inc(1) -} - // loadSyncStatus retrieves a previously aborted sync status from the database, // or generates a fresh one if none is available. func (s *Syncer) loadSyncStatus() { @@ -745,28 +768,27 @@ func (s *Syncer) loadSyncStatus() { for _, task := range s.tasks { task := task // closure for task.genBatch in the stacktrie writer callback + // Restore the completed storages + task.stateCompleted = make(map[common.Hash]struct{}) + for _, hash := range task.StorageCompleted { + task.stateCompleted[hash] = struct{}{} + } + task.StorageCompleted = nil + + // Allocate batch for account trie generation task.genBatch = ethdb.HookedBatch{ Batch: s.db.NewBatch(), OnPut: func(key []byte, value []byte) { s.accountBytes += common.StorageSize(len(key) + len(value)) }, } - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, blob, s.scheme) - }) + if s.scheme == rawdb.HashScheme { + task.genTrie = newHashTrie(task.genBatch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner and also filter out boundary nodes - // only in the context of the path scheme. Deletion is forbidden in the - // hash scheme, as it can disrupt state completeness. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(task.genBatch, common.Hash{}, path) - }) - // Skip the left boundary if it's not the first range. - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(task.Next != (common.Hash{}), task.Last != common.MaxHash, boundaryAccountNodesGauge) + task.genTrie = newPathTrie(common.Hash{}, task.Next != common.Hash{}, s.db, task.genBatch) } - task.genTrie = trie.NewStackTrie(options) + // Restore leftover storage tasks for accountHash, subtasks := range task.SubTasks { for _, subtask := range subtasks { subtask := subtask // closure for subtask.genBatch in the stacktrie writer callback @@ -777,23 +799,12 @@ func (s *Syncer) loadSyncStatus() { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } - owner := accountHash // local assignment for stacktrie writer closure - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, blob, s.scheme) - }) + if s.scheme == rawdb.HashScheme { + subtask.genTrie = newHashTrie(subtask.genBatch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner and also filter out boundary nodes - // only in the context of the path scheme. Deletion is forbidden in the - // hash scheme, as it can disrupt state completeness. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(subtask.genBatch, owner, path) - }) - // Skip the left boundary if it's not the first range. - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(subtask.Next != common.Hash{}, subtask.Last != common.MaxHash, boundaryStorageNodesGauge) + subtask.genTrie = newPathTrie(accountHash, subtask.Next != common.Hash{}, s.db, subtask.genBatch) } - subtask.genTrie = trie.NewStackTrie(options) } } } @@ -845,27 +856,20 @@ func (s *Syncer) loadSyncStatus() { s.accountBytes += common.StorageSize(len(key) + len(value)) }, } - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, blob, s.scheme) - }) + var tr genTrie + if s.scheme == rawdb.HashScheme { + tr = newHashTrie(batch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner and also filter out boundary nodes - // only in the context of the path scheme. Deletion is forbidden in the - // hash scheme, as it can disrupt state completeness. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(batch, common.Hash{}, path) - }) - // Skip the left boundary if it's not the first range. - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(next != common.Hash{}, last != common.MaxHash, boundaryAccountNodesGauge) + tr = newPathTrie(common.Hash{}, next != common.Hash{}, s.db, batch) } s.tasks = append(s.tasks, &accountTask{ - Next: next, - Last: last, - SubTasks: make(map[common.Hash][]*storageTask), - genBatch: batch, - genTrie: trie.NewStackTrie(options), + Next: next, + Last: last, + SubTasks: make(map[common.Hash][]*storageTask), + genBatch: batch, + stateCompleted: make(map[common.Hash]struct{}), + genTrie: tr, }) log.Debug("Created account sync task", "from", next, "last", last) next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) @@ -876,16 +880,31 @@ func (s *Syncer) loadSyncStatus() { func (s *Syncer) saveSyncStatus() { // Serialize any partial progress to disk before spinning down for _, task := range s.tasks { + // Claim the right boundary as incomplete before flushing the + // accumulated nodes in batch, the nodes on right boundary + // will be discarded and cleaned up by this call. + task.genTrie.commit(false) if err := task.genBatch.Write(); err != nil { log.Error("Failed to persist account slots", "err", err) } for _, subtasks := range task.SubTasks { for _, subtask := range subtasks { + // Same for account trie, discard and cleanup the + // incomplete right boundary. + subtask.genTrie.commit(false) if err := subtask.genBatch.Write(); err != nil { log.Error("Failed to persist storage slots", "err", err) } } } + // Save the account hashes of completed storage. + task.StorageCompleted = make([]common.Hash, 0, len(task.stateCompleted)) + for hash := range task.stateCompleted { + task.StorageCompleted = append(task.StorageCompleted, hash) + } + if len(task.StorageCompleted) > 0 { + log.Debug("Leftover completed storages", "number", len(task.StorageCompleted), "next", task.Next, "last", task.Last) + } } // Store the actual progress markers progress := &SyncProgress{ @@ -970,6 +989,10 @@ func (s *Syncer) cleanStorageTasks() { delete(task.SubTasks, account) task.pend-- + // Mark the state as complete to prevent resyncing, regardless + // if state healing is necessary. + task.stateCompleted[account] = struct{}{} + // If this was the last pending task, forward the account task if task.pend == 0 { s.forwardAccountTask(task) @@ -1209,7 +1232,8 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st continue } // Skip tasks that are already retrieving (or done with) all small states - if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 { + storageTasks := task.activeSubTasks() + if len(storageTasks) == 0 && len(task.stateTasks) == 0 { continue } // Task pending retrieval, try to find an idle peer. If no such peer @@ -1253,7 +1277,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st roots = make([]common.Hash, 0, storageSets) subtask *storageTask ) - for account, subtasks := range task.SubTasks { + for account, subtasks := range storageTasks { for _, st := range subtasks { // Skip any subtasks already filling if st.req != nil { @@ -1850,11 +1874,11 @@ func (s *Syncer) processAccountResponse(res *accountResponse) { res.task.res = res // Ensure that the response doesn't overflow into the subsequent task - last := res.task.Last.Big() + lastBig := res.task.Last.Big() for i, hash := range res.hashes { // Mark the range complete if the last is already included. // Keep iteration to delete the extra states if exists. - cmp := hash.Big().Cmp(last) + cmp := hash.Big().Cmp(lastBig) if cmp == 0 { res.cont = false continue @@ -1890,7 +1914,21 @@ func (s *Syncer) processAccountResponse(res *accountResponse) { } // Check if the account is a contract with an unknown storage trie if account.Root != types.EmptyRootHash { - if !rawdb.HasTrieNode(s.db, res.hashes[i], nil, account.Root, s.scheme) { + // If the storage was already retrieved in the last cycle, there's no need + // to resync it again, regardless of whether the storage root is consistent + // or not. + if _, exist := res.task.stateCompleted[res.hashes[i]]; exist { + // The leftover storage tasks are not expected, unless system is + // very wrong. + if _, ok := res.task.SubTasks[res.hashes[i]]; ok { + panic(fmt.Errorf("unexpected leftover storage tasks, owner: %x", res.hashes[i])) + } + // Mark the healing tag if storage root node is inconsistent, or + // it's non-existent due to storage chunking. + if !rawdb.HasTrieNode(s.db, res.hashes[i], nil, account.Root, s.scheme) { + res.task.needHeal[i] = true + } + } else { // If there was a previous large state retrieval in progress, // don't restart it from scratch. This happens if a sync cycle // is interrupted and resumed later. However, *do* update the @@ -1902,7 +1940,12 @@ func (s *Syncer) processAccountResponse(res *accountResponse) { } res.task.needHeal[i] = true resumed[res.hashes[i]] = struct{}{} + largeStorageResumedGauge.Inc(1) } else { + // It's possible that in the hash scheme, the storage, along + // with the trie nodes of the given root, is already present + // in the database. Schedule the storage task anyway to simplify + // the logic here. res.task.stateTasks[res.hashes[i]] = account.Root } res.task.needState[i] = true @@ -1910,13 +1953,29 @@ func (s *Syncer) processAccountResponse(res *accountResponse) { } } } - // Delete any subtasks that have been aborted but not resumed. This may undo - // some progress if a new peer gives us less accounts than an old one, but for - // now we have to live with that. - for hash := range res.task.SubTasks { - if _, ok := resumed[hash]; !ok { - log.Debug("Aborting suspended storage retrieval", "account", hash) - delete(res.task.SubTasks, hash) + // Delete any subtasks that have been aborted but not resumed. It's essential + // as the corresponding contract might be self-destructed in this cycle(it's + // no longer possible in ethereum as self-destruction is disabled in Cancun + // Fork, but the condition is still necessary for other networks). + // + // Keep the leftover storage tasks if they are not covered by the responded + // account range which should be picked up in next account wave. + if len(res.hashes) > 0 { + // The hash of last delivered account in the response + last := res.hashes[len(res.hashes)-1] + for hash := range res.task.SubTasks { + // TODO(rjl493456442) degrade the log level before merging. + if hash.Cmp(last) > 0 { + log.Info("Keeping suspended storage retrieval", "account", hash) + continue + } + // TODO(rjl493456442) degrade the log level before merging. + // It should never happen in ethereum. + if _, ok := resumed[hash]; !ok { + log.Error("Aborting suspended storage retrieval", "account", hash) + delete(res.task.SubTasks, hash) + largeStorageDiscardGauge.Inc(1) + } } } // If the account range contained no contracts, or all have been fully filled @@ -2014,6 +2073,7 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) { res.mainTask.needState[j] = false res.mainTask.pend-- + res.mainTask.stateCompleted[account] = struct{}{} // mark it as completed smallStorageGauge.Inc(1) } // If the last contract was chunked, mark it as needing healing @@ -2062,25 +2122,20 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } - owner := account // local assignment for stacktrie writer closure - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme) - }) + var tr genTrie + if s.scheme == rawdb.HashScheme { + tr = newHashTrie(batch) + } if s.scheme == rawdb.PathScheme { - options = options.WithCleaner(func(path []byte) { - s.cleanPath(batch, owner, path) - }) // Keep the left boundary as it's the first range. - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(false, r.End() != common.MaxHash, boundaryStorageNodesGauge) + tr = newPathTrie(account, false, s.db, batch) } tasks = append(tasks, &storageTask{ Next: common.Hash{}, Last: r.End(), root: acc.Root, genBatch: batch, - genTrie: trie.NewStackTrie(options), + genTrie: tr, }) for r.Next() { batch := ethdb.HookedBatch{ @@ -2089,27 +2144,19 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme) - }) + var tr genTrie + if s.scheme == rawdb.HashScheme { + tr = newHashTrie(batch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner and also filter out boundary nodes - // only in the context of the path scheme. Deletion is forbidden in the - // hash scheme, as it can disrupt state completeness. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(batch, owner, path) - }) - // Skip the left boundary as it's not the first range - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(true, r.End() != common.MaxHash, boundaryStorageNodesGauge) + tr = newPathTrie(account, true, s.db, batch) } tasks = append(tasks, &storageTask{ Next: r.Start(), Last: r.End(), root: acc.Root, genBatch: batch, - genTrie: trie.NewStackTrie(options), + genTrie: tr, }) } for _, task := range tasks { @@ -2155,26 +2202,18 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { if i < len(res.hashes)-1 || res.subTask == nil { // no need to make local reassignment of account: this closure does not outlive the loop - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(batch, account, path, hash, blob, s.scheme) - }) + var tr genTrie + if s.scheme == rawdb.HashScheme { + tr = newHashTrie(batch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner only in the context of the - // path scheme. Deletion is forbidden in the hash scheme, as it can - // disrupt state completeness. - // - // Notably, boundary nodes can be also kept because the whole storage - // trie is complete. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(batch, account, path) - }) + // Keep the left boundary as it's complete + tr = newPathTrie(account, false, s.db, batch) } - tr := trie.NewStackTrie(options) for j := 0; j < len(res.hashes[i]); j++ { - tr.Update(res.hashes[i][j][:], res.slots[i][j]) + tr.update(res.hashes[i][j][:], res.slots[i][j]) } - tr.Commit() + tr.commit(true) } // Persist the received storage segments. These flat state maybe // outdated during the sync, but it can be fixed later during the @@ -2185,14 +2224,14 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // If we're storing large contracts, generate the trie nodes // on the fly to not trash the gluing points if i == len(res.hashes)-1 && res.subTask != nil { - res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j]) + res.subTask.genTrie.update(res.hashes[i][j][:], res.slots[i][j]) } } } // Large contracts could have generated new trie nodes, flush them to disk if res.subTask != nil { if res.subTask.done { - root := res.subTask.genTrie.Commit() + root := res.subTask.genTrie.commit(res.subTask.Last == common.MaxHash) if err := res.subTask.genBatch.Write(); err != nil { log.Error("Failed to persist stack slots", "err", err) } @@ -2209,8 +2248,8 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { } } } - } - if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize { + } else if res.subTask.genBatch.ValueSize() > batchSizeThreshold { + res.subTask.genTrie.commit(false) if err := res.subTask.genBatch.Write(); err != nil { log.Error("Failed to persist stack slots", "err", err) } @@ -2393,7 +2432,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { if err != nil { panic(err) // Really shouldn't ever happen } - task.genTrie.Update(hash[:], full) + task.genTrie.update(hash[:], full) } } // Flush anything written just now and update the stats @@ -2409,17 +2448,30 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { return } task.Next = incHash(hash) + + // Remove the completion flag once the account range is pushed + // forward. The leftover accounts will be skipped in the next + // cycle. + delete(task.stateCompleted, hash) } // All accounts marked as complete, track if the entire task is done task.done = !res.cont + // Error out if there is any leftover completion flag. + if task.done && len(task.stateCompleted) != 0 { + panic(fmt.Errorf("storage completion flags should be emptied, %d left", len(task.stateCompleted))) + } // Stack trie could have generated trie nodes, push them to disk (we need to // flush after finalizing task.done. It's fine even if we crash and lose this // write as it will only cause more data to be downloaded during heal. if task.done { - task.genTrie.Commit() - } - if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done { + task.genTrie.commit(task.Last == common.MaxHash) + if err := task.genBatch.Write(); err != nil { + log.Error("Failed to persist stack account", "err", err) + } + task.genBatch.Reset() + } else if task.genBatch.ValueSize() > batchSizeThreshold { + task.genTrie.commit(false) if err := task.genBatch.Write(); err != nil { log.Error("Failed to persist stack account", "err", err) } diff --git a/fork.yaml b/fork.yaml index 15f26bc7e7..14336f78eb 100644 --- a/fork.yaml +++ b/fork.yaml @@ -5,7 +5,7 @@ footer: | base: name: go-ethereum url: https://github.com/ethereum/go-ethereum - hash: 2bd6bd01d2e8561dd7fc21b631f4a34ac16627a1 v1.13.14 + hash: c5ba367eb6232e3eddd7d6226bfd374449c63164 v1.13.15 fork: name: op-geth url: https://github.com/ethereum-optimism/op-geth diff --git a/internal/testrand/rand.go b/internal/testrand/rand.go new file mode 100644 index 0000000000..690993de05 --- /dev/null +++ b/internal/testrand/rand.go @@ -0,0 +1,53 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package testrand + +import ( + crand "crypto/rand" + "encoding/binary" + mrand "math/rand" + + "github.com/ethereum/go-ethereum/common" +) + +// prng is a pseudo random number generator seeded by strong randomness. +// The randomness is printed on startup in order to make failures reproducible. +var prng = initRand() + +func initRand() *mrand.Rand { + var seed [8]byte + crand.Read(seed[:]) + rnd := mrand.New(mrand.NewSource(int64(binary.LittleEndian.Uint64(seed[:])))) + return rnd +} + +// Bytes generates a random byte slice with specified length. +func Bytes(n int) []byte { + r := make([]byte, n) + prng.Read(r) + return r +} + +// Hash generates a random hash. +func Hash() common.Hash { + return common.BytesToHash(Bytes(common.HashLength)) +} + +// Address generates a random address. +func Address() common.Address { + return common.BytesToAddress(Bytes(common.AddressLength)) +} diff --git a/params/version.go b/params/version.go index bb6f38e9e6..86adc36db7 100644 --- a/params/version.go +++ b/params/version.go @@ -26,7 +26,7 @@ import ( const ( VersionMajor = 1 // Major version component of the current release VersionMinor = 13 // Minor version component of the current release - VersionPatch = 14 // Patch version component of the current release + VersionPatch = 15 // Patch version component of the current release VersionMeta = "stable" // Version metadata to append to the version string ) diff --git a/trie/stacktrie.go b/trie/stacktrie.go index f2f5355c49..9c574db0bf 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -23,8 +23,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" ) var ( @@ -32,62 +30,32 @@ var ( _ = types.TrieHasher((*StackTrie)(nil)) ) -// StackTrieOptions contains the configured options for manipulating the stackTrie. -type StackTrieOptions struct { - Writer func(path []byte, hash common.Hash, blob []byte) // The function to commit the dirty nodes - Cleaner func(path []byte) // The function to clean up dangling nodes - - SkipLeftBoundary bool // Flag whether the nodes on the left boundary are skipped for committing - SkipRightBoundary bool // Flag whether the nodes on the right boundary are skipped for committing - boundaryGauge metrics.Gauge // Gauge to track how many boundary nodes are met -} - -// NewStackTrieOptions initializes an empty options for stackTrie. -func NewStackTrieOptions() *StackTrieOptions { return &StackTrieOptions{} } - -// WithWriter configures trie node writer within the options. -func (o *StackTrieOptions) WithWriter(writer func(path []byte, hash common.Hash, blob []byte)) *StackTrieOptions { - o.Writer = writer - return o -} - -// WithCleaner configures the cleaner in the option for removing dangling nodes. -func (o *StackTrieOptions) WithCleaner(cleaner func(path []byte)) *StackTrieOptions { - o.Cleaner = cleaner - return o -} - -// WithSkipBoundary configures whether the left and right boundary nodes are -// filtered for committing, along with a gauge metrics to track how many -// boundary nodes are met. -func (o *StackTrieOptions) WithSkipBoundary(skipLeft, skipRight bool, gauge metrics.Gauge) *StackTrieOptions { - o.SkipLeftBoundary = skipLeft - o.SkipRightBoundary = skipRight - o.boundaryGauge = gauge - return o -} +// OnTrieNode is a callback method invoked when a trie node is committed +// by the stack trie. The node is only committed if it's considered complete. +// +// The caller should not modify the contents of the returned path and blob +// slice, and their contents may be changed after the call. It is up to the +// `onTrieNode` receiver function to deep-copy the data if it wants to retain +// it after the call ends. +type OnTrieNode func(path []byte, hash common.Hash, blob []byte) // StackTrie is a trie implementation that expects keys to be inserted // in order. Once it determines that a subtree will no longer be inserted // into, it will hash it and free up the memory it uses. type StackTrie struct { - options *StackTrieOptions - root *stNode - h *hasher - - first []byte // The (hex-encoded without terminator) key of first inserted entry, tracked as left boundary. - last []byte // The (hex-encoded without terminator) key of last inserted entry, tracked as right boundary. + root *stNode + h *hasher + last []byte + onTrieNode OnTrieNode } -// NewStackTrie allocates and initializes an empty trie. -func NewStackTrie(options *StackTrieOptions) *StackTrie { - if options == nil { - options = NewStackTrieOptions() - } +// NewStackTrie allocates and initializes an empty trie. The committed nodes +// will be discarded immediately if no callback is configured. +func NewStackTrie(onTrieNode OnTrieNode) *StackTrie { return &StackTrie{ - options: options, - root: stPool.Get().(*stNode), - h: newHasher(false), + root: stPool.Get().(*stNode), + h: newHasher(false), + onTrieNode: onTrieNode, } } @@ -101,10 +69,6 @@ func (t *StackTrie) Update(key, value []byte) error { if bytes.Compare(t.last, k) >= 0 { return errors.New("non-ascending key order") } - // track the first and last inserted entries. - if t.first == nil { - t.first = append([]byte{}, k...) - } if t.last == nil { t.last = append([]byte{}, k...) // allocate key slice } else { @@ -114,19 +78,9 @@ func (t *StackTrie) Update(key, value []byte) error { return nil } -// MustUpdate is a wrapper of Update and will omit any encountered error but -// just print out an error message. -func (t *StackTrie) MustUpdate(key, value []byte) { - if err := t.Update(key, value); err != nil { - log.Error("Unhandled trie error in StackTrie.Update", "err", err) - } -} - // Reset resets the stack trie object to empty state. func (t *StackTrie) Reset() { - t.options = NewStackTrieOptions() t.root = stPool.Get().(*stNode) - t.first = nil t.last = nil } @@ -346,10 +300,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { // // This method also sets 'st.type' to hashedNode, and clears 'st.key'. func (t *StackTrie) hash(st *stNode, path []byte) { - var ( - blob []byte // RLP-encoded node blob - internal [][]byte // List of node paths covered by the extension node - ) + var blob []byte // RLP-encoded node blob switch st.typ { case hashedNode: return @@ -384,15 +335,6 @@ func (t *StackTrie) hash(st *stNode, path []byte) { // recursively hash and commit child as the first step t.hash(st.children[0], append(path, st.key...)) - // Collect the path of internal nodes between shortNode and its **in disk** - // child. This is essential in the case of path mode scheme to avoid leaving - // danging nodes within the range of this internal path on disk, which would - // break the guarantee for state healing. - if len(st.children[0].val) >= 32 && t.options.Cleaner != nil { - for i := 1; i < len(st.key); i++ { - internal = append(internal, append(path, st.key[:i]...)) - } - } // encode the extension node n := shortNode{Key: hexToCompactInPlace(st.key)} if len(st.children[0].val) < 32 { @@ -416,11 +358,12 @@ func (t *StackTrie) hash(st *stNode, path []byte) { default: panic("invalid node type") } - + // Convert the node type to hashNode and reset the key slice. st.typ = hashedNode st.key = st.key[:0] - // Skip committing the non-root node if the size is smaller than 32 bytes. + // Skip committing the non-root node if the size is smaller than 32 bytes + // as tiny nodes are always embedded in their parent except root node. if len(blob) < 32 && len(path) > 0 { st.val = common.CopyBytes(blob) return @@ -429,51 +372,20 @@ func (t *StackTrie) hash(st *stNode, path []byte) { // input values. st.val = t.h.hashData(blob) - // Short circuit if the stack trie is not configured for writing. - if t.options.Writer == nil { - return + // Invoke the callback it's provided. Notably, the path and blob slices are + // volatile, please deep-copy the slices in callback if the contents need + // to be retained. + if t.onTrieNode != nil { + t.onTrieNode(path, common.BytesToHash(st.val), blob) } - // Skip committing if the node is on the left boundary and stackTrie is - // configured to filter the boundary. - if t.options.SkipLeftBoundary && bytes.HasPrefix(t.first, path) { - if t.options.boundaryGauge != nil { - t.options.boundaryGauge.Inc(1) - } - return - } - // Skip committing if the node is on the right boundary and stackTrie is - // configured to filter the boundary. - if t.options.SkipRightBoundary && bytes.HasPrefix(t.last, path) { - if t.options.boundaryGauge != nil { - t.options.boundaryGauge.Inc(1) - } - return - } - // Clean up the internal dangling nodes covered by the extension node. - // This should be done before writing the node to adhere to the committing - // order from bottom to top. - for _, path := range internal { - t.options.Cleaner(path) - } - t.options.Writer(path, common.BytesToHash(st.val), blob) } // Hash will firstly hash the entire trie if it's still not hashed and then commit -// all nodes to the associated database. Actually most of the trie nodes have been -// committed already. The main purpose here is to commit the nodes on right boundary. -// -// For stack trie, Hash and Commit are functionally identical. +// all leftover nodes to the associated database. Actually most of the trie nodes +// have been committed already. The main purpose here is to commit the nodes on +// right boundary. func (t *StackTrie) Hash() common.Hash { n := t.root t.hash(n, nil) return common.BytesToHash(n.val) } - -// Commit will firstly hash the entire trie if it's still not hashed and then commit -// all nodes to the associated database. Actually most of the trie nodes have been -// committed already. The main purpose here is to commit the nodes on right boundary. -// -// For stack trie, Hash and Commit are functionally identical. -func (t *StackTrie) Commit() common.Hash { - return t.Hash() -} diff --git a/trie/stacktrie_fuzzer_test.go b/trie/stacktrie_fuzzer_test.go index 50b5c4de52..c8e568355c 100644 --- a/trie/stacktrie_fuzzer_test.go +++ b/trie/stacktrie_fuzzer_test.go @@ -46,11 +46,9 @@ func fuzz(data []byte, debugging bool) { trieA = NewEmpty(dbA) spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()} dbB = newTestDatabase(rawdb.NewDatabase(spongeB), rawdb.HashScheme) - - options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + trieB = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(spongeB, common.Hash{}, path, hash, blob, dbB.Scheme()) }) - trieB = NewStackTrie(options) vals []*kv maxElements = 10000 // operate on unique keys only @@ -99,10 +97,9 @@ func fuzz(data []byte, debugging bool) { if debugging { fmt.Printf("{\"%#x\" , \"%#x\"} // stacktrie.Update\n", kv.k, kv.v) } - trieB.MustUpdate(kv.k, kv.v) + trieB.Update(kv.k, kv.v) } rootB := trieB.Hash() - trieB.Commit() if rootA != rootB { panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB)) } @@ -114,20 +111,19 @@ func fuzz(data []byte, debugging bool) { // Ensure all the nodes are persisted correctly var ( - nodeset = make(map[string][]byte) // path -> blob - optionsC = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + nodeset = make(map[string][]byte) // path -> blob + trieC = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { if crypto.Keccak256Hash(blob) != hash { panic("invalid node blob") } nodeset[string(path)] = common.CopyBytes(blob) }) - trieC = NewStackTrie(optionsC) checked int ) for _, kv := range vals { - trieC.MustUpdate(kv.k, kv.v) + trieC.Update(kv.k, kv.v) } - rootC := trieC.Commit() + rootC := trieC.Hash() if rootA != rootC { panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootC)) } diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go index 3a0e1cb260..f053b5112d 100644 --- a/trie/stacktrie_test.go +++ b/trie/stacktrie_test.go @@ -19,15 +19,12 @@ package trie import ( "bytes" "math/big" - "math/rand" "testing" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/trie/testutil" "github.com/stretchr/testify/assert" - "golang.org/x/exp/slices" ) func TestStackTrieInsertAndHash(t *testing.T) { @@ -381,90 +378,6 @@ func TestStacktrieNotModifyValues(t *testing.T) { } } -func buildPartialTree(entries []*kv, t *testing.T) map[string]common.Hash { - var ( - options = NewStackTrieOptions() - nodes = make(map[string]common.Hash) - ) - var ( - first int - last = len(entries) - 1 - - noLeft bool - noRight bool - ) - // Enter split mode if there are at least two elements - if rand.Intn(5) != 0 { - for { - first = rand.Intn(len(entries)) - last = rand.Intn(len(entries)) - if first <= last { - break - } - } - if first != 0 { - noLeft = true - } - if last != len(entries)-1 { - noRight = true - } - } - options = options.WithSkipBoundary(noLeft, noRight, nil) - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - nodes[string(path)] = hash - }) - tr := NewStackTrie(options) - - for i := first; i <= last; i++ { - tr.MustUpdate(entries[i].k, entries[i].v) - } - tr.Commit() - return nodes -} - -func TestPartialStackTrie(t *testing.T) { - for round := 0; round < 100; round++ { - var ( - n = rand.Intn(100) + 1 - entries []*kv - ) - for i := 0; i < n; i++ { - var val []byte - if rand.Intn(3) == 0 { - val = testutil.RandBytes(3) - } else { - val = testutil.RandBytes(32) - } - entries = append(entries, &kv{ - k: testutil.RandBytes(32), - v: val, - }) - } - slices.SortFunc(entries, (*kv).cmp) - - var ( - nodes = make(map[string]common.Hash) - options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { - nodes[string(path)] = hash - }) - ) - tr := NewStackTrie(options) - - for i := 0; i < len(entries); i++ { - tr.MustUpdate(entries[i].k, entries[i].v) - } - tr.Commit() - - for j := 0; j < 100; j++ { - for path, hash := range buildPartialTree(entries, t) { - if nodes[path] != hash { - t.Errorf("%v, want %x, got %x", []byte(path), nodes[path], hash) - } - } - } - } -} - func TestStackTrieErrors(t *testing.T) { s := NewStackTrie(nil) // Deletion diff --git a/trie/trie_test.go b/trie/trie_test.go index 379a866f7e..c141c52078 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -963,11 +963,9 @@ func TestCommitSequenceStackTrie(t *testing.T) { id: "b", values: make(map[string]string), } - options := NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme()) }) - stTrie := NewStackTrie(options) // Fill the trie with elements for i := 0; i < count; i++ { @@ -993,7 +991,7 @@ func TestCommitSequenceStackTrie(t *testing.T) { s.Flush() // And flush stacktrie -> disk - stRoot := stTrie.Commit() + stRoot := stTrie.Hash() if stRoot != root { t.Fatalf("root wrong, got %x exp %x", stRoot, root) } @@ -1034,12 +1032,9 @@ func TestCommitSequenceSmallRoot(t *testing.T) { id: "b", values: make(map[string]string), } - options := NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme()) }) - stTrie := NewStackTrie(options) - // Add a single small-element to the trie(s) key := make([]byte, 5) key[0] = 1 @@ -1053,7 +1048,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) { db.Commit(root) // And flush stacktrie -> disk - stRoot := stTrie.Commit() + stRoot := stTrie.Hash() if stRoot != root { t.Fatalf("root wrong, got %x exp %x", stRoot, root) }