diff --git a/chain/chain/src/flat_storage_creator.rs b/chain/chain/src/flat_storage_creator.rs index e85616c6962..7c0c15e27fd 100644 --- a/chain/chain/src/flat_storage_creator.rs +++ b/chain/chain/src/flat_storage_creator.rs @@ -110,7 +110,7 @@ impl FlatStorageShardCreator { trie_iter.visit_nodes_interval(&path_begin, &path_end).unwrap() { if let Some(key) = key { - let value = trie.storage.retrieve_raw_bytes(&hash).unwrap(); + let value = trie.retrieve_value(&hash).unwrap(); store_helper::set_flat_state_value( &mut store_update, shard_uid, diff --git a/core/primitives/src/types.rs b/core/primitives/src/types.rs index cc8d99c0f6a..b1b4d0f345d 100644 --- a/core/primitives/src/types.rs +++ b/core/primitives/src/types.rs @@ -960,7 +960,7 @@ pub trait EpochInfoProvider { } /// Mode of the trie cache. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum TrieCacheMode { /// In this mode we put each visited node to LRU cache to optimize performance. /// Presence of any exact node is not guaranteed. diff --git a/core/store/src/metrics.rs b/core/store/src/metrics.rs index ea8ba309608..f480561e17f 100644 --- a/core/store/src/metrics.rs +++ b/core/store/src/metrics.rs @@ -18,6 +18,7 @@ pub(crate) static DATABASE_OP_LATENCY_HIST: Lazy = Lazy::new(|| { .unwrap() }); +// TODO(#9054): Rename the metric to be consistent with "accounting cache". pub static CHUNK_CACHE_HITS: Lazy = Lazy::new(|| { try_create_int_counter_vec( "near_chunk_cache_hits", @@ -27,6 +28,7 @@ pub static CHUNK_CACHE_HITS: Lazy = Lazy::new(|| { .unwrap() }); +// TODO(#9054): Rename the metric to be consistent with "accounting cache". pub static CHUNK_CACHE_MISSES: Lazy = Lazy::new(|| { try_create_int_counter_vec( "near_chunk_cache_misses", @@ -68,6 +70,7 @@ pub static SHARD_CACHE_SIZE: Lazy = Lazy::new(|| { .unwrap() }); +// TODO(#9054): Rename the metric to be consistent with "accounting cache". pub static CHUNK_CACHE_SIZE: Lazy = Lazy::new(|| { try_create_int_gauge_vec("near_chunk_cache_size", "Chunk cache size", &["shard_id", "is_view"]) .unwrap() diff --git a/core/store/src/test_utils.rs b/core/store/src/test_utils.rs index 5630821853c..66e63089f7f 100644 --- a/core/store/src/test_utils.rs +++ b/core/store/src/test_utils.rs @@ -1,15 +1,17 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; +use near_primitives::state::{FlatStateValue, ValueRef}; use near_primitives::trie_key::TrieKey; use rand::seq::SliceRandom; use rand::Rng; use crate::db::TestDB; +use crate::flat::{store_helper, BlockInfo, FlatStorageReadyStatus}; use crate::metadata::{DbKind, DbVersion, DB_VERSION}; use crate::{get, get_delayed_receipt_indices, DBCol, NodeStorage, ShardTries, Store}; use near_primitives::account::id::AccountId; -use near_primitives::hash::CryptoHash; +use near_primitives::hash::{hash, CryptoHash}; use near_primitives::receipt::{DataReceipt, Receipt, ReceiptEnum}; use near_primitives::shard_layout::{ShardUId, ShardVersion}; use near_primitives::types::{NumShards, StateRoot}; @@ -87,6 +89,34 @@ pub fn test_populate_trie( root } +pub fn test_populate_flat_storage( + tries: &ShardTries, + shard_uid: ShardUId, + block_hash: &CryptoHash, + prev_block_hash: &CryptoHash, + changes: &Vec<(Vec, Option>)>, +) { + let mut store_update = tries.store_update(); + store_helper::set_flat_storage_status( + &mut store_update, + shard_uid, + crate::flat::FlatStorageStatus::Ready(FlatStorageReadyStatus { + flat_head: BlockInfo { hash: *block_hash, prev_hash: *prev_block_hash, height: 1 }, + }), + ); + for (key, value) in changes { + store_helper::set_flat_state_value( + &mut store_update, + shard_uid, + key.clone(), + value.as_ref().map(|value| { + FlatStateValue::Ref(ValueRef { hash: hash(value), length: value.len() as u32 }) + }), + ); + } + store_update.commit().unwrap(); +} + /// Insert values to non-reference-counted columns in the store. pub fn test_populate_store(store: &Store, data: &[(DBCol, Vec, Vec)]) { let mut update = store.store_update(); diff --git a/core/store/src/trie/accounting_cache.rs b/core/store/src/trie/accounting_cache.rs new file mode 100644 index 00000000000..125479dfad2 --- /dev/null +++ b/core/store/src/trie/accounting_cache.rs @@ -0,0 +1,122 @@ +use near_o11y::metrics::prometheus; +use near_o11y::metrics::prometheus::core::{GenericCounter, GenericGauge}; +use near_primitives::errors::StorageError; +use near_primitives::hash::CryptoHash; +use near_primitives::shard_layout::ShardUId; +use near_vm_runner::logic::TrieNodesCount; +use std::collections::HashMap; +use std::sync::Arc; + +use crate::{metrics, TrieStorage}; + +/// Deterministic cache to store trie nodes that have been accessed so far +/// during the cache's lifetime. It is used for deterministic gas accounting +/// so that previously accessed trie nodes and values are charged at a +/// cheaper gas cost. +/// +/// This cache's correctness is critical as it contributes to the gas +/// accounting of storage operations during contract execution. For that +/// reason, a new TrieAccountingCache must be created at the beginning of a +/// chunk's execution, and the db_read_nodes and mem_read_nodes must be taken +/// into account whenever a storage operation is performed to calculate what +/// kind of operation it was. +/// +/// Note that we don't have a size limit for values in the accounting cache. +/// There are two reasons: +/// - for nodes, value size is an implementation detail. If we change +/// internal representation of a node (e.g. change `memory_usage` field +/// from `RawTrieNodeWithSize`), this would have to be a protocol upgrade. +/// - total size of all values is limited by the runtime fees. More +/// thoroughly: +/// - number of nodes is limited by receipt gas limit / touching trie +/// node fee ~= 500 Tgas / 16 Ggas = 31_250; +/// - size of trie keys and values is limited by receipt gas limit / +/// lowest per byte fee (`storage_read_value_byte`) ~= +/// (500 * 10**12 / 5611005) / 2**20 ~= 85 MB. +/// All values are given as of 16/03/2022. We may consider more precise limit +/// for the accounting cache as well. +/// +/// Note that in general, it is NOT true that all storage access is either a +/// db read or mem read. It can also be a flat storage read, which is not +/// tracked via TrieAccountingCache. +pub struct TrieAccountingCache { + /// Whether the cache is enabled. By default it is not, but it can be + /// turned on or off on the fly. + enable: bool, + /// Cache of trie node hash -> trie node body, or a leaf value hash -> + /// leaf value. + cache: HashMap>, + /// The number of times a key was accessed by reading from the underlying + /// storage. (This does not necessarily mean it was accessed from *disk*, + /// as the underlying storage layer may have a best-effort cache.) + db_read_nodes: u64, + /// The number of times a key was accessed when it was deterministically + /// already cached during the processing of this chunk. + mem_read_nodes: u64, + /// Prometheus metrics. It's optional - in testing it can be None. + metrics: Option, +} + +struct TrieAccountingCacheMetrics { + accounting_cache_hits: GenericCounter, + accounting_cache_misses: GenericCounter, + accounting_cache_size: GenericGauge, +} + +impl TrieAccountingCache { + /// Constructs a new accounting cache. By default it is not enabled. + /// The optional parameter is passed in if prometheus metrics are desired. + pub fn new(shard_uid_and_is_view: Option<(ShardUId, bool)>) -> Self { + let metrics = shard_uid_and_is_view.map(|(shard_uid, is_view)| { + let mut buffer = itoa::Buffer::new(); + let shard_id = buffer.format(shard_uid.shard_id); + + let metrics_labels: [&str; 2] = [&shard_id, if is_view { "1" } else { "0" }]; + TrieAccountingCacheMetrics { + accounting_cache_hits: metrics::CHUNK_CACHE_HITS.with_label_values(&metrics_labels), + accounting_cache_misses: metrics::CHUNK_CACHE_MISSES + .with_label_values(&metrics_labels), + accounting_cache_size: metrics::CHUNK_CACHE_SIZE.with_label_values(&metrics_labels), + } + }); + Self { enable: false, cache: HashMap::new(), db_read_nodes: 0, mem_read_nodes: 0, metrics } + } + + pub fn set_enabled(&mut self, enabled: bool) { + self.enable = enabled; + } + + /// Retrieve raw bytes from the cache if it exists, otherwise retrieve it + /// from the given storage, and count it as a db access. + pub fn retrieve_raw_bytes_with_accounting( + &mut self, + hash: &CryptoHash, + storage: &dyn TrieStorage, + ) -> Result, StorageError> { + if let Some(node) = self.cache.get(hash) { + self.mem_read_nodes += 1; + if let Some(metrics) = &self.metrics { + metrics.accounting_cache_hits.inc(); + } + Ok(node.clone()) + } else { + self.db_read_nodes += 1; + if let Some(metrics) = &self.metrics { + metrics.accounting_cache_misses.inc(); + } + let node = storage.retrieve_raw_bytes(hash)?; + + if self.enable { + self.cache.insert(*hash, node.clone()); + if let Some(metrics) = &self.metrics { + metrics.accounting_cache_size.set(self.cache.len() as i64); + } + } + Ok(node) + } + } + + pub fn get_trie_nodes_count(&self) -> TrieNodesCount { + TrieNodesCount { db_reads: self.db_read_nodes, mem_reads: self.mem_read_nodes } + } +} diff --git a/core/store/src/trie/iterator.rs b/core/store/src/trie/iterator.rs index 200931f7508..41ad99043b7 100644 --- a/core/store/src/trie/iterator.rs +++ b/core/store/src/trie/iterator.rs @@ -372,7 +372,7 @@ impl<'a> TrieIterator<'a> { if self.key_nibbles[prefix..] >= path_end[prefix..] { break; } - self.trie.storage.retrieve_raw_bytes(&hash)?; + self.trie.retrieve_value(&hash)?; nodes_list.push(TrieTraversalItem { hash, key: self.has_value().then(|| self.key()), @@ -417,10 +417,7 @@ impl<'a> Iterator for TrieIterator<'a> { }, (IterStep::Value(hash), true) => { return Some( - self.trie - .storage - .retrieve_raw_bytes(&hash) - .map(|value| (self.key(), value.to_vec())), + self.trie.retrieve_value(&hash).map(|value| (self.key(), value.to_vec())), ) } } diff --git a/core/store/src/trie/mod.rs b/core/store/src/trie/mod.rs index aa6a1efd59a..672b6cfd14e 100644 --- a/core/store/src/trie/mod.rs +++ b/core/store/src/trie/mod.rs @@ -11,7 +11,6 @@ pub use crate::trie::shard_tries::{ KeyForStateChanges, ShardTries, StateSnapshot, StateSnapshotConfig, WrappedTrieChanges, }; pub use crate::trie::trie_storage::{TrieCache, TrieCachingStorage, TrieDBStorage, TrieStorage}; -use crate::trie::trie_storage::{TrieMemoryPartialStorage, TrieRecordingStorage}; use crate::StorageError; use borsh::{BorshDeserialize, BorshSerialize}; use near_primitives::challenge::PartialState; @@ -30,7 +29,9 @@ use std::fmt::Write; use std::hash::{Hash, Hasher}; use std::rc::Rc; use std::str; +use std::sync::Arc; +pub mod accounting_cache; mod config; mod from_flat; mod insert_delete; @@ -41,11 +42,15 @@ mod raw_node; mod shard_tries; pub mod split_state; mod state_parts; +mod trie_recording; mod trie_storage; #[cfg(test)] mod trie_tests; pub mod update; +use self::accounting_cache::TrieAccountingCache; +use self::trie_recording::TrieRecorder; +use self::trie_storage::TrieMemoryPartialStorage; pub use from_flat::construct_trie_from_flat; const POISONED_LOCK_ERR: &str = "The lock was poisoned."; @@ -321,9 +326,31 @@ impl std::fmt::Debug for TrieNode { } pub struct Trie { - pub storage: Rc, + storage: Rc, root: StateRoot, - pub flat_storage_chunk_view: Option, + /// If present, flat storage is used to look up keys (if asked for). + /// Otherwise, we would crawl through the trie. + flat_storage_chunk_view: Option, + /// This is the deterministic accounting cache, meaning that for the + /// lifetime of this Trie struct, whenever the accounting cache is enabled + /// (which can be toggled on the fly), trie nodes that have been looked up + /// once will be guaranteed to be cached, and further reads to these nodes + /// will encounter less gas cost. + accounting_cache: RefCell, + /// If present, we're capturing all trie nodes that have been accessed + /// during the lifetime of this Trie struct. This is used to produce a + /// state proof so that the same access pattern can be replayed using only + /// the captured result. + recorder: Option>, + /// This can only be true if the storage is based on a recorded partial + /// trie, i.e. replaying lookups on a state proof, where flat storage may + /// not be available so we always have to go through the trie. If this + /// flag is true, trie node lookups will not go through the accounting + /// cache, i.e. the access is free from the trie's perspective, just like + /// flat storage. (Note that dereferencing ValueRef still has the same cost + /// no matter what.) This allows us to accurately calculate storage gas + /// costs even with only a state proof. + skip_accounting_cache_for_trie_nodes: bool, } /// Trait for reading data from a trie. @@ -426,44 +453,105 @@ enum NodeOrValue { impl Trie { pub const EMPTY_ROOT: StateRoot = StateRoot::new(); + /// Starts accessing a trie with the given storage. + /// By default, the accounting cache is not enabled. To enable or disable it + /// (only in this crate), call self.accounting_cache.borrow_mut().set_enabled(). pub fn new( storage: Rc, root: StateRoot, flat_storage_chunk_view: Option, ) -> Self { - Trie { storage, root, flat_storage_chunk_view } + let accounting_cache = match storage.as_caching_storage() { + Some(caching_storage) => RefCell::new(TrieAccountingCache::new(Some(( + caching_storage.shard_uid, + caching_storage.is_view, + )))), + None => RefCell::new(TrieAccountingCache::new(None)), + }; + Trie { + storage, + root, + flat_storage_chunk_view, + accounting_cache: accounting_cache, + recorder: None, + skip_accounting_cache_for_trie_nodes: false, + } } + /// Makes a new trie that has everything the same except that access + /// through that trie accumulates a state proof for all nodes accessed. pub fn recording_reads(&self) -> Self { - let storage = TrieRecordingStorage { - storage: Rc::clone(&self.storage), - recorded: RefCell::new(Default::default()), - }; - Trie { storage: Rc::new(storage), root: self.root, flat_storage_chunk_view: None } + let mut trie = + Self::new(self.storage.clone(), self.root, self.flat_storage_chunk_view.clone()); + trie.recorder = Some(RefCell::new(TrieRecorder::new())); + trie } + /// Takes the recorded state proof out of the trie. pub fn recorded_storage(&self) -> Option { - let storage = self.storage.as_recording_storage()?; - let mut nodes: Vec<_> = - storage.recorded.borrow_mut().drain().map(|(_key, value)| value).collect(); - nodes.sort(); - Some(PartialStorage { nodes: PartialState::TrieValues(nodes) }) + self.recorder.as_ref().map(|recorder| recorder.borrow_mut().recorded_storage()) } - pub fn from_recorded_storage(partial_storage: PartialStorage, root: StateRoot) -> Self { + /// Constructs a Trie from the partial storage (i.e. state proof) that + /// was returned from recorded_storage(). If used to access the same trie + /// nodes as when the partial storage was generated, this trie will behave + /// identically. + /// + /// The flat_storage_used parameter should be true iff originally the trie + /// was accessed with flat storage present. It will be used to simulate the + /// same costs as if flat storage were present. + pub fn from_recorded_storage( + partial_storage: PartialStorage, + root: StateRoot, + flat_storage_used: bool, + ) -> Self { let PartialState::TrieValues(nodes) = partial_storage.nodes; let recorded_storage = nodes.into_iter().map(|value| (hash(&value), value)).collect(); let storage = Rc::new(TrieMemoryPartialStorage::new(recorded_storage)); - Self::new(storage, root, None) + let mut trie = Self::new(storage, root, None); + trie.skip_accounting_cache_for_trie_nodes = flat_storage_used; + trie } pub fn get_root(&self) -> &StateRoot { &self.root } + pub fn has_flat_storage_chunk_view(&self) -> bool { + self.flat_storage_chunk_view.is_some() + } + + pub fn internal_get_storage_as_caching_storage(&self) -> Option<&TrieCachingStorage> { + self.storage.as_caching_storage() + } + + /// All access to trie nodes or values must go through this method, so it + /// can be properly cached and recorded. + /// + /// count_cost can be false to skip caching. This is used when we're + /// generating a state proof, but the value is supposed to fetched from + /// flat storage. + fn internal_retrieve_trie_node( + &self, + hash: &CryptoHash, + use_accounting_cache: bool, + ) -> Result, StorageError> { + let result = if use_accounting_cache { + self.accounting_cache + .borrow_mut() + .retrieve_raw_bytes_with_accounting(hash, &*self.storage)? + } else { + self.storage.retrieve_raw_bytes(hash)? + }; + if let Some(recorder) = &self.recorder { + recorder.borrow_mut().record(hash, result.clone()); + } + Ok(result) + } + #[cfg(test)] fn memory_usage_verify(&self, memory: &NodesStorage, handle: NodeHandle) -> u64 { - if self.storage.as_recording_storage().is_some() { + if self.recorder.is_some() { return 0; } let TrieNodeWithSize { node, memory_usage } = match handle { @@ -510,7 +598,7 @@ impl Trie { ) -> Result<(), StorageError> { match value { ValueHandle::HashAndSize(value) => { - let bytes = self.storage.retrieve_raw_bytes(&value.hash)?; + let bytes = self.internal_retrieve_trie_node(&value.hash, true)?; memory .refcount_changes .entry(value.hash) @@ -527,7 +615,7 @@ impl Trie { // Prints the trie nodes starting from hash, up to max_depth depth. // The node hash can be any node in the trie. pub fn print_recursive(&self, f: &mut dyn std::io::Write, hash: &CryptoHash, max_depth: u32) { - match self.retrieve_raw_node_or_value(hash) { + match self.debug_retrieve_raw_node_or_value(hash) { Ok(NodeOrValue::Node(_)) => { let mut prefix: Vec = Vec::new(); self.print_recursive_internal(f, hash, max_depth, &mut "".to_string(), &mut prefix) @@ -606,7 +694,7 @@ impl Trie { return Ok(()); } - let (bytes, raw_node, mem_usage) = match self.retrieve_raw_node(hash) { + let (bytes, raw_node, mem_usage) = match self.retrieve_raw_node(hash, true) { Ok(Some((bytes, raw_node))) => (bytes, raw_node.node, raw_node.memory_usage), Ok(None) => return writeln!(f, "{spaces}EmptyNode"), Err(err) => return writeln!(f, "{spaces}error {err}"), @@ -682,11 +770,12 @@ impl Trie { fn retrieve_raw_node( &self, hash: &CryptoHash, + use_accounting_cache: bool, ) -> Result, RawTrieNodeWithSize)>, StorageError> { if hash == &Self::EMPTY_ROOT { return Ok(None); } - let bytes = self.storage.retrieve_raw_bytes(hash)?; + let bytes = self.internal_retrieve_trie_node(hash, use_accounting_cache)?; let node = RawTrieNodeWithSize::try_from_slice(&bytes).map_err(|err| { StorageError::StorageInconsistentState(format!("Failed to decode node {hash}: {err}")) })?; @@ -696,8 +785,11 @@ impl Trie { // Similar to retrieve_raw_node but handles the case where there is a Value (and not a Node) in the database. // This method is not safe to be used in any real scenario as it can incorrectly interpret a value as a trie node. // It's only provided as a convenience for debugging tools. - fn retrieve_raw_node_or_value(&self, hash: &CryptoHash) -> Result { - let bytes = self.storage.retrieve_raw_bytes(hash)?; + fn debug_retrieve_raw_node_or_value( + &self, + hash: &CryptoHash, + ) -> Result { + let bytes = self.internal_retrieve_trie_node(hash, true)?; match RawTrieNodeWithSize::try_from_slice(&bytes) { Ok(node) => Ok(NodeOrValue::Node(node)), Err(_) => Ok(NodeOrValue::Value(bytes)), @@ -709,7 +801,7 @@ impl Trie { memory: &mut NodesStorage, hash: &CryptoHash, ) -> Result { - match self.retrieve_raw_node(hash)? { + match self.retrieve_raw_node(hash, true)? { None => Ok(memory.store(TrieNodeWithSize::empty())), Some((bytes, node)) => { let result = memory.store(TrieNodeWithSize::from_raw(node)); @@ -729,14 +821,14 @@ impl Trie { &self, hash: &CryptoHash, ) -> Result<(Option>, TrieNodeWithSize), StorageError> { - match self.retrieve_raw_node(hash)? { + match self.retrieve_raw_node(hash, true)? { None => Ok((None, TrieNodeWithSize::empty())), Some((bytes, node)) => Ok((Some(bytes), TrieNodeWithSize::from_raw(node))), } } pub fn retrieve_root_node(&self) -> Result { - match self.retrieve_raw_node(&self.root)? { + match self.retrieve_raw_node(&self.root, true)? { None => Ok(StateRootNode::empty()), Some((bytes, node)) => { Ok(StateRootNode { data: bytes, memory_usage: node.memory_usage }) @@ -744,10 +836,14 @@ impl Trie { } } - fn lookup(&self, mut key: NibbleSlice<'_>) -> Result, StorageError> { + fn lookup( + &self, + mut key: NibbleSlice<'_>, + use_accounting_cache: bool, + ) -> Result, StorageError> { let mut hash = self.root; loop { - let node = match self.retrieve_raw_node(&hash)? { + let node = match self.retrieve_raw_node(&hash, use_accounting_cache)? { None => return Ok(None), Some((_bytes, node)) => node.node, }; @@ -822,7 +918,7 @@ impl Trie { // The rest of the logic is very similar to the standard lookup() function, except // we return the raw node and don't expect to hit a leaf. - let mut node = self.retrieve_raw_node(&self.root)?; + let mut node = self.retrieve_raw_node(&self.root, true)?; while !key.is_empty() { match node { Some((_, raw_node)) => match raw_node.node { @@ -834,7 +930,7 @@ impl Trie { let child = children[key.at(0)]; match child { Some(child) => { - node = self.retrieve_raw_node(&child)?; + node = self.retrieve_raw_node(&child, true)?; key = key.mid(1); } None => return Ok(None), @@ -843,7 +939,7 @@ impl Trie { RawTrieNode::Extension(existing_key, child) => { let existing_key = NibbleSlice::from_encoded(&existing_key).0; if key.starts_with(&existing_key) { - node = self.retrieve_raw_node(&child)?; + node = self.retrieve_raw_node(&child, true)?; key = key.mid(existing_key.len()); } else { return Ok(None); @@ -859,10 +955,10 @@ impl Trie { } } - /// For debugging only. Returns the raw bytes corresponding to a ValueRef that came - /// from a node with value (either Leaf or BranchWithValue). - pub fn debug_get_value(&self, value_ref: &ValueRef) -> Result, StorageError> { - let bytes = self.storage.retrieve_raw_bytes(&value_ref.hash)?; + /// Returns the raw bytes corresponding to a ValueRef that came from a node with + /// value (either Leaf or BranchWithValue). + pub fn retrieve_value(&self, hash: &CryptoHash) -> Result, StorageError> { + let bytes = self.internal_retrieve_trie_node(hash, true)?; Ok(bytes.to_vec()) } @@ -885,19 +981,32 @@ impl Trie { matches!(mode, KeyLookupMode::FlatStorage) && self.flat_storage_chunk_view.is_some(); if use_flat_storage { - let flat_state_value = - self.flat_storage_chunk_view.as_ref().unwrap().get_value(&key)?; - Ok(flat_state_value.map(|value| value.to_value_ref())) + let value_from_flat_storage = self + .flat_storage_chunk_view + .as_ref() + .unwrap() + .get_value(&key)? + .map(|value| value.to_value_ref()); + if self.recorder.is_some() { + // If recording, we need to look up in the trie as well to record the trie nodes, + // as they are needed to prove the value. Also, it's important that this lookup + // is done even if the key was not found, because intermediate trie nodes may be + // needed to prove the non-existence of the key. + let key_nibbles = NibbleSlice::new(key); + let value_from_trie = self.lookup(key_nibbles, false)?; + assert_eq!(&value_from_flat_storage, &value_from_trie); + } + Ok(value_from_flat_storage) } else { let key_nibbles = NibbleSlice::new(key); - self.lookup(key_nibbles) + self.lookup(key_nibbles, !self.skip_accounting_cache_for_trie_nodes) } } pub fn get(&self, key: &[u8]) -> Result>, StorageError> { match self.get_ref(key, KeyLookupMode::FlatStorage)? { Some(ValueRef { hash, .. }) => { - self.storage.retrieve_raw_bytes(&hash).map(|bytes| Some(bytes.to_vec())) + self.internal_retrieve_trie_node(&hash, true).map(|bytes| Some(bytes.to_vec())) } None => Ok(None), } @@ -972,7 +1081,7 @@ impl Trie { } pub fn get_trie_nodes_count(&self) -> TrieNodesCount { - self.storage.get_trie_nodes_count() + self.accounting_cache.borrow().get_trie_nodes_count() } } @@ -1346,7 +1455,7 @@ mod tests { trie2.get(b"horse").unwrap(); let partial_storage = trie2.recorded_storage(); - let trie3 = Trie::from_recorded_storage(partial_storage.unwrap(), root); + let trie3 = Trie::from_recorded_storage(partial_storage.unwrap(), root, false); assert_eq!(trie3.get(b"dog"), Ok(Some(b"puppy".to_vec()))); assert_eq!(trie3.get(b"horse"), Ok(Some(b"stallion".to_vec()))); diff --git a/core/store/src/trie/prefetching_trie_storage.rs b/core/store/src/trie/prefetching_trie_storage.rs index 83cf7ac7ac9..6b872dea6c4 100644 --- a/core/store/src/trie/prefetching_trie_storage.rs +++ b/core/store/src/trie/prefetching_trie_storage.rs @@ -10,7 +10,7 @@ use near_o11y::tracing::error; use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::ShardUId; use near_primitives::trie_key::TrieKey; -use near_primitives::types::{AccountId, ShardId, StateRoot, TrieNodesCount}; +use near_primitives::types::{AccountId, ShardId, StateRoot}; use std::collections::HashMap; use std::rc::Rc; use std::sync::Arc; @@ -291,10 +291,6 @@ impl TrieStorage for TriePrefetchingStorage { )), } } - - fn get_trie_nodes_count(&self) -> TrieNodesCount { - unimplemented!() - } } impl TriePrefetchingStorage { diff --git a/core/store/src/trie/state_parts.rs b/core/store/src/trie/state_parts.rs index f72155b8c14..2a0cb3a354f 100644 --- a/core/store/src/trie/state_parts.rs +++ b/core/store/src/trie/state_parts.rs @@ -23,7 +23,7 @@ use crate::trie::trie_storage::TrieMemoryPartialStorage; use crate::trie::{ ApplyStatePartResult, NodeHandle, RawTrieNodeWithSize, TrieNode, TrieNodeWithSize, }; -use crate::{metrics, PartialStorage, StorageError, Trie, TrieChanges, TrieStorage}; +use crate::{metrics, PartialStorage, StorageError, Trie, TrieChanges}; use borsh::BorshDeserialize; use near_primitives::challenge::PartialState; use near_primitives::contract::ContractCode; @@ -173,14 +173,14 @@ impl Trie { /// * part_id - number of the state part, mainly for metrics. /// * partial_state - nodes needed to generate and proof state part boundaries. /// * nibbles_begin and nibbles_end specify the range of flat storage to be read. - /// * state_storage - provides access to State for random lookups of values by hash. + /// * state_trie - provides access to State for random lookups of values by hash. pub fn get_trie_nodes_for_part_with_flat_storage( &self, part_id: PartId, partial_state: PartialState, nibbles_begin: Vec, nibbles_end: Vec, - state_storage: Rc, + state_trie: &Trie, ) -> Result { let shard_id: ShardId = self.flat_storage_chunk_view.as_ref().map_or( ShardId::MAX, // Fake value for metrics. @@ -230,9 +230,7 @@ impl Trie { .start_timer(); let looked_up_value_refs: Vec<_> = value_refs .iter() - .map(|(k, hash)| { - Ok((k.clone(), Some(state_storage.retrieve_raw_bytes(hash)?.to_vec()))) - }) + .map(|(k, hash)| Ok((k.clone(), Some(state_trie.retrieve_value(hash)?.to_vec())))) .collect::>() .unwrap(); all_state_part_items.extend(looked_up_value_refs.iter().cloned()); @@ -425,8 +423,11 @@ impl Trie { ) -> Result<(), StorageError> { let PartialState::TrieValues(nodes) = &partial_state; let num_nodes = nodes.len(); - let trie = - Trie::from_recorded_storage(PartialStorage { nodes: partial_state }, *state_root); + let trie = Trie::from_recorded_storage( + PartialStorage { nodes: partial_state }, + *state_root, + false, + ); trie.visit_nodes_for_state_part(part_id)?; let storage = trie.storage.as_partial_storage().unwrap(); @@ -451,7 +452,7 @@ impl Trie { contract_codes: vec![], }); } - let trie = Trie::from_recorded_storage(PartialStorage { nodes: part }, *state_root); + let trie = Trie::from_recorded_storage(PartialStorage { nodes: part }, *state_root, false); let path_begin = trie.find_state_part_boundary(part_id.idx, part_id.total)?; let path_end = trie.find_state_part_boundary(part_id.idx + 1, part_id.total)?; let mut iterator = trie.iter()?; @@ -460,7 +461,7 @@ impl Trie { let mut flat_state_delta = FlatStateChanges::default(); let mut contract_codes = Vec::new(); for TrieTraversalItem { hash, key } in trie_traversal_items { - let value = trie.storage.retrieve_raw_bytes(&hash)?; + let value = trie.retrieve_value(&hash)?; map.entry(hash).or_insert_with(|| (value.to_vec(), 0)).1 += 1; if let Some(trie_key) = key { let flat_state_value = FlatStateValue::on_disk(&value); @@ -617,13 +618,13 @@ mod tests { .cloned() .collect(), ); - let trie = Trie::from_recorded_storage(PartialStorage { nodes }, *state_root); + let trie = Trie::from_recorded_storage(PartialStorage { nodes }, *state_root, false); let mut insertions = , u32)>>::new(); trie.traverse_all_nodes(|hash| { if let Some((_bytes, rc)) = insertions.get_mut(hash) { *rc += 1; } else { - let bytes = trie.storage.retrieve_raw_bytes(hash)?; + let bytes = trie.retrieve_value(hash)?; insertions.insert(*hash, (bytes.to_vec(), 1)); } Ok(()) @@ -1190,7 +1191,7 @@ mod tests { partial_state, nibbles_begin, nibbles_end, - trie_without_flat.storage.clone(), + &trie_without_flat, ), Err(StorageError::MissingTrieValue) ); @@ -1213,7 +1214,7 @@ mod tests { partial_state.clone(), nibbles_begin.clone(), nibbles_end.clone(), - trie_without_flat.storage.clone(), + &trie_without_flat, ); assert_eq!(state_part_with_flat, Ok(state_part.clone())); @@ -1237,7 +1238,7 @@ mod tests { partial_state.clone(), nibbles_begin.clone(), nibbles_end.clone(), - trie_without_flat.storage.clone(), + &trie_without_flat, ), Ok(state_part) ); @@ -1256,7 +1257,7 @@ mod tests { partial_state, nibbles_begin, nibbles_end, - trie_without_flat.storage.clone(), + &trie_without_flat, ), Err(StorageError::MissingTrieValue) ); diff --git a/core/store/src/trie/trie_recording.rs b/core/store/src/trie/trie_recording.rs new file mode 100644 index 00000000000..cc7aaff9ef9 --- /dev/null +++ b/core/store/src/trie/trie_recording.rs @@ -0,0 +1,253 @@ +use crate::PartialStorage; +use near_primitives::challenge::PartialState; +use near_primitives::hash::CryptoHash; +use std::collections::HashMap; +use std::sync::Arc; + +/// A simple struct to capture a state proof as it's being accumulated. +pub struct TrieRecorder { + recorded: HashMap>, +} + +impl TrieRecorder { + pub fn new() -> Self { + Self { recorded: HashMap::new() } + } + + pub fn record(&mut self, hash: &CryptoHash, node: Arc<[u8]>) { + self.recorded.insert(*hash, node); + } + + pub fn recorded_storage(&mut self) -> PartialStorage { + let mut nodes: Vec<_> = self.recorded.drain().map(|(_key, value)| value).collect(); + nodes.sort(); + PartialStorage { nodes: PartialState::TrieValues(nodes) } + } +} + +#[cfg(test)] +mod trie_recording_tests { + use crate::test_utils::{ + create_tries_complex, gen_larger_changes, simplify_changes, test_populate_flat_storage, + test_populate_trie, + }; + use crate::Trie; + use near_primitives::hash::CryptoHash; + use near_primitives::shard_layout::ShardUId; + use near_vm_runner::logic::TrieNodesCount; + use std::collections::HashMap; + + const NUM_ITERATIONS_PER_TEST: usize = 100; + + /// Verifies that when operating on a trie, the results are completely consistent + /// regardless of whether we're operating on the real storage (with or without chunk + /// cache), while recording reads, or when operating on recorded partial storage. + fn test_trie_recording_consistency(enable_accounting_cache: bool, use_missing_keys: bool) { + let mut rng = rand::thread_rng(); + for _ in 0..NUM_ITERATIONS_PER_TEST { + let tries = create_tries_complex(1, 2); + + let shard_uid = ShardUId { version: 1, shard_id: 0 }; + let trie_changes = gen_larger_changes(&mut rng, 50); + let trie_changes = simplify_changes(&trie_changes); + if trie_changes.is_empty() { + continue; + } + let state_root = + test_populate_trie(&tries, &Trie::EMPTY_ROOT, shard_uid, trie_changes.clone()); + let data_in_trie = trie_changes + .iter() + .map(|(key, value)| (key.clone(), value.clone().unwrap())) + .collect::>(); + let keys_to_test_with = trie_changes + .iter() + .map(|(key, _)| { + let mut key = key.clone(); + if use_missing_keys { + key.push(100); + } + key + }) + .collect::>(); + + // Let's capture the baseline node counts - this is what will happen + // in production. + let trie = tries.get_trie_for_shard(shard_uid, state_root); + trie.accounting_cache.borrow_mut().set_enabled(enable_accounting_cache); + for key in &keys_to_test_with { + assert_eq!(trie.get(key).unwrap(), data_in_trie.get(key).cloned()); + } + let baseline_trie_nodes_count = trie.get_trie_nodes_count(); + println!("Baseline trie nodes count: {:?}", baseline_trie_nodes_count); + + // Now let's do this again while recording, and make sure that the counters + // we get are exactly the same. + let trie = tries.get_trie_for_shard(shard_uid, state_root).recording_reads(); + trie.accounting_cache.borrow_mut().set_enabled(enable_accounting_cache); + for key in &keys_to_test_with { + assert_eq!(trie.get(key).unwrap(), data_in_trie.get(key).cloned()); + } + assert_eq!(trie.get_trie_nodes_count(), baseline_trie_nodes_count); + + // Now, let's check that when doing the same lookups with the captured partial storage, + // we still get the same counters. + let partial_storage = trie.recorded_storage().unwrap(); + println!( + "Partial storage has {} nodes from {} entries", + partial_storage.nodes.len(), + trie_changes.len() + ); + let trie = Trie::from_recorded_storage(partial_storage, state_root, false); + trie.accounting_cache.borrow_mut().set_enabled(enable_accounting_cache); + for key in &keys_to_test_with { + assert_eq!(trie.get(key).unwrap(), data_in_trie.get(key).cloned()); + } + assert_eq!(trie.get_trie_nodes_count(), baseline_trie_nodes_count); + } + } + + #[test] + fn test_trie_recording_consistency_no_accounting_cache() { + test_trie_recording_consistency(false, false); + } + + #[test] + fn test_trie_recording_consistency_with_accounting_cache() { + test_trie_recording_consistency(true, false); + } + + #[test] + fn test_trie_recording_consistency_no_accounting_cache_with_missing_keys() { + test_trie_recording_consistency(false, true); + } + + #[test] + fn test_trie_recording_consistency_with_accounting_cache_and_missing_keys() { + test_trie_recording_consistency(true, true); + } + + /// Verifies that when operating on a trie, the results are completely consistent + /// regardless of whether we're operating on the real storage (with or without chunk + /// cache), while recording reads, or when operating on recorded partial storage. + /// This test additionally verifies this when flat storage is used. + fn test_trie_recording_consistency_with_flat_storage( + enable_accounting_cache: bool, + use_missing_keys: bool, + ) { + let mut rng = rand::thread_rng(); + for _ in 0..NUM_ITERATIONS_PER_TEST { + let tries = create_tries_complex(1, 2); + + let shard_uid = ShardUId { version: 1, shard_id: 0 }; + let trie_changes = gen_larger_changes(&mut rng, 50); + let trie_changes = simplify_changes(&trie_changes); + if trie_changes.is_empty() { + continue; + } + let state_root = + test_populate_trie(&tries, &Trie::EMPTY_ROOT, shard_uid, trie_changes.clone()); + test_populate_flat_storage( + &tries, + shard_uid, + &CryptoHash::default(), + &CryptoHash::default(), + &trie_changes, + ); + + let data_in_trie = trie_changes + .iter() + .map(|(key, value)| (key.clone(), value.clone().unwrap())) + .collect::>(); + let keys_to_test_with = trie_changes + .iter() + .map(|(key, _)| { + let mut key = key.clone(); + if use_missing_keys { + key.push(100); + } + key + }) + .collect::>(); + + // First, check that the trie is using flat storage, so that counters are all zero. + // Only use get_ref(), because get() will actually dereference values which can + // cause trie reads. + let trie = tries.get_trie_with_block_hash_for_shard( + shard_uid, + state_root, + &CryptoHash::default(), + false, + ); + for key in &keys_to_test_with { + trie.get_ref(&key, crate::KeyLookupMode::FlatStorage).unwrap(); + } + assert_eq!(trie.get_trie_nodes_count(), TrieNodesCount { db_reads: 0, mem_reads: 0 }); + + // Now, let's capture the baseline node counts - this is what will happen + // in production. + let trie = tries.get_trie_with_block_hash_for_shard( + shard_uid, + state_root, + &CryptoHash::default(), + false, + ); + trie.accounting_cache.borrow_mut().set_enabled(enable_accounting_cache); + for key in &keys_to_test_with { + assert_eq!(trie.get(key).unwrap(), data_in_trie.get(key).cloned()); + } + let baseline_trie_nodes_count = trie.get_trie_nodes_count(); + println!("Baseline trie nodes count: {:?}", baseline_trie_nodes_count); + + // Let's do this again, but this time recording reads. We'll make sure + // the counters are exactly the same even when we're recording. + let trie = tries + .get_trie_with_block_hash_for_shard( + shard_uid, + state_root, + &CryptoHash::default(), + false, + ) + .recording_reads(); + trie.accounting_cache.borrow_mut().set_enabled(enable_accounting_cache); + for key in &keys_to_test_with { + assert_eq!(trie.get(key).unwrap(), data_in_trie.get(key).cloned()); + } + assert_eq!(trie.get_trie_nodes_count(), baseline_trie_nodes_count); + + // Now, let's check that when doing the same lookups with the captured partial storage, + // we still get the same counters. + let partial_storage = trie.recorded_storage().unwrap(); + println!( + "Partial storage has {} nodes from {} entries", + partial_storage.nodes.len(), + trie_changes.len() + ); + let trie = Trie::from_recorded_storage(partial_storage, state_root, true); + trie.accounting_cache.borrow_mut().set_enabled(enable_accounting_cache); + for key in &keys_to_test_with { + assert_eq!(trie.get(key).unwrap(), data_in_trie.get(key).cloned()); + } + assert_eq!(trie.get_trie_nodes_count(), baseline_trie_nodes_count); + } + } + + #[test] + fn test_trie_recording_consistency_with_flat_storage_no_accounting_cache() { + test_trie_recording_consistency_with_flat_storage(false, false); + } + + #[test] + fn test_trie_recording_consistency_with_flat_storage_with_accounting_cache() { + test_trie_recording_consistency_with_flat_storage(true, false); + } + + #[test] + fn test_trie_recording_consistency_with_flat_storage_no_accounting_cache_with_missing_keys() { + test_trie_recording_consistency_with_flat_storage(false, true); + } + + #[test] + fn test_trie_recording_consistency_with_flat_storage_with_accounting_cache_and_missing_keys() { + test_trie_recording_consistency_with_flat_storage(true, true); + } +} diff --git a/core/store/src/trie/trie_storage.rs b/core/store/src/trie/trie_storage.rs index ba94c1c5e74..74ff15ec2ef 100644 --- a/core/store/src/trie/trie_storage.rs +++ b/core/store/src/trie/trie_storage.rs @@ -9,10 +9,9 @@ use near_o11y::metrics::prometheus::core::{GenericCounter, GenericGauge}; use near_primitives::challenge::PartialState; use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::ShardUId; -use near_primitives::types::{ShardId, TrieCacheMode, TrieNodesCount}; -use std::cell::{Cell, RefCell}; +use near_primitives::types::ShardId; +use std::cell::RefCell; use std::collections::{HashMap, HashSet, VecDeque}; -use std::rc::Rc; use std::sync::{Arc, Mutex}; pub(crate) struct BoundedQueue { @@ -292,42 +291,9 @@ pub trait TrieStorage { None } - fn as_recording_storage(&self) -> Option<&TrieRecordingStorage> { - None - } - fn as_partial_storage(&self) -> Option<&TrieMemoryPartialStorage> { None } - - fn get_trie_nodes_count(&self) -> TrieNodesCount; -} - -/// Records every value read by retrieve_raw_bytes. -/// Used for obtaining state parts (and challenges in the future). -/// TODO (#6316): implement proper nodes counting logic as in TrieCachingStorage -pub struct TrieRecordingStorage { - pub(crate) storage: Rc, - pub(crate) recorded: RefCell>>, -} - -impl TrieStorage for TrieRecordingStorage { - fn retrieve_raw_bytes(&self, hash: &CryptoHash) -> Result, StorageError> { - if let Some(val) = self.recorded.borrow().get(hash).cloned() { - return Ok(val); - } - let val = self.storage.retrieve_raw_bytes(hash)?; - self.recorded.borrow_mut().insert(*hash, Arc::clone(&val)); - Ok(val) - } - - fn as_recording_storage(&self) -> Option<&TrieRecordingStorage> { - Some(self) - } - - fn get_trie_nodes_count(&self) -> TrieNodesCount { - unimplemented!(); - } } /// Storage for validating recorded partial storage. @@ -350,10 +316,6 @@ impl TrieStorage for TrieMemoryPartialStorage { fn as_partial_storage(&self) -> Option<&TrieMemoryPartialStorage> { Some(self) } - - fn get_trie_nodes_count(&self) -> TrieNodesCount { - unimplemented!(); - } } impl TrieMemoryPartialStorage { @@ -381,43 +343,33 @@ impl TrieMemoryPartialStorage { } /// Storage for reading State nodes and values from DB which caches reads. +/// +/// Important: The TrieCachingStorage contains the shard cache, which is +/// different from the accounting cache. The former is a best-effort +/// optimization to speed up execution, whereas the latter is a deterministic +/// cache used for gas accounting during contract execution. pub struct TrieCachingStorage { pub(crate) store: Store, pub(crate) shard_uid: ShardUId, + pub(crate) is_view: bool, /// Caches ever requested items for the shard `shard_uid`. Used to speed up DB operations, presence of any item is /// not guaranteed. pub(crate) shard_cache: TrieCache, - /// Caches all items requested in the mode `TrieCacheMode::CachingChunk`. It is created in - /// `apply_transactions_with_optional_storage_proof` by calling `get_trie_for_shard`. Before we start to apply - /// txs and receipts in the chunk, it must be empty, and all items placed here must remain until applying - /// txs/receipts ends. Then cache is removed automatically in `apply_transactions_with_optional_storage_proof` when - /// `TrieCachingStorage` is removed. - /// Note that for both caches key is the hash of value, so for the fixed key the value is unique. - pub(crate) chunk_cache: RefCell>>, - pub(crate) cache_mode: Cell, /// The entry point for the runtime to submit prefetch requests. pub(crate) prefetch_api: Option, - /// Counts potentially expensive trie node reads which are served from disk in the worst case. Here we count reads - /// from DB or shard cache. - pub(crate) db_read_nodes: Cell, - /// Counts trie nodes retrieved from the chunk cache. - pub(crate) mem_read_nodes: Cell, // Counters tracking operations happening inside the shard cache. // Stored here to avoid overhead of looking them up on hot paths. metrics: TrieCacheInnerMetrics, } struct TrieCacheInnerMetrics { - chunk_cache_hits: GenericCounter, - chunk_cache_misses: GenericCounter, shard_cache_hits: GenericCounter, shard_cache_misses: GenericCounter, shard_cache_too_large: GenericCounter, shard_cache_size: GenericGauge, - chunk_cache_size: GenericGauge, shard_cache_current_total_size: GenericGauge, prefetch_hits: GenericCounter, prefetch_pending: GenericCounter, @@ -441,14 +393,11 @@ impl TrieCachingStorage { let metrics_labels: [&str; 2] = [&shard_id, if is_view { "1" } else { "0" }]; let metrics = TrieCacheInnerMetrics { - chunk_cache_hits: metrics::CHUNK_CACHE_HITS.with_label_values(&metrics_labels), - chunk_cache_misses: metrics::CHUNK_CACHE_MISSES.with_label_values(&metrics_labels), shard_cache_hits: metrics::SHARD_CACHE_HITS.with_label_values(&metrics_labels), shard_cache_misses: metrics::SHARD_CACHE_MISSES.with_label_values(&metrics_labels), shard_cache_too_large: metrics::SHARD_CACHE_TOO_LARGE .with_label_values(&metrics_labels), shard_cache_size: metrics::SHARD_CACHE_SIZE.with_label_values(&metrics_labels), - chunk_cache_size: metrics::CHUNK_CACHE_SIZE.with_label_values(&metrics_labels), shard_cache_current_total_size: metrics::SHARD_CACHE_CURRENT_TOTAL_SIZE .with_label_values(&metrics_labels), prefetch_hits: metrics::PREFETCH_HITS.with_label_values(&metrics_labels[..1]), @@ -460,17 +409,7 @@ impl TrieCachingStorage { prefetch_retry: metrics::PREFETCH_RETRY.with_label_values(&metrics_labels[..1]), prefetch_conflict: metrics::PREFETCH_CONFLICT.with_label_values(&metrics_labels[..1]), }; - TrieCachingStorage { - store, - shard_uid, - shard_cache, - cache_mode: Cell::new(TrieCacheMode::CachingShard), - prefetch_api, - chunk_cache: RefCell::new(Default::default()), - db_read_nodes: Cell::new(0), - mem_read_nodes: Cell::new(0), - metrics, - } + TrieCachingStorage { store, shard_uid, is_view, shard_cache, prefetch_api, metrics } } pub fn get_key_from_shard_uid_and_hash(shard_uid: ShardUId, hash: &CryptoHash) -> [u8; 40] { @@ -479,33 +418,10 @@ impl TrieCachingStorage { key[8..].copy_from_slice(hash.as_ref()); key } - - fn inc_db_read_nodes(&self) { - self.db_read_nodes.set(self.db_read_nodes.get() + 1); - } - - fn inc_mem_read_nodes(&self) { - self.mem_read_nodes.set(self.mem_read_nodes.get() + 1); - } - - /// Set cache mode. - pub fn set_mode(&self, state: TrieCacheMode) { - self.cache_mode.set(state); - } } impl TrieStorage for TrieCachingStorage { fn retrieve_raw_bytes(&self, hash: &CryptoHash) -> Result, StorageError> { - self.metrics.chunk_cache_size.set(self.chunk_cache.borrow().len() as i64); - // Try to get value from chunk cache containing nodes with cheaper access. We can do it for any `TrieCacheMode`, - // because we charge for reading nodes only when `CachingChunk` mode is enabled anyway. - if let Some(val) = self.chunk_cache.borrow_mut().get(hash) { - self.metrics.chunk_cache_hits.inc(); - self.inc_mem_read_nodes(); - return Ok(val.clone()); - } - self.metrics.chunk_cache_misses.inc(); - // Try to get value from shard cache containing most recently touched nodes. let mut guard = self.shard_cache.lock(); self.metrics.shard_cache_size.set(guard.len() as i64); @@ -579,9 +495,9 @@ impl TrieStorage for TrieCachingStorage { } // Insert value to shard cache, if its size is small enough. - // It is fine to have a size limit for shard cache and **not** have a limit for chunk cache, because key + // It is fine to have a size limit for shard cache and **not** have a limit for accounting cache, because key // is always a value hash, so for each key there could be only one value, and it is impossible to have - // **different** values for the given key in shard and chunk caches. + // **different** values for the given key in shard and accounting caches. if val.len() < TrieConfig::max_cached_value_size() { let mut guard = self.shard_cache.lock(); guard.put(*hash, val.clone()); @@ -599,31 +515,12 @@ impl TrieStorage for TrieCachingStorage { } }; - // Because node is not present in chunk cache, increment the nodes counter and optionally insert it into the - // chunk cache. - // Note that we don't have a size limit for values in the chunk cache. There are two reasons: - // - for nodes, value size is an implementation detail. If we change internal representation of a node (e.g. - // change `memory_usage` field from `RawTrieNodeWithSize`), this would have to be a protocol upgrade. - // - total size of all values is limited by the runtime fees. More thoroughly: - // - - number of nodes is limited by receipt gas limit / touching trie node fee ~= 500 Tgas / 16 Ggas = 31_250; - // - - size of trie keys and values is limited by receipt gas limit / lowest per byte fee - // (`storage_read_value_byte`) ~= (500 * 10**12 / 5611005) / 2**20 ~= 85 MB. - // All values are given as of 16/03/2022. We may consider more precise limit for the chunk cache as well. - self.inc_db_read_nodes(); - if let TrieCacheMode::CachingChunk = self.cache_mode.get() { - self.chunk_cache.borrow_mut().insert(*hash, val.clone()); - }; - Ok(val) } fn as_caching_storage(&self) -> Option<&TrieCachingStorage> { Some(self) } - - fn get_trie_nodes_count(&self) -> TrieNodesCount { - TrieNodesCount { db_reads: self.db_read_nodes.get(), mem_reads: self.mem_read_nodes.get() } - } } fn read_node_from_db( @@ -669,10 +566,6 @@ impl TrieStorage for TrieDBStorage { fn retrieve_raw_bytes(&self, hash: &CryptoHash) -> Result, StorageError> { read_node_from_db(&self.store, self.shard_uid, hash) } - - fn get_trie_nodes_count(&self) -> TrieNodesCount { - unimplemented!(); - } } #[cfg(test)] diff --git a/core/store/src/trie/trie_tests.rs b/core/store/src/trie/trie_tests.rs index 8ae1aef7f91..3be3f3ba237 100644 --- a/core/store/src/trie/trie_tests.rs +++ b/core/store/src/trie/trie_tests.rs @@ -35,16 +35,18 @@ impl IncompletePartialStorage { impl TrieStorage for IncompletePartialStorage { fn retrieve_raw_bytes(&self, hash: &CryptoHash) -> Result, StorageError> { - let result = self.recorded_storage.get(hash).cloned().ok_or(StorageError::MissingTrieValue); + let result = self + .recorded_storage + .get(hash) + .cloned() + .expect("Recorded storage is missing the given hash"); - if result.is_ok() { - self.visited_nodes.borrow_mut().insert(*hash); - } + self.visited_nodes.borrow_mut().insert(*hash); if self.visited_nodes.borrow().len() > self.node_count_to_fail_after { Err(StorageError::MissingTrieValue) } else { - result + Ok(result) } } @@ -52,10 +54,6 @@ impl TrieStorage for IncompletePartialStorage { // Make sure it's not called - it pretends to be PartialStorage but is not unimplemented!() } - - fn get_trie_nodes_count(&self) -> TrieNodesCount { - unimplemented!(); - } } fn setup_storage(trie: Trie, test: &mut F) -> (PartialStorage, Trie, Out) @@ -75,14 +73,10 @@ where { let (storage, trie, expected) = setup_storage(trie, &mut test); let size = storage.nodes.len(); - print!("Test touches {} nodes, expected result {:?}...", size, expected); + println!("Test touches {} nodes, expected result {:?}...", size, expected); for i in 0..(size + 1) { let storage = IncompletePartialStorage::new(storage.clone(), i); - let new_trie = Trie { - storage: Rc::new(storage), - root: *trie.get_root(), - flat_storage_chunk_view: None, - }; + let new_trie = Trie::new(Rc::new(storage), *trie.get_root(), None); let expected_result = if i < size { Err(&StorageError::MissingTrieValue) } else { Ok(&expected) }; assert_eq!(test(new_trie).map(|v| v.1).as_ref(), expected_result); @@ -199,12 +193,12 @@ mod nodes_counter_tests { mod trie_storage_tests { use super::*; use crate::test_utils::{create_test_store, create_tries}; + use crate::trie::accounting_cache::TrieAccountingCache; use crate::trie::trie_storage::{TrieCache, TrieCachingStorage, TrieDBStorage}; use crate::trie::TrieRefcountChange; use crate::{Store, TrieChanges, TrieConfig}; use assert_matches::assert_matches; use near_primitives::hash::hash; - use near_primitives::types::TrieCacheMode; fn create_store_with_values(values: &[Vec], shard_uid: ShardUId) -> Store { let tries = create_tries(); @@ -247,14 +241,16 @@ mod trie_storage_tests { let trie_cache = TrieCache::new(&TrieConfig::default(), shard_uid, false); let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false, None); + let mut accounting_cache = TrieAccountingCache::new(None); let key = hash(&value); assert_eq!(trie_cache.get(&key), None); for _ in 0..2 { - let count_before = trie_caching_storage.get_trie_nodes_count(); - let result = trie_caching_storage.retrieve_raw_bytes(&key); + let count_before = accounting_cache.get_trie_nodes_count(); + let result = + accounting_cache.retrieve_raw_bytes_with_accounting(&key, &trie_caching_storage); let count_delta = - trie_caching_storage.get_trie_nodes_count().checked_sub(&count_before).unwrap(); + accounting_cache.get_trie_nodes_count().checked_sub(&count_before).unwrap(); assert_eq!(result.unwrap().as_ref(), value); assert_eq!(count_delta.db_reads, 1); assert_eq!(count_delta.mem_reads, 0); @@ -281,7 +277,7 @@ mod trie_storage_tests { assert_matches!(result, Err(StorageError::MissingTrieValue)); } - /// Check that large values does not fall into shard cache, but fall into chunk cache. + /// Check that large values does not fall into shard cache, but fall into accounting cache. #[test] fn test_large_value() { let value = vec![1u8].repeat(TrieConfig::max_cached_value_size() + 1); @@ -291,15 +287,17 @@ mod trie_storage_tests { let trie_cache = TrieCache::new(&TrieConfig::default(), shard_uid, false); let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false, None); + let mut accounting_cache = TrieAccountingCache::new(None); let key = hash(&value); - trie_caching_storage.set_mode(TrieCacheMode::CachingChunk); - let _ = trie_caching_storage.retrieve_raw_bytes(&key); + accounting_cache.set_enabled(true); + let _ = accounting_cache.retrieve_raw_bytes_with_accounting(&key, &trie_caching_storage); - let count_before = trie_caching_storage.get_trie_nodes_count(); - let result = trie_caching_storage.retrieve_raw_bytes(&key); + let count_before: TrieNodesCount = accounting_cache.get_trie_nodes_count(); + let result = + accounting_cache.retrieve_raw_bytes_with_accounting(&key, &trie_caching_storage); let count_delta = - trie_caching_storage.get_trie_nodes_count().checked_sub(&count_before).unwrap(); + accounting_cache.get_trie_nodes_count().checked_sub(&count_before).unwrap(); assert_eq!(trie_cache.get(&key), None); assert_eq!(result.unwrap().as_ref(), value); assert_eq!(count_delta.db_reads, 0); @@ -315,6 +313,7 @@ mod trie_storage_tests { let trie_cache = TrieCache::new(&TrieConfig::default(), shard_uid, false); let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false, None); + let mut accounting_cache = TrieAccountingCache::new(None); let value = &values[0]; let key = hash(&value); @@ -327,39 +326,43 @@ mod trie_storage_tests { // Move to CachingChunk mode. Retrieval should increment the counter, because it is the first time we accessed // item while caching chunk. - trie_caching_storage.set_mode(TrieCacheMode::CachingChunk); - let count_before = trie_caching_storage.get_trie_nodes_count(); - let result = trie_caching_storage.retrieve_raw_bytes(&key); + accounting_cache.set_enabled(true); + let count_before = accounting_cache.get_trie_nodes_count(); + let result = + accounting_cache.retrieve_raw_bytes_with_accounting(&key, &trie_caching_storage); let count_delta = - trie_caching_storage.get_trie_nodes_count().checked_sub(&count_before).unwrap(); + accounting_cache.get_trie_nodes_count().checked_sub(&count_before).unwrap(); assert_eq!(result.unwrap().as_ref(), value); assert_eq!(count_delta.db_reads, 1); assert_eq!(count_delta.mem_reads, 0); - // After previous retrieval, item must be copied to chunk cache. Retrieval shouldn't increment the counter. - let count_before = trie_caching_storage.get_trie_nodes_count(); - let result = trie_caching_storage.retrieve_raw_bytes(&key); + // After previous retrieval, item must be copied to accounting cache. Retrieval shouldn't increment the counter. + let count_before = accounting_cache.get_trie_nodes_count(); + let result = + accounting_cache.retrieve_raw_bytes_with_accounting(&key, &trie_caching_storage); let count_delta = - trie_caching_storage.get_trie_nodes_count().checked_sub(&count_before).unwrap(); + accounting_cache.get_trie_nodes_count().checked_sub(&count_before).unwrap(); assert_eq!(result.unwrap().as_ref(), value); assert_eq!(count_delta.db_reads, 0); assert_eq!(count_delta.mem_reads, 1); - // Even if we switch to caching shard, retrieval shouldn't increment the counter. Chunk cache only grows and is + // Even if we switch to caching shard, retrieval shouldn't increment the counter. Accounting cache only grows and is // dropped only when trie caching storage is dropped. - trie_caching_storage.set_mode(TrieCacheMode::CachingShard); - let count_before = trie_caching_storage.get_trie_nodes_count(); - let result = trie_caching_storage.retrieve_raw_bytes(&key); + accounting_cache.set_enabled(true); + let count_before = accounting_cache.get_trie_nodes_count(); + let result = + accounting_cache.retrieve_raw_bytes_with_accounting(&key, &trie_caching_storage); let count_delta = - trie_caching_storage.get_trie_nodes_count().checked_sub(&count_before).unwrap(); + accounting_cache.get_trie_nodes_count().checked_sub(&count_before).unwrap(); assert_eq!(result.unwrap().as_ref(), value); assert_eq!(count_delta.db_reads, 0); assert_eq!(count_delta.mem_reads, 1); } - /// Check that if an item present in chunk cache gets evicted from the shard cache, it stays in the chunk cache. + /// Check that if an item present in accounting cache gets evicted from the shard cache, + /// it stays in the accounting cache. #[test] - fn test_chunk_cache_presence() { + fn test_accounting_cache_presence() { let shard_cache_size = 5; let values: Vec> = (0..shard_cache_size as u8 + 1).map(|i| vec![i]).collect(); let shard_uid = ShardUId::single_shard(); @@ -369,26 +372,30 @@ mod trie_storage_tests { let trie_cache = TrieCache::new(&trie_config, shard_uid, false); let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false, None); + let mut accounting_cache = TrieAccountingCache::new(None); let value = &values[0]; let key = hash(&value); - trie_caching_storage.set_mode(TrieCacheMode::CachingChunk); - let result = trie_caching_storage.retrieve_raw_bytes(&key); + accounting_cache.set_enabled(true); + let result = + accounting_cache.retrieve_raw_bytes_with_accounting(&key, &trie_caching_storage); assert_eq!(result.unwrap().as_ref(), value); - trie_caching_storage.set_mode(TrieCacheMode::CachingShard); + accounting_cache.set_enabled(true); for value in values[1..].iter() { - let result = trie_caching_storage.retrieve_raw_bytes(&hash(value)); + let result = accounting_cache + .retrieve_raw_bytes_with_accounting(&hash(value), &trie_caching_storage); assert_eq!(result.unwrap().as_ref(), value); } // Check that the first element gets evicted, but the counter is not incremented. assert_eq!(trie_cache.get(&key), None); - let count_before = trie_caching_storage.get_trie_nodes_count(); - let result = trie_caching_storage.retrieve_raw_bytes(&key); + let count_before = accounting_cache.get_trie_nodes_count(); + let result = + accounting_cache.retrieve_raw_bytes_with_accounting(&key, &trie_caching_storage); let count_delta = - trie_caching_storage.get_trie_nodes_count().checked_sub(&count_before).unwrap(); + accounting_cache.get_trie_nodes_count().checked_sub(&count_before).unwrap(); assert_eq!(result.unwrap().as_ref(), value); assert_eq!(count_delta.db_reads, 0); assert_eq!(count_delta.mem_reads, 1); diff --git a/core/store/src/trie/update.rs b/core/store/src/trie/update.rs index ada4b76b28c..28de5a37839 100644 --- a/core/store/src/trie/update.rs +++ b/core/store/src/trie/update.rs @@ -46,7 +46,7 @@ impl<'a> TrieUpdateValuePtr<'a> { match self { TrieUpdateValuePtr::MemoryRef(value) => Ok(value.to_vec()), TrieUpdateValuePtr::HashAndSize(trie, _, hash) => { - trie.storage.retrieve_raw_bytes(hash).map(|bytes| bytes.to_vec()) + trie.internal_retrieve_trie_node(hash, true).map(|bytes| bytes.to_vec()) } } } @@ -157,9 +157,7 @@ impl TrieUpdate { } pub fn set_trie_cache_mode(&self, state: TrieCacheMode) { - if let Some(storage) = self.trie.storage.as_caching_storage() { - storage.set_mode(state); - } + self.trie.accounting_cache.borrow_mut().set_enabled(state == TrieCacheMode::CachingChunk); } } diff --git a/docs/architecture/storage/trie.md b/docs/architecture/storage/trie.md index 83667fbcf0c..0b5c13e7636 100644 --- a/docs/architecture/storage/trie.md +++ b/docs/architecture/storage/trie.md @@ -61,12 +61,9 @@ when `ShardTries::apply_insertions` is called, which puts new values to Stores all `Trie` nodes and allows to get serialized nodes by `TrieKey` hash using the `retrieve_raw_bytes` method. -There are three implementations of `TrieStorage`: +There are two major implementations of `TrieStorage`: * `TrieCachingStorage` - caches all big values ever read by `retrieve_raw_bytes`. -* `TrieRecordingStorage` - records all key-value pairs ever read by - `retrieve_raw_bytes`. Used for obtaining state parts (and challenges in the - future). * `TrieMemoryPartialStorage` - used for validating recorded partial storage. Note that these storages use database keys, which are retrieved using hashes of diff --git a/docs/practices/workflows/io_trace.md b/docs/practices/workflows/io_trace.md index 8aba4b5f872..b295c42f074 100644 --- a/docs/practices/workflows/io_trace.md +++ b/docs/practices/workflows/io_trace.md @@ -216,7 +216,7 @@ memory. Then the SDK reads the serialized contract state from the hardcoded key `"STATE"`. Note that we charge 20 `tn_db_reads` for it, since we missed the -chunk cache, but we hit everything in the shard cache. Thus, there are no DB +accounting cache, but we hit everything in the shard cache. Thus, there are no DB requests. If there were DB requests for this `tn_db_reads`, you would see them listed. @@ -234,10 +234,10 @@ The `sha256` call here is used to shorten implicit account ids. Afterwards, a value with 16 bytes (a `u128`) is fetched from the trie state. To serve this, it required reading 30 trie nodes, 19 of them were cached in the -chunk cache and were not charged the full gas cost. And the remaining 11 missed -the chunk cache but they hit the shard cache. Nothing needed to be fetched from -DB because the Sweatcoin specific prefetcher has already loaded everything into -the shard cache. +accounting cache and were not charged the full gas cost. And the remaining 11 +missed the accounting cache but they hit the shard cache. Nothing needed to be +fetched from DB because the Sweatcoin specific prefetcher has already loaded +everything into the shard cache. *Note: We see trie node requests despite flat state being used. This is because the trace was collected with a binary that performed a read on both the trie and diff --git a/integration-tests/src/tests/client/features/chunk_nodes_cache.rs b/integration-tests/src/tests/client/features/chunk_nodes_cache.rs index 7e32e167a98..0809c27ecea 100644 --- a/integration-tests/src/tests/client/features/chunk_nodes_cache.rs +++ b/integration-tests/src/tests/client/features/chunk_nodes_cache.rs @@ -66,7 +66,8 @@ fn process_transaction( /// Compare charged node accesses before and after protocol upgrade to the protocol version of `ChunkNodesCache`. /// This upgrade during chunk processing saves each node for which we charge touching trie node cost to a special -/// chunk cache, and such cost is charged only once on the first access. This effect doesn't persist across chunks. +/// accounting cache (used to be called "chunk cache"), and such cost is charged only once on the first access. +/// This effect doesn't persist across chunks. /// /// We run the same transaction 4 times and compare resulting costs. This transaction writes two different key-value /// pairs to the contract storage. @@ -78,8 +79,8 @@ fn process_transaction( /// /// 2nd run should count 12 regular db reads - for 6 nodes per each value, because protocol is not upgraded yet. /// 3nd run follows the upgraded protocol and it should count 8 db and 4 memory reads, which comes from 6 db reads -/// for `Value 1` and only 2 db reads for `Value 2`, because first 4 nodes were already put into the chunk cache. -/// 4nd run should give the same results, because caching must not affect different chunks. +/// for `Value 1` and only 2 db reads for `Value 2`, because first 4 nodes were already put into the accounting +/// cache. 4nd run should give the same results, because caching must not affect different chunks. #[test] fn compare_node_counts() { let mut genesis = Genesis::test(vec!["test0".parse().unwrap(), "test1".parse().unwrap()], 1); diff --git a/integration-tests/src/tests/client/flat_storage.rs b/integration-tests/src/tests/client/flat_storage.rs index 1a2df88c022..2f371e79d80 100644 --- a/integration-tests/src/tests/client/flat_storage.rs +++ b/integration-tests/src/tests/client/flat_storage.rs @@ -15,6 +15,7 @@ use near_store::flat::{ }; use near_store::test_utils::create_test_store; use near_store::{KeyLookupMode, Store, TrieTraversalItem}; +use near_vm_runner::logic::TrieNodesCount; use nearcore::config::GenesisExt; use std::str::FromStr; use std::thread; @@ -360,12 +361,12 @@ fn test_flat_storage_creation_start_from_state_part() { .runtime_adapter .get_trie_for_shard(0, &block_hash, state_root, true) .unwrap(); - let chunk_view = trie.flat_storage_chunk_view.unwrap(); for part_trie_keys in trie_keys.iter() { for trie_key in part_trie_keys.iter() { - assert_matches!(chunk_view.get_value(trie_key), Ok(Some(_))); + assert_matches!(trie.get_ref(&trie_key, KeyLookupMode::FlatStorage), Ok(Some(_))); } } + assert_eq!(trie.get_trie_nodes_count(), TrieNodesCount { db_reads: 0, mem_reads: 0 }); } } diff --git a/integration-tests/src/tests/standard_cases/mod.rs b/integration-tests/src/tests/standard_cases/mod.rs index 65a91e5ee20..f7dd2586282 100644 --- a/integration-tests/src/tests/standard_cases/mod.rs +++ b/integration-tests/src/tests/standard_cases/mod.rs @@ -1457,8 +1457,8 @@ fn check_trie_nodes_count( /// /// 1st receipt should count 6 db reads. /// 2nd and 3rd receipts should count 2 db and 4 memory reads, because for them first 4 nodes were already put into the -/// chunk cache. -pub fn test_chunk_nodes_cache_common_parent(node: impl Node, runtime_config: RuntimeConfig) { +/// accounting cache. +pub fn test_accounting_cache_common_parent(node: impl Node, runtime_config: RuntimeConfig) { let receipts: Vec = (0..3) .map(|i| { make_receipt( @@ -1477,14 +1477,14 @@ pub fn test_chunk_nodes_cache_common_parent(node: impl Node, runtime_config: Run check_trie_nodes_count(&node, &runtime_config, receipts, results, true); } -/// This test is similar to `test_chunk_nodes_cache_common_parent` but checks another trie structure: +/// This test is similar to `test_accounting_cache_common_parent` but checks another trie structure: /// /// --> (Value 1) /// (Extension) -> (Branch) -> (Extension) -> (Branch) |-> (Leaf) -> (Value 2) /// /// 1st receipt should count 5 db reads. /// 2nd receipt should count 2 db and 4 memory reads. -pub fn test_chunk_nodes_cache_branch_value(node: impl Node, runtime_config: RuntimeConfig) { +pub fn test_accounting_cache_branch_value(node: impl Node, runtime_config: RuntimeConfig) { let receipts: Vec = (0..2) .map(|i| { make_receipt( @@ -1502,23 +1502,23 @@ pub fn test_chunk_nodes_cache_branch_value(node: impl Node, runtime_config: Runt check_trie_nodes_count(&node, &runtime_config, receipts, results, true); } -/// This test is similar to `test_chunk_nodes_cache_common_parent` but checks another trie structure: +/// This test is similar to `test_accounting_cache_common_parent` but checks another trie structure: /// /// --> (Leaf) -> (Value 1) /// (Extension) -> (Branch) --> (Extension) -> (Branch) |-> (Leaf) -> (Value 2) /// |-> (Leaf) -> (Value 2) /// -/// Here we check that chunk cache is enabled *only during function calls execution*. +/// Here we check that accounting cache is enabled *only during function calls execution*. /// 1st receipt writes `Value 1` and should count 6 db reads. /// 2nd receipt deploys a new contract which *code* is the same as `Value 2`. But this value shouldn't be put into the -/// chunk cache. +/// accounting cache. /// 3rd receipt writes `Value 2` and should count 2 db and 4 memory reads. /// -/// We have checked manually that if chunk cache mode is not disabled, then the following scenario happens: -/// - 1st receipt enables chunk cache mode but doesn't disable it -/// - 2nd receipt triggers insertion of `Value 2` into the chunk cache -/// - 3rd receipt reads it from the chunk cache, so it incorrectly charges user for 1 db and 5 memory reads. -pub fn test_chunk_nodes_cache_mode(node: impl Node, runtime_config: RuntimeConfig) { +/// We have checked manually that if accounting cache mode is not disabled, then the following scenario happens: +/// - 1st receipt enables accounting cache mode but doesn't disable it +/// - 2nd receipt triggers insertion of `Value 2` into the accounting cache +/// - 3rd receipt reads it from the accounting cache, so it incorrectly charges user for 1 db and 5 memory reads. +pub fn test_accounting_cache_mode(node: impl Node, runtime_config: RuntimeConfig) { let receipts: Vec = vec![ make_receipt(&node, vec![make_write_key_value_action(vec![1], vec![1])], bob_account()), make_receipt( diff --git a/integration-tests/src/tests/standard_cases/runtime.rs b/integration-tests/src/tests/standard_cases/runtime.rs index a5cae097047..68060498834 100644 --- a/integration-tests/src/tests/standard_cases/runtime.rs +++ b/integration-tests/src/tests/standard_cases/runtime.rs @@ -318,24 +318,24 @@ fn test_contract_write_key_value_cost_runtime() { } #[test] -fn test_chunk_nodes_cache_same_common_parent() { +fn test_accounting_cache_same_common_parent() { let node = create_runtime_node(); let runtime_config = node.client.as_ref().read().unwrap().runtime_config.clone(); - test_chunk_nodes_cache_common_parent(node, runtime_config); + test_accounting_cache_common_parent(node, runtime_config); } #[test] -fn test_chunk_nodes_cache_branch_value_runtime() { +fn test_accounting_cache_branch_value_runtime() { let node = create_runtime_node(); let runtime_config = node.client.as_ref().read().unwrap().runtime_config.clone(); - test_chunk_nodes_cache_branch_value(node, runtime_config); + test_accounting_cache_branch_value(node, runtime_config); } #[test] -fn test_chunk_nodes_cache_mode_runtime() { +fn test_accounting_cache_mode_runtime() { let node = create_runtime_node(); let runtime_config = node.client.as_ref().read().unwrap().runtime_config.clone(); - test_chunk_nodes_cache_mode(node, runtime_config); + test_accounting_cache_mode(node, runtime_config); } #[test] diff --git a/nearcore/src/entity_debug.rs b/nearcore/src/entity_debug.rs index 3dbd2b4c1bd..fb9ddbfccef 100644 --- a/nearcore/src/entity_debug.rs +++ b/nearcore/src/entity_debug.rs @@ -238,7 +238,7 @@ impl EntityDebugHandlerImpl { .copied() .chain(extension_nibbles.0.iter()) .collect::>(); - let data = trie.debug_get_value(&value)?; + let data = trie.retrieve_value(&value.hash)?; entity_data.entries.push(EntityDataEntry { name: "leaf_path".to_owned(), value: EntityDataValue::String(TriePath::nibbles_to_hex(&leaf_nibbles)), @@ -264,7 +264,7 @@ impl EntityDebugHandlerImpl { } } near_store::RawTrieNode::BranchWithValue(value, children) => { - let data = trie.debug_get_value(&value)?; + let data = trie.retrieve_value(&value.hash)?; entity_data.entries.push(EntityDataEntry { name: "leaf_path".to_owned(), value: EntityDataValue::String(TriePath::nibbles_to_hex( diff --git a/nearcore/src/runtime/mod.rs b/nearcore/src/runtime/mod.rs index ea082fba377..f313a0b352b 100644 --- a/nearcore/src/runtime/mod.rs +++ b/nearcore/src/runtime/mod.rs @@ -550,7 +550,7 @@ impl NightshadeRuntime { .tries .get_trie_with_block_hash_for_shard_from_snapshot(shard_uid, *state_root, &prev_hash) .map_err(|err| Error::Other(err.to_string()))?; - let state_part = match snapshot_trie.get_trie_nodes_for_part_with_flat_storage(part_id, partial_state, nibbles_begin, nibbles_end, trie_with_state.storage.clone()) { + let state_part = match snapshot_trie.get_trie_nodes_for_part_with_flat_storage(part_id, partial_state, nibbles_begin, nibbles_end, &trie_with_state) { Ok(partial_state) => partial_state, Err(err) => { error!(target: "runtime", ?err, part_id.idx, part_id.total, %prev_hash, %state_root, %shard_id, "Can't get trie nodes for state part"); @@ -860,7 +860,7 @@ impl RuntimeAdapter for NightshadeRuntime { is_new_chunk: bool, is_first_block_with_chunk_of_version: bool, ) -> Result { - let trie = Trie::from_recorded_storage(partial_storage, *state_root); + let trie = Trie::from_recorded_storage(partial_storage, *state_root, true); self.process_state_update( trie, shard_id, @@ -2695,13 +2695,13 @@ mod test { .runtime .get_trie_for_shard(0, &env.head.prev_block_hash, Trie::EMPTY_ROOT, true) .unwrap(); - assert!(trie.flat_storage_chunk_view.is_some()); + assert!(trie.has_flat_storage_chunk_view()); let trie = env .runtime .get_view_trie_for_shard(0, &env.head.prev_block_hash, Trie::EMPTY_ROOT) .unwrap(); - assert!(trie.flat_storage_chunk_view.is_none()); + assert!(!trie.has_flat_storage_chunk_view()); } /// Check that querying trie and flat state gives the same result. diff --git a/runtime/runtime-params-estimator/README.md b/runtime/runtime-params-estimator/README.md index 34d19d1a05b..4c9ff748c97 100644 --- a/runtime/runtime-params-estimator/README.md +++ b/runtime/runtime-params-estimator/README.md @@ -52,7 +52,7 @@ cargo run -p runtime-params-estimator -- replay my_trace.log cache-stats STORAGE WRITE 151412 requests for a total of 2512012 B TRIE NODES 8878276 /375708 /27383 (chunk-cache/shard-cache/DB) SHARD CACHE 93.21% hit rate, 93.21% if removing 15 too large nodes from total - CHUNK CACHE 95.66% hit rate, 99.69% if removing 375708 shard cache hits from total + ACCOUNTING CACHE 95.66% hit rate, 99.69% if removing 375708 shard cache hits from total ``` For a list of all options, run `cargo run -p runtime-params-estimator -- replay --help`. diff --git a/runtime/runtime-params-estimator/src/lib.rs b/runtime/runtime-params-estimator/src/lib.rs index 35e37370f33..a93d6967fbc 100644 --- a/runtime/runtime-params-estimator/src/lib.rs +++ b/runtime/runtime-params-estimator/src/lib.rs @@ -1192,7 +1192,7 @@ fn read_cached_trie_node(ctx: &mut EstimatorContext) -> GasCost { let mut testbed = ctx.testbed(); let results = (0..(warmup_iters + iters)) - .map(|_| trie::read_node_from_chunk_cache(&mut testbed)) + .map(|_| trie::read_node_from_accounting_cache(&mut testbed)) .skip(warmup_iters) .collect::>(); average_cost(results) diff --git a/runtime/runtime-params-estimator/src/replay.rs b/runtime/runtime-params-estimator/src/replay.rs index 3a2ab4588c8..35d6f67599c 100644 --- a/runtime/runtime-params-estimator/src/replay.rs +++ b/runtime/runtime-params-estimator/src/replay.rs @@ -277,7 +277,7 @@ GET State "'stateKey10'" size=500 } #[test] - fn test_chunk_cache_stats() { + fn test_accounting_cache_stats() { check_replay_mode(ReplayMode::ChunkCacheStats); } diff --git a/runtime/runtime-params-estimator/src/replay/cache_stats.rs b/runtime/runtime-params-estimator/src/replay/cache_stats.rs index 3acfce466bd..b2b94ee88a2 100644 --- a/runtime/runtime-params-estimator/src/replay/cache_stats.rs +++ b/runtime/runtime-params-estimator/src/replay/cache_stats.rs @@ -23,14 +23,14 @@ pub(super) struct CacheStats { /// Sum of all storage writes sizes. (can only be from inside guest program) total_size_write: u64, - /// Hits in the chunk cache. (can only be from inside guest program) - num_tn_chunk_cache_hit: u64, + /// Hits in the accounting cache. (can only be from inside guest program) + num_tn_accounting_cache_hit: u64, /// Hits in the shard cache, from inside guest program. num_tn_shard_cache_hit_guest: u64, /// Misses in the shard cache, from inside guest program. num_tn_shard_cache_miss_guest: u64, /// All trie node accesses that the user pays for as being fetched from DB. - /// Includes shard cache misses and hits, but no chunk cache hits. + /// Includes shard cache misses and hits, but no accounting cache hits. num_tn_db_paid: u64, /// Hits in the shard cache, requested by host. @@ -110,7 +110,7 @@ impl CacheStats { _ => {} } - self.num_tn_chunk_cache_hit += tn_mem_reads; + self.num_tn_accounting_cache_hit += tn_mem_reads; self.num_tn_shard_cache_hit_guest += tn_shard_cache_hits; self.num_tn_db_paid += tn_db_reads; self.num_tn_shard_cache_too_large += tn_shard_cache_too_large; @@ -132,7 +132,7 @@ impl CacheStats { let tn_shard_cache_too_large = dict.get("shard_cache_too_large").map(|s| s.parse().unwrap()).unwrap_or(0); - // there is no chunk cache update here, as we are not in a smart contract execution + // there is no accounting cache update here, as we are not in a smart contract execution self.num_tn_shard_cache_hit_host += tn_shard_cache_hits; self.num_tn_shard_cache_too_large += tn_shard_cache_too_large; self.num_tn_shard_cache_miss_host += tn_shard_cache_misses; @@ -163,7 +163,7 @@ impl CacheStats { out, "{:indent$}TRIE NODES (guest) {:>4} /{:>4} /{:>4} (chunk-cache/shard-cache/DB)", "", - self.num_tn_chunk_cache_hit, + self.num_tn_accounting_cache_hit, self.num_tn_shard_cache_hit_guest, self.num_tn_shard_cache_miss_guest )?; @@ -180,11 +180,12 @@ impl CacheStats { self.num_tn_shard_cache_miss_guest + self.num_tn_shard_cache_miss_host, Some((self.num_tn_shard_cache_too_large, "too large nodes")), )?; + // TODO(#9054): Rename this to ACCOUNTING CACHE. Self::print_cache_rate( out, indent, "CHUNK CACHE", - self.num_tn_chunk_cache_hit, + self.num_tn_accounting_cache_hit, self.num_tn_db_paid, None, )?; diff --git a/runtime/runtime-params-estimator/src/trie.rs b/runtime/runtime-params-estimator/src/trie.rs index ca606bd96ff..ef72f4f11ee 100644 --- a/runtime/runtime-params-estimator/src/trie.rs +++ b/runtime/runtime-params-estimator/src/trie.rs @@ -2,8 +2,8 @@ use crate::estimator_context::{EstimatorContext, Testbed}; use crate::gas_cost::{GasCost, NonNegativeTolerance}; use crate::utils::{aggregate_per_block_measurements, overhead_per_measured_block, percentiles}; use near_primitives::hash::hash; -use near_primitives::types::TrieCacheMode; -use near_store::{TrieCachingStorage, TrieStorage}; +use near_store::trie::accounting_cache::TrieAccountingCache; +use near_store::TrieCachingStorage; use near_vm_runner::logic::ExtCosts; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -68,7 +68,7 @@ pub(crate) fn write_node( cost } -pub(crate) fn read_node_from_chunk_cache(testbed: &mut Testbed) -> GasCost { +pub(crate) fn read_node_from_accounting_cache(testbed: &mut Testbed) -> GasCost { let debug = testbed.config.debug; let iters = 200; let percentiles_of_interest = &[0.5, 0.9, 0.99, 0.999]; @@ -89,7 +89,7 @@ pub(crate) fn read_node_from_chunk_cache(testbed: &mut Testbed) -> GasCost { num_warmup_values: usize, data_spread_factor: usize, spoil_l3: bool| { - let results = read_node_from_chunk_cache_ext( + let results = read_node_from_accounting_cache_ext( testbed, iters, num_values, @@ -189,7 +189,7 @@ pub(crate) fn read_node_from_chunk_cache(testbed: &mut Testbed) -> GasCost { base_case } -fn read_node_from_chunk_cache_ext( +fn read_node_from_accounting_cache_ext( testbed: &mut Testbed, iters: usize, // How many values are read after each other. The higher the number, the @@ -248,8 +248,13 @@ fn read_node_from_chunk_cache_ext( // Create a new cache and load nodes into it as preparation. let caching_storage = testbed.trie_caching_storage(); - caching_storage.set_mode(TrieCacheMode::CachingChunk); - let _dummy_sum = read_raw_nodes_from_storage(&caching_storage, &all_value_hashes); + let mut accounting_cache = TrieAccountingCache::new(None); + accounting_cache.set_enabled(true); + let _dummy_sum = read_raw_nodes_from_storage( + &caching_storage, + &mut accounting_cache, + &all_value_hashes, + ); // Remove trie nodes from CPU caches by filling the caches with useless data. // (To measure latency from main memory, not CPU caches) @@ -261,11 +266,19 @@ fn read_node_from_chunk_cache_ext( // Read some nodes from the cache, to warm up caches again. (We only // want the trie node to come from main memory, the data structures // around that are expected to always be in cache) - let dummy_sum = read_raw_nodes_from_storage(&caching_storage, unmeasured_value_hashes); + let dummy_sum = read_raw_nodes_from_storage( + &caching_storage, + &mut accounting_cache, + unmeasured_value_hashes, + ); SINK.fetch_add(dummy_sum, Ordering::SeqCst); let start = GasCost::measure(testbed.config.metric); - let dummy_sum = read_raw_nodes_from_storage(&caching_storage, &measured_value_hashes); + let dummy_sum = read_raw_nodes_from_storage( + &caching_storage, + &mut accounting_cache, + &measured_value_hashes, + ); let cost = start.elapsed(); SINK.fetch_add(dummy_sum, Ordering::SeqCst); @@ -280,11 +293,13 @@ fn read_node_from_chunk_cache_ext( /// compiler. fn read_raw_nodes_from_storage( caching_storage: &TrieCachingStorage, + accounting_cache: &mut TrieAccountingCache, keys: &[near_primitives::hash::CryptoHash], ) -> usize { keys.iter() .map(|key| { - let bytes = caching_storage.retrieve_raw_bytes(key).unwrap(); + let bytes = + accounting_cache.retrieve_raw_bytes_with_accounting(key, caching_storage).unwrap(); near_store::estimator::decode_extension_node(&bytes).len() }) .sum() diff --git a/runtime/runtime/src/prefetch.rs b/runtime/runtime/src/prefetch.rs index 3e959759eca..91c7de9b2dc 100644 --- a/runtime/runtime/src/prefetch.rs +++ b/runtime/runtime/src/prefetch.rs @@ -63,7 +63,7 @@ pub(crate) struct TriePrefetcher { impl TriePrefetcher { pub(crate) fn new_if_enabled(trie: &Trie) -> Option { - if let Some(caching_storage) = trie.storage.as_caching_storage() { + if let Some(caching_storage) = trie.internal_get_storage_as_caching_storage() { if let Some(prefetch_api) = caching_storage.prefetch_api().clone() { let trie_root = *trie.get_root(); let shard_uid = prefetch_api.shard_uid; @@ -320,7 +320,7 @@ mod tests { } let root = test_populate_trie(&tries, &Trie::EMPTY_ROOT, ShardUId::single_shard(), kvs); let trie = tries.get_trie_for_shard(ShardUId::single_shard(), root); - trie.storage.as_caching_storage().unwrap().clear_cache(); + trie.internal_get_storage_as_caching_storage().unwrap().clear_cache(); let prefetcher = TriePrefetcher::new_if_enabled(&trie).expect("caching storage should have prefetcher"); diff --git a/tools/state-viewer/src/contract_accounts.rs b/tools/state-viewer/src/contract_accounts.rs index 2fb38582b23..84e9a898dda 100644 --- a/tools/state-viewer/src/contract_accounts.rs +++ b/tools/state-viewer/src/contract_accounts.rs @@ -174,8 +174,7 @@ impl ContractAccount { ) -> Result { let code = if filter.code_size { Some( - trie.storage - .retrieve_raw_bytes(&value_hash) + trie.retrieve_value(&value_hash) .map_err(|err| ContractAccountError::NoCode(err, account_id.clone()))?, ) } else { diff --git a/tools/state-viewer/src/state_parts.rs b/tools/state-viewer/src/state_parts.rs index cd019cc397c..1f3da815048 100644 --- a/tools/state-viewer/src/state_parts.rs +++ b/tools/state-viewer/src/state_parts.rs @@ -394,7 +394,8 @@ async fn load_state_parts( fn print_state_part(state_root: &StateRoot, _part_id: PartId, data: &[u8]) { let trie_nodes: PartialState = BorshDeserialize::try_from_slice(data).unwrap(); - let trie = Trie::from_recorded_storage(PartialStorage { nodes: trie_nodes }, *state_root); + let trie = + Trie::from_recorded_storage(PartialStorage { nodes: trie_nodes }, *state_root, false); trie.print_recursive(&mut std::io::stdout().lock(), &state_root, u32::MAX); } @@ -473,7 +474,8 @@ async fn dump_state_parts( /// Returns the first `StateRecord` encountered while iterating over a sub-trie in the state part. fn get_first_state_record(state_root: &StateRoot, data: &[u8]) -> Option { let trie_nodes = BorshDeserialize::try_from_slice(data).unwrap(); - let trie = Trie::from_recorded_storage(PartialStorage { nodes: trie_nodes }, *state_root); + let trie = + Trie::from_recorded_storage(PartialStorage { nodes: trie_nodes }, *state_root, false); for (key, value) in trie.iter().unwrap().flatten() { if let Some(sr) = StateRecord::from_raw_key_value(key, value) {