From abe0cfaa1f795b3ed47741c8a1e76d49a0547fe5 Mon Sep 17 00:00:00 2001 From: Qyriad Date: Mon, 12 Aug 2024 15:56:07 -0600 Subject: [PATCH] feat(smt): impl hashing leaves that don't yet exist This commit implements 'prospective leaf hashing' -- computing what the hash of a sparse Merkle tree leaf *would* be for a key-value insertion without actually performing that insertion. For SimpleSmt, this is trivial, since the leaf hash and its payload are the same. For the full Smt, the new leaf payload (and thus, its hash) depend on the existing payload in that leaf, making the prospective hash logic a combination of the normal insertion logic and the normal hash logic. But because we're only interested in the hash and not the intermediate value, we can skip allocations and sorts for the payload itself. --- src/merkle/smt/full/leaf.rs | 4 +- src/merkle/smt/full/mod.rs | 87 ++++++++++++++++++++++++++++++++++ src/merkle/smt/full/tests.rs | 92 +++++++++++++++++++++++++++++++++++- src/merkle/smt/mod.rs | 9 ++++ src/merkle/smt/simple/mod.rs | 4 ++ 5 files changed, 193 insertions(+), 3 deletions(-) diff --git a/src/merkle/smt/full/leaf.rs b/src/merkle/smt/full/leaf.rs index 095a4fb..585fc40 100644 --- a/src/merkle/smt/full/leaf.rs +++ b/src/merkle/smt/full/leaf.rs @@ -350,7 +350,7 @@ impl Deserializable for SmtLeaf { // ================================================================================================ /// Converts a key-value tuple to an iterator of `Felt`s -fn kv_to_elements((key, value): (RpoDigest, Word)) -> impl Iterator { +pub(crate) fn kv_to_elements((key, value): (RpoDigest, Word)) -> impl Iterator { let key_elements = key.into_iter(); let value_elements = value.into_iter(); @@ -359,7 +359,7 @@ fn kv_to_elements((key, value): (RpoDigest, Word)) -> impl Iterator /// Compares two keys, compared element-by-element using their integer representations starting with /// the most significant element. -fn cmp_keys(key_1: RpoDigest, key_2: RpoDigest) -> Ordering { +pub(crate) fn cmp_keys(key_1: RpoDigest, key_2: RpoDigest) -> Ordering { for (v1, v2) in key_1.iter().zip(key_2.iter()).rev() { let v1 = v1.as_int(); let v2 = v2.as_int(); diff --git a/src/merkle/smt/full/mod.rs b/src/merkle/smt/full/mod.rs index 5cd510b..626be36 100644 --- a/src/merkle/smt/full/mod.rs +++ b/src/merkle/smt/full/mod.rs @@ -3,6 +3,7 @@ use alloc::{ string::ToString, vec::Vec, }; +use core::iter; use super::{ EmptySubtreeRoots, Felt, InnerNode, InnerNodeInfo, LeafIndex, MerkleError, MerklePath, @@ -263,6 +264,92 @@ impl SparseMerkleTree for Smt { leaf.hash() } + fn hash_prospective_leaf(&self, key: &RpoDigest, value: &Word) -> RpoDigest { + // This function combines logic from SmtLeaf::insert() and SmtLeaf::hash() to determine what + // the hash of a leaf would be with the `(key, value)` pair inserted into it, without simply + // cloning the leaf which could be expensive for some leaves, and is easily avoidable when + // we can combine the insertion and hashing operations. + let new_pair = (*key, *value); + let is_removal: bool = *value == EMPTY_WORD; + + let leaf_index: LeafIndex = Self::key_to_leaf_index(key); + match self.leaves.get(&leaf_index.value()) { + // If this key doesn't have a value, our job is very simple. + None => SmtLeaf::Single(new_pair).hash(), + + // If this key already has a value, then the hash will be based off a prospective + // mutation on the leaf. + Some(existing_leaf) => match existing_leaf { + // Inserting an empty value into an empty leaf or a single leaf both do the same + // thing. + SmtLeaf::Empty(_) | SmtLeaf::Single(_) if is_removal => { + SmtLeaf::new_empty(key.into()).hash() + }, + + SmtLeaf::Empty(_) => SmtLeaf::Single(new_pair).hash(), + + SmtLeaf::Single(pair) => { + if pair.0 == *key { + SmtLeaf::Single(new_pair).hash() + } else { + // Inserting a non-empty value into a new key would change this to a + // multi-leaf. + // TODO: mini-optimization: use an array with each key's and value's Felts + // flattened inline to avoid the Vec allocation. + let elements: Vec = + [*pair, new_pair].into_iter().flat_map(leaf::kv_to_elements).collect(); + + Rpo256::hash_elements(&elements) + } + }, + + SmtLeaf::Multiple(pairs) => { + match pairs.binary_search_by(|(cur_key, _)| leaf::cmp_keys(*cur_key, *key)) { + Ok(pos) => { + if is_removal && pairs.len() == 2 { + // This removal would convert this Multi into a Single, so we can + // just stop here. + return SmtLeaf::Single(pairs[0]).hash(); + } + + let (before_pos, rest) = pairs.split_at(pos); + let with_pos_removed = rest.iter().copied().skip(1); + let middle = if !is_removal { Some(new_pair) } else { None }; + let elements: Vec = before_pos + .iter() + .copied() + .chain(middle) + .chain(with_pos_removed) + .flat_map(leaf::kv_to_elements) + .collect(); + + Rpo256::hash_elements(&elements) + }, + Err(pos_for_insert) => { + if is_removal { + // The only values are at other keys, so we just hash the leaf + // as-is. + return existing_leaf.hash(); + } + + let (before_pos, rest) = pairs.split_at(pos_for_insert); + let middle = iter::once(new_pair); + let elements: Vec = before_pos + .iter() + .copied() + .chain(middle) + .chain(rest.iter().copied()) + .flat_map(leaf::kv_to_elements) + .collect(); + + Rpo256::hash_elements(&elements) + }, + } + }, + }, + } + } + fn key_to_leaf_index(key: &RpoDigest) -> LeafIndex { let most_significant_felt = key[3]; LeafIndex::new_max_depth(most_significant_felt.as_int()) diff --git a/src/merkle/smt/full/tests.rs b/src/merkle/smt/full/tests.rs index 66cd203..1ee7f9d 100644 --- a/src/merkle/smt/full/tests.rs +++ b/src/merkle/smt/full/tests.rs @@ -2,7 +2,7 @@ use alloc::vec::Vec; use super::{Felt, LeafIndex, NodeIndex, Rpo256, RpoDigest, Smt, SmtLeaf, EMPTY_WORD, SMT_DEPTH}; use crate::{ - merkle::{EmptySubtreeRoots, MerkleStore}, + merkle::{smt::SparseMerkleTree, EmptySubtreeRoots, MerkleStore}, utils::{Deserializable, Serializable}, Word, ONE, WORD_SIZE, }; @@ -258,6 +258,96 @@ fn test_smt_removal() { } } +#[test] +fn test_prospective_hash() { + let mut smt = Smt::default(); + + let raw = 0b_01101001_01101100_00011111_11111111_10010110_10010011_11100000_00000000_u64; + + let key_1: RpoDigest = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw)]); + let key_2: RpoDigest = + RpoDigest::from([2_u32.into(), 2_u32.into(), 2_u32.into(), Felt::new(raw)]); + let key_3: RpoDigest = + RpoDigest::from([3_u32.into(), 3_u32.into(), 3_u32.into(), Felt::new(raw)]); + + let value_1 = [ONE; WORD_SIZE]; + let value_2 = [2_u32.into(); WORD_SIZE]; + let value_3: [Felt; 4] = [3_u32.into(); WORD_SIZE]; + + // insert key-value 1 + { + let prospective = smt.hash_prospective_leaf(&key_1, &value_1); + let old_value_1 = smt.insert(key_1, value_1); + assert_eq!(old_value_1, EMPTY_WORD); + + assert_eq!(prospective, smt.get_leaf(&key_1).hash()); + + assert_eq!(smt.get_leaf(&key_1), SmtLeaf::Single((key_1, value_1))); + + } + + // insert key-value 2 + { + let prospective = smt.hash_prospective_leaf(&key_2, &value_2); + let old_value_2 = smt.insert(key_2, value_2); + assert_eq!(old_value_2, EMPTY_WORD); + + assert_eq!(prospective, smt.get_leaf(&key_2).hash()); + + assert_eq!( + smt.get_leaf(&key_2), + SmtLeaf::Multiple(vec![(key_1, value_1), (key_2, value_2)]) + ); + } + + // insert key-value 3 + { + let prospective = smt.hash_prospective_leaf(&key_3, &value_3); + let old_value_3 = smt.insert(key_3, value_3); + assert_eq!(old_value_3, EMPTY_WORD); + + assert_eq!(prospective, smt.get_leaf(&key_3).hash()); + + assert_eq!( + smt.get_leaf(&key_3), + SmtLeaf::Multiple(vec![(key_1, value_1), (key_2, value_2), (key_3, value_3)]) + ); + } + + // remove key 3 + { + let old_hash = smt.get_leaf(&key_3).hash(); + let old_value_3 = smt.insert(key_3, EMPTY_WORD); + assert_eq!(old_value_3, value_3); + assert_eq!(old_hash, smt.hash_prospective_leaf(&key_3, &old_value_3)); + + assert_eq!( + smt.get_leaf(&key_3), + SmtLeaf::Multiple(vec![(key_1, value_1), (key_2, value_2)]) + ); + } + + // remove key 2 + { + let old_hash = smt.get_leaf(&key_2).hash(); + let old_value_2 = smt.insert(key_2, EMPTY_WORD); + assert_eq!(old_value_2, value_2); + assert_eq!(old_hash, smt.hash_prospective_leaf(&key_2, &old_value_2)); + + assert_eq!(smt.get_leaf(&key_2), SmtLeaf::Single((key_1, value_1))); + } + + // remove key 1 + { + let old_hash = smt.get_leaf(&key_1).hash(); + let old_value_1 = smt.insert(key_1, EMPTY_WORD); + assert_eq!(old_value_1, value_1); + assert_eq!(old_hash, smt.hash_prospective_leaf(&key_1, &old_value_1)); + + assert_eq!(smt.get_leaf(&key_1), SmtLeaf::new_empty(key_1.into())); + } +} + /// Tests that 2 key-value pairs stored in the same leaf have the same path #[test] fn test_smt_path_to_keys_in_same_leaf_are_equal() { diff --git a/src/merkle/smt/mod.rs b/src/merkle/smt/mod.rs index d7d42da..9c67485 100644 --- a/src/merkle/smt/mod.rs +++ b/src/merkle/smt/mod.rs @@ -167,6 +167,15 @@ pub(crate) trait SparseMerkleTree { /// Returns the hash of a leaf fn hash_leaf(leaf: &Self::Leaf) -> RpoDigest; + /// Returns the hash of a leaf if the leaf WERE inserted into the tree, + /// without performing any insertion or other mutation. + /// + /// Note: calling this function after actually performing an insert with + /// the same arguments will *not* return the same result, as inserting + /// multiple times with the same key mutates the leaf each time. + #[cfg_attr(not(test), allow(dead_code))] + fn hash_prospective_leaf(&self, key: &Self::Key, value: &Self::Value) -> RpoDigest; + /// Maps a key to a leaf index fn key_to_leaf_index(key: &Self::Key) -> LeafIndex; diff --git a/src/merkle/smt/simple/mod.rs b/src/merkle/smt/simple/mod.rs index f1ff0dc..53bc797 100644 --- a/src/merkle/smt/simple/mod.rs +++ b/src/merkle/smt/simple/mod.rs @@ -302,6 +302,10 @@ impl SparseMerkleTree for SimpleSmt { leaf.into() } + fn hash_prospective_leaf(&self, _key: &LeafIndex, value: &Word) -> RpoDigest { + Self::hash_leaf(value) + } + fn key_to_leaf_index(key: &LeafIndex) -> LeafIndex { *key }