From 124807bf8bf0ab1bf55f570ec7e7a851511daa1f Mon Sep 17 00:00:00 2001 From: Qyriad Date: Wed, 21 Aug 2024 20:25:50 -0600 Subject: [PATCH] WIP: remove a *bunch* of allocations and clones in hash_prospective_leaf --- src/merkle/smt/full/mod.rs | 98 +++++++++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 16 deletions(-) diff --git a/src/merkle/smt/full/mod.rs b/src/merkle/smt/full/mod.rs index 4d042e7..87291f1 100644 --- a/src/merkle/smt/full/mod.rs +++ b/src/merkle/smt/full/mod.rs @@ -3,6 +3,7 @@ use alloc::{ string::ToString, vec::Vec, }; +use core::iter; use super::{ EmptySubtreeRoots, Felt, InnerNode, InnerNodeInfo, LeafIndex, MerkleError, MerklePath, @@ -287,25 +288,90 @@ impl SparseMerkleTree for Smt { } fn hash_prospective_leaf(&self, key: &RpoDigest, value: &Word) -> RpoDigest { - // If this key already has a value, then the hash will be based off a - // prospective mutation on the leaf. - let leaf_index: LeafIndex = Self::key_to_leaf_index(&key); + // This function combines logic from SmtLeaf::insert() and SmtLeaf::hash() to determine what + // the hash of a leaf would be with the `(key, value)` pair inserted into it, without simply + // cloning the leaf which could be expensive for some leaves, and is easily avoidable when + // we can combine the insertion and hashing operations. + let new_pair = (*key, *value); + let is_removal: bool = *value == EMPTY_WORD; + + let leaf_index: LeafIndex = Self::key_to_leaf_index(key); match self.leaves.get(&leaf_index.value()) { - Some(existing_leaf) => { - if value == &Self::EMPTY_VALUE { - // A leaf with an empty value is conceptually a removal the - // value in that leaf with this key. - // TODO: avoid cloning the leaf. - let mut cloned = existing_leaf.clone(); - cloned.remove(*key); - return cloned.hash(); + // If this key doesn't have a value, our job is very simple. + None => SmtLeaf::Single(new_pair).hash(), + + // If this key already has a value, then the hash will be based off a prospective + // mutation on the leaf. + Some(existing_leaf) => match existing_leaf { + // Inserting an empty value into an empty leaf or a single leaf both do the same + // thing. + SmtLeaf::Empty(_) | SmtLeaf::Single(_) if is_removal => { + SmtLeaf::new_empty(key.into()).hash() + }, + + SmtLeaf::Empty(_) => SmtLeaf::Single(new_pair).hash(), + + SmtLeaf::Single(pair) => { + if pair.0 == *key { + SmtLeaf::Single(new_pair).hash() + } else { + // Inserting a non-empty value into a new key would change this to a + // multi-leaf. + // TODO: mini-optimization: use an array with each key's and value's Felts + // flattened inline to avoid the Vec allocation. + let elements: Vec = [*pair, new_pair] + .into_iter() + .flat_map(leaf::kv_to_elements) + .collect(); + + Rpo256::hash_elements(&elements) + } + }, + + SmtLeaf::Multiple(pairs) => { + match pairs.binary_search_by(|&(cur_key, _)| leaf::cmp_keys(cur_key, *key)) { + Ok(pos) => { + if is_removal && pairs.len() == 2 { + // This removal would convert this Multi into a Single, so we can + // just stop here. + return SmtLeaf::Single(pairs[0]).hash(); + } + + let (before_pos, rest) = pairs.split_at(pos); + let with_pos_removed = rest.iter().copied().skip(1); + let middle = iter::once(new_pair).filter(|_| !is_removal); + let elements: Vec = before_pos + .iter() + .copied() + .chain(middle) + .chain(with_pos_removed) + .flat_map(leaf::kv_to_elements) + .collect(); + + Rpo256::hash_elements(&elements) + } + Err(pos_for_insert) => { + if is_removal { + // The only values are at other keys, so we just hash the leaf + // as-is. + return existing_leaf.hash(); + } + + let (before_pos, rest) = pairs.split_at(pos_for_insert); + let middle = iter::once(new_pair); + let elements: Vec = before_pos + .iter() + .copied() + .chain(middle) + .chain(rest.iter().copied()) + .flat_map(leaf::kv_to_elements) + .collect(); + + Rpo256::hash_elements(&elements) + } + } } - // TODO: avoid cloning the leaf. - let mut cloned = existing_leaf.clone(); - cloned.insert(*key, *value); - cloned.hash() }, - None => SmtLeaf::new_single(*key, *value).hash(), } }