feat: impl hashing Merkle leaves that don't yet exist

This commit implements 'prospective leaf hashing' -- computing what the
hash of a sparse Merkle tree leaf *would* be for a key-value insertion
without actually performing that insertion.

For SimpleSmt, this is trivial, since the leaf hash and its payload are
the same.

For the full Smt, the new leaf payload (and thus, its hash) depend on
the existing payload in that leaf, making the prospective hash logic a
combination of the normal insertion logic and the normal hash logic. But
because we're only interested in the hash and not the intermediate
value, we can skip allocations and sorts for the payload itself.
This commit is contained in:
Qyriad 2024-08-12 15:56:07 -06:00
parent f4a9d5b027
commit 4c19f364ca
5 changed files with 214 additions and 3 deletions

View file

@ -350,7 +350,7 @@ impl Deserializable for SmtLeaf {
// ================================================================================================
/// Converts a key-value tuple to an iterator of `Felt`s
fn kv_to_elements((key, value): (RpoDigest, Word)) -> impl Iterator<Item = Felt> {
pub(crate) fn kv_to_elements((key, value): (RpoDigest, Word)) -> impl Iterator<Item = Felt> {
let key_elements = key.into_iter();
let value_elements = value.into_iter();
@ -359,7 +359,7 @@ fn kv_to_elements((key, value): (RpoDigest, Word)) -> impl Iterator<Item = Felt>
/// Compares two keys, compared element-by-element using their integer representations starting with
/// the most significant element.
fn cmp_keys(key_1: RpoDigest, key_2: RpoDigest) -> Ordering {
pub(crate) fn cmp_keys(key_1: RpoDigest, key_2: RpoDigest) -> Ordering {
for (v1, v2) in key_1.iter().zip(key_2.iter()).rev() {
let v1 = v1.as_int();
let v2 = v2.as_int();

View file

@ -3,6 +3,7 @@ use alloc::{
string::ToString,
vec::Vec,
};
use core::iter;
use super::{
EmptySubtreeRoots, Felt, InnerNode, InnerNodeInfo, LeafIndex, MerkleError, MerklePath,
@ -263,6 +264,92 @@ impl SparseMerkleTree<SMT_DEPTH> for Smt {
leaf.hash()
}
fn hash_prospective_leaf(&self, key: &RpoDigest, value: &Word) -> RpoDigest {
// This function combines logic from SmtLeaf::insert() and SmtLeaf::hash() to determine what
// the hash of a leaf would be with the `(key, value)` pair inserted into it, without simply
// cloning the leaf which could be expensive for some leaves, and is easily avoidable when
// we can combine the insertion and hashing operations.
let new_pair = (*key, *value);
let is_removal: bool = *value == EMPTY_WORD;
let leaf_index: LeafIndex<SMT_DEPTH> = Self::key_to_leaf_index(key);
match self.leaves.get(&leaf_index.value()) {
// If this key doesn't have a value, our job is very simple.
None => SmtLeaf::Single(new_pair).hash(),
// If this key already has a value, then the hash will be based off a prospective
// mutation on the leaf.
Some(existing_leaf) => match existing_leaf {
// Inserting an empty value into an empty leaf or a single leaf both do the same
// thing.
SmtLeaf::Empty(_) | SmtLeaf::Single(_) if is_removal => {
SmtLeaf::new_empty(key.into()).hash()
},
SmtLeaf::Empty(_) => SmtLeaf::Single(new_pair).hash(),
SmtLeaf::Single(pair) => {
if pair.0 == *key {
SmtLeaf::Single(new_pair).hash()
} else {
// Inserting a non-empty value into a new key would change this to a
// multi-leaf.
// TODO: mini-optimization: use an array with each key's and value's Felts
// flattened inline to avoid the Vec allocation.
let elements: Vec<Felt> =
[*pair, new_pair].into_iter().flat_map(leaf::kv_to_elements).collect();
Rpo256::hash_elements(&elements)
}
},
SmtLeaf::Multiple(pairs) => {
match pairs.binary_search_by(|(cur_key, _)| leaf::cmp_keys(*cur_key, *key)) {
Ok(pos) => {
if is_removal && pairs.len() == 2 {
// This removal would convert this Multi into a Single, so we can
// just stop here.
return SmtLeaf::Single(pairs[0]).hash();
}
let (before_pos, rest) = pairs.split_at(pos);
let with_pos_removed = rest.iter().copied().skip(1);
let middle = iter::once(new_pair).filter(|_| !is_removal);
let elements: Vec<Felt> = before_pos
.iter()
.copied()
.chain(middle)
.chain(with_pos_removed)
.flat_map(leaf::kv_to_elements)
.collect();
Rpo256::hash_elements(&elements)
},
Err(pos_for_insert) => {
if is_removal {
// The only values are at other keys, so we just hash the leaf
// as-is.
return existing_leaf.hash();
}
let (before_pos, rest) = pairs.split_at(pos_for_insert);
let middle = iter::once(new_pair);
let elements: Vec<Felt> = before_pos
.iter()
.copied()
.chain(middle)
.chain(rest.iter().copied())
.flat_map(leaf::kv_to_elements)
.collect();
Rpo256::hash_elements(&elements)
},
}
},
},
}
}
fn key_to_leaf_index(key: &RpoDigest) -> LeafIndex<SMT_DEPTH> {
let most_significant_felt = key[3];
LeafIndex::new_max_depth(most_significant_felt.as_int())
@ -357,3 +444,25 @@ fn test_smt_serialization_deserialization() {
let bytes = smt.to_bytes();
assert_eq!(smt, Smt::read_from_bytes(&bytes).unwrap());
}
#[test]
fn test_prospective_hash() {
// Smt with values
let smt_leaves_2: [(RpoDigest, Word); 2] = [
(
RpoDigest::new([Felt::new(101), Felt::new(102), Felt::new(103), Felt::new(104)]),
[Felt::new(1_u64), Felt::new(2_u64), Felt::new(3_u64), Felt::new(4_u64)],
),
(
RpoDigest::new([Felt::new(105), Felt::new(106), Felt::new(107), Felt::new(108)]),
[Felt::new(5_u64), Felt::new(6_u64), Felt::new(7_u64), Felt::new(8_u64)],
),
];
let smt = Smt::with_entries(smt_leaves_2).unwrap();
for (key, value) in &smt_leaves_2 {
let expected = smt.get_leaf(key).hash();
let actual = smt.hash_prospective_leaf(key, value);
assert_eq!(expected, actual);
}
}

View file

@ -2,7 +2,7 @@ use alloc::vec::Vec;
use super::{Felt, LeafIndex, NodeIndex, Rpo256, RpoDigest, Smt, SmtLeaf, EMPTY_WORD, SMT_DEPTH};
use crate::{
merkle::{EmptySubtreeRoots, MerkleStore},
merkle::{smt::SparseMerkleTree, EmptySubtreeRoots, MerkleStore},
utils::{Deserializable, Serializable},
Word, ONE, WORD_SIZE,
};
@ -258,6 +258,96 @@ fn test_smt_removal() {
}
}
#[test]
fn test_prospective_hash() {
let mut smt = Smt::default();
let raw = 0b_01101001_01101100_00011111_11111111_10010110_10010011_11100000_00000000_u64;
let key_1: RpoDigest = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw)]);
let key_2: RpoDigest =
RpoDigest::from([2_u32.into(), 2_u32.into(), 2_u32.into(), Felt::new(raw)]);
let key_3: RpoDigest =
RpoDigest::from([3_u32.into(), 3_u32.into(), 3_u32.into(), Felt::new(raw)]);
let value_1 = [ONE; WORD_SIZE];
let value_2 = [2_u32.into(); WORD_SIZE];
let value_3: [Felt; 4] = [3_u32.into(); WORD_SIZE];
// insert key-value 1
{
let prospective = smt.hash_prospective_leaf(&key_1, &value_1);
let old_value_1 = smt.insert(key_1, value_1);
assert_eq!(old_value_1, EMPTY_WORD);
assert_eq!(smt.get_leaf(&key_1).hash(), prospective);
assert_eq!(smt.get_leaf(&key_1), SmtLeaf::Single((key_1, value_1)));
}
// insert key-value 2
{
let prospective = smt.hash_prospective_leaf(&key_2, &value_2);
let old_value_2 = smt.insert(key_2, value_2);
assert_eq!(old_value_2, EMPTY_WORD);
assert_eq!(smt.get_leaf(&key_2).hash(), prospective);
assert_eq!(
smt.get_leaf(&key_2),
SmtLeaf::Multiple(vec![(key_1, value_1), (key_2, value_2)])
);
}
// insert key-value 3
{
let prospective_hash = smt.hash_prospective_leaf(&key_3, &value_3);
let old_value_3 = smt.insert(key_3, value_3);
assert_eq!(old_value_3, EMPTY_WORD);
assert_eq!(smt.get_leaf(&key_3).hash(), prospective_hash);
assert_eq!(
smt.get_leaf(&key_3),
SmtLeaf::Multiple(vec![(key_1, value_1), (key_2, value_2), (key_3, value_3)])
);
}
// remove key 3
{
let old_hash = smt.get_leaf(&key_3).hash();
let old_value_3 = smt.insert(key_3, EMPTY_WORD);
assert_eq!(old_value_3, value_3);
assert_eq!(old_hash, smt.hash_prospective_leaf(&key_3, &old_value_3));
assert_eq!(
smt.get_leaf(&key_3),
SmtLeaf::Multiple(vec![(key_1, value_1), (key_2, value_2)])
);
}
// remove key 2
{
let old_hash = smt.get_leaf(&key_2).hash();
let old_value_2 = smt.insert(key_2, EMPTY_WORD);
assert_eq!(old_value_2, value_2);
assert_eq!(old_hash, smt.hash_prospective_leaf(&key_2, &old_value_2));
assert_eq!(smt.get_leaf(&key_2), SmtLeaf::Single((key_1, value_1)));
}
// remove key 1
{
let old_hash = smt.get_leaf(&key_1).hash();
let old_value_1 = smt.insert(key_1, EMPTY_WORD);
assert_eq!(old_value_1, value_1);
assert_eq!(old_hash, smt.hash_prospective_leaf(&key_1, &old_value_1));
assert_eq!(smt.get_leaf(&key_1), SmtLeaf::new_empty(key_1.into()));
}
}
/// Tests that 2 key-value pairs stored in the same leaf have the same path
#[test]
fn test_smt_path_to_keys_in_same_leaf_are_equal() {

View file

@ -167,6 +167,14 @@ pub(crate) trait SparseMerkleTree<const DEPTH: u8> {
/// Returns the hash of a leaf
fn hash_leaf(leaf: &Self::Leaf) -> RpoDigest;
/// Returns the hash of a leaf if the leaf WERE inserted into the tree,
/// without performing any insertion or other mutation.
///
/// Note: calling this function after actually performing an insert with
/// the same arguments will *not* return the same result, as inserting
/// multiple times with the same key mutates the leaf each time.
fn hash_prospective_leaf(&self, key: &Self::Key, value: &Self::Value) -> RpoDigest;
/// Maps a key to a leaf index
fn key_to_leaf_index(key: &Self::Key) -> LeafIndex<DEPTH>;

View file

@ -302,6 +302,10 @@ impl<const DEPTH: u8> SparseMerkleTree<DEPTH> for SimpleSmt<DEPTH> {
leaf.into()
}
fn hash_prospective_leaf(&self, _key: &LeafIndex<DEPTH>, value: &Word) -> RpoDigest {
Self::hash_leaf(value)
}
fn key_to_leaf_index(key: &LeafIndex<DEPTH>) -> LeafIndex<DEPTH> {
*key
}