refactor sorted_pairs_to_leaves() to also group subtrees

This commit is contained in:
Qyriad 2024-10-31 13:20:53 -06:00
parent 6d530b52c1
commit f2480b8c75

View file

@ -352,41 +352,43 @@ pub(crate) trait SparseMerkleTree<const DEPTH: u8> {
fn sorted_pairs_to_leaves( fn sorted_pairs_to_leaves(
pairs: Vec<(Self::Key, Self::Value)>, pairs: Vec<(Self::Key, Self::Value)>,
) -> PrecomputedLeaves<Self::Leaf> { ) -> PrecomputedSubtrees<Self::Leaf> {
let mut all_leaves = PrecomputedLeaves::default(); let mut accumulator: PrecomputedSubtrees<Self::Leaf> = Default::default();
let mut buffer: Vec<(Self::Key, Self::Value)> = Default::default(); // The kv-pairs we've seen so far that correspond to a single leaf.
let mut current_leaf_buffer: Vec<(Self::Key, Self::Value)> = Default::default();
let mut iter = pairs.into_iter().peekable(); let mut iter = pairs.into_iter().peekable();
while let Some((key, value)) = iter.next() { while let Some((key, value)) = iter.next() {
let col = Self::key_to_leaf_index(&key).index.value(); let col = Self::key_to_leaf_index(&key).index.value();
let next_col = iter.peek().map(|(key, _)| { let peeked_col = iter.peek().map(|(key, _v)| {
let index = Self::key_to_leaf_index(key); let index = Self::key_to_leaf_index(key);
index.index.value() let next_col = index.index.value();
// We panic if `pairs` is not sorted by column.
debug_assert!(next_col >= col);
next_col
}); });
current_leaf_buffer.push((key, value));
buffer.push((key, value)); // If the next pair is the same column as this one, then we're done after adding this
// pair to the buffer.
if let Some(next_col) = next_col { if peeked_col == Some(col) {
assert!(next_col >= col);
}
if next_col == Some(col) {
// Keep going in our buffer.
continue; continue;
} }
// Whether the next pair is a different column, or non-existent, we break off. // Otherwise, the next pair is a different column, or there is no next pair. Either way
let leaf_pairs = mem::take(&mut buffer); // it's time to swap out our buffer.
let leaf_pairs = mem::take(&mut current_leaf_buffer);
let leaf = Self::pairs_to_leaf(leaf_pairs); let leaf = Self::pairs_to_leaf(leaf_pairs);
let hash = Self::hash_leaf(&leaf); let hash = Self::hash_leaf(&leaf);
all_leaves.nodes.insert(col, leaf); accumulator.nodes.insert(col, leaf);
all_leaves.subtrees.push(SubtreeLeaf { col, hash }); accumulator.add_leaf(SubtreeLeaf { col, hash });
}
assert_eq!(buffer.len(), 0);
all_leaves debug_assert!(current_leaf_buffer.is_empty());
}
accumulator
} }
/// Builds Merkle nodes from a bottom layer of tuples of horizontal indices and their hashes, /// Builds Merkle nodes from a bottom layer of tuples of horizontal indices and their hashes,
@ -615,48 +617,64 @@ pub struct SubtreeLeaf {
pub hash: RpoDigest, pub hash: RpoDigest,
} }
impl SubtreeLeaf {
#[cfg_attr(not(test), allow(dead_code))]
fn from_smt_leaf(leaf: &crate::merkle::SmtLeaf) -> Self {
Self {
col: leaf.index().index.value(),
hash: leaf.hash(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct PrecomputedLeaves<L> { pub struct PrecomputedSubtrees<L> {
/// Literal leaves to be added to the sparse Merkle tree's internal mapping. /// Literal leaves to be added to the sparse Merkle tree's internal mapping.
pub nodes: BTreeMap<u64, L>, pub nodes: BTreeMap<u64, L>,
/// "Conceptual" leaves that will be used for computations. /// "Conceptual" leaves that will be used for computations.
pub subtrees: Vec<SubtreeLeaf>, pub leaves: Vec<Vec<SubtreeLeaf>>,
} }
impl<L> PrecomputedLeaves<L> { impl<L> PrecomputedSubtrees<L> {
#[cfg_attr(not(test), allow(dead_code))] pub fn add_leaf(&mut self, leaf: SubtreeLeaf) {
pub fn split_at_column(mut self, col: u64) -> (Self, Self) { // A depth-8 subtree contains 256 "columns" that can possibly be occupied.
let split_point = match self.subtrees.binary_search_by_key(&col, |key| key.col) { const COLS_PER_SUBTREE: u64 = u64::pow(2, 8);
// Surprisingly, Result<T, E> has no method like `unwrap_or_unwrap_err() where T == E`.
// Probably because there's no way to write that where bound. let last_subtree = match self.leaves.last_mut() {
Ok(split_point) | Err(split_point) => split_point, // Base case.
None => {
self.leaves.push(vec![leaf]);
return;
},
Some(last_subtree) => last_subtree,
}; };
let subtrees_right = self.subtrees.split_off(split_point); debug_assert!(!last_subtree.is_empty());
let subtrees_left = self.subtrees; debug_assert!(last_subtree.len() <= COLS_PER_SUBTREE as usize);
let nodes_right = self.nodes.split_off(&col); // The multiple of 256 after 0 is 1, but 0 and 1 do not belong to different subtrees.
let nodes_left = self.nodes; let last_subtree_col = u64::max(1, last_subtree.last().unwrap().col);
let next_subtree_col = if last_subtree_col.is_multiple_of(&COLS_PER_SUBTREE) {
let left = Self { u64::next_multiple_of(last_subtree_col + 1, COLS_PER_SUBTREE)
nodes: nodes_left, } else {
subtrees: subtrees_left, last_subtree_col.next_multiple_of(COLS_PER_SUBTREE)
}; };
let right = Self { if leaf.col < next_subtree_col {
nodes: nodes_right, last_subtree.push(leaf);
subtrees: subtrees_right, } else {
}; //std::eprintln!("\tcreating new subtree for column {}", leaf.col);
let next_subtree = vec![leaf];
(left, right) self.leaves.push(next_subtree);
}
} }
} }
// Derive requires `L` to impl Default, even though we don't actually need that. // Derive requires `L` to impl Default, even though we don't actually need that.
impl<L> Default for PrecomputedLeaves<L> { impl<L> Default for PrecomputedSubtrees<L> {
fn default() -> Self { fn default() -> Self {
Self { Self {
nodes: Default::default(), nodes: Default::default(),
subtrees: Default::default(), leaves: Default::default(),
} }
} }
} }
@ -665,50 +683,88 @@ impl<L> Default for PrecomputedLeaves<L> {
// ================================================================================================ // ================================================================================================
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use alloc::vec::Vec; use alloc::{collections::BTreeMap, vec::Vec};
use super::SparseMerkleTree; use super::SparseMerkleTree;
use crate::{ use crate::{
hash::rpo::RpoDigest, hash::rpo::RpoDigest,
merkle::{smt::SubtreeLeaf, Smt, SmtLeaf, SMT_DEPTH}, merkle::{smt::SubtreeLeaf, Smt, SmtLeaf, SMT_DEPTH},
Felt, Word, EMPTY_WORD, ONE, Felt, Word, ONE,
}; };
#[test] #[test]
fn test_sorted_pairs_to_leaves() { fn test_sorted_pairs_to_leaves() {
let entries: Vec<(RpoDigest, Word)> = vec![ let entries: Vec<(RpoDigest, Word)> = vec![
// Subtree 0.
(RpoDigest::new([ONE, ONE, ONE, Felt::new(16)]), [ONE; 4]), (RpoDigest::new([ONE, ONE, ONE, Felt::new(16)]), [ONE; 4]),
(RpoDigest::new([ONE, ONE, ONE, Felt::new(17)]), [ONE; 4]), (RpoDigest::new([ONE, ONE, ONE, Felt::new(17)]), [ONE; 4]),
// Leaf index collision. // Leaf index collision.
(RpoDigest::new([ONE, ONE, Felt::new(10), Felt::new(20)]), [ONE; 4]), (RpoDigest::new([ONE, ONE, Felt::new(10), Felt::new(20)]), [ONE; 4]),
(RpoDigest::new([ONE, ONE, Felt::new(20), Felt::new(20)]), [ONE; 4]), (RpoDigest::new([ONE, ONE, Felt::new(20), Felt::new(20)]), [ONE; 4]),
// Normal single leaf again. // Subtree 1. Normal single leaf again.
(RpoDigest::new([ONE, ONE, ONE, Felt::new(400)]), [ONE; 4]), (RpoDigest::new([ONE, ONE, ONE, Felt::new(400)]), [ONE; 4]), // Subtree boundary.
// Empty leaf. (RpoDigest::new([ONE, ONE, ONE, Felt::new(401)]), [ONE; 4]),
(RpoDigest::new([ONE, ONE, ONE, Felt::new(500)]), EMPTY_WORD), // Subtree 2. Another normal leaf.
]; (RpoDigest::new([ONE, ONE, ONE, Felt::new(1024)]), [ONE; 4]),
let mut entries_iter = entries.iter().cloned();
let mut next_entry = || entries_iter.next().unwrap();
let control_leaves: Vec<SmtLeaf> = vec![
SmtLeaf::Single(next_entry()),
SmtLeaf::Single(next_entry()),
SmtLeaf::new_multiple(vec![next_entry(), next_entry()]).unwrap(),
SmtLeaf::Single(next_entry()),
SmtLeaf::new_empty(Smt::key_to_leaf_index(&next_entry().0)),
]; ];
let control_subtree_leaves: Vec<SubtreeLeaf> = control_leaves let control = Smt::with_entries(entries.clone()).unwrap();
.iter()
.map(|leaf| { let control_leaves: Vec<SmtLeaf> = {
let col = leaf.index().index.value(); let mut entries_iter = entries.iter().cloned();
let hash = leaf.hash(); let mut next_entry = || entries_iter.next().unwrap();
SubtreeLeaf { col, hash } let control_leaves = vec![
}) // Subtree 0.
SmtLeaf::Single(next_entry()),
SmtLeaf::Single(next_entry()),
SmtLeaf::new_multiple(vec![next_entry(), next_entry()]).unwrap(),
// Subtree 1.
SmtLeaf::Single(next_entry()),
SmtLeaf::Single(next_entry()),
// Subtree 2.
SmtLeaf::Single(next_entry()),
];
assert_eq!(entries_iter.next(), None);
control_leaves
};
let control_subtree_leaves: Vec<Vec<SubtreeLeaf>> = {
let mut control_leaves_iter = control_leaves.iter();
let mut next_leaf = || control_leaves_iter.next().unwrap();
let control_subtree_leaves: Vec<Vec<SubtreeLeaf>> = [
// Subtree 0.
vec![next_leaf(), next_leaf(), next_leaf()],
// Subtree 1.
vec![next_leaf(), next_leaf()],
// Subtree 2.
vec![next_leaf()],
]
.map(|subtree| subtree.into_iter().map(SubtreeLeaf::from_smt_leaf).collect())
.to_vec();
assert_eq!(control_leaves_iter.next(), None);
control_subtree_leaves
};
let subtrees = Smt::sorted_pairs_to_leaves(entries);
// This will check that the hashes, columns, and subtree assignments all match.
assert_eq!(subtrees.leaves, control_subtree_leaves);
// Then finally we might as well check the computed leaf nodes too.
let control_leaves: BTreeMap<u64, SmtLeaf> = control
.leaves()
.map(|(index, value)| (index.index.value(), value.clone()))
.collect(); .collect();
let test_subtree_leaves = Smt::sorted_pairs_to_leaves(entries).subtrees; for (column, test_leaf) in subtrees.nodes {
assert_eq!(control_subtree_leaves, test_subtree_leaves); if test_leaf.is_empty() {
continue;
}
let control_leaf = control_leaves
.get(&column)
.expect(&format!("no leaf node found for column {column}"));
assert_eq!(control_leaf, &test_leaf);
}
} }
// Helper for the below tests. // Helper for the below tests.
@ -733,7 +789,8 @@ mod test {
let control = Smt::with_entries(entries.clone()).unwrap(); let control = Smt::with_entries(entries.clone()).unwrap();
// `entries` should already be sorted by nature of how we constructed it. // `entries` should already be sorted by nature of how we constructed it.
let leaves = Smt::sorted_pairs_to_leaves(entries).subtrees; let leaves = Smt::sorted_pairs_to_leaves(entries).leaves;
let leaves = leaves.into_iter().next().unwrap();
let (first_subtree, _) = Smt::build_subtree(leaves, SMT_DEPTH); let (first_subtree, _) = Smt::build_subtree(leaves, SMT_DEPTH);
assert!(!first_subtree.is_empty()); assert!(!first_subtree.is_empty());
@ -756,17 +813,19 @@ mod test {
let control = Smt::with_entries(entries.clone()).unwrap(); let control = Smt::with_entries(entries.clone()).unwrap();
let leaves = Smt::sorted_pairs_to_leaves(entries); let PairComputations { leaves, .. } = Smt::sorted_pairs_to_leaves(entries);
let (first, second) = leaves.split_at_column(PAIR_COUNT / 2); // With two subtrees' worth of leaves, we should have exactly two subtrees.
assert_eq!(first.subtrees.len(), second.subtrees.len()); let [first, second]: [_; 2] = leaves.try_into().unwrap();
assert_eq!(first.len() as u64, PAIR_COUNT / 2);
assert_eq!(first.len(), second.len());
let mut current_depth = SMT_DEPTH; let mut current_depth = SMT_DEPTH;
let mut next_leaves: Vec<SubtreeLeaf> = Default::default(); let mut next_leaves: Vec<SubtreeLeaf> = Default::default();
let (first_nodes, leaves) = Smt::build_subtree(first.subtrees, current_depth); let (first_nodes, leaves) = Smt::build_subtree(first, current_depth);
next_leaves.extend(leaves); next_leaves.extend(leaves);
let (second_nodes, leaves) = Smt::build_subtree(second.subtrees, current_depth); let (second_nodes, leaves) = Smt::build_subtree(second, current_depth);
next_leaves.extend(leaves); next_leaves.extend(leaves);
// All new inner nodes + the new subtree-leaves should be 512, for one depth-cycle. // All new inner nodes + the new subtree-leaves should be 512, for one depth-cycle.