From b184d301e4d676e2e2df2df46874505f58960c2d Mon Sep 17 00:00:00 2001 From: Qyriad Date: Thu, 14 Nov 2024 19:01:58 -0700 Subject: [PATCH] smt: add parallel constructors to Smt and SimpleSmt What the previous few commits have been leading up to: SparseMerkleTree now has a function to construct the tree from existing data in parallel. This is significantly faster than the singlethreaded equivalent. Benchmarks incoming! --- src/merkle/smt/full/mod.rs | 12 ++++++ src/merkle/smt/mod.rs | 84 ++++++++++++++++++++++++++++++++++++ src/merkle/smt/simple/mod.rs | 15 +++++++ 3 files changed, 111 insertions(+) diff --git a/src/merkle/smt/full/mod.rs b/src/merkle/smt/full/mod.rs index 4f6ec62..ade92e6 100644 --- a/src/merkle/smt/full/mod.rs +++ b/src/merkle/smt/full/mod.rs @@ -101,6 +101,18 @@ impl Smt { Ok(tree) } + /// The parallel version of [`Smt::with_entries()`]. + /// + /// Returns a new [`Smt`] instantiated with leaves set as specified by the provided entries, + /// constructed in parallel. + /// + /// All leaves omitted from the entries list are set to [Self::EMPTY_VALUE]. + pub fn with_entries_par( + entries: impl IntoIterator, + ) -> Result { + >::with_entries_par(Vec::from_iter(entries)) + } + /// Returns a new [`Smt`] instantiated from already computed leaves and nodes. /// /// This function performs minimal consistency checking. It is the caller's responsibility to diff --git a/src/merkle/smt/mod.rs b/src/merkle/smt/mod.rs index adeecd9..8aa16b0 100644 --- a/src/merkle/smt/mod.rs +++ b/src/merkle/smt/mod.rs @@ -65,6 +65,18 @@ pub(crate) trait SparseMerkleTree { // PROVIDED METHODS // --------------------------------------------------------------------------------------------- + /// Creates a new sparse Merkle tree from an existing set of key-value pairs, in parallel. + fn with_entries_par(entries: Vec<(Self::Key, Self::Value)>) -> Result + where + Self: Sized, + { + let (inner_nodes, leaves) = Self::build_subtrees(entries); + let leaves: BTreeMap = + leaves.into_iter().map(|(index, leaf)| (index.value(), leaf)).collect(); + let root = inner_nodes.get(&NodeIndex::root()).unwrap().hash(); + Self::from_raw_parts(inner_nodes, leaves, root) + } + /// Returns an opening of the leaf associated with `key`. Conceptually, an opening is a Merkle /// path to the leaf, as well as the leaf itself. fn open(&self, key: &Self::Key) -> Self::Opening { @@ -429,6 +441,8 @@ pub(crate) trait SparseMerkleTree { /// the "leaves" for the next 8-deep subtree, so this function can effectively be chained into /// itself. /// + /// This function is mostly an implementation detail of [`SparseMerkleTree::build_subtrees()`]. + /// /// # Panics /// With debug assertions on, this function panics under invalid inputs: if `leaves` contains /// more entries than can fit in a depth-8 subtree, if `leaves` contains leaves belonging to @@ -522,6 +536,63 @@ pub(crate) trait SparseMerkleTree { (inner_nodes, leaves) } + + /// Computes the raw parts for a new sparse Merkle tree from a set of key-value pairs. + /// + /// `entries` need not be sorted. This function will sort them. + /// + /// This function is mostly an implementation detail of + /// [`SparseMerkleTree::with_entries_par()`]. + fn build_subtrees( + mut entries: Vec<(Self::Key, Self::Value)>, + ) -> (BTreeMap, BTreeMap, Self::Leaf>) { + use rayon::prelude::*; + + entries.sort_by_key(|item| { + let index = Self::key_to_leaf_index(&item.0); + index.value() + }); + + let mut accumulated_nodes: BTreeMap = Default::default(); + + let PairComputations { + leaves: mut leaf_subtrees, + nodes: initial_leaves, + } = Self::sorted_pairs_to_leaves(entries); + + for current_depth in (SUBTREE_DEPTH..=DEPTH).step_by(SUBTREE_DEPTH as usize).rev() { + let (nodes, subtrees): (Vec>, Vec>) = leaf_subtrees + .into_par_iter() + .map(|subtree| { + debug_assert!(subtree.is_sorted()); + debug_assert!(!subtree.is_empty()); + + let (nodes, next_leaves) = Self::build_subtree(subtree, current_depth); + + debug_assert!(next_leaves.is_sorted()); + + (nodes, next_leaves) + }) + .unzip(); + + let mut all_leaves: Vec = subtrees.into_iter().flatten().collect(); + leaf_subtrees = SubtreeLeavesIter::from_leaves(&mut all_leaves).collect(); + accumulated_nodes.extend(nodes.into_iter().flatten()); + + debug_assert!(!leaf_subtrees.is_empty()); + } + + let leaves: BTreeMap, Self::Leaf> = initial_leaves + .into_iter() + .map(|(key, value)| { + // This unwrap *should* be unreachable. + let key = LeafIndex::::new(key).unwrap(); + (key, value) + }) + .collect(); + + (accumulated_nodes, leaves) + } } // INNER NODE @@ -1150,4 +1221,17 @@ mod test { // And of course the root we got from each place should match. assert_eq!(control.root(), root_leaf.hash); } + + #[test] + fn test_with_entries_par() { + const PAIR_COUNT: u64 = COLS_PER_SUBTREE * 64; + + let entries = generate_entries(PAIR_COUNT); + + let control = Smt::with_entries(entries.clone()).unwrap(); + + let smt = Smt::with_entries_par(entries.clone()).unwrap(); + assert_eq!(smt.root(), control.root()); + assert_eq!(smt, control); + } } diff --git a/src/merkle/smt/simple/mod.rs b/src/merkle/smt/simple/mod.rs index 1ded87f..7c90daa 100644 --- a/src/merkle/smt/simple/mod.rs +++ b/src/merkle/smt/simple/mod.rs @@ -100,6 +100,21 @@ impl SimpleSmt { Ok(tree) } + /// The parallel version of [`SimpleSmt::with_leaves()`]. + /// + /// Returns a new [`SimpleSmt`] instantiated with leaves set as specified by the provided entries. + /// + /// All leaves omitted from the entries list are set to [ZERO; 4]. + pub fn with_leaves_par( + entries: impl IntoIterator, + ) -> Result { + let entries: Vec<_> = entries + .into_iter() + .map(|(col, value)| (LeafIndex::::new(col).unwrap(), value)) + .collect(); + >::with_entries_par(entries) + } + /// Returns a new [`SimpleSmt`] instantiated from already computed leaves and nodes. /// /// This function performs minimal consistency checking. It is the caller's responsibility to