Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: introduce DecoratorSpan to reduce MastNode size #1621

Draft
wants to merge 1 commit into
base: next
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 5 additions & 14 deletions assembly/src/assembler/basic_block_builder.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
use alloc::{borrow::Borrow, string::ToString, vec::Vec};

use vm_core::{
mast::{DecoratorId, MastNodeId},
sys_events::SystemEvent,
AssemblyOp, Decorator, Operation,
};

use super::{mast_forest_builder::MastForestBuilder, BodyWrapper, DecoratorList, ProcedureContext};
use vm_core::{mast::{DecoratorId, MastNodeId}, sys_events::SystemEvent, AssemblyOp, Decorator, Operation};
use vm_core::mast::DecoratorSpan;
use super::{mast_forest_builder::MastForestBuilder, BodyWrapper, ProcedureContext};
use crate::{ast::Instruction, AssemblyError, Span};

// BASIC BLOCK BUILDER
Expand All @@ -24,7 +19,7 @@ use crate::{ast::Instruction, AssemblyError, Span};
#[derive(Debug)]
pub struct BasicBlockBuilder<'a> {
ops: Vec<Operation>,
decorators: DecoratorList,
decorators: Vec<(usize, DecoratorId)>,
epilogue: Vec<Operation>,
last_asmop_pos: usize,
mast_forest_builder: &'a mut MastForestBuilder,
Expand Down Expand Up @@ -174,11 +169,7 @@ impl BasicBlockBuilder<'_> {
pub fn make_basic_block(&mut self) -> Result<Option<MastNodeId>, AssemblyError> {
if !self.ops.is_empty() {
let ops = self.ops.drain(..).collect();
let decorators = if !self.decorators.is_empty() {
Some(self.decorators.drain(..).collect())
} else {
None
};
let decorators = DecoratorSpan::from_raw_ops_decorators(self.decorators.drain(..).collect());

let basic_block_node_id = self.mast_forest_builder.ensure_block(ops, decorators)?;

Expand Down
10 changes: 7 additions & 3 deletions assembly/src/assembler/mast_forest_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use vm_core::{
},
Decorator, DecoratorList, Operation,
};

use vm_core::mast::DecoratorSpan;
use super::{GlobalProcedureIndex, Procedure};
use crate::AssemblyError;

Expand Down Expand Up @@ -456,14 +456,18 @@ impl MastForestBuilder {
}

pub fn set_before_enter(&mut self, node_id: MastNodeId, decorator_ids: Vec<DecoratorId>) {
self.mast_forest[node_id].set_before_enter(decorator_ids);
// Does this need to have a Vec<DecoratorSpan>?
let span = DecoratorSpan::new_collection(decorator_ids).into_iter().next().unwrap();
Copy link
Contributor Author

@yasonk yasonk Jan 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure having a collection of spans is needed, but new_collection method creates a collection because it seems that it may be possible to have non-contiguous decorator spans. See this comment: https://github.com/0xPolygonMiden/miden-vm/pull/1621/files#r1912588483

self.mast_forest[node_id].set_before_enter(span);

let new_node_fingerprint = self.fingerprint_for_node(&self[node_id]);
self.hash_by_node_id.insert(node_id, new_node_fingerprint);
}

pub fn set_after_exit(&mut self, node_id: MastNodeId, decorator_ids: Vec<DecoratorId>) {
self.mast_forest[node_id].set_after_exit(decorator_ids);
// Does this need to have a Vec<DecoratorSpan>?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure having a collection of spans is needed, but new_collection method creates a collection because it seems that it may be possible to have non-contiguous decorator spans. See this comment: https://github.com/0xPolygonMiden/miden-vm/pull/1621/files#r1912588483

let span = DecoratorSpan::new_collection(decorator_ids).into_iter().next().unwrap();
self.mast_forest[node_id].set_after_exit(span);

let new_node_fingerprint = self.fingerprint_for_node(&self[node_id]);
self.hash_by_node_id.insert(node_id, new_node_fingerprint);
Expand Down
9 changes: 6 additions & 3 deletions assembly/src/assembler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ use vm_core::{
crypto::hash::RpoDigest,
debuginfo::SourceSpan,
mast::{DecoratorId, MastNodeId},
DecoratorList, Felt, Kernel, Operation, Program,
Felt, Kernel, Operation, Program,
};

use vm_core::mast::DecoratorSpan;
use crate::{
ast::{self, Export, InvocationTarget, InvokeKind, ModuleKind, QualifiedProcedureName},
diagnostics::Report,
Expand Down Expand Up @@ -642,10 +642,13 @@ impl Assembler {
)?;

if let Some(decorator_ids) = block_builder.drain_decorators() {
// This just picks off the first decorator span, but should there be more than one?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again not sure if there will ever be a case for more than one span here

let span = DecoratorSpan::new_collection(decorator_ids).into_iter().next().unwrap();

// Attach the decorators before the first instance of the repeated node
let mut first_repeat_node =
block_builder.mast_forest_builder_mut()[repeat_node_id].clone();
first_repeat_node.set_before_enter(decorator_ids);
first_repeat_node.set_before_enter(span);
let first_repeat_node_id = block_builder
.mast_forest_builder_mut()
.ensure_node(first_repeat_node)?;
Expand Down
33 changes: 22 additions & 11 deletions core/src/mast/merger/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@ use alloc::{collections::BTreeMap, vec::Vec};

use miden_crypto::{hash::blake::Blake3Digest, utils::collections::KvMap};

use crate::mast::{
DecoratorId, MastForest, MastForestError, MastNode, MastNodeFingerprint, MastNodeId,
MultiMastForestIteratorItem, MultiMastForestNodeIter,
};
use crate::mast::{DecoratorId, DecoratorSpan, MastForest, MastForestError, MastNode, MastNodeFingerprint, MastNodeId, MultiMastForestIteratorItem, MultiMastForestNodeIter};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -151,6 +148,8 @@ impl MastForestMerger {
let new_decorator_id = if let Some(existing_decorator) =
self.decorators_by_hash.get(&merging_decorator_hash)
{
// This implies that in some cases it is possible to have intersecting decorator sets
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This implies that in some cases it is possible to have intersecting decorator sets, meaning a DecoratorSpan will potentially need to be split up into multiple DecoratorSpan objects. Not sure if this is a real scenario or what prevents it from becoming real.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think the original proposal didn't account for this. We definitely don't want to split a DecoratorSpan into multiple spans for a single node/operation. So, I think we have 2 options here:

  1. Either we drop the requirement that decorators in the decorator list are unique. Then, if two nodes use the same decorator, we'd have such decorators duplicated so that each of them has a different ID.
  2. Or, we add a level of indirection where DecoratorSpan points to a list of decorator IDs, and then these IDs point to a unique list of decorators.

Option 1 could make sense if we expect vast majority of decorators to be unique anyway - so, duplicating a small number of them may not be an issue. On the other hand, option 2 is a more generic approach and is probably less work. So, my intuition is that option 2 is probably a better approach here (assuming in fact is is not more complicated to implement than options 1).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bobbinth I don't fully understand Option 2. But just to understand the picture better.
This is my understanding:
When you add nodes and operations each decorator is assigned a unique DecoratorID. Therefore there is no overlap of DecoratorId between nodes (I didn't see the check for existing decorators when adding decorators).

The only place where I found a potential of overlap is this code checking for existing decorators when merging.
I'm not familiar with how impactful or frequent merging is, but if we remove the code to check for existing decorator, it will mean decorator spans for an operation are contiguous.

If they are not contiguous and we need another list to track that, then the initial goal of reducing memory footprint will not be met. (We were trying to replace a Vec with a smaller structure. Or does Option 2 simply push the memory burden away from MastForest?

If we have DecoratorSpan -> Vec(oldDecoratorID, newDecoratorId) -> DecoratorID

I don't understand where the middle part would live, and why would this be better than what is currently in main.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you add nodes and operations each decorator is assigned a unique DecoratorID.

This is not exactly correct: only unique decorators get uniq decorator IDs. That is, if we want to add two identical decorators to the same MastForest, both of them would get the same ID. This happens in MastForestBuilder::ensure_decorator().

If they are not contiguous and we need another list to track that, then the initial goal of reducing memory footprint will not be met. (We were trying to replace a Vec with a smaller structure. Or does Option 2 simply push the memory burden away from MastForest?

If we have DecoratorSpan -> Vec(oldDecoratorID, newDecoratorId) -> DecoratorID

I don't understand where the middle part would live, and why would this be better than what is currently in main.

The main idea of option 2 is to have a vector of decorator IDs which could contain duplicate IDs. So, MastForest could look like so:

pub struct MastForest {
    nodes: Vec<MastNode>,
    roots: Vec<MastNodeId>,
    decorator_ids: Vec<DecoratorId>, // new filed, open to use a different name for it
    decorators: Vec<Decorator>,
    advice_map: AdviceMap,
}

Then, DecoratorSpan would point to the decorator_ids field, and then we could use individual IDs from there to look up decorators.

To illustrate this, let's say we have a MastForest with 2 nodes. Node 1 would have decorators a and b attached to it, and node 2 would have decorators a and c attached to it. Then, this would result in the following:

  • decorators would look something like this: [a, b, c] - i.e., ID of a is 0, ID of b is 1, and ID of c is 2.
  • decorator_ids would look as follows [0, 1, 0, 2].
  • The decorator span for node 1 would then be (0, 2), and for node 2 it would be (2, 2).

And so, when we want to look up decorators for node 1, we look at its decorator span that says that we need to grab the first two IDs from the decorator_ids. These IDs are 0 and 1 and so then we can look up these decorators in the decorators vector.

This does increase the size of the MastForest somewhat and also creates an extra lookup "hop". But the main thing we are able to avoid is creating 2 vectors per node (which would require 24 bytes per vector, not counting the actual vector data). Instead, each node now can contain 2 decorator spans (which requires 16 bytes total - and we can compress this to 10). So, we still get some memory savings, but again, the main benefit is avoiding creating thousands (and potentially millions) of vectors - which is a significant bottleneck.

// meaning a DecoratorSpan will potentially need to be split up
*existing_decorator
} else {
let new_decorator_id = self.mast_forest.add_decorator(merging_decorator.clone())?;
Expand Down Expand Up @@ -263,8 +262,18 @@ impl MastForestMerger {
)
})
};
let map_decorators = |decorators: &[DecoratorId]| -> Result<Vec<_>, MastForestError> {
decorators.iter().map(map_decorator_id).collect()

let remap_decorator_span = |decorator_span: &DecoratorSpan| -> Result<DecoratorSpan, MastForestError>{
let new_decorator_ids = decorator_span
.iter()
.map(|decorator_id| map_decorator_id(&decorator_id))
.collect::<Result<Vec<_>, _>>()?;
let new_spans = DecoratorSpan::new_collection(new_decorator_ids);

new_spans
.into_iter()
.next()
.ok_or_else(|| MastForestError::DecoratorSpanMissing(decorator_span.offset, decorator_span.num_decorators))
};

let map_node_id = |node_id: MastNodeId| {
Expand Down Expand Up @@ -311,9 +320,11 @@ impl MastForestMerger {
basic_block_node
.decorators()
.iter()
.map(|(idx, decorator_id)| match map_decorator_id(decorator_id) {
Ok(mapped_decorator) => Ok((*idx, mapped_decorator)),
Err(err) => Err(err),
.map(|(idx, decorator_span)| {
match remap_decorator_span(decorator_span) {
Ok(mapped_decorator) => Ok((*idx, mapped_decorator)),
Err(err) => Err(err),
}
})
.collect::<Result<Vec<_>, _>>()?,
),
Expand All @@ -327,8 +338,8 @@ impl MastForestMerger {
// Decorators must be handled specially for basic block nodes.
// For other node types we can handle it centrally.
if !mapped_node.is_basic_block() {
mapped_node.set_before_enter(map_decorators(node.before_enter())?);
mapped_node.set_after_exit(map_decorators(node.after_exit())?);
mapped_node.set_before_enter(remap_decorator_span(node.before_enter())?);
mapped_node.set_after_exit(remap_decorator_span(node.after_exit())?);
}

Ok(mapped_node)
Expand Down
134 changes: 131 additions & 3 deletions core/src/mast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub use node::{
BasicBlockNode, CallNode, DynNode, ExternalNode, JoinNode, LoopNode, MastNode, OpBatch,
OperationOrDecorator, SplitNode, OP_BATCH_SIZE, OP_GROUP_SIZE,
};
use winter_utils::{ByteWriter, DeserializationError, Serializable};
use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable};

use crate::{AdviceMap, Decorator, DecoratorList, Operation};

Expand Down Expand Up @@ -199,11 +199,11 @@ impl MastForest {
Some(id_remappings)
}

pub fn set_before_enter(&mut self, node_id: MastNodeId, decorator_ids: Vec<DecoratorId>) {
pub fn set_before_enter(&mut self, node_id: MastNodeId, decorator_ids: DecoratorSpan) {
self[node_id].set_before_enter(decorator_ids)
}

pub fn set_after_exit(&mut self, node_id: MastNodeId, decorator_ids: Vec<DecoratorId>) {
pub fn set_after_exit(&mut self, node_id: MastNodeId, decorator_ids: DecoratorSpan) {
self[node_id].set_after_exit(decorator_ids)
}

Expand Down Expand Up @@ -621,6 +621,132 @@ impl fmt::Display for MastNodeId {
}
}

#[derive(Debug, Clone, PartialEq, Eq, Default, Copy)]
pub struct DecoratorSpan {
/// Offset into the decorators vector of the mast forest.
offset: u32,

/// The number of decorators in this span.
num_decorators: u32,
}

impl DecoratorSpan {
pub fn new_collection(decorator_ids: Vec<DecoratorId>) -> Vec<DecoratorSpan> {
// Test that this returns an empty decoratorspan if the DecoratorId list is empty
let mut sorted_ids = decorator_ids;
sorted_ids.sort_unstable();

sorted_ids
.into_iter()
.fold(Vec::new(), |mut spans: Vec<DecoratorSpan>, id| {
if let Some(last) = spans.last_mut() {
if last.offset + last.num_decorators == id.as_u32() {
last.num_decorators += 1;
return spans;
}
}
spans.push(DecoratorSpan {
offset: id.as_u32(),
num_decorators: 1,
});
spans
})
}

pub fn from_raw_ops_decorators(decorators: Vec<(usize, DecoratorId)>) -> Option<DecoratorList> {
if !decorators.is_empty() {
let mut decorators_list: DecoratorList = Vec::new();
let mut current_index: Option<usize> = None;
let mut current_group: Vec<DecoratorId> = Vec::new();

for (index, decorator_id) in decorators{
if Some(index) != current_index {
//process the previous group
if !current_group.is_empty() {
let spans = DecoratorSpan::new_collection(current_group);
if let Some(span) = spans.into_iter().next() {
decorators_list.push((index, span));
}
}
// Start a new group
current_index = Some(index);
current_group = Vec::new();
}
current_group.push(decorator_id);
}

// Process the last group
if let Some(idx) = current_index {
if !current_group.is_empty() {
let spans = DecoratorSpan::new_collection(current_group);
if let Some(span) = spans.into_iter().next() {
decorators_list.push((idx, span));
}
}
}
Some(decorators_list)
} else {
None
}
}

pub fn iter(&self) -> impl Iterator<Item = DecoratorId> { //Should this return a reference?
(self.offset..self.offset + self.num_decorators).map(DecoratorId)
}

pub fn is_empty(&self) -> bool {
self.num_decorators == 0
}

pub fn read_safe<R: ByteReader>(
source: &mut R,
mast_forest: &MastForest,
) -> Result<Self, DeserializationError> {
let value = Self::read_from(source)?;
let last_decorator_id = (value.offset + value.num_decorators) as usize;
if last_decorator_id < mast_forest.decorators.len() {
Ok(Self { offset: value.offset, num_decorators: value.num_decorators })
} else {
Err(DeserializationError::InvalidValue(format!(
"Invalid deserialized MAST decorator span with offset'{}' and num_decorators'{}', but only {} decorators in the forest",
value.offset, value.num_decorators,
mast_forest.nodes.len(),
)))
}
}
}

impl Deserializable for DecoratorSpan {
fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
let offset = source.read_u32()?;
let num_decorators = source.read_u32()?;
Ok(DecoratorSpan { offset, num_decorators })
}
}
impl From<Vec<DecoratorId>> for DecoratorSpan {
fn from(decorator_ids: Vec<DecoratorId>) -> Self {
// Create a DecoratorSpan based on the size of the list and the first offset
let num_decorators = decorator_ids.len() as u32;
let offset = if num_decorators > 0 {
decorator_ids[0].into()
} else {
0
};

DecoratorSpan {
offset,
num_decorators,
}
}
}

impl Serializable for DecoratorSpan {
fn write_into<W: ByteWriter>(&self, target: &mut W) {
self.offset.write_into(target);
self.num_decorators.write_into(target);
}
}

// DECORATOR ID
// ================================================================================================

Expand Down Expand Up @@ -707,6 +833,8 @@ pub enum MastForestError {
NodeIdOverflow(MastNodeId, usize),
#[error("decorator id {0} is greater than or equal to decorator count {1}")]
DecoratorIdOverflow(DecoratorId, usize),
#[error("decorator span with offset '{0}' and num_decorators '{1}' should remap when merging, but was not")]
DecoratorSpanMissing(u32, u32),
#[error("basic block cannot be created from an empty list of operations")]
EmptyBasicBlock,
#[error("decorator root of child with node id {0} is missing but is required for fingerprint computation")]
Expand Down
Loading
Loading