From 1d7af290223532e54461a3ae2d0e4e5b5d47d8f0 Mon Sep 17 00:00:00 2001 From: Johannes Hostert Date: Sat, 26 Oct 2024 16:35:26 +0200 Subject: [PATCH] Add GC heuristic --- src/bin/miri.rs | 10 ++++++++-- src/borrow_tracker/tree_borrows/mod.rs | 14 ++++++++++++++ src/borrow_tracker/tree_borrows/tree.rs | 21 ++++++++++++++++++++- src/eval.rs | 18 +++++++++++++++--- src/lib.rs | 4 ++-- src/machine.rs | 18 ++++++++++++------ src/provenance_gc.rs | 15 +++++++++++++++ 7 files changed, 86 insertions(+), 14 deletions(-) diff --git a/src/bin/miri.rs b/src/bin/miri.rs index 717229ba8b..9285ac21b1 100644 --- a/src/bin/miri.rs +++ b/src/bin/miri.rs @@ -28,7 +28,10 @@ use std::num::NonZero; use std::path::PathBuf; use std::str::FromStr; -use miri::{BacktraceStyle, BorrowTrackerMethod, ProvenanceMode, RetagFields, ValidationMode}; +use miri::{ + BacktraceStyle, BorrowTrackerMethod, ProvenanceGcSettings, ProvenanceMode, RetagFields, + ValidationMode, +}; use rustc_data_structures::sync::Lrc; use rustc_driver::Compilation; use rustc_hir::def_id::LOCAL_CRATE; @@ -607,7 +610,10 @@ fn main() { let interval = param.parse::().unwrap_or_else(|err| { show_error!("-Zmiri-provenance-gc requires a `u32`: {}", err) }); - miri_config.gc_interval = interval; + miri_config.gc_settings = match interval { + 0 => ProvenanceGcSettings::Disabled, + interval => ProvenanceGcSettings::Regularly { interval }, + }; } else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") { miri_config.measureme_out = Some(param.to_string()); } else if let Some(param) = arg.strip_prefix("-Zmiri-backtrace=") { diff --git a/src/borrow_tracker/tree_borrows/mod.rs b/src/borrow_tracker/tree_borrows/mod.rs index acfb76030f..ef91d19f9c 100644 --- a/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/borrow_tracker/tree_borrows/mod.rs @@ -60,6 +60,9 @@ impl<'tcx> Tree { }; let global = machine.borrow_tracker.as_ref().unwrap(); let span = machine.current_span(); + if self.tree_grew_significantly_since_last_gc() { + machine.request_gc(); + } self.perform_access( tag, Some((range, access_kind, diagnostics::AccessCause::Explicit(access_kind))), @@ -85,6 +88,9 @@ impl<'tcx> Tree { }; let global = machine.borrow_tracker.as_ref().unwrap(); let span = machine.current_span(); + if self.tree_grew_significantly_since_last_gc() { + machine.request_gc(); + } self.dealloc(tag, alloc_range(Size::ZERO, size), global, alloc_id, span) } @@ -106,6 +112,9 @@ impl<'tcx> Tree { alloc_id: AllocId, // diagnostics ) -> InterpResult<'tcx> { let span = machine.current_span(); + if self.tree_grew_significantly_since_last_gc() { + machine.request_gc(); + } // `None` makes it the magic on-protector-end operation self.perform_access(tag, None, global, alloc_id, span) } @@ -297,6 +306,11 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { )?; // Record the parent-child pair in the tree. tree_borrows.new_child(orig_tag, new_tag, new_perm.initial_state, range, span)?; + + // Also request a GC if things are getting too large. + if tree_borrows.tree_grew_significantly_since_last_gc() { + this.machine.request_gc(); + } drop(tree_borrows); // Also inform the data race model (but only if any bytes are actually affected). diff --git a/src/borrow_tracker/tree_borrows/tree.rs b/src/borrow_tracker/tree_borrows/tree.rs index a551b017df..faa9d2b8f2 100644 --- a/src/borrow_tracker/tree_borrows/tree.rs +++ b/src/borrow_tracker/tree_borrows/tree.rs @@ -259,6 +259,8 @@ pub struct Tree { pub(super) rperms: RangeMap>, /// The index of the root node. pub(super) root: UniIndex, + /// The number of nodes when we last ran the Garbage Collector + nodes_at_last_gc: usize, } /// A node in the borrow tree. Each node is uniquely identified by a tag via @@ -604,7 +606,7 @@ impl Tree { perms.insert(root_idx, LocationState::new_init(Permission::new_active())); RangeMap::new(size, perms) }; - Self { root: root_idx, nodes, rperms, tag_mapping } + Self { root: root_idx, nodes, rperms, tag_mapping, nodes_at_last_gc: 1 } } } @@ -833,6 +835,22 @@ impl<'tcx> Tree { /// Integration with the BorTag garbage collector impl Tree { + /// The number of nodes in this tree + pub fn nodes_count(&self) -> usize { + self.tag_mapping.len() + } + + /// Whether the tree grew significantly since the last provenance GC run + pub fn tree_grew_significantly_since_last_gc(&self) -> bool { + let current = self.nodes_count(); + // do not trigger the GC for small nodes + let last = self.nodes_at_last_gc.max(50); + // trigger the GC if the tree doubled since the last run, + // or otherwise got "significantly" larger. + // Note that for trees < 100 nodes, nothing happens. + current > 2 * last || current > last + 1500 + } + pub fn remove_unreachable_tags(&mut self, live_tags: &FxHashSet) { self.remove_useless_children(self.root, live_tags); // Right after the GC runs is a good moment to check if we can @@ -840,6 +858,7 @@ impl Tree { // tags (this does not necessarily mean that they have identical internal representations, // see the `PartialEq` impl for `UniValMap`) self.rperms.merge_adjacent_thorough(); + self.nodes_at_last_gc = self.nodes_count(); } /// Checks if a node is useless and should be GC'ed. diff --git a/src/eval.rs b/src/eval.rs index 9f93f15166..e6de420723 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -83,6 +83,18 @@ pub enum ValidationMode { Deep, } +/// Settings for the provenance GC +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ProvenanceGcSettings { + /// The GC is disabled + Disabled, + /// The GC is to run regularly, every `inverval` basic blocks + Regularly { interval: u32 }, + /// The GC runs as needed, determined by heuristics. + /// See `should_run_provenance_gc` + Heuristically, +} + /// Configuration needed to spawn a Miri instance. #[derive(Clone)] pub struct MiriConfig { @@ -145,8 +157,8 @@ pub struct MiriConfig { /// The location of a shared object file to load when calling external functions /// FIXME! consider allowing users to specify paths to multiple files, or to a directory pub native_lib: Option, - /// Run a garbage collector for BorTags every N basic blocks. - pub gc_interval: u32, + /// Settings for the provenance GC (e.g. how often it runs) + pub gc_settings: ProvenanceGcSettings, /// The number of CPUs to be reported by miri. pub num_cpus: u32, /// Requires Miri to emulate pages of a certain size @@ -188,7 +200,7 @@ impl Default for MiriConfig { report_progress: None, retag_fields: RetagFields::Yes, native_lib: None, - gc_interval: 10_000, + gc_settings: ProvenanceGcSettings::Heuristically, num_cpus: 1, page_size: None, collect_leak_backtraces: true, diff --git a/src/lib.rs b/src/lib.rs index 938d1ca319..28c0e124c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -131,8 +131,8 @@ pub use crate::diagnostics::{ EvalContextExt as _, NonHaltingDiagnostic, TerminationInfo, report_error, }; pub use crate::eval::{ - AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith, ValidationMode, - create_ecx, eval_entry, + AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, ProvenanceGcSettings, RejectOpWith, + ValidationMode, create_ecx, eval_entry, }; pub use crate::helpers::{AccessKind, EvalContextExt as _}; pub use crate::intrinsics::EvalContextExt as _; diff --git a/src/machine.rs b/src/machine.rs index 60d096b92f..b4dc03461c 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -3,7 +3,7 @@ use std::any::Any; use std::borrow::Cow; -use std::cell::RefCell; +use std::cell::{Cell, RefCell}; use std::collections::hash_map::Entry; use std::path::Path; use std::{fmt, process}; @@ -556,9 +556,10 @@ pub struct MiriMachine<'tcx> { pub native_lib: Option, /// Run a garbage collector for BorTags every N basic blocks. - pub(crate) gc_interval: u32, + pub(crate) gc_settings: ProvenanceGcSettings, /// The number of blocks that passed since the last BorTag GC pass. pub(crate) since_gc: u32, + pub(crate) gc_requested: Cell, /// The number of CPUs to be reported by miri. pub(crate) num_cpus: u32, @@ -716,8 +717,9 @@ impl<'tcx> MiriMachine<'tcx> { native_lib: config.native_lib.as_ref().map(|_| { panic!("calling functions from native libraries via FFI is only supported on Unix") }), - gc_interval: config.gc_interval, + gc_settings: config.gc_settings, since_gc: 0, + gc_requested: Cell::new(false), num_cpus: config.num_cpus, page_size, stack_addr, @@ -784,6 +786,10 @@ impl<'tcx> MiriMachine<'tcx> { .and_then(|(_allocated, deallocated)| *deallocated) .map(Span::data) } + + pub(crate) fn request_gc(&self) { + self.gc_requested.set(true) + } } impl VisitProvenance for MiriMachine<'_> { @@ -828,8 +834,9 @@ impl VisitProvenance for MiriMachine<'_> { report_progress: _, basic_block_count: _, native_lib: _, - gc_interval: _, + gc_settings: _, since_gc: _, + gc_requested: _, num_cpus: _, page_size: _, stack_addr: _, @@ -1491,8 +1498,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { // stacks. // When debug assertions are enabled, run the GC as often as possible so that any cases // where it mistakenly removes an important tag become visible. - if ecx.machine.gc_interval > 0 && ecx.machine.since_gc >= ecx.machine.gc_interval { - ecx.machine.since_gc = 0; + if ecx.should_run_provenance_gc() { ecx.run_provenance_gc(); } diff --git a/src/provenance_gc.rs b/src/provenance_gc.rs index c5a35bc14f..1d1b62d0b7 100644 --- a/src/provenance_gc.rs +++ b/src/provenance_gc.rs @@ -208,8 +208,23 @@ fn remove_unreachable_allocs<'tcx>(this: &mut MiriInterpCx<'tcx>, allocs: FxHash impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> { + fn should_run_provenance_gc(&self) -> bool { + let this = self.eval_context_ref(); + match this.machine.gc_settings { + ProvenanceGcSettings::Disabled => false, + ProvenanceGcSettings::Regularly { interval } => this.machine.since_gc >= interval, + ProvenanceGcSettings::Heuristically => + // no GC run if the last one was recently + this.machine.since_gc >= 75 + // run GC if requested, or every 10000 blocks otherwise + && (this.machine.since_gc >= 10_000 || this.machine.gc_requested.get()), + } + } + fn run_provenance_gc(&mut self) { let this = self.eval_context_mut(); + this.machine.since_gc = 0; + this.machine.gc_requested.set(false); // We collect all tags and AllocId from every part of the interpreter. let mut tags = FxHashSet::default();