BurntSushi · BurntSushi · Aug 10, 2023 · Aug 10, 2023 · Aug 10, 2023 · Aug 10, 2023
diff --git a/src/ahocorasick.rs b/src/ahocorasick.rs
@@ -1996,7 +1996,7 @@ impl AhoCorasick {
     ///     .ascii_case_insensitive(true)
     ///     .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"])
     ///     .unwrap();
-    /// assert_eq!(9_128, ac.memory_usage());
+    /// assert_eq!(9_144, ac.memory_usage());
     ///
     /// let ac = AhoCorasick::builder()
     ///     .kind(Some(AhoCorasickKind::ContiguousNFA))

diff --git a/src/dfa.rs b/src/dfa.rs
@@ -93,15 +93,9 @@ pub struct DFA {
     /// instead of the IDs being 0, 1, 2, 3, ..., they are 0*stride, 1*stride,
     /// 2*stride, 3*stride, ...
     trans: Vec<StateID>,
-    /// The matches for every match state in this DFA. This is indexed by order
-    /// of match states in the DFA. Namely, as constructed, match states are
-    /// always laid out sequentially and contiguously in memory. Thus, after
-    /// converting a match state ID to a match state index, the indices are
-    /// all adjacent.
-    ///
-    /// More concretely, when a search enters a match state with id 'sid', then
-    /// the matching patterns are at 'matches[(sid >> stride2) - 2]'. The '- 2'
-    /// is to offset the first two states of a DFA: the dead and fail states.
+    /// The matches for every match state in this DFA. This is first indexed by
+    /// state index (so that's `sid >> stride2`) and then by order in which the
+    /// matches are meant to occur.
     matches: Vec<Vec<PatternID>>,
     /// The amount of heap memory used, in bytes, by the inner Vecs of
     /// 'matches'.
@@ -524,6 +518,18 @@ impl Builder {
             dfa.byte_classes.alphabet_len(),
             dfa.byte_classes.stride(),
         );
+        // The vectors can grow ~twice as big during construction because a
+        // Vec amortizes growth. But here, let's shrink things back down to
+        // what we actually need since we're never going to add more to it.
+        dfa.trans.shrink_to_fit();
+        dfa.pattern_lens.shrink_to_fit();
+        dfa.matches.shrink_to_fit();
+        // TODO: We might also want to shrink each Vec inside of `dfa.matches`,
+        // or even better, convert it to one contiguous allocation. But I think
+        // I went with nested allocs for good reason (can't remember), so this
+        // may be tricky to do. I decided not to shrink them here because it
+        // might require a fair bit of work to do. It's unclear whether it's
+        // worth it.
         Ok(dfa)
     }
 

diff --git a/src/nfa/contiguous.rs b/src/nfa/contiguous.rs
@@ -992,6 +992,11 @@ impl Builder {
             nfa.memory_usage(),
             nfa.byte_classes.alphabet_len(),
         );
+        // The vectors can grow ~twice as big during construction because a
+        // Vec amortizes growth. But here, let's shrink things back down to
+        // what we actually need since we're never going to add more to it.
+        nfa.repr.shrink_to_fit();
+        nfa.pattern_lens.shrink_to_fit();
         Ok(nfa)
     }
 

diff --git a/src/nfa/noncontiguous.rs b/src/nfa/noncontiguous.rs
@@ -342,6 +342,7 @@ unsafe impl Automaton for NFA {
     #[inline(always)]
     fn memory_usage(&self) -> usize {
         self.memory_usage
+            + self.pattern_lens.len() * SmallIndex::SIZE
             + self.prefilter.as_ref().map_or(0, |p| p.memory_usage())
     }
 
@@ -1218,7 +1219,8 @@ impl<'a> Compiler<'a> {
     fn calculate_memory_usage(&mut self) {
         use core::mem::size_of;
 
-        for state in self.nfa.states.iter() {
+        self.nfa.states.shrink_to_fit();
+        for state in self.nfa.states.iter_mut() {
             self.nfa.memory_usage += size_of::<State>() + state.memory_usage();
         }
     }