From 251ee75afba035244c87c93df66e4e288b61210c Mon Sep 17 00:00:00 2001
From: Innes Anderson-Morrison <innes.andersonmorrison@gmail.com>
Date: Wed, 19 Feb 2025 09:13:57 +0000
Subject: [PATCH] fix: improving the performance of tree-sitter highlighting

The initial implementation of tree-sitter based highlighting was
knowingly simple and naive and worked well enough, but had a number
of issues. The main one (other than incorrect highlighting which has
already been addressed) was that perfomance took a massive hit once you
started working with larger files. The implementation now is a big
improvement but its still not ideal and will need more work in the
future.
---
 src/buffer/buffers.rs |  6 ------
 src/buffer/mod.rs     |  4 ++--
 src/editor/mod.rs     |  2 +-
 src/ts.rs             | 46 ++++++++++++++++++++++++++++++++-----------
 src/ui/layout.rs      | 18 +++++++++++++----
 5 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/src/buffer/buffers.rs b/src/buffer/buffers.rs
index 160d941..f5babfd 100644
--- a/src/buffer/buffers.rs
+++ b/src/buffer/buffers.rs
@@ -142,12 +142,6 @@ impl Buffers {
         self.inner.iter().map(|(_, b)| b)
     }
 
-    pub fn update_ts_state(&mut self) {
-        for b in self.inner.iter_mut().map(|(_, b)| b) {
-            b.update_ts_state();
-        }
-    }
-
     pub fn close_buffer(&mut self, id: BufferId) {
         let removed = self
             .inner
diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs
index 3fca449..e9aa6a4 100644
--- a/src/buffer/mod.rs
+++ b/src/buffer/mod.rs
@@ -427,9 +427,9 @@ impl Buffer {
             .collect()
     }
 
-    pub fn update_ts_state(&mut self) {
+    pub fn update_ts_state(&mut self, from: usize, n_rows: usize) {
         if let Some(ts) = self.ts_state.as_mut() {
-            ts.update(&self.txt);
+            ts.update(&self.txt, from, n_rows);
         }
     }
 
diff --git a/src/editor/mod.rs b/src/editor/mod.rs
index 6134ac1..305cc1a 100644
--- a/src/editor/mod.rs
+++ b/src/editor/mod.rs
@@ -172,7 +172,7 @@ where
 
     pub(super) fn refresh_screen_w_minibuffer(&mut self, mb: Option<MiniBufferState<'_>>) {
         self.layout.clamp_scroll();
-        self.layout.buffers_mut().update_ts_state();
+        self.layout.update_visible_ts_state();
         self.ui.refresh(
             &self.modes[0].name,
             &self.layout,
diff --git a/src/ts.rs b/src/ts.rs
index 0c28d2d..fe5ef86 100644
--- a/src/ts.rs
+++ b/src/ts.rs
@@ -90,8 +90,9 @@ impl TsState {
         match tree {
             Some(tree) => {
                 let mut t = p.new_tokenizer(query)?;
-                t.update(tree.root_node(), gb);
+                t.update(tree.root_node(), gb, 0, usize::MAX - 1);
                 info!("TS loaded for {}", p.lang_name);
+
                 Ok(Self { p, t, tree })
             }
             None => Err("failed to parse file".to_owned()),
@@ -127,14 +128,25 @@ impl TsState {
         );
 
         if let Some(tree) = new_tree {
+            // TODO: it might be looking at self.tree.changed_ranges(&tree) to optimise being able
+            // to only tokenize regions we're missing
             self.tree = tree;
-            self.t.stale = true;
+            self.t.ranges.clear();
         }
     }
 
-    pub fn update(&mut self, gb: &GapBuffer) {
-        if self.t.stale {
-            self.t.update(self.tree.root_node(), gb);
+    pub fn update(&mut self, gb: &GapBuffer, from: usize, n_rows: usize) {
+        let byte_from = gb.char_to_byte(gb.line_to_char(from));
+        let byte_to = gb.char_to_byte(gb.line_to_char(min(from + n_rows + 1, gb.len_lines() - 1)));
+        let need_tokens = if self.t.ranges.is_empty() {
+            true
+        } else {
+            self.t.ranges.first().unwrap().r.from > byte_from
+                || self.t.ranges.last().unwrap().r.to < byte_to
+        };
+
+        if need_tokens {
+            self.t.update(self.tree.root_node(), gb, from, n_rows);
         }
     }
 
@@ -314,16 +326,16 @@ impl Parser {
             q,
             cur,
             ranges: Vec::new(),
-            stale: true,
         })
     }
 }
 
 pub struct Tokenizer {
+    // Tree-sitter state
     q: ts::Query,
     cur: ts::QueryCursor,
+    // Cache of computed syntax tokens for passing to LineIter
     ranges: Vec<SyntaxRange>,
-    stale: bool,
 }
 
 impl fmt::Debug for Tokenizer {
@@ -333,11 +345,19 @@ impl fmt::Debug for Tokenizer {
 }
 
 impl Tokenizer {
-    pub fn update(&mut self, root: ts::Node<'_>, gb: &GapBuffer) {
+    pub fn update(&mut self, root: ts::Node<'_>, gb: &GapBuffer, from: usize, n_rows: usize) {
+        self.cur.set_point_range(
+            ts::Point {
+                row: from,
+                column: 0,
+            }..ts::Point {
+                row: from + n_rows,
+                column: 0,
+            },
+        );
+
         // This is a streaming-iterator not an interator, hence the odd while-let that follows
         let mut it = self.cur.captures(&self.q, root, gb);
-        // FIXME: this is really inefficient. Ideally we should be able to apply a diff here
-        self.ranges.clear();
 
         while let Some((m, idx)) = it.next() {
             let cap = m.captures[*idx];
@@ -360,7 +380,6 @@ impl Tokenizer {
 
         self.ranges.sort_unstable();
         self.ranges.dedup();
-        self.stale = false;
     }
 
     #[inline]
@@ -1240,7 +1259,10 @@ mod tests {
 
         b.dot = Dot::Cur { c: Cur { idx: 9 } };
         b.handle_action(Action::Delete, Source::Fsys);
-        b.ts_state.as_mut().unwrap().update(&b.txt);
+        b.ts_state
+            .as_mut()
+            .unwrap()
+            .update(&b.txt, 0, usize::MAX - 1);
         let ranges = b.ts_state.as_ref().unwrap().t.range_tokens();
 
         assert_eq!(b.str_contents(), "fn main(){}\n");
diff --git a/src/ui/layout.rs b/src/ui/layout.rs
index 7907c76..3b41e3a 100644
--- a/src/ui/layout.rs
+++ b/src/ui/layout.rs
@@ -50,10 +50,6 @@ impl Layout {
         &self.buffers
     }
 
-    pub(crate) fn buffers_mut(&mut self) -> &mut Buffers {
-        &mut self.buffers
-    }
-
     pub(crate) fn ensure_file_is_open(&mut self, path: &str) {
         self.buffers.ensure_file_is_open(path)
     }
@@ -664,6 +660,20 @@ impl Layout {
 
         bufid == current_bufid
     }
+
+    pub(crate) fn update_visible_ts_state(&mut self) {
+        let it = self.cols.iter().flat_map(|(_, c)| {
+            c.wins
+                .iter()
+                .map(|(_, w)| (w.view.bufid, w.view.row_off, w.n_rows))
+        });
+
+        for (bufid, from, n_rows) in it {
+            // SAFETY: we know this id is valid
+            let b = unsafe { self.buffers.with_id_mut(bufid).unwrap_unchecked() };
+            b.update_ts_state(from, n_rows);
+        }
+    }
 }
 
 #[derive(Debug, Clone)]