From 251ee75afba035244c87c93df66e4e288b61210c Mon Sep 17 00:00:00 2001 From: Innes Anderson-Morrison Date: Wed, 19 Feb 2025 09:13:57 +0000 Subject: [PATCH] fix: improving the performance of tree-sitter highlighting The initial implementation of tree-sitter based highlighting was knowingly simple and naive and worked well enough, but had a number of issues. The main one (other than incorrect highlighting which has already been addressed) was that perfomance took a massive hit once you started working with larger files. The implementation now is a big improvement but its still not ideal and will need more work in the future. --- src/buffer/buffers.rs | 6 ------ src/buffer/mod.rs | 4 ++-- src/editor/mod.rs | 2 +- src/ts.rs | 46 ++++++++++++++++++++++++++++++++----------- src/ui/layout.rs | 18 +++++++++++++---- 5 files changed, 51 insertions(+), 25 deletions(-) diff --git a/src/buffer/buffers.rs b/src/buffer/buffers.rs index 160d941..f5babfd 100644 --- a/src/buffer/buffers.rs +++ b/src/buffer/buffers.rs @@ -142,12 +142,6 @@ impl Buffers { self.inner.iter().map(|(_, b)| b) } - pub fn update_ts_state(&mut self) { - for b in self.inner.iter_mut().map(|(_, b)| b) { - b.update_ts_state(); - } - } - pub fn close_buffer(&mut self, id: BufferId) { let removed = self .inner diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs index 3fca449..e9aa6a4 100644 --- a/src/buffer/mod.rs +++ b/src/buffer/mod.rs @@ -427,9 +427,9 @@ impl Buffer { .collect() } - pub fn update_ts_state(&mut self) { + pub fn update_ts_state(&mut self, from: usize, n_rows: usize) { if let Some(ts) = self.ts_state.as_mut() { - ts.update(&self.txt); + ts.update(&self.txt, from, n_rows); } } diff --git a/src/editor/mod.rs b/src/editor/mod.rs index 6134ac1..305cc1a 100644 --- a/src/editor/mod.rs +++ b/src/editor/mod.rs @@ -172,7 +172,7 @@ where pub(super) fn refresh_screen_w_minibuffer(&mut self, mb: Option>) { self.layout.clamp_scroll(); - self.layout.buffers_mut().update_ts_state(); + self.layout.update_visible_ts_state(); self.ui.refresh( &self.modes[0].name, &self.layout, diff --git a/src/ts.rs b/src/ts.rs index 0c28d2d..fe5ef86 100644 --- a/src/ts.rs +++ b/src/ts.rs @@ -90,8 +90,9 @@ impl TsState { match tree { Some(tree) => { let mut t = p.new_tokenizer(query)?; - t.update(tree.root_node(), gb); + t.update(tree.root_node(), gb, 0, usize::MAX - 1); info!("TS loaded for {}", p.lang_name); + Ok(Self { p, t, tree }) } None => Err("failed to parse file".to_owned()), @@ -127,14 +128,25 @@ impl TsState { ); if let Some(tree) = new_tree { + // TODO: it might be looking at self.tree.changed_ranges(&tree) to optimise being able + // to only tokenize regions we're missing self.tree = tree; - self.t.stale = true; + self.t.ranges.clear(); } } - pub fn update(&mut self, gb: &GapBuffer) { - if self.t.stale { - self.t.update(self.tree.root_node(), gb); + pub fn update(&mut self, gb: &GapBuffer, from: usize, n_rows: usize) { + let byte_from = gb.char_to_byte(gb.line_to_char(from)); + let byte_to = gb.char_to_byte(gb.line_to_char(min(from + n_rows + 1, gb.len_lines() - 1))); + let need_tokens = if self.t.ranges.is_empty() { + true + } else { + self.t.ranges.first().unwrap().r.from > byte_from + || self.t.ranges.last().unwrap().r.to < byte_to + }; + + if need_tokens { + self.t.update(self.tree.root_node(), gb, from, n_rows); } } @@ -314,16 +326,16 @@ impl Parser { q, cur, ranges: Vec::new(), - stale: true, }) } } pub struct Tokenizer { + // Tree-sitter state q: ts::Query, cur: ts::QueryCursor, + // Cache of computed syntax tokens for passing to LineIter ranges: Vec, - stale: bool, } impl fmt::Debug for Tokenizer { @@ -333,11 +345,19 @@ impl fmt::Debug for Tokenizer { } impl Tokenizer { - pub fn update(&mut self, root: ts::Node<'_>, gb: &GapBuffer) { + pub fn update(&mut self, root: ts::Node<'_>, gb: &GapBuffer, from: usize, n_rows: usize) { + self.cur.set_point_range( + ts::Point { + row: from, + column: 0, + }..ts::Point { + row: from + n_rows, + column: 0, + }, + ); + // This is a streaming-iterator not an interator, hence the odd while-let that follows let mut it = self.cur.captures(&self.q, root, gb); - // FIXME: this is really inefficient. Ideally we should be able to apply a diff here - self.ranges.clear(); while let Some((m, idx)) = it.next() { let cap = m.captures[*idx]; @@ -360,7 +380,6 @@ impl Tokenizer { self.ranges.sort_unstable(); self.ranges.dedup(); - self.stale = false; } #[inline] @@ -1240,7 +1259,10 @@ mod tests { b.dot = Dot::Cur { c: Cur { idx: 9 } }; b.handle_action(Action::Delete, Source::Fsys); - b.ts_state.as_mut().unwrap().update(&b.txt); + b.ts_state + .as_mut() + .unwrap() + .update(&b.txt, 0, usize::MAX - 1); let ranges = b.ts_state.as_ref().unwrap().t.range_tokens(); assert_eq!(b.str_contents(), "fn main(){}\n"); diff --git a/src/ui/layout.rs b/src/ui/layout.rs index 7907c76..3b41e3a 100644 --- a/src/ui/layout.rs +++ b/src/ui/layout.rs @@ -50,10 +50,6 @@ impl Layout { &self.buffers } - pub(crate) fn buffers_mut(&mut self) -> &mut Buffers { - &mut self.buffers - } - pub(crate) fn ensure_file_is_open(&mut self, path: &str) { self.buffers.ensure_file_is_open(path) } @@ -664,6 +660,20 @@ impl Layout { bufid == current_bufid } + + pub(crate) fn update_visible_ts_state(&mut self) { + let it = self.cols.iter().flat_map(|(_, c)| { + c.wins + .iter() + .map(|(_, w)| (w.view.bufid, w.view.row_off, w.n_rows)) + }); + + for (bufid, from, n_rows) in it { + // SAFETY: we know this id is valid + let b = unsafe { self.buffers.with_id_mut(bufid).unwrap_unchecked() }; + b.update_ts_state(from, n_rows); + } + } } #[derive(Debug, Clone)]