Skip to content

Commit

Permalink
create turbo-static for compile time graph analysis (vercel/turborepo…
Browse files Browse the repository at this point in the history
…#8037)

### Description

<!--
  ✍️ Write a short summary of your work.
  If necessary, include relevant screenshots.
-->

### Testing Instructions

<!--
  Give a quick description of steps to test your changes.
-->


Closes TURBO-2877
  • Loading branch information
arlyon authored Jun 4, 2024
1 parent e3eb595 commit 51285ef
Show file tree
Hide file tree
Showing 8 changed files with 1,059 additions and 0 deletions.
2 changes: 2 additions & 0 deletions crates/turbo-static/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
call_resolver.bincode
graph.cypherl
25 changes: 25 additions & 0 deletions crates/turbo-static/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[package]
name = "turbo-static"
version = "0.1.0"
edition = "2021"
license = "MPL-2.0"

[dependencies]
bincode = "1.3.3"
clap = { workspace = true, features = ["derive"] }
ctrlc = "3.4.4"
ignore = "0.4.22"
itertools.workspace = true
lsp-server = "0.7.6"
lsp-types = "0.95.1"
proc-macro2 = { workspace = true, features = ["span-locations"] }
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
serde_path_to_error = "0.1.16"
syn = { version = "2", features = ["parsing", "full", "visit", "extra-traits"] }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
tracing.workspace = true
walkdir = "2.5.0"

[lints]
workspace = true
33 changes: 33 additions & 0 deletions crates/turbo-static/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Turbo Static

Leverages rust-analyzer to build a complete view into the static dependency
graph for your turbo tasks project.

## How it works

- find all occurences of #[turbo_tasks::function] across all the packages you
want to query
- for each of the tasks we find, query rust analyzer to see which tasks call
them
- apply some very basis control flow analysis to determine whether the call is
made 1 time, 0/1 times, or 0+ times, corresponding to direct calls,
conditionals, or for loops
- produce a cypher file that can be loaded into a graph database to query the
static dependency graph

## Usage

This uses an in memory persisted database to cache rust-analyzer queries.
To reset the cache, pass the `--reindex` flag. Running will produce a
`graph.cypherl` file which can be loaded into any cypher-compatible database.

```bash
# pass in the root folders you want to analyze. the system will recursively
# parse all rust code looking for turbo tasks functions
cargo run --release -- ../../../turbo ../../../next.js
# now you can load graph.cypherl into your database of choice, such as neo4j
docker run \
--publish=7474:7474 --publish=7687:7687 \
--volume=$HOME/neo4j/data:/data \
neo4j
```
165 changes: 165 additions & 0 deletions crates/turbo-static/src/call_resolver.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
use std::{collections::HashMap, fs::OpenOptions, path::PathBuf};

use crate::{lsp_client::RAClient, Identifier, IdentifierReference};

/// A wrapper around a rust-analyzer client that can resolve call references.
/// This is quite expensive so we cache the results in an on-disk key-value
/// store.
pub struct CallResolver<'a> {
client: &'a mut RAClient,
state: HashMap<Identifier, Vec<IdentifierReference>>,
path: Option<PathBuf>,
}

/// On drop, serialize the state to disk
impl<'a> Drop for CallResolver<'a> {
fn drop(&mut self) {
let file = OpenOptions::new()
.create(true)
.truncate(false)
.write(true)
.open(self.path.as_ref().unwrap())
.unwrap();
bincode::serialize_into(file, &self.state).unwrap();
}
}

impl<'a> CallResolver<'a> {
pub fn new(client: &'a mut RAClient, path: Option<PathBuf>) -> Self {
// load bincode-encoded HashMap from path
let state = path
.as_ref()
.and_then(|path| {
let file = OpenOptions::new()
.create(true)
.truncate(false)
.read(true)
.write(true)
.open(path)
.unwrap();
let reader = std::io::BufReader::new(file);
bincode::deserialize_from::<_, HashMap<Identifier, Vec<IdentifierReference>>>(
reader,
)
.map_err(|e| {
tracing::warn!("failed to load existing cache, restarting");
e
})
.ok()
})
.unwrap_or_default();
Self {
client,
state,
path,
}
}

pub fn cached_count(&self) -> usize {
self.state.len()
}

pub fn cleared(mut self) -> Self {
// delete file if exists and clear state
self.state = Default::default();
if let Some(path) = self.path.as_ref() {
std::fs::remove_file(path).unwrap();
}
self
}

pub fn resolve(&mut self, ident: &Identifier) -> Vec<IdentifierReference> {
if let Some(data) = self.state.get(ident) {
tracing::info!("skipping {}", ident);
return data.to_owned();
};

tracing::info!("checking {}", ident);

let mut count = 0;
let _response = loop {
let Some(response) = self.client.request(lsp_server::Request {
id: 1.into(),
method: "textDocument/prepareCallHierarchy".to_string(),
params: serde_json::to_value(&lsp_types::CallHierarchyPrepareParams {
text_document_position_params: lsp_types::TextDocumentPositionParams {
position: ident.range.start,
text_document: lsp_types::TextDocumentIdentifier {
uri: lsp_types::Url::from_file_path(&ident.path).unwrap(),
},
},
work_done_progress_params: lsp_types::WorkDoneProgressParams {
work_done_token: Some(lsp_types::ProgressToken::String(
"prepare".to_string(),
)),
},
})
.unwrap(),
}) else {
tracing::warn!("RA server shut down");
return vec![];
};

if let Some(Some(value)) = response.result.as_ref().map(|r| r.as_array()) {
if !value.is_empty() {
break value.to_owned();
}
count += 1;
}

// textDocument/prepareCallHierarchy will sometimes return an empty array so try
// at most 5 times
if count > 5 {
tracing::warn!("discovered isolated task {}", ident);
break vec![];
}

std::thread::sleep(std::time::Duration::from_secs(1));
};

// callHierarchy/incomingCalls
let Some(response) = self.client.request(lsp_server::Request {
id: 1.into(),
method: "callHierarchy/incomingCalls".to_string(),
params: serde_json::to_value(lsp_types::CallHierarchyIncomingCallsParams {
partial_result_params: lsp_types::PartialResultParams::default(),
item: lsp_types::CallHierarchyItem {
name: ident.name.to_owned(),
kind: lsp_types::SymbolKind::FUNCTION,
data: None,
tags: None,
detail: None,
uri: lsp_types::Url::from_file_path(&ident.path).unwrap(),
range: ident.range,
selection_range: ident.range,
},
work_done_progress_params: lsp_types::WorkDoneProgressParams {
work_done_token: Some(lsp_types::ProgressToken::String("prepare".to_string())),
},
})
.unwrap(),
}) else {
tracing::warn!("RA server shut down");
return vec![];
};

let links = if let Some(e) = response.error {
tracing::warn!("unable to resolve {}: {:?}", ident, e);
vec![]
} else {
let response: Result<Vec<lsp_types::CallHierarchyIncomingCall>, _> =
serde_path_to_error::deserialize(response.result.unwrap());

response
.unwrap()
.into_iter()
.map(|i| i.into())
.collect::<Vec<IdentifierReference>>()
};

tracing::debug!("links: {:?}", links);

self.state.insert(ident.to_owned(), links.clone());
links
}
}
95 changes: 95 additions & 0 deletions crates/turbo-static/src/identifier.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
use std::{fs, path::PathBuf};

use lsp_types::{CallHierarchyIncomingCall, CallHierarchyItem, Range};

/// A task that references another, with the range of the reference
#[derive(Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize, Clone, Debug)]
pub struct IdentifierReference {
pub identifier: Identifier,
pub references: Vec<Range>, // the places where this identifier is used
}

/// identifies a task by its file, and range in the file
#[derive(Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize, Clone)]
pub struct Identifier {
pub path: String,
// technically you can derive this from the name and range but it's easier to just store it
pub name: String,
// post_transform_name: Option<String>,
pub range: lsp_types::Range,
}

impl Identifier {
/// check the span matches and the text matches
///
/// `same_location` is used to check if the location of the identifier is
/// the same as the other
pub fn equals_ident(&self, other: &syn::Ident, match_location: bool) -> bool {
*other == self.name
&& (!match_location
|| (self.range.start.line == other.span().start().line as u32
&& self.range.start.character == other.span().start().column as u32))
}

/// We cannot use `item.name` here in all cases as, during testing, the name
/// does not always align with the exact text in the range.
fn get_name(item: &CallHierarchyItem) -> String {
// open file, find range inside, extract text
let file = fs::read_to_string(item.uri.path()).unwrap();
let start = item.selection_range.start;
let end = item.selection_range.end;
file.lines()
.nth(start.line as usize)
.unwrap()
.chars()
.skip(start.character as usize)
.take(end.character as usize - start.character as usize)
.collect()
}
}

impl From<(PathBuf, syn::Ident)> for Identifier {
fn from((path, ident): (PathBuf, syn::Ident)) -> Self {
Self {
path: path.display().to_string(),
name: ident.to_string(),
// post_transform_name: None,
range: Range {
start: lsp_types::Position {
line: ident.span().start().line as u32 - 1,
character: ident.span().start().column as u32,
},
end: lsp_types::Position {
line: ident.span().end().line as u32 - 1,
character: ident.span().end().column as u32,
},
},
}
}
}

impl From<CallHierarchyIncomingCall> for IdentifierReference {
fn from(item: CallHierarchyIncomingCall) -> Self {
Self {
identifier: Identifier {
name: Identifier::get_name(&item.from),
// post_transform_name: Some(item.from.name),
path: item.from.uri.path().to_owned(),
range: item.from.selection_range,
},
references: item.from_ranges,
}
}
}

impl std::fmt::Debug for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
}
}

impl std::fmt::Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}#{}", self.path, self.range.start.line, self.name,)
}
}
Loading

0 comments on commit 51285ef

Please sign in to comment.