-
Notifications
You must be signed in to change notification settings - Fork 26.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
create turbo-static for compile time graph analysis (vercel/turborepo…
…#8037) ### Description <!-- ✍️ Write a short summary of your work. If necessary, include relevant screenshots. --> ### Testing Instructions <!-- Give a quick description of steps to test your changes. --> Closes TURBO-2877
- Loading branch information
Showing
8 changed files
with
1,059 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
call_resolver.bincode | ||
graph.cypherl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
[package] | ||
name = "turbo-static" | ||
version = "0.1.0" | ||
edition = "2021" | ||
license = "MPL-2.0" | ||
|
||
[dependencies] | ||
bincode = "1.3.3" | ||
clap = { workspace = true, features = ["derive"] } | ||
ctrlc = "3.4.4" | ||
ignore = "0.4.22" | ||
itertools.workspace = true | ||
lsp-server = "0.7.6" | ||
lsp-types = "0.95.1" | ||
proc-macro2 = { workspace = true, features = ["span-locations"] } | ||
serde = { workspace = true, features = ["derive"] } | ||
serde_json.workspace = true | ||
serde_path_to_error = "0.1.16" | ||
syn = { version = "2", features = ["parsing", "full", "visit", "extra-traits"] } | ||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } | ||
tracing.workspace = true | ||
walkdir = "2.5.0" | ||
|
||
[lints] | ||
workspace = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Turbo Static | ||
|
||
Leverages rust-analyzer to build a complete view into the static dependency | ||
graph for your turbo tasks project. | ||
|
||
## How it works | ||
|
||
- find all occurences of #[turbo_tasks::function] across all the packages you | ||
want to query | ||
- for each of the tasks we find, query rust analyzer to see which tasks call | ||
them | ||
- apply some very basis control flow analysis to determine whether the call is | ||
made 1 time, 0/1 times, or 0+ times, corresponding to direct calls, | ||
conditionals, or for loops | ||
- produce a cypher file that can be loaded into a graph database to query the | ||
static dependency graph | ||
|
||
## Usage | ||
|
||
This uses an in memory persisted database to cache rust-analyzer queries. | ||
To reset the cache, pass the `--reindex` flag. Running will produce a | ||
`graph.cypherl` file which can be loaded into any cypher-compatible database. | ||
|
||
```bash | ||
# pass in the root folders you want to analyze. the system will recursively | ||
# parse all rust code looking for turbo tasks functions | ||
cargo run --release -- ../../../turbo ../../../next.js | ||
# now you can load graph.cypherl into your database of choice, such as neo4j | ||
docker run \ | ||
--publish=7474:7474 --publish=7687:7687 \ | ||
--volume=$HOME/neo4j/data:/data \ | ||
neo4j | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
use std::{collections::HashMap, fs::OpenOptions, path::PathBuf}; | ||
|
||
use crate::{lsp_client::RAClient, Identifier, IdentifierReference}; | ||
|
||
/// A wrapper around a rust-analyzer client that can resolve call references. | ||
/// This is quite expensive so we cache the results in an on-disk key-value | ||
/// store. | ||
pub struct CallResolver<'a> { | ||
client: &'a mut RAClient, | ||
state: HashMap<Identifier, Vec<IdentifierReference>>, | ||
path: Option<PathBuf>, | ||
} | ||
|
||
/// On drop, serialize the state to disk | ||
impl<'a> Drop for CallResolver<'a> { | ||
fn drop(&mut self) { | ||
let file = OpenOptions::new() | ||
.create(true) | ||
.truncate(false) | ||
.write(true) | ||
.open(self.path.as_ref().unwrap()) | ||
.unwrap(); | ||
bincode::serialize_into(file, &self.state).unwrap(); | ||
} | ||
} | ||
|
||
impl<'a> CallResolver<'a> { | ||
pub fn new(client: &'a mut RAClient, path: Option<PathBuf>) -> Self { | ||
// load bincode-encoded HashMap from path | ||
let state = path | ||
.as_ref() | ||
.and_then(|path| { | ||
let file = OpenOptions::new() | ||
.create(true) | ||
.truncate(false) | ||
.read(true) | ||
.write(true) | ||
.open(path) | ||
.unwrap(); | ||
let reader = std::io::BufReader::new(file); | ||
bincode::deserialize_from::<_, HashMap<Identifier, Vec<IdentifierReference>>>( | ||
reader, | ||
) | ||
.map_err(|e| { | ||
tracing::warn!("failed to load existing cache, restarting"); | ||
e | ||
}) | ||
.ok() | ||
}) | ||
.unwrap_or_default(); | ||
Self { | ||
client, | ||
state, | ||
path, | ||
} | ||
} | ||
|
||
pub fn cached_count(&self) -> usize { | ||
self.state.len() | ||
} | ||
|
||
pub fn cleared(mut self) -> Self { | ||
// delete file if exists and clear state | ||
self.state = Default::default(); | ||
if let Some(path) = self.path.as_ref() { | ||
std::fs::remove_file(path).unwrap(); | ||
} | ||
self | ||
} | ||
|
||
pub fn resolve(&mut self, ident: &Identifier) -> Vec<IdentifierReference> { | ||
if let Some(data) = self.state.get(ident) { | ||
tracing::info!("skipping {}", ident); | ||
return data.to_owned(); | ||
}; | ||
|
||
tracing::info!("checking {}", ident); | ||
|
||
let mut count = 0; | ||
let _response = loop { | ||
let Some(response) = self.client.request(lsp_server::Request { | ||
id: 1.into(), | ||
method: "textDocument/prepareCallHierarchy".to_string(), | ||
params: serde_json::to_value(&lsp_types::CallHierarchyPrepareParams { | ||
text_document_position_params: lsp_types::TextDocumentPositionParams { | ||
position: ident.range.start, | ||
text_document: lsp_types::TextDocumentIdentifier { | ||
uri: lsp_types::Url::from_file_path(&ident.path).unwrap(), | ||
}, | ||
}, | ||
work_done_progress_params: lsp_types::WorkDoneProgressParams { | ||
work_done_token: Some(lsp_types::ProgressToken::String( | ||
"prepare".to_string(), | ||
)), | ||
}, | ||
}) | ||
.unwrap(), | ||
}) else { | ||
tracing::warn!("RA server shut down"); | ||
return vec![]; | ||
}; | ||
|
||
if let Some(Some(value)) = response.result.as_ref().map(|r| r.as_array()) { | ||
if !value.is_empty() { | ||
break value.to_owned(); | ||
} | ||
count += 1; | ||
} | ||
|
||
// textDocument/prepareCallHierarchy will sometimes return an empty array so try | ||
// at most 5 times | ||
if count > 5 { | ||
tracing::warn!("discovered isolated task {}", ident); | ||
break vec![]; | ||
} | ||
|
||
std::thread::sleep(std::time::Duration::from_secs(1)); | ||
}; | ||
|
||
// callHierarchy/incomingCalls | ||
let Some(response) = self.client.request(lsp_server::Request { | ||
id: 1.into(), | ||
method: "callHierarchy/incomingCalls".to_string(), | ||
params: serde_json::to_value(lsp_types::CallHierarchyIncomingCallsParams { | ||
partial_result_params: lsp_types::PartialResultParams::default(), | ||
item: lsp_types::CallHierarchyItem { | ||
name: ident.name.to_owned(), | ||
kind: lsp_types::SymbolKind::FUNCTION, | ||
data: None, | ||
tags: None, | ||
detail: None, | ||
uri: lsp_types::Url::from_file_path(&ident.path).unwrap(), | ||
range: ident.range, | ||
selection_range: ident.range, | ||
}, | ||
work_done_progress_params: lsp_types::WorkDoneProgressParams { | ||
work_done_token: Some(lsp_types::ProgressToken::String("prepare".to_string())), | ||
}, | ||
}) | ||
.unwrap(), | ||
}) else { | ||
tracing::warn!("RA server shut down"); | ||
return vec![]; | ||
}; | ||
|
||
let links = if let Some(e) = response.error { | ||
tracing::warn!("unable to resolve {}: {:?}", ident, e); | ||
vec![] | ||
} else { | ||
let response: Result<Vec<lsp_types::CallHierarchyIncomingCall>, _> = | ||
serde_path_to_error::deserialize(response.result.unwrap()); | ||
|
||
response | ||
.unwrap() | ||
.into_iter() | ||
.map(|i| i.into()) | ||
.collect::<Vec<IdentifierReference>>() | ||
}; | ||
|
||
tracing::debug!("links: {:?}", links); | ||
|
||
self.state.insert(ident.to_owned(), links.clone()); | ||
links | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
use std::{fs, path::PathBuf}; | ||
|
||
use lsp_types::{CallHierarchyIncomingCall, CallHierarchyItem, Range}; | ||
|
||
/// A task that references another, with the range of the reference | ||
#[derive(Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize, Clone, Debug)] | ||
pub struct IdentifierReference { | ||
pub identifier: Identifier, | ||
pub references: Vec<Range>, // the places where this identifier is used | ||
} | ||
|
||
/// identifies a task by its file, and range in the file | ||
#[derive(Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize, Clone)] | ||
pub struct Identifier { | ||
pub path: String, | ||
// technically you can derive this from the name and range but it's easier to just store it | ||
pub name: String, | ||
// post_transform_name: Option<String>, | ||
pub range: lsp_types::Range, | ||
} | ||
|
||
impl Identifier { | ||
/// check the span matches and the text matches | ||
/// | ||
/// `same_location` is used to check if the location of the identifier is | ||
/// the same as the other | ||
pub fn equals_ident(&self, other: &syn::Ident, match_location: bool) -> bool { | ||
*other == self.name | ||
&& (!match_location | ||
|| (self.range.start.line == other.span().start().line as u32 | ||
&& self.range.start.character == other.span().start().column as u32)) | ||
} | ||
|
||
/// We cannot use `item.name` here in all cases as, during testing, the name | ||
/// does not always align with the exact text in the range. | ||
fn get_name(item: &CallHierarchyItem) -> String { | ||
// open file, find range inside, extract text | ||
let file = fs::read_to_string(item.uri.path()).unwrap(); | ||
let start = item.selection_range.start; | ||
let end = item.selection_range.end; | ||
file.lines() | ||
.nth(start.line as usize) | ||
.unwrap() | ||
.chars() | ||
.skip(start.character as usize) | ||
.take(end.character as usize - start.character as usize) | ||
.collect() | ||
} | ||
} | ||
|
||
impl From<(PathBuf, syn::Ident)> for Identifier { | ||
fn from((path, ident): (PathBuf, syn::Ident)) -> Self { | ||
Self { | ||
path: path.display().to_string(), | ||
name: ident.to_string(), | ||
// post_transform_name: None, | ||
range: Range { | ||
start: lsp_types::Position { | ||
line: ident.span().start().line as u32 - 1, | ||
character: ident.span().start().column as u32, | ||
}, | ||
end: lsp_types::Position { | ||
line: ident.span().end().line as u32 - 1, | ||
character: ident.span().end().column as u32, | ||
}, | ||
}, | ||
} | ||
} | ||
} | ||
|
||
impl From<CallHierarchyIncomingCall> for IdentifierReference { | ||
fn from(item: CallHierarchyIncomingCall) -> Self { | ||
Self { | ||
identifier: Identifier { | ||
name: Identifier::get_name(&item.from), | ||
// post_transform_name: Some(item.from.name), | ||
path: item.from.uri.path().to_owned(), | ||
range: item.from.selection_range, | ||
}, | ||
references: item.from_ranges, | ||
} | ||
} | ||
} | ||
|
||
impl std::fmt::Debug for Identifier { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
std::fmt::Display::fmt(self, f) | ||
} | ||
} | ||
|
||
impl std::fmt::Display for Identifier { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
write!(f, "{}:{}#{}", self.path, self.range.start.line, self.name,) | ||
} | ||
} |
Oops, something went wrong.