Skip to content

Commit

Permalink
WIP code
Browse files Browse the repository at this point in the history
ghstack-source-id: 686c0e7911e549984aaeb4a7d9fdb38b7a56cda8
Pull Request resolved: #536
  • Loading branch information
ketkarameya committed Jul 10, 2023
1 parent 73aa81a commit c23b103
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 4 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ getset = "0.1.2"
pyo3 = "0.19.0"
pyo3-log = "0.8.1"
glob = "0.3.1"
ast-grep-core = "0.7.2"

[features]
extension-module = ["pyo3/extension-module"]
Expand Down
16 changes: 12 additions & 4 deletions src/models/capture_group_patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,23 @@ Copyright (c) 2023 Uber Technologies, Inc.
use crate::{
models::Validator,
utilities::{
ast_grep_utilities::get_all_matches_for_ast_grep_pattern,
regex_utilities::get_all_matches_for_regex,
tree_sitter_utilities::{get_all_matches_for_query, get_ts_query_parser, number_of_errors},
Instantiate,
},
};
use ast_grep_core::{language::TSLanguage, Pattern, StrDoc};
use pyo3::prelude::pyclass;
use regex::Regex;
use serde_derive::Deserialize;
use std::collections::HashMap;
use tree_sitter::{Node, Query};

use super::{default_configs::REGEX_QUERY_PREFIX, matches::Match};
use super::{
default_configs::{AST_GREP_PREFIX, REGEX_QUERY_PREFIX},
matches::Match,
};

#[pyclass]
#[derive(Deserialize, Debug, Clone, Default, PartialEq, Hash, Eq)]
Expand Down Expand Up @@ -54,6 +59,9 @@ impl Validator for CGPattern {
.map(|_| Ok(()))
.unwrap_or(Err(format!("Cannot parse the regex - {}", self.pattern())));
}
if self.pattern().starts_with(AST_GREP_PREFIX) {
return Ok(());
}
let mut parser = get_ts_query_parser();
parser
.parse(self.pattern(), None)
Expand All @@ -78,6 +86,7 @@ impl Instantiate for CGPattern {

#[derive(Debug)]
pub(crate) enum CompiledCGPattern {
P(Pattern<StrDoc<TSLanguage>>),
Q(Query),
R(Regex),
}
Expand All @@ -104,6 +113,7 @@ impl CompiledCGPattern {
replace_node_idx: Option<u8>,
) -> Vec<Match> {
match self {
CompiledCGPattern::P(_pattern) => panic!("ast-grep pattern is not supported"),
CompiledCGPattern::Q(query) => get_all_matches_for_query(
node,
source_code,
Expand All @@ -112,9 +122,7 @@ impl CompiledCGPattern {
replace_node,
replace_node_idx,
),
CompiledCGPattern::R(regex) => {
get_all_matches_for_regex(node, source_code, regex, recursive, replace_node)
}
CompiledCGPattern::R(regex) => get_all_matches_for_ast_grep_pattern(),
}
}
}
1 change: 1 addition & 0 deletions src/models/default_configs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub const STRINGS: &str = "strings";
pub const TS_SCHEME: &str = "scm"; // We support scheme files that contain tree-sitter query

pub const REGEX_QUERY_PREFIX: &str = "rgx ";
pub const AST_GREP_PREFIX: &str = "sg ";

#[cfg(test)]
//FIXME: Remove this hack by not passing PiranhaArguments to SourceCodeUnit
Expand Down
23 changes: 23 additions & 0 deletions src/models/matches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Copyright (c) 2023 Uber Technologies, Inc.

use std::collections::HashMap;

use ast_grep_core::{language::TSLanguage, StrDoc};
use getset::{Getters, MutGetters};
use itertools::Itertools;
use log::trace;
Expand Down Expand Up @@ -67,6 +68,19 @@ impl Match {
}
}

pub(crate) fn from_ast_grep_captures(
captures: &ast_grep_core::Node<'_, StrDoc<TSLanguage>>, matches: HashMap<String, String>,
source_code: &str,
) -> Self {
Match {
matched_string: captures.text().to_string(),
range: Range::from_range(&captures.range(), source_code),
matches,
associated_comma: None,
associated_comments: Vec::new(),
}
}

pub(crate) fn new(
matched_string: String, range: tree_sitter::Range, matches: HashMap<String, String>,
) -> Self {
Expand Down Expand Up @@ -281,6 +295,15 @@ impl Range {
end_point: position_for_offset(source_code.as_bytes(), mtch.end()),
}
}

pub(crate) fn from_range(range: &std::ops::Range<usize>, source_code: &str) -> Self {
Self {
start_byte: range.start,
end_byte: range.end,
start_point: position_for_offset(source_code.as_bytes(), range.start),
end_point: position_for_offset(source_code.as_bytes(), range.end),
}
}
}

// Finds the position (col and row number) for a given offset.
Expand Down
81 changes: 81 additions & 0 deletions src/utilities/ast_grep_utilities.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use std::{collections::HashMap, hash::Hash};

/*
Copyright (c) 2023 Uber Technologies, Inc.
<p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
<p>http://www.apache.org/licenses/LICENSE-2.0
<p>Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
express or implied. See the License for the specific language governing permissions and
limitations under the License.
*/
use ast_grep_core::{language::TSLanguage, AstGrep, Matcher, Pattern, StrDoc};
use tree_sitter::Node;

use crate::models::matches::Match;

/// Applies the query upon the given `node`, and gets all the matches
/// # Arguments
/// * `node` - the root node to apply the query upon
/// * `source_code` - the corresponding source code string for the node.
/// * `recursive` - if `true` it matches the query to `self` and `self`'s sub-ASTs, else it matches the `query` only to `self`.
/// * `replace_node` - node to replace
///
/// # Returns
/// List containing all the matches against `node`
pub(crate) fn get_all_matches_for_ast_grep_pattern(
node: &Node, source_code: String, pattern: &Pattern<StrDoc<TSLanguage>>, recursive: bool,
replace_node: Option<String>, language: tree_sitter::Language,
) -> Vec<Match> {
let x = AstGrep::new(&source_code, TSLanguage::from(language));

let all_captures = x.root().find_all(pattern);
let mut all_matches = vec![];
for captures in all_captures {
let range_matches_node =
node.start_byte() == captures.range().start && node.end_byte() == captures.range().end;
let range_matches_inside_node =
node.start_byte() <= captures.range().start && node.end_byte() >= captures.range().end;
if (recursive && range_matches_inside_node) || range_matches_node {
let replace_node_match = if let Some(ref rn) = replace_node {
captures
.get_env()
.get_match(rn)
.unwrap_or_else(|| panic!("The tag {rn} provided in the replace node is not present"))
} else {
captures.get_node()
};
let matches = extract_captures(&captures);
all_matches.push(Match::from_ast_grep_captures(
&replace_node_match,
matches,
&source_code,
));
}
}
vec![]
}

fn extract_captures(
captures: &ast_grep_core::NodeMatch<'_, StrDoc<TSLanguage>>,
) -> HashMap<String, String> {
let mut map = HashMap::new();
for v in captures.get_env().get_matched_variables() {
let name = match v {
ast_grep_core::meta_var::MetaVariable::Named(name, _) => Some(name),
ast_grep_core::meta_var::MetaVariable::Anonymous(_) => None,
ast_grep_core::meta_var::MetaVariable::Ellipsis => None,
ast_grep_core::meta_var::MetaVariable::NamedEllipsis(name) => Some(name),
};
if let Some(n) = name {
map.insert(
n.to_string(),
captures.get_env().get_match(&n).unwrap().text().to_string(),
);
}
}
return map;
}
1 change: 1 addition & 0 deletions src/utilities/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Copyright (c) 2023 Uber Technologies, Inc.
limitations under the License.
*/

pub(crate) mod ast_grep_utilities;
pub(crate) mod regex_utilities;
pub(crate) mod tree_sitter_utilities;
use std::collections::HashMap;
Expand Down
13 changes: 13 additions & 0 deletions utilities/ast_grep_utilities.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/*
Copyright (c) 2023 Uber Technologies, Inc.

<p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
<p>http://www.apache.org/licenses/LICENSE-2.0

<p>Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
express or implied. See the License for the specific language governing permissions and
limitations under the License.
*/

0 comments on commit c23b103

Please sign in to comment.