Skip to content

Commit 754f412

Browse files
committed
Add support for JavaScript
Fixes #51 Add support for JavaScript code analysis using tree-sitter. * Add `api/analyzers/javascript/analyzer.py` implementing `JavaScriptAnalyzer` class using tree-sitter for JavaScript. - Implement methods for first and second pass analysis. - Use tree-sitter to parse JavaScript code. - Extract functions and classes from JavaScript code. - Connect entities in the graph. * Update `api/analyzers/source_analyzer.py` to include `JavaScriptAnalyzer` in the analyzers list. * Add `tree-sitter-javascript` dependency to `pyproject.toml`. * Add utility functions for JavaScript analysis in `api/analyzers/utils.py`. --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/FalkorDB/code-graph-backend/issues/51?shareId=XXXX-XXXX-XXXX-XXXX).
1 parent c8ec9a4 commit 754f412

File tree

4 files changed

+254
-1
lines changed

4 files changed

+254
-1
lines changed

api/analyzers/javascript/analyzer.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
import io
2+
import os
3+
from ..utils import *
4+
from pathlib import Path
5+
from ...entities import *
6+
from ...graph import Graph
7+
from typing import Optional
8+
from ..analyzer import AbstractAnalyzer
9+
10+
import tree_sitter_javascript as tsjs
11+
from tree_sitter import Language, Parser, Node
12+
13+
JS_LANGUAGE = Language(tsjs.language())
14+
15+
import logging
16+
logger = logging.getLogger('code_graph')
17+
18+
class JavaScriptAnalyzer(AbstractAnalyzer):
19+
def __init__(self) -> None:
20+
self.parser = Parser(JS_LANGUAGE)
21+
22+
def process_function_declaration(self, node: Node, path: Path, source_code: str) -> Optional[Function]:
23+
"""
24+
Processes a function declaration node to extract function details.
25+
26+
Args:
27+
node (Node): The AST node representing a function declaration.
28+
path (Path): The file path where the function is defined.
29+
30+
Returns:
31+
Optional[Function]: A Function object containing details about the function, or None if the function name cannot be determined.
32+
"""
33+
34+
# Extract function name
35+
res = find_child_of_type(node, 'identifier')
36+
if res is None:
37+
return None
38+
39+
identifier = res[0]
40+
function_name = identifier.text.decode('utf-8')
41+
logger.info(f"Function declaration: {function_name}")
42+
43+
# Extract function parameters
44+
args = []
45+
res = find_child_of_type(node, 'formal_parameters')
46+
if res is not None:
47+
parameters = res[0]
48+
49+
# Extract arguments and their types
50+
for child in parameters.children:
51+
if child.type == 'identifier':
52+
arg_name = child.text.decode('utf-8')
53+
args.append((arg_name, 'Unknown'))
54+
55+
# Extract function definition line numbers
56+
start_line = node.start_point[0]
57+
end_line = node.end_point[0]
58+
59+
# Create Function object
60+
docs = ''
61+
src = source_code[node.start_byte:node.end_byte]
62+
f = Function(str(path), function_name, docs, 'Unknown', src, start_line, end_line)
63+
64+
# Add arguments to Function object
65+
for arg in args:
66+
name = arg[0]
67+
type_ = arg[1]
68+
f.add_argument(name, type_)
69+
70+
return f
71+
72+
def process_class_declaration(self, node: Node, path: Path) -> Optional[Class]:
73+
"""
74+
Processes a class declaration node to extract class details.
75+
76+
Args:
77+
node (Node): The AST node representing a class declaration.
78+
path (Path): The file path where the class is defined.
79+
80+
Returns:
81+
Optional[Class]: A Class object containing details about the class, or None if the class name cannot be determined.
82+
"""
83+
84+
# Extract class name
85+
res = find_child_of_type(node, 'identifier')
86+
if res is None:
87+
return None
88+
89+
identifier = res[0]
90+
class_name = identifier.text.decode('utf-8')
91+
logger.info(f"Class declaration: {class_name}")
92+
93+
# Extract class definition line numbers
94+
start_line = node.start_point[0]
95+
end_line = node.end_point[0]
96+
97+
# Create Class object
98+
docs = ''
99+
c = Class(str(path), class_name, docs, start_line, end_line)
100+
101+
return c
102+
103+
def first_pass(self, path: Path, f: io.TextIOWrapper, graph: Graph) -> None:
104+
"""
105+
Perform the first pass processing of a JavaScript source file.
106+
107+
Args:
108+
path (Path): The path to the JavaScript source file.
109+
f (io.TextIOWrapper): The file object representing the opened JavaScript source file.
110+
graph (Graph): The Graph object where entities will be added.
111+
112+
Returns:
113+
None
114+
"""
115+
116+
if path.suffix != '.js':
117+
logger.debug(f"Skipping none JavaScript file {path}")
118+
return
119+
120+
logger.info(f"Processing {path}")
121+
122+
# Create file entity
123+
file = File(os.path.dirname(path), path.name, path.suffix)
124+
graph.add_file(file)
125+
126+
# Parse file
127+
source_code = f.read()
128+
tree = self.parser.parse(source_code)
129+
try:
130+
source_code = source_code.decode('utf-8')
131+
except Exception as e:
132+
logger.error(f"Failed decoding source code: {e}")
133+
source_code = ''
134+
135+
# Process function declarations
136+
query = JS_LANGUAGE.query("(function_declaration) @function")
137+
captures = query.captures(tree.root_node)
138+
if 'function' in captures:
139+
functions = captures['function']
140+
for node in functions:
141+
entity = self.process_function_declaration(node, path, source_code)
142+
if entity is not None:
143+
graph.add_function(entity)
144+
graph.connect_entities('DEFINES', file.id, entity.id)
145+
146+
# Process class declarations
147+
query = JS_LANGUAGE.query("(class_declaration) @class")
148+
captures = query.captures(tree.root_node)
149+
if 'class' in captures:
150+
classes = captures['class']
151+
for node in classes:
152+
entity = self.process_class_declaration(node, path)
153+
if entity is not None:
154+
graph.add_class(entity)
155+
graph.connect_entities('DEFINES', file.id, entity.id)
156+
157+
def second_pass(self, path: Path, f: io.TextIOWrapper, graph: Graph) -> None:
158+
"""
159+
Perform the second pass processing of a JavaScript source file to establish function call relationships.
160+
161+
Args:
162+
path (Path): The path to the JavaScript source file.
163+
f (io.TextIOWrapper): The file object representing the opened JavaScript source file.
164+
graph (Graph): The Graph object containing entities (functions and files) to establish relationships.
165+
166+
Returns:
167+
None
168+
"""
169+
170+
if path.suffix != '.js':
171+
logger.debug(f"Skipping none JavaScript file {path}")
172+
return
173+
174+
logger.info(f"Processing {path}")
175+
176+
# Get file entity
177+
file = graph.get_file(os.path.dirname(path), path.name, path.suffix)
178+
if file is None:
179+
logger.error(f"File entity not found for: {path}")
180+
return
181+
182+
try:
183+
# Parse file
184+
content = f.read()
185+
tree = self.parser.parse(content)
186+
except Exception as e:
187+
logger.error(f"Failed to process file {path}: {e}")
188+
return
189+
190+
# Locate function invocation
191+
query_call_exp = JS_LANGUAGE.query("(call_expression function: (identifier) @callee)")
192+
193+
# Locate function definitions
194+
query_function_def = JS_LANGUAGE.query("""
195+
(
196+
function_declaration
197+
declarator: (identifier) @function_name
198+
)""")
199+
200+
function_defs = query_function_def.captures(tree.root_node)
201+
for function_def in function_defs:
202+
caller = function_def[0]
203+
caller_name = caller.text.decode('utf-8')
204+
caller_f = graph.get_function_by_name(caller_name)
205+
assert(caller_f is not None)
206+
207+
function_calls = query_call_exp.captures(caller.parent.parent)
208+
for function_call in function_calls:
209+
callee = function_call[0]
210+
callee_name = callee.text.decode('utf-8')
211+
callee_f = graph.get_function_by_name(callee_name)
212+
213+
if callee_f is None:
214+
# Create missing function
215+
# Assuming this is a call to a native function
216+
callee_f = Function('/', callee_name, None, None, None, 0, 0)
217+
graph.add_function(callee_f)
218+
219+
# Connect the caller and callee in the graph
220+
graph.connect_entities('CALLS', caller_f.id, callee_f.id)

api/analyzers/source_analyzer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from ..graph import Graph
99
from .c.analyzer import CAnalyzer
1010
from .python.analyzer import PythonAnalyzer
11+
from .javascript.analyzer import JavaScriptAnalyzer
1112

1213
import logging
1314
# Configure logging
@@ -16,7 +17,8 @@
1617
# List of available analyzers
1718
analyzers = {'.c': CAnalyzer(),
1819
'.h': CAnalyzer(),
19-
'.py': PythonAnalyzer()}
20+
'.py': PythonAnalyzer(),
21+
'.js': JavaScriptAnalyzer()}
2022

2123
class SourceAnalyzer():
2224

api/analyzers/utils.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,33 @@ def find_child_of_type(node: Node, child_type: str) -> Union[tuple[Node, int], N
2121
return (child, idx)
2222

2323
return None
24+
25+
def extract_js_function_name(node: Node) -> str:
26+
"""
27+
Extract the function name from a JavaScript function node.
28+
29+
Args:
30+
node (Node): The AST node representing the function.
31+
32+
Returns:
33+
str: The name of the function.
34+
"""
35+
for child in node.children:
36+
if child.type == 'identifier':
37+
return child.text.decode('utf-8')
38+
return ''
39+
40+
def extract_js_class_name(node: Node) -> str:
41+
"""
42+
Extract the class name from a JavaScript class node.
43+
44+
Args:
45+
node (Node): The AST node representing the class.
46+
47+
Returns:
48+
str: The name of the class.
49+
"""
50+
for child in node.children:
51+
if child.type == 'identifier':
52+
return child.text.decode('utf-8')
53+
return ''

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ validators = "^0.34.0"
1313
falkordb = "^1.0.5"
1414
tree-sitter-c = "^0.23.2"
1515
tree-sitter-python = "^0.23.6"
16+
tree-sitter-javascript = "^0.23.2"
1617
flask = "^3.1.0"
1718
python-dotenv = "^1.0.1"
1819

0 commit comments

Comments
 (0)