Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
172 changes: 172 additions & 0 deletions api/analyzers/javascript/analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
"""JavaScript analyzer using tree-sitter for code entity extraction."""

from pathlib import Path
from typing import Optional

from multilspy import SyncLanguageServer
from ...entities.entity import Entity
from ...entities.file import File
from ..analyzer import AbstractAnalyzer

import tree_sitter_javascript as tsjs
from tree_sitter import Language, Node

import logging
logger = logging.getLogger('code_graph')


class JavaScriptAnalyzer(AbstractAnalyzer):
"""Analyzer for JavaScript source files using tree-sitter.

Extracts functions, classes, and methods from JavaScript code.
Resolves class inheritance (extends) and function/method call references.
"""

def __init__(self) -> None:
"""Initialize the JavaScript analyzer with the tree-sitter JS grammar."""
super().__init__(Language(tsjs.language()))

def add_dependencies(self, path: Path, files: list[Path]) -> None:
"""Detect and register JavaScript project dependencies.

Currently a no-op; npm dependency resolution is not yet implemented.
"""
pass

def get_entity_label(self, node: Node) -> str:
"""Return the graph label for a given AST node type.

Args:
node: A tree-sitter AST node representing a JavaScript entity.

Returns:
One of 'Function', 'Class', or 'Method'.

Raises:
ValueError: If the node type is not a recognised entity.
"""
if node.type == 'function_declaration':
return "Function"
elif node.type == 'class_declaration':
return "Class"
elif node.type == 'method_definition':
return "Method"
raise ValueError(f"Unknown entity type: {node.type}")

def get_entity_name(self, node: Node) -> str:
"""Extract the declared name from a JavaScript entity node.

Args:
node: A tree-sitter AST node for a function, class, or method.

Returns:
The entity name, or an empty string if no name node is found.

Raises:
ValueError: If the node type is not a recognised entity.
"""
if node.type in ['function_declaration', 'class_declaration', 'method_definition']:
name_node = node.child_by_field_name('name')
if name_node is None:
return ''
return name_node.text.decode('utf-8')
raise ValueError(f"Unknown entity type: {node.type}")

def get_entity_docstring(self, node: Node) -> Optional[str]:
"""Extract a leading comment as a docstring for the entity.

Looks for a comment node immediately preceding the entity in the AST.

Args:
node: A tree-sitter AST node for a function, class, or method.

Returns:
The comment text, or None if no leading comment exists.

Raises:
ValueError: If the node type is not a recognised entity.
"""
if node.type in ['function_declaration', 'class_declaration', 'method_definition']:
if node.prev_sibling and node.prev_sibling.type == 'comment':
return node.prev_sibling.text.decode('utf-8')
return None
raise ValueError(f"Unknown entity type: {node.type}")

def get_entity_types(self) -> list[str]:
"""Return the tree-sitter node types recognised as JavaScript entities."""
return ['function_declaration', 'class_declaration', 'method_definition']

def add_symbols(self, entity: Entity) -> None:
"""Extract symbols (references) from a JavaScript entity.

For classes: extracts base-class identifiers from ``extends`` clauses.
For functions/methods: extracts call-expression references.

Note:
JavaScript parameters are untyped, so they are not captured as
symbols — unlike typed languages (Java, Python) where parameter
type annotations are meaningful for resolution.
"""
if entity.node.type == 'class_declaration':
for child in entity.node.children:
if child.type == 'class_heritage':
for heritage_child in child.children:
if heritage_child.type == 'identifier':
entity.add_symbol("base_class", heritage_child)
elif entity.node.type in ['function_declaration', 'method_definition']:
captures = self._captures("(call_expression) @reference.call", entity.node)
if 'reference.call' in captures:
for caller in captures['reference.call']:
entity.add_symbol("call", caller)

def is_dependency(self, file_path: str) -> bool:
"""Check whether a file path belongs to an external dependency.

Uses path-segment matching so that directories merely containing
'node_modules' in their name (e.g. ``node_modules_utils``) are not
treated as dependencies.
"""
return "node_modules" in Path(file_path).parts

def resolve_path(self, file_path: str, path: Path) -> str:
"""Resolve an import path relative to the project root."""
return file_path

def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
"""Resolve a type reference to its class declaration entity."""
res = []
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
type_dec = self.find_parent(resolved_node, ['class_declaration'])
if type_dec in file.entities:
res.append(file.entities[type_dec])
return res

def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
"""Resolve a call expression to the target function or method entity."""
res = []
if node.type == 'call_expression':
func_node = node.child_by_field_name('function')
if func_node and func_node.type == 'member_expression':
func_node = func_node.child_by_field_name('property')
if func_node:
node = func_node
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
method_dec = self.find_parent(resolved_node, ['function_declaration', 'method_definition', 'class_declaration'])
if method_dec and method_dec.type == 'class_declaration':
continue
if method_dec in file.entities:
res.append(file.entities[method_dec])
return res

def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
"""Dispatch symbol resolution based on the symbol category.

Routes ``base_class`` symbols to type resolution and ``call`` symbols
to method resolution.
"""
if key == "base_class":
return self.resolve_type(files, lsp, file_path, path, symbol)
elif key == "call":
return self.resolve_method(files, lsp, file_path, path, symbol)
else:
raise ValueError(f"Unknown key {key}")
14 changes: 11 additions & 3 deletions api/analyzers/source_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .java.analyzer import JavaAnalyzer
from .python.analyzer import PythonAnalyzer
from .csharp.analyzer import CSharpAnalyzer
from .javascript.analyzer import JavaScriptAnalyzer

from multilspy import SyncLanguageServer
from multilspy.multilspy_config import MultilspyConfig
Expand All @@ -26,7 +27,8 @@
# '.h': CAnalyzer(),
'.py': PythonAnalyzer(),
'.java': JavaAnalyzer(),
'.cs': CSharpAnalyzer()}
'.cs': CSharpAnalyzer(),
'.js': JavaScriptAnalyzer()}

class NullLanguageServer:
def start_server(self):
Expand Down Expand Up @@ -143,9 +145,15 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path))
else:
lsps[".cs"] = NullLanguageServer()
with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server():
lsps[".js"] = NullLanguageServer()
with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server():
files_len = len(self.files)
for i, file_path in enumerate(files):
if file_path not in self.files:
continue
# Skip symbol resolution when no real LSP is available
if isinstance(lsps.get(file_path.suffix), NullLanguageServer):
continue
file = self.files[file_path]
logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}')
for _, entity in file.entities.items():
Expand Down Expand Up @@ -174,7 +182,7 @@ def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None:

def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None:
path = path.resolve()
files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs"))
files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + [f for f in path.rglob("*.js") if "node_modules" not in f.parts]
# First pass analysis of the source code
self.first_pass(path, files, ignore, graph)

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies = [
"tree-sitter-c>=0.24.1,<0.25.0",
"tree-sitter-python>=0.25.0,<0.26.0",
"tree-sitter-java>=0.23.5,<0.24.0",
"tree-sitter-javascript>=0.23.0,<0.24.0",
"tree-sitter-c-sharp>=0.23.1,<0.24.0",
"fastapi>=0.115.0,<1.0.0",
"uvicorn[standard]>=0.34.0,<1.0.0",
Expand Down
31 changes: 31 additions & 0 deletions tests/source_files/javascript/sample.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* Base class for shapes
*/
class Shape {
constructor(name) {
this.name = name;
}

area() {
return 0;
}
}

class Circle extends Shape {
constructor(radius) {
super(radius);
this.radius = radius;
}

area() {
return Math.PI * this.radius * this.radius;
}
}

function calculateTotal(shapes) {
let total = 0;
for (const shape of shapes) {
total += shape.area();
}
return total;
}
Loading
Loading