From 4e44a0a0a342957428449e0b4942d245f0f1af08 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Tue, 10 Mar 2026 10:58:20 +0200 Subject: [PATCH 1/5] Add support for JavaScript Migrated from FalkorDB/code-graph-backend PR #59. Original issue: FalkorDB/code-graph-backend#51 Resolves #540 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/javascript/__init__.py | 0 api/analyzers/javascript/analyzer.py | 110 +++++++++++++++++++++++++++ api/analyzers/source_analyzer.py | 9 ++- pyproject.toml | 1 + 4 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 api/analyzers/javascript/__init__.py create mode 100644 api/analyzers/javascript/analyzer.py diff --git a/api/analyzers/javascript/__init__.py b/api/analyzers/javascript/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/api/analyzers/javascript/analyzer.py b/api/analyzers/javascript/analyzer.py new file mode 100644 index 00000000..76fe6923 --- /dev/null +++ b/api/analyzers/javascript/analyzer.py @@ -0,0 +1,110 @@ +from pathlib import Path +from typing import Optional + +from multilspy import SyncLanguageServer +from ...entities.entity import Entity +from ...entities.file import File +from ..analyzer import AbstractAnalyzer + +import tree_sitter_javascript as tsjs +from tree_sitter import Language, Node + +import logging +logger = logging.getLogger('code_graph') + + +class JavaScriptAnalyzer(AbstractAnalyzer): + def __init__(self) -> None: + super().__init__(Language(tsjs.language())) + + def add_dependencies(self, path: Path, files: list[Path]): + pass + + def get_entity_label(self, node: Node) -> str: + if node.type == 'function_declaration': + return "Function" + elif node.type == 'class_declaration': + return "Class" + elif node.type == 'method_definition': + return "Method" + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_name(self, node: Node) -> str: + if node.type in ['function_declaration', 'class_declaration', 'method_definition']: + name_node = node.child_by_field_name('name') + if name_node is None: + return '' + return name_node.text.decode('utf-8') + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_docstring(self, node: Node) -> Optional[str]: + if node.type in ['function_declaration', 'class_declaration', 'method_definition']: + if node.prev_sibling and node.prev_sibling.type == 'comment': + return node.prev_sibling.text.decode('utf-8') + return None + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_types(self) -> list[str]: + return ['function_declaration', 'class_declaration', 'method_definition'] + + def add_symbols(self, entity: Entity) -> None: + if entity.node.type == 'class_declaration': + heritage = entity.node.child_by_field_name('body') + if heritage is None: + return + superclass_node = entity.node.child_by_field_name('name') + # Check for `extends` clause via class_heritage + for child in entity.node.children: + if child.type == 'class_heritage': + for heritage_child in child.children: + if heritage_child.type == 'identifier': + entity.add_symbol("base_class", heritage_child) + elif entity.node.type in ['function_declaration', 'method_definition']: + query = self.language.query("(call_expression) @reference.call") + captures = query.captures(entity.node) + if 'reference.call' in captures: + for caller in captures['reference.call']: + entity.add_symbol("call", caller) + query = self.language.query("(formal_parameters (identifier) @parameter)") + captures = query.captures(entity.node) + if 'parameter' in captures: + for parameter in captures['parameter']: + entity.add_symbol("parameters", parameter) + + def is_dependency(self, file_path: str) -> bool: + return "node_modules" in file_path + + def resolve_path(self, file_path: str, path: Path) -> str: + return file_path + + def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + res = [] + for file, resolved_node in self.resolve(files, lsp, file_path, path, node): + type_dec = self.find_parent(resolved_node, ['class_declaration']) + if type_dec in file.entities: + res.append(file.entities[type_dec]) + return res + + def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + res = [] + if node.type == 'call_expression': + func_node = node.child_by_field_name('function') + if func_node and func_node.type == 'member_expression': + func_node = func_node.child_by_field_name('property') + if func_node: + node = func_node + for file, resolved_node in self.resolve(files, lsp, file_path, path, node): + method_dec = self.find_parent(resolved_node, ['function_declaration', 'method_definition', 'class_declaration']) + if method_dec and method_dec.type == 'class_declaration': + continue + if method_dec in file.entities: + res.append(file.entities[method_dec]) + return res + + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: + if key in ["base_class", "parameters"]: + return self.resolve_type(files, lsp, file_path, path, symbol) + elif key in ["call"]: + return self.resolve_method(files, lsp, file_path, path, symbol) + else: + raise ValueError(f"Unknown key {key}") diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 4186f358..7a242eb9 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -11,6 +11,7 @@ from .java.analyzer import JavaAnalyzer from .python.analyzer import PythonAnalyzer from .csharp.analyzer import CSharpAnalyzer +from .javascript.analyzer import JavaScriptAnalyzer from multilspy import SyncLanguageServer from multilspy.multilspy_config import MultilspyConfig @@ -26,7 +27,8 @@ # '.h': CAnalyzer(), '.py': PythonAnalyzer(), '.java': JavaAnalyzer(), - '.cs': CSharpAnalyzer()} + '.cs': CSharpAnalyzer(), + '.js': JavaScriptAnalyzer()} class NullLanguageServer: def start_server(self): @@ -143,7 +145,8 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path)) else: lsps[".cs"] = NullLanguageServer() - with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(): + lsps[".js"] = NullLanguageServer() + with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server(): files_len = len(self.files) for i, file_path in enumerate(files): file = self.files[file_path] @@ -174,7 +177,7 @@ def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None: def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None: path = path.resolve() - files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + list(path.rglob("*.js")) # First pass analysis of the source code self.first_pass(path, files, ignore, graph) diff --git a/pyproject.toml b/pyproject.toml index 49438fb9..05cfebfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "tree-sitter-c>=0.24.1,<0.25.0", "tree-sitter-python>=0.25.0,<0.26.0", "tree-sitter-java>=0.23.5,<0.24.0", +"tree-sitter-javascript>=0.23.0", "tree-sitter-c-sharp>=0.23.1,<0.24.0", "fastapi>=0.115.0,<1.0.0", "uvicorn[standard]>=0.34.0,<1.0.0", From 0f193ce137ca09aa3ead282e5738bca01404f499 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sat, 21 Mar 2026 13:16:09 +0200 Subject: [PATCH 2/5] fix: address review feedback for JavaScript analyzer - Fix pyproject.toml: align indentation and add upper bound (<0.24.0) for tree-sitter-javascript - Remove unused variables (heritage, superclass_node) in add_symbols - Switch from query.captures() to self._captures() for correct QueryCursor usage - Filter out node_modules when rglob'ing for *.js files in analyze_sources - Add unit tests (tests/test_javascript_analyzer.py) and fixture (tests/source_files/javascript/sample.js) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/javascript/analyzer.py | 10 +-- api/analyzers/source_analyzer.py | 2 +- pyproject.toml | 2 +- tests/source_files/javascript/sample.js | 31 +++++++ tests/test_javascript_analyzer.py | 102 ++++++++++++++++++++++++ uv.lock | 17 ++++ 6 files changed, 154 insertions(+), 10 deletions(-) create mode 100644 tests/source_files/javascript/sample.js create mode 100644 tests/test_javascript_analyzer.py diff --git a/api/analyzers/javascript/analyzer.py b/api/analyzers/javascript/analyzer.py index 76fe6923..529fdafb 100644 --- a/api/analyzers/javascript/analyzer.py +++ b/api/analyzers/javascript/analyzer.py @@ -49,10 +49,6 @@ def get_entity_types(self) -> list[str]: def add_symbols(self, entity: Entity) -> None: if entity.node.type == 'class_declaration': - heritage = entity.node.child_by_field_name('body') - if heritage is None: - return - superclass_node = entity.node.child_by_field_name('name') # Check for `extends` clause via class_heritage for child in entity.node.children: if child.type == 'class_heritage': @@ -60,13 +56,11 @@ def add_symbols(self, entity: Entity) -> None: if heritage_child.type == 'identifier': entity.add_symbol("base_class", heritage_child) elif entity.node.type in ['function_declaration', 'method_definition']: - query = self.language.query("(call_expression) @reference.call") - captures = query.captures(entity.node) + captures = self._captures("(call_expression) @reference.call", entity.node) if 'reference.call' in captures: for caller in captures['reference.call']: entity.add_symbol("call", caller) - query = self.language.query("(formal_parameters (identifier) @parameter)") - captures = query.captures(entity.node) + captures = self._captures("(formal_parameters (identifier) @parameter)", entity.node) if 'parameter' in captures: for parameter in captures['parameter']: entity.add_symbol("parameters", parameter) diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 7a242eb9..23c25893 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -177,7 +177,7 @@ def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None: def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None: path = path.resolve() - files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + list(path.rglob("*.js")) + files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + [f for f in path.rglob("*.js") if "node_modules" not in f.parts] # First pass analysis of the source code self.first_pass(path, files, ignore, graph) diff --git a/pyproject.toml b/pyproject.toml index 05cfebfa..67fcf521 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "tree-sitter-c>=0.24.1,<0.25.0", "tree-sitter-python>=0.25.0,<0.26.0", "tree-sitter-java>=0.23.5,<0.24.0", -"tree-sitter-javascript>=0.23.0", + "tree-sitter-javascript>=0.23.0,<0.24.0", "tree-sitter-c-sharp>=0.23.1,<0.24.0", "fastapi>=0.115.0,<1.0.0", "uvicorn[standard]>=0.34.0,<1.0.0", diff --git a/tests/source_files/javascript/sample.js b/tests/source_files/javascript/sample.js new file mode 100644 index 00000000..24e6ef56 --- /dev/null +++ b/tests/source_files/javascript/sample.js @@ -0,0 +1,31 @@ +/** + * Base class for shapes + */ +class Shape { + constructor(name) { + this.name = name; + } + + area() { + return 0; + } +} + +class Circle extends Shape { + constructor(radius) { + super(radius); + this.radius = radius; + } + + area() { + return Math.PI * this.radius * this.radius; + } +} + +function calculateTotal(shapes) { + let total = 0; + for (const shape of shapes) { + total += shape.area(); + } + return total; +} diff --git a/tests/test_javascript_analyzer.py b/tests/test_javascript_analyzer.py new file mode 100644 index 00000000..44d875ec --- /dev/null +++ b/tests/test_javascript_analyzer.py @@ -0,0 +1,102 @@ +"""Tests for the JavaScript analyzer - extraction only (no DB required).""" + +import unittest +from pathlib import Path + +from api.analyzers.javascript.analyzer import JavaScriptAnalyzer +from api.entities.entity import Entity +from api.entities.file import File + + +def _entity_name(analyzer, entity): + """Get the name of an entity using the analyzer.""" + return analyzer.get_entity_name(entity.node) + + +class TestJavaScriptAnalyzer(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.analyzer = JavaScriptAnalyzer() + source_dir = Path(__file__).parent / "source_files" / "javascript" + cls.sample_path = source_dir / "sample.js" + source = cls.sample_path.read_bytes() + tree = cls.analyzer.parser.parse(source) + cls.file = File(cls.sample_path, tree) + + # Walk AST and extract entities (mirrors create_hierarchy without Graph) + types = cls.analyzer.get_entity_types() + stack = [tree.root_node] + while stack: + node = stack.pop() + if node.type in types: + entity = Entity(node) + cls.analyzer.add_symbols(entity) + cls.file.add_entity(entity) + # Also recurse into entity children (e.g., class body methods) + stack.extend(node.children) + else: + stack.extend(node.children) + + def _entity_names(self): + return [_entity_name(self.analyzer, e) for e in self.file.entities.values()] + + def test_discovers_js_files(self): + """SourceAnalyzer should enumerate .js files.""" + source_dir = Path(__file__).parent / "source_files" / "javascript" + js_files = list(source_dir.rglob("*.js")) + self.assertTrue(len(js_files) > 0, "Should find .js files") + + def test_entity_types(self): + """Analyzer should recognise JS entity types.""" + self.assertEqual( + self.analyzer.get_entity_types(), + ['function_declaration', 'class_declaration', 'method_definition'], + ) + + def test_class_extraction(self): + """Classes should be extracted from sample.js.""" + names = self._entity_names() + self.assertIn("Shape", names) + self.assertIn("Circle", names) + + def test_function_extraction(self): + """Top-level functions should be extracted.""" + names = self._entity_names() + self.assertIn("calculateTotal", names) + + def test_method_extraction(self): + """Class methods should be extracted.""" + names = self._entity_names() + self.assertIn("area", names) + self.assertIn("constructor", names) + + def test_class_labels(self): + """Classes should get the 'Class' label.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) in ("Shape", "Circle"): + self.assertEqual(self.analyzer.get_entity_label(entity.node), "Class") + + def test_function_label(self): + """Functions should get the 'Function' label.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "calculateTotal": + self.assertEqual(self.analyzer.get_entity_label(entity.node), "Function") + + def test_base_class_symbol(self): + """Circle should have Shape as a base_class symbol.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Circle": + base_names = [ + s.symbol.text.decode("utf-8") + for s in entity.symbols.get("base_class", []) + ] + self.assertIn("Shape", base_names) + + def test_is_dependency(self): + """node_modules paths should be flagged as dependencies.""" + self.assertTrue(self.analyzer.is_dependency("foo/node_modules/bar/index.js")) + self.assertFalse(self.analyzer.is_dependency("src/utils.js")) + + +if __name__ == "__main__": + unittest.main() diff --git a/uv.lock b/uv.lock index 44c773aa..349ffd28 100644 --- a/uv.lock +++ b/uv.lock @@ -263,6 +263,7 @@ dependencies = [ { name = "tree-sitter-c" }, { name = "tree-sitter-c-sharp" }, { name = "tree-sitter-java" }, + { name = "tree-sitter-javascript" }, { name = "tree-sitter-python" }, { name = "uvicorn", extra = ["standard"] }, { name = "validators" }, @@ -291,6 +292,7 @@ requires-dist = [ { name = "tree-sitter-c", specifier = ">=0.24.1,<0.25.0" }, { name = "tree-sitter-c-sharp", specifier = ">=0.23.1,<0.24.0" }, { name = "tree-sitter-java", specifier = ">=0.23.5,<0.24.0" }, + { name = "tree-sitter-javascript", specifier = ">=0.23.0,<0.24.0" }, { name = "tree-sitter-python", specifier = ">=0.25.0,<0.26.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0,<1.0.0" }, { name = "validators", specifier = ">=0.35.0,<0.36.0" }, @@ -1627,6 +1629,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/57/5bab54d23179350356515526fff3cc0f3ac23bfbc1a1d518a15978d4880e/tree_sitter_java-0.23.5-cp39-abi3-win_arm64.whl", hash = "sha256:402efe136104c5603b429dc26c7e75ae14faaca54cfd319ecc41c8f2534750f4", size = 59059, upload-time = "2024-12-21T18:24:24.934Z" }, ] +[[package]] +name = "tree-sitter-javascript" +version = "0.23.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/dc/1c55c33cc6bbe754359b330534cf9f261c1b9b2c26ddf23aef3c5fa67759/tree_sitter_javascript-0.23.1.tar.gz", hash = "sha256:b2059ce8b150162cda05a457ca3920450adbf915119c04b8c67b5241cd7fcfed", size = 110058, upload-time = "2024-11-10T05:40:42.357Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/d3/c67d7d49967344b51208ad19f105233be1afdf07d3dcb35b471900265227/tree_sitter_javascript-0.23.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6ca583dad4bd79d3053c310b9f7208cd597fd85f9947e4ab2294658bb5c11e35", size = 59333, upload-time = "2024-11-10T05:40:31.988Z" }, + { url = "https://files.pythonhosted.org/packages/a5/db/ea0ee1547679d1750e80a0c4bc60b3520b166eeaf048764cfdd1ba3fd5e5/tree_sitter_javascript-0.23.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:94100e491a6a247aa4d14caf61230c171b6376c863039b6d9cd71255c2d815ec", size = 61071, upload-time = "2024-11-10T05:40:33.458Z" }, + { url = "https://files.pythonhosted.org/packages/67/6e/07c4857e08be37bfb55bfb269863df8ec908b2f6a3f1893cd852b893ecab/tree_sitter_javascript-0.23.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6bc1055b061c5055ec58f39ee9b2e9efb8e6e0ae970838af74da0afb811f0a", size = 96999, upload-time = "2024-11-10T05:40:34.869Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f5/4de730afe8b9422845bc2064020a8a8f49ebd1695c04261c38d1b3e3edec/tree_sitter_javascript-0.23.1-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:056dc04fb6b24293f8c5fec43c14e7e16ba2075b3009c643abf8c85edc4c7c3c", size = 94020, upload-time = "2024-11-10T05:40:35.735Z" }, + { url = "https://files.pythonhosted.org/packages/77/0a/f980520da86c4eff8392867840a945578ef43372c9d4a37922baa6b121fe/tree_sitter_javascript-0.23.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a11ca1c0f736da42967586b568dff8a465ee148a986c15ebdc9382806e0ce871", size = 92927, upload-time = "2024-11-10T05:40:37.92Z" }, + { url = "https://files.pythonhosted.org/packages/ff/5c/36a98d512aa1d1082409d6b7eda5d26b820bd4477a54100ad9f62212bc55/tree_sitter_javascript-0.23.1-cp39-abi3-win_amd64.whl", hash = "sha256:041fa22b34250ea6eb313d33104d5303f79504cb259d374d691e38bbdc49145b", size = 58824, upload-time = "2024-11-10T05:40:39.903Z" }, + { url = "https://files.pythonhosted.org/packages/dc/79/ceb21988e6de615355a63eebcf806cd2a0fe875bec27b429d58b63e7fb5f/tree_sitter_javascript-0.23.1-cp39-abi3-win_arm64.whl", hash = "sha256:eb28130cd2fb30d702d614cbf61ef44d1c7f6869e7d864a9cc17111e370be8f7", size = 57027, upload-time = "2024-11-10T05:40:40.841Z" }, +] + [[package]] name = "tree-sitter-python" version = "0.25.0" From 1de114e49ec170804a73aaee14c15f6b0f83d047 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 22 Mar 2026 16:50:05 +0200 Subject: [PATCH 3/5] fix(javascript): address review comments and improve test coverage - Remove untyped parameter capture from add_symbols; JS params have no type annotations, so resolving them as types (like Java/Python do) is incorrect - Fix is_dependency to use Path.parts instead of substring matching, avoiding false positives on paths like 'node_modules_utils' - Add comprehensive docstrings to all JavaScriptAnalyzer public methods (docstring coverage was 39% vs 80% threshold) - Expand test suite from 9 to 22 tests covering: - Docstring extraction and missing docstrings - Method labels and unknown entity type errors - Absence of parameter symbols (regression guard) - Call symbol extraction - Path-segment is_dependency matching - SourceAnalyzer integration (registration, supported_types) - resolve_symbol error handling Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/javascript/analyzer.py | 86 ++++++++++++++++-- tests/test_javascript_analyzer.py | 126 ++++++++++++++++++++++++++- 2 files changed, 202 insertions(+), 10 deletions(-) diff --git a/api/analyzers/javascript/analyzer.py b/api/analyzers/javascript/analyzer.py index 529fdafb..abc2879f 100644 --- a/api/analyzers/javascript/analyzer.py +++ b/api/analyzers/javascript/analyzer.py @@ -1,3 +1,5 @@ +"""JavaScript analyzer using tree-sitter for code entity extraction.""" + from pathlib import Path from typing import Optional @@ -14,13 +16,35 @@ class JavaScriptAnalyzer(AbstractAnalyzer): + """Analyzer for JavaScript source files using tree-sitter. + + Extracts functions, classes, and methods from JavaScript code. + Resolves class inheritance (extends) and function/method call references. + """ + def __init__(self) -> None: + """Initialize the JavaScript analyzer with the tree-sitter JS grammar.""" super().__init__(Language(tsjs.language())) - def add_dependencies(self, path: Path, files: list[Path]): + def add_dependencies(self, path: Path, files: list[Path]) -> None: + """Detect and register JavaScript project dependencies. + + Currently a no-op; npm dependency resolution is not yet implemented. + """ pass def get_entity_label(self, node: Node) -> str: + """Return the graph label for a given AST node type. + + Args: + node: A tree-sitter AST node representing a JavaScript entity. + + Returns: + One of 'Function', 'Class', or 'Method'. + + Raises: + ValueError: If the node type is not a recognised entity. + """ if node.type == 'function_declaration': return "Function" elif node.type == 'class_declaration': @@ -30,6 +54,17 @@ def get_entity_label(self, node: Node) -> str: raise ValueError(f"Unknown entity type: {node.type}") def get_entity_name(self, node: Node) -> str: + """Extract the declared name from a JavaScript entity node. + + Args: + node: A tree-sitter AST node for a function, class, or method. + + Returns: + The entity name, or an empty string if no name node is found. + + Raises: + ValueError: If the node type is not a recognised entity. + """ if node.type in ['function_declaration', 'class_declaration', 'method_definition']: name_node = node.child_by_field_name('name') if name_node is None: @@ -38,6 +73,19 @@ def get_entity_name(self, node: Node) -> str: raise ValueError(f"Unknown entity type: {node.type}") def get_entity_docstring(self, node: Node) -> Optional[str]: + """Extract a leading comment as a docstring for the entity. + + Looks for a comment node immediately preceding the entity in the AST. + + Args: + node: A tree-sitter AST node for a function, class, or method. + + Returns: + The comment text, or None if no leading comment exists. + + Raises: + ValueError: If the node type is not a recognised entity. + """ if node.type in ['function_declaration', 'class_declaration', 'method_definition']: if node.prev_sibling and node.prev_sibling.type == 'comment': return node.prev_sibling.text.decode('utf-8') @@ -45,11 +93,21 @@ def get_entity_docstring(self, node: Node) -> Optional[str]: raise ValueError(f"Unknown entity type: {node.type}") def get_entity_types(self) -> list[str]: + """Return the tree-sitter node types recognised as JavaScript entities.""" return ['function_declaration', 'class_declaration', 'method_definition'] def add_symbols(self, entity: Entity) -> None: + """Extract symbols (references) from a JavaScript entity. + + For classes: extracts base-class identifiers from ``extends`` clauses. + For functions/methods: extracts call-expression references. + + Note: + JavaScript parameters are untyped, so they are not captured as + symbols — unlike typed languages (Java, Python) where parameter + type annotations are meaningful for resolution. + """ if entity.node.type == 'class_declaration': - # Check for `extends` clause via class_heritage for child in entity.node.children: if child.type == 'class_heritage': for heritage_child in child.children: @@ -60,18 +118,22 @@ def add_symbols(self, entity: Entity) -> None: if 'reference.call' in captures: for caller in captures['reference.call']: entity.add_symbol("call", caller) - captures = self._captures("(formal_parameters (identifier) @parameter)", entity.node) - if 'parameter' in captures: - for parameter in captures['parameter']: - entity.add_symbol("parameters", parameter) def is_dependency(self, file_path: str) -> bool: - return "node_modules" in file_path + """Check whether a file path belongs to an external dependency. + + Uses path-segment matching so that directories merely containing + 'node_modules' in their name (e.g. ``node_modules_utils``) are not + treated as dependencies. + """ + return "node_modules" in Path(file_path).parts def resolve_path(self, file_path: str, path: Path) -> str: + """Resolve an import path relative to the project root.""" return file_path def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + """Resolve a type reference to its class declaration entity.""" res = [] for file, resolved_node in self.resolve(files, lsp, file_path, path, node): type_dec = self.find_parent(resolved_node, ['class_declaration']) @@ -80,6 +142,7 @@ def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_pa return res def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + """Resolve a call expression to the target function or method entity.""" res = [] if node.type == 'call_expression': func_node = node.child_by_field_name('function') @@ -96,9 +159,14 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ return res def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: - if key in ["base_class", "parameters"]: + """Dispatch symbol resolution based on the symbol category. + + Routes ``base_class`` symbols to type resolution and ``call`` symbols + to method resolution. + """ + if key == "base_class": return self.resolve_type(files, lsp, file_path, path, symbol) - elif key in ["call"]: + elif key == "call": return self.resolve_method(files, lsp, file_path, path, symbol) else: raise ValueError(f"Unknown key {key}") diff --git a/tests/test_javascript_analyzer.py b/tests/test_javascript_analyzer.py index 44d875ec..beca0021 100644 --- a/tests/test_javascript_analyzer.py +++ b/tests/test_javascript_analyzer.py @@ -4,6 +4,7 @@ from pathlib import Path from api.analyzers.javascript.analyzer import JavaScriptAnalyzer +from api.analyzers.source_analyzer import SourceAnalyzer, analyzers from api.entities.entity import Entity from api.entities.file import File @@ -14,8 +15,11 @@ def _entity_name(analyzer, entity): class TestJavaScriptAnalyzer(unittest.TestCase): + """Unit tests for JavaScriptAnalyzer entity extraction and classification.""" + @classmethod def setUpClass(cls): + """Parse sample.js and populate entities for all tests.""" cls.analyzer = JavaScriptAnalyzer() source_dir = Path(__file__).parent / "source_files" / "javascript" cls.sample_path = source_dir / "sample.js" @@ -32,20 +36,29 @@ def setUpClass(cls): entity = Entity(node) cls.analyzer.add_symbols(entity) cls.file.add_entity(entity) - # Also recurse into entity children (e.g., class body methods) stack.extend(node.children) else: stack.extend(node.children) def _entity_names(self): + """Return all entity names discovered in the sample file.""" return [_entity_name(self.analyzer, e) for e in self.file.entities.values()] + # -- Registration ---------------------------------------------------------- + + def test_js_extension_registered(self): + """The .js extension should be registered in the analyzers map.""" + self.assertIn(".js", analyzers) + self.assertIsInstance(analyzers[".js"], JavaScriptAnalyzer) + def test_discovers_js_files(self): """SourceAnalyzer should enumerate .js files.""" source_dir = Path(__file__).parent / "source_files" / "javascript" js_files = list(source_dir.rglob("*.js")) self.assertTrue(len(js_files) > 0, "Should find .js files") + # -- Entity types ---------------------------------------------------------- + def test_entity_types(self): """Analyzer should recognise JS entity types.""" self.assertEqual( @@ -53,6 +66,8 @@ def test_entity_types(self): ['function_declaration', 'class_declaration', 'method_definition'], ) + # -- Entity extraction ----------------------------------------------------- + def test_class_extraction(self): """Classes should be extracted from sample.js.""" names = self._entity_names() @@ -70,6 +85,8 @@ def test_method_extraction(self): self.assertIn("area", names) self.assertIn("constructor", names) + # -- Labels ---------------------------------------------------------------- + def test_class_labels(self): """Classes should get the 'Class' label.""" for entity in self.file.entities.values(): @@ -82,6 +99,58 @@ def test_function_label(self): if _entity_name(self.analyzer, entity) == "calculateTotal": self.assertEqual(self.analyzer.get_entity_label(entity.node), "Function") + def test_method_label(self): + """Methods should get the 'Method' label.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "area": + self.assertEqual(self.analyzer.get_entity_label(entity.node), "Method") + break + + def test_unknown_entity_label_raises(self): + """get_entity_label should raise ValueError for unknown node types.""" + source = b"let x = 1;" + tree = self.analyzer.parser.parse(source) + node = tree.root_node + with self.assertRaises(ValueError): + self.analyzer.get_entity_label(node) + + def test_unknown_entity_name_raises(self): + """get_entity_name should raise ValueError for unknown node types.""" + source = b"let x = 1;" + tree = self.analyzer.parser.parse(source) + node = tree.root_node + with self.assertRaises(ValueError): + self.analyzer.get_entity_name(node) + + # -- Docstrings ------------------------------------------------------------ + + def test_class_docstring(self): + """Shape class should have a leading comment as docstring.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Shape": + doc = self.analyzer.get_entity_docstring(entity.node) + self.assertIsNotNone(doc) + self.assertIn("Base class for shapes", doc) + break + + def test_no_docstring(self): + """Entities without a leading comment should return None.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Circle": + doc = self.analyzer.get_entity_docstring(entity.node) + self.assertIsNone(doc) + break + + def test_unknown_entity_docstring_raises(self): + """get_entity_docstring should raise ValueError for unknown node types.""" + source = b"let x = 1;" + tree = self.analyzer.parser.parse(source) + node = tree.root_node + with self.assertRaises(ValueError): + self.analyzer.get_entity_docstring(node) + + # -- Symbols --------------------------------------------------------------- + def test_base_class_symbol(self): """Circle should have Shape as a base_class symbol.""" for entity in self.file.entities.values(): @@ -92,11 +161,66 @@ def test_base_class_symbol(self): ] self.assertIn("Shape", base_names) + def test_no_parameters_symbol(self): + """JS functions should NOT capture untyped parameters as symbols. + + Unlike typed languages (Java, Python), plain JS parameter names are + not meaningful type references and should not be extracted. + """ + for entity in self.file.entities.values(): + self.assertNotIn( + "parameters", entity.symbols, + f"Entity '{_entity_name(self.analyzer, entity)}' should not have " + f"parameter symbols — JS params are untyped", + ) + + def test_call_symbols_extracted(self): + """Functions with call expressions should have 'call' symbols.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "calculateTotal": + self.assertIn("call", entity.symbols) + break + + def test_class_without_extends_has_no_base_class(self): + """Shape (no extends) should have no base_class symbols.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Shape": + self.assertEqual(len(entity.symbols.get("base_class", [])), 0) + + # -- resolve_symbol dispatch ----------------------------------------------- + + def test_resolve_symbol_unknown_key_raises(self): + """resolve_symbol should raise ValueError for unknown symbol keys.""" + with self.assertRaises(ValueError): + self.analyzer.resolve_symbol({}, None, Path("f.js"), Path("."), "unknown_key", None) + + # -- Dependency detection -------------------------------------------------- + def test_is_dependency(self): """node_modules paths should be flagged as dependencies.""" self.assertTrue(self.analyzer.is_dependency("foo/node_modules/bar/index.js")) self.assertFalse(self.analyzer.is_dependency("src/utils.js")) + def test_is_dependency_path_segment_matching(self): + """is_dependency should use path-segment matching, not substring. + + A directory named 'node_modules_utils' should NOT be treated as a + dependency — only actual 'node_modules' path segments count. + """ + self.assertFalse( + self.analyzer.is_dependency("src/node_modules_utils/helper.js") + ) + self.assertTrue( + self.analyzer.is_dependency("lib/node_modules/lodash/index.js") + ) + + # -- SourceAnalyzer integration -------------------------------------------- + + def test_source_analyzer_supported_types_includes_js(self): + """SourceAnalyzer.supported_types() should include '.js'.""" + sa = SourceAnalyzer() + self.assertIn(".js", sa.supported_types()) + if __name__ == "__main__": unittest.main() From bbad52dc5f61e32e52295dfffb4ee10c14b5204a Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 22 Mar 2026 16:51:59 +0200 Subject: [PATCH 4/5] fix(javascript): skip NullLanguageServer in second pass, add integration test - Skip symbol resolution for files whose LSP is NullLanguageServer, avoiding unnecessary exception handling during second-pass (addresses coderabbit review on source_analyzer.py) - Add SourceAnalyzer.create_hierarchy integration test with MockGraph to verify the production code path for JS entity extraction and edge creation without requiring a database connection (addresses coderabbit review on test coverage) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/source_analyzer.py | 3 +++ tests/test_javascript_analyzer.py | 44 +++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 23c25893..da62a93b 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -151,6 +151,9 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: for i, file_path in enumerate(files): file = self.files[file_path] logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}') + # Skip symbol resolution when no real LSP is available + if isinstance(lsps.get(file_path.suffix), NullLanguageServer): + continue for _, entity in file.entities.items(): entity.resolved_symbol(lambda key, symbol, fp=file_path: analyzers[fp.suffix].resolve_symbol(self.files, lsps[fp.suffix], fp, path, key, symbol)) for key, symbols in entity.symbols.items(): diff --git a/tests/test_javascript_analyzer.py b/tests/test_javascript_analyzer.py index beca0021..195cef3e 100644 --- a/tests/test_javascript_analyzer.py +++ b/tests/test_javascript_analyzer.py @@ -221,6 +221,50 @@ def test_source_analyzer_supported_types_includes_js(self): sa = SourceAnalyzer() self.assertIn(".js", sa.supported_types()) + def test_source_analyzer_create_hierarchy(self): + """SourceAnalyzer.create_hierarchy() should process JS files correctly. + + Uses a lightweight mock Graph to verify the production code path + without requiring a database connection. + """ + class MockGraph: + def __init__(self): + self._next_id = 1 + self.entities = {} + self.edges = [] + + def add_file(self, file): + file.id = self._next_id + self._next_id += 1 + + def add_entity(self, label, name, doc, path, src_start, src_end, props): + eid = self._next_id + self._next_id += 1 + self.entities[eid] = {"label": label, "name": name, "doc": doc} + return eid + + def connect_entities(self, rel, src, dest, props=None): + self.edges.append((rel, src, dest)) + + sa = SourceAnalyzer() + source = self.sample_path.read_bytes() + tree = self.analyzer.parser.parse(source) + file = File(self.sample_path, tree) + graph = MockGraph() + graph.add_file(file) + sa.create_hierarchy(file, self.analyzer, graph) + + entity_names = [e["name"] for e in graph.entities.values()] + self.assertIn("Shape", entity_names) + self.assertIn("Circle", entity_names) + self.assertIn("calculateTotal", entity_names) + self.assertIn("area", entity_names) + self.assertIn("constructor", entity_names) + + # Verify DEFINES edges were created (file → entities) + defines_edges = [e for e in graph.edges if e[0] == "DEFINES"] + self.assertTrue(len(defines_edges) > 0, "Should create DEFINES edges") + if __name__ == "__main__": unittest.main() From 5780d2b0997dd39ee16a9c7b06adbe4c00da4628 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 22 Mar 2026 17:16:07 +0200 Subject: [PATCH 5/5] fix(analyzers): guard against KeyError for ignored files in second_pass Files skipped by first_pass() via the ignore list are not added to self.files, but second_pass() iterated the same files list and dereferenced self.files[file_path] unconditionally. This would raise KeyError for any ignored file. Add a membership check before accessing self.files and reorder the guards so both the membership check and NullLanguageServer skip happen before dereferencing. Addresses review comment: coderabbitai suggested adding a guard to skip files not in self.files before indexing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/source_analyzer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index da62a93b..9631e603 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -149,11 +149,13 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server(): files_len = len(self.files) for i, file_path in enumerate(files): - file = self.files[file_path] - logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}') + if file_path not in self.files: + continue # Skip symbol resolution when no real LSP is available if isinstance(lsps.get(file_path.suffix), NullLanguageServer): continue + file = self.files[file_path] + logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}') for _, entity in file.entities.items(): entity.resolved_symbol(lambda key, symbol, fp=file_path: analyzers[fp.suffix].resolve_symbol(self.files, lsps[fp.suffix], fp, path, key, symbol)) for key, symbols in entity.symbols.items():