From fafd0667672c5de38ddf65ed4c809b6d977d6fcd Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Tue, 10 Mar 2026 10:37:16 +0200 Subject: [PATCH 1/7] Add Python import tracking to code graph Migrated from FalkorDB/code-graph-backend PR #97. Original issue: FalkorDB/code-graph-backend#61 Resolves #535 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CI_OPTIMIZATION.md | 160 ++++++++++++++++++++++ api/analyzers/analyzer.py | 29 ++++ api/analyzers/java/analyzer.py | 16 +++ api/analyzers/python/analyzer.py | 92 +++++++++++++ api/analyzers/source_analyzer.py | 13 ++ api/entities/file.py | 20 +++ test-project/a.c | 11 ++ test-project/b.py___ | 73 ++++++++++ test-project/c.java | 26 ++++ tests/source_files/py_imports/module_a.py | 12 ++ tests/source_files/py_imports/module_b.py | 11 ++ tests/test_py_imports.py | 67 +++++++++ 12 files changed, 530 insertions(+) create mode 100644 CI_OPTIMIZATION.md create mode 100644 test-project/a.c create mode 100644 test-project/b.py___ create mode 100644 test-project/c.java create mode 100644 tests/source_files/py_imports/module_a.py create mode 100644 tests/source_files/py_imports/module_b.py create mode 100644 tests/test_py_imports.py diff --git a/CI_OPTIMIZATION.md b/CI_OPTIMIZATION.md new file mode 100644 index 00000000..3b19eb55 --- /dev/null +++ b/CI_OPTIMIZATION.md @@ -0,0 +1,160 @@ +# CI Pipeline Optimization Analysis (Staging Branch) + +## Current Workflows on Staging + +The staging branch has 3 workflow files (identical to main): + +| Workflow | File | Trigger | ~Duration | +|---|---|---|---| +| **Build** | `nextjs.yml` | All PRs + push to main | **~1 min** | +| **Playwright Tests** | `playwright.yml` | PRs + push to main/staging | **~10 min** (x2 shards) | +| **Release image** | `release-image.yml` | Tags + main push | release-only | + +Additionally, **CodeQL** runs on staging pushes. + +## Playwright Tests — The Bottleneck + +This is the critical path. It runs 2 shards in parallel, each taking ~10 min. Measured from recent staging runs: + +| Step | Shard 1 | Shard 2 | % of total | +|---|---|---|---| +| **Seed test data into FalkorDB** | **223s** | **220s** | **37%** | +| **Run Playwright tests** | 264s | 262s | 44% | +| **Install Playwright browsers** | 48s | 51s | 8% | +| Install backend deps (`pip install`) | 28s | 31s | 5% | +| Build frontend | 12s | 12s | 2% | +| Install frontend deps (`npm ci`) | 8s | 8s | 1% | +| Container init + setup | ~15s | ~15s | 3% | + +**Total per shard: ~600s (10 min). Total billable: ~20 min.** + +## Build Workflow — Wasted Work + +The Build workflow (~64s total) installs backend dependencies but does nothing with them: + +| Step | Duration | +|---|---| +| Install frontend deps | 7s | +| Build frontend | 14s | +| Lint frontend | <1s | +| **Install backend deps (`pip install`)** | **35s** | + +The backend install accounts for **55% of the Build workflow** and serves no purpose. + +--- + +## Optimization Recommendations + +### 1. Cache or pre-seed FalkorDB test data (saves **~3.5 min/shard = ~7 min total**) + +`seed_test_data.py` clones 2 GitHub repos (GraphRAG-SDK, Flask) and runs full source analysis every run. This is the single biggest time sink at **37% of Playwright runtime**. + +**Options:** +- **Best**: Export the seeded graph as an RDB dump, commit it as a test fixture, and restore with `redis-cli`. Eliminates the 220s step entirely. +- **Good**: Cache the cloned repos + analysis output with `actions/cache` keyed on the seed script hash + repo commit SHAs. +- **Minimum**: Cache just the git clones to skip network time. + +### 2. Cache Playwright browsers (saves **~50s/shard = ~1.5 min total**) + +Browsers are installed from scratch every run (`npx playwright install --with-deps`). Add: + +```yaml +- name: Cache Playwright browsers + id: playwright-cache + uses: actions/cache@v4 + with: + path: ~/.cache/ms-playwright + key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }} + +- name: Install Playwright Browsers + if: steps.playwright-cache.outputs.cache-hit != 'true' + run: npx playwright install --with-deps chromium + +- name: Install Playwright system deps + if: steps.playwright-cache.outputs.cache-hit == 'true' + run: npx playwright install-deps chromium +``` + +### 3. Switch `pip install` to `uv` (saves **~15-20s/shard**) + +Both workflows use slow `pip install`. `uv sync` is 3-5x faster: + +```yaml +- name: Install uv + uses: astral-sh/setup-uv@v5 + with: + version: "latest" + +- name: Install dependencies + run: uv sync +``` + +### 4. Remove unused backend install from Build workflow (saves **~35s**) + +`nextjs.yml` installs backend deps but runs no backend tests or lint. Either: +- **Remove** the `Setup Python` and `Install backend dependencies` steps entirely +- **Or** add backend unit tests / pylint to justify the install + +### 5. Add concurrency groups (saves **queued minutes**) + +The Build workflow has no concurrency group. Rapid pushes queue redundant runs: + +```yaml +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true +``` + +The Playwright workflow also lacks a concurrency group. + +### 6. Add npm cache (saves **~3-5s/shard**) + +Neither workflow caches npm. Add to `setup-node`: + +```yaml +- uses: actions/setup-node@v4 + with: + node-version: 24 + cache: 'npm' + cache-dependency-path: | + package-lock.json + app/package-lock.json +``` + +### 7. Docker build caching for releases (saves **~2-5 min** on releases) + +No layer caching on the Docker build. Add: + +```yaml +- uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + push: true + tags: ${{ env.TAGS }} + cache-from: type=gha + cache-to: type=gha,mode=max +``` + +### 8. Deduplicate npm installs in Playwright workflow + +The Playwright workflow runs `npm ci` twice — once for frontend (`./app`) and once for root (Playwright). These could be consolidated or at least cached. + +--- + +## Summary + +| # | Optimization | Time saved | Effort | +|---|---|---|---| +| 1 | Cache/pre-seed FalkorDB data | **~7 min** | Medium | +| 2 | Cache Playwright browsers | **~1.5 min** | Low | +| 3 | Switch to `uv` from `pip` | **~40s** | Low | +| 4 | Remove unused backend install from Build | **~35s** | Trivial | +| 5 | Add concurrency groups | Variable | Trivial | +| 6 | Add npm cache | ~10s | Trivial | +| 7 | Docker layer caching | ~2-5 min (releases) | Low | +| 8 | Deduplicate npm installs | ~5s | Low | + +**Total potential savings: ~9-10 min per CI run**, bringing Playwright from ~10 min/shard down to ~4-5 min/shard (dominated by the actual test execution). + +The single biggest win is **pre-seeding FalkorDB data** — it alone accounts for 37% of the Playwright workflow runtime. diff --git a/api/analyzers/analyzer.py b/api/analyzers/analyzer.py index 33ca5a2b..8e3e855e 100644 --- a/api/analyzers/analyzer.py +++ b/api/analyzers/analyzer.py @@ -149,3 +149,32 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ pass + @abstractmethod + def add_file_imports(self, file: File) -> None: + """ + Add import statements to the file. + + Args: + file (File): The file to add imports to. + """ + + pass + + @abstractmethod + def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]: + """ + Resolve an import statement to entities. + + Args: + files (dict[Path, File]): All files in the project. + lsp (SyncLanguageServer): The language server. + file_path (Path): The path to the file containing the import. + path (Path): The path to the project root. + import_node (Node): The import statement node. + + Returns: + list[Entity]: List of resolved entities. + """ + + pass + diff --git a/api/analyzers/java/analyzer.py b/api/analyzers/java/analyzer.py index 5269d698..194ab1b3 100644 --- a/api/analyzers/java/analyzer.py +++ b/api/analyzers/java/analyzer.py @@ -127,3 +127,19 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ return self.resolve_method(files, lsp, file_path, path, symbol) else: raise ValueError(f"Unknown key {key}") + + def add_file_imports(self, file: File) -> None: + """ + Extract and add import statements from the file. + Java imports are not yet implemented. + """ + # TODO: Implement Java import tracking + pass + + def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]: + """ + Resolve an import statement to the entities it imports. + Java imports are not yet implemented. + """ + # TODO: Implement Java import resolution + return [] diff --git a/api/analyzers/python/analyzer.py b/api/analyzers/python/analyzer.py index 7a991202..2a96b442 100644 --- a/api/analyzers/python/analyzer.py +++ b/api/analyzers/python/analyzer.py @@ -122,3 +122,95 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ return self.resolve_method(files, lsp, file_path, path, symbol) else: raise ValueError(f"Unknown key {key}") + + def add_file_imports(self, file: File) -> None: + """ + Extract and add import statements from the file. + + Supports: + - import module + - import module as alias + - from module import name + - from module import name1, name2 + - from module import name as alias + """ + try: + import warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # Query for both import types + import_query = self.language.query(""" + (import_statement) @import + (import_from_statement) @import_from + """) + + captures = import_query.captures(file.tree.root_node) + + # Add all import statement nodes to the file + if 'import' in captures: + for import_node in captures['import']: + file.add_import(import_node) + + if 'import_from' in captures: + for import_node in captures['import_from']: + file.add_import(import_node) + except Exception as e: + logger.debug(f"Failed to extract imports from {file.path}: {e}") + + def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]: + """ + Resolve an import statement to the entities it imports. + """ + res = [] + + try: + if import_node.type == 'import_statement': + # Handle "import module" or "import module as alias" + # Find all dotted_name and aliased_import nodes + for child in import_node.children: + if child.type == 'dotted_name': + # Try to resolve the module/name + identifier = child.children[0] if child.child_count > 0 else child + resolved = self.resolve_type(files, lsp, file_path, path, identifier) + res.extend(resolved) + elif child.type == 'aliased_import': + # Get the actual name from aliased import (before 'as') + if child.child_count > 0: + actual_name = child.children[0] + if actual_name.type == 'dotted_name' and actual_name.child_count > 0: + identifier = actual_name.children[0] + else: + identifier = actual_name + resolved = self.resolve_type(files, lsp, file_path, path, identifier) + res.extend(resolved) + + elif import_node.type == 'import_from_statement': + # Handle "from module import name1, name2" + # Find the 'import' keyword to know where imported names start + import_keyword_found = False + for child in import_node.children: + if child.type == 'import': + import_keyword_found = True + continue + + # After 'import' keyword, dotted_name nodes are the imported names + if import_keyword_found and child.type == 'dotted_name': + # Try to resolve the imported name + identifier = child.children[0] if child.child_count > 0 else child + resolved = self.resolve_type(files, lsp, file_path, path, identifier) + res.extend(resolved) + elif import_keyword_found and child.type == 'aliased_import': + # Handle "from module import name as alias" + if child.child_count > 0: + actual_name = child.children[0] + if actual_name.type == 'dotted_name' and actual_name.child_count > 0: + identifier = actual_name.children[0] + else: + identifier = actual_name + resolved = self.resolve_type(files, lsp, file_path, path, identifier) + res.extend(resolved) + + except Exception as e: + logger.debug(f"Failed to resolve import: {e}") + + return res diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 9046abcf..6396c199 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -119,6 +119,10 @@ def first_pass(self, path: Path, files: list[Path], ignore: list[str], graph: Gr # Walk thought the AST graph.add_file(file) self.create_hierarchy(file, analyzer, graph) + + # Extract import statements + if not analyzer.is_dependency(str(file_path)): + analyzer.add_file_imports(file) def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: """ @@ -162,6 +166,8 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: continue file = self.files[file_path] logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}') + + # Resolve entity symbols for _, entity in file.entities.items(): entity.resolved_symbol(lambda key, symbol, fp=file_path: analyzers[fp.suffix].resolve_symbol(self.files, lsps[fp.suffix], fp, path, key, symbol)) for key, resolved_set in entity.resolved_symbols.items(): @@ -178,6 +184,13 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: graph.connect_entities("RETURNS", entity.id, resolved.id) elif key == "parameters": graph.connect_entities("PARAMETERS", entity.id, resolved.id) + + # Resolve file imports + for import_node in file.imports: + resolved_entities = analyzers[file_path.suffix].resolve_import(self.files, lsps[file_path.suffix], file_path, path, import_node) + for resolved_entity in resolved_entities: + file.add_resolved_import(resolved_entity) + graph.connect_entities("IMPORTS", file.id, resolved_entity.id) def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None: self.first_pass(path, files, [], graph) diff --git a/api/entities/file.py b/api/entities/file.py index c59e2b6a..a8937349 100644 --- a/api/entities/file.py +++ b/api/entities/file.py @@ -21,10 +21,30 @@ def __init__(self, path: Path, tree: Tree) -> None: self.path = path self.tree = tree self.entities: dict[Node, Entity] = {} + self.imports: list[Node] = [] + self.resolved_imports: set[Entity] = set() def add_entity(self, entity: Entity): entity.parent = self self.entities[entity.node] = entity + + def add_import(self, import_node: Node): + """ + Add an import statement node to track. + + Args: + import_node (Node): The import statement node. + """ + self.imports.append(import_node) + + def add_resolved_import(self, resolved_entity: Entity): + """ + Add a resolved import entity. + + Args: + resolved_entity (Entity): The resolved entity that is imported. + """ + self.resolved_imports.add(resolved_entity) def __str__(self) -> str: return f"path: {self.path}" diff --git a/test-project/a.c b/test-project/a.c new file mode 100644 index 00000000..788c3f4b --- /dev/null +++ b/test-project/a.c @@ -0,0 +1,11 @@ +#include +#include "/src/ff.h" + + +/* Create an empty intset. */ +intset* intsetNew(void) { + intset *is = zmalloc(sizeof(intset)); + is->encoding = intrev32ifbe(INTSET_ENC_INT16); + is->length = 0; + return is; +} \ No newline at end of file diff --git a/test-project/b.py___ b/test-project/b.py___ new file mode 100644 index 00000000..0efe117b --- /dev/null +++ b/test-project/b.py___ @@ -0,0 +1,73 @@ +class Employee: + def __init__(self, name, employee_id, base_salary): + self.name = name + self.employee_id = employee_id + self.base_salary = base_salary + + def get_salary(self): + return self.base_salary + + def display_info(self): + return f"Employee: {self.name} (ID: {self.employee_id})" + + def apply_raise(self, percentage): + self.base_salary += (self.base_salary * percentage / 100) + return f"New salary after {percentage}% raise: ${self.base_salary:,.2f}" + +class Developer(Employee): + def __init__(self, name, employee_id, base_salary, programming_languages): + # Call parent class's __init__ method + super().__init__(name, employee_id, base_salary) + self.programming_languages = programming_languages + + def add_language(self, language): + self.programming_languages.append(language) + return f"Added {language} to skill set" + + # Override display_info method + def display_info(self): + basic_info = super().display_info() + return f"{basic_info}\nRole: Developer\nSkills: {', '.join(self.programming_languages)}" + +class Manager(Employee): + def __init__(self, name, employee_id, base_salary, team_members=None): + super().__init__(name, employee_id, base_salary) + self.team_members = team_members if team_members else [] + + def add_team_member(self, employee): + if isinstance(employee, Employee): + self.team_members.append(employee) + return f"Added {employee.name} to team" + return "Invalid team member" + + def get_salary(self): + # Managers get 10% bonus on top of base salary + return self.base_salary * 1.1 + + def display_info(self): + basic_info = super().display_info() + team_names = [member.name for member in self.team_members] + return f"{basic_info}\nRole: Manager\nTeam Members: {', '.join(team_names)}" + +# Example usage +if __name__ == "__main__": + # Create a developer + dev = Developer("Alice Smith", "DEV001", 75000, ["Python", "JavaScript"]) + print(dev.display_info()) + print(dev.add_language("Java")) + print(dev.apply_raise(10)) + + # Create another developer + dev2 = Developer("Bob Johnson", "DEV002", 70000, ["Python", "C++"]) + + # Create a manager + manager = Manager("Carol Williams", "MGR001", 100000) + print("\n" + manager.display_info()) + + # Add team members to manager + print(manager.add_team_member(dev)) + print(manager.add_team_member(dev2)) + + # Display updated manager info + print("\n" + manager.display_info()) + print(f"Manager's salary with bonus: ${manager.get_salary():,.2f}") \ No newline at end of file diff --git a/test-project/c.java b/test-project/c.java new file mode 100644 index 00000000..57e855cc --- /dev/null +++ b/test-project/c.java @@ -0,0 +1,26 @@ +package test-project; + +public class c { + + private int a; + + public static void main(String[] args) { + System.out.println("Hello, World!"); + } + + public static void print() { + System.out.println("Hello, World!"); + } + + public int getA() { + return a; + } + + public void setA(int a) { + this.a = a; + } + + public void inc() { + setA(getA() + 1); + } +} diff --git a/tests/source_files/py_imports/module_a.py b/tests/source_files/py_imports/module_a.py new file mode 100644 index 00000000..b6323048 --- /dev/null +++ b/tests/source_files/py_imports/module_a.py @@ -0,0 +1,12 @@ +"""Module A with a class definition.""" + +class ClassA: + """A simple class in module A.""" + + def method_a(self): + """A method in ClassA.""" + return "Method A" + +def function_a(): + """A function in module A.""" + return "Function A" diff --git a/tests/source_files/py_imports/module_b.py b/tests/source_files/py_imports/module_b.py new file mode 100644 index 00000000..c0c1c307 --- /dev/null +++ b/tests/source_files/py_imports/module_b.py @@ -0,0 +1,11 @@ +"""Module B that imports from module A.""" + +from module_a import ClassA, function_a + +class ClassB(ClassA): + """A class that extends ClassA.""" + + def method_b(self): + """A method in ClassB.""" + result = function_a() + return f"Method B: {result}" diff --git a/tests/test_py_imports.py b/tests/test_py_imports.py new file mode 100644 index 00000000..8e866037 --- /dev/null +++ b/tests/test_py_imports.py @@ -0,0 +1,67 @@ +import os +import unittest +from pathlib import Path + +from api import SourceAnalyzer, File, Graph + + +class Test_PY_Imports(unittest.TestCase): + def test_import_tracking(self): + """Test that Python imports are tracked correctly.""" + # Get test file path + current_dir = os.path.dirname(os.path.abspath(__file__)) + test_path = os.path.join(current_dir, 'source_files', 'py_imports') + + # Create graph and analyze + g = Graph("py_imports_test") + analyzer = SourceAnalyzer() + + try: + analyzer.analyze_local_folder(test_path, g) + + # Verify files were created + module_a = g.get_file('', 'module_a.py', '.py') + self.assertIsNotNone(module_a, "module_a.py should be in the graph") + + module_b = g.get_file('', 'module_b.py', '.py') + self.assertIsNotNone(module_b, "module_b.py should be in the graph") + + # Verify classes were created + class_a = g.get_class_by_name('ClassA') + self.assertIsNotNone(class_a, "ClassA should be in the graph") + + class_b = g.get_class_by_name('ClassB') + self.assertIsNotNone(class_b, "ClassB should be in the graph") + + # Verify function was created + func_a = g.get_function_by_name('function_a') + self.assertIsNotNone(func_a, "function_a should be in the graph") + + # Test: module_b should have IMPORTS relationship to ClassA + # Query to check if module_b imports ClassA + query = """ + MATCH (f:File {name: 'module_b.py'})-[:IMPORTS]->(c:Class {name: 'ClassA'}) + RETURN c + """ + result = g._query(query, {}) + self.assertGreater(len(result.result_set), 0, + "module_b.py should import ClassA") + + # Test: module_b should have IMPORTS relationship to function_a + query = """ + MATCH (f:File {name: 'module_b.py'})-[:IMPORTS]->(fn:Function {name: 'function_a'}) + RETURN fn + """ + result = g._query(query, {}) + self.assertGreater(len(result.result_set), 0, + "module_b.py should import function_a") + + print("✓ Import tracking test passed") + + finally: + # Cleanup: delete the test graph + g.delete() + + +if __name__ == '__main__': + unittest.main() From 7f951318b14a4b6d0c1309d223bd6b8b2415f948 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sat, 21 Mar 2026 13:10:42 +0200 Subject: [PATCH 2/7] fix: address review feedback for Python import tracking - Remove unused Path and File imports from test_py_imports.py - Add explicit Entity and File imports to Java analyzer - Fix resolve_type to also resolve function_definition (not just classes) - Add _resolve_import_name helper to try both type and method resolution - Fix invalid Java package name in test-project/c.java (hyphens not allowed) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/java/analyzer.py | 2 ++ api/analyzers/python/analyzer.py | 27 ++++++++++++++++----------- test-project/c.java | 2 +- tests/test_py_imports.py | 3 +-- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/api/analyzers/java/analyzer.py b/api/analyzers/java/analyzer.py index 194ab1b3..77ed63c9 100644 --- a/api/analyzers/java/analyzer.py +++ b/api/analyzers/java/analyzer.py @@ -2,6 +2,8 @@ from pathlib import Path import subprocess from ...entities import * +from ...entities.entity import Entity +from ...entities.file import File from typing import Optional from ..analyzer import AbstractAnalyzer diff --git a/api/analyzers/python/analyzer.py b/api/analyzers/python/analyzer.py index 2a96b442..b7b0c535 100644 --- a/api/analyzers/python/analyzer.py +++ b/api/analyzers/python/analyzer.py @@ -96,9 +96,11 @@ def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_pa if node.type == 'attribute': node = node.child_by_field_name('attribute') for file, resolved_node in self.resolve(files, lsp, file_path, path, node): - type_dec = self.find_parent(resolved_node, ['class_definition']) - if type_dec in file.entities: - res.append(file.entities[type_dec]) + decl = resolved_node + if decl.type not in ['class_definition', 'function_definition']: + decl = self.find_parent(resolved_node, ['class_definition', 'function_definition']) + if decl in file.entities: + res.append(file.entities[decl]) return res def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: @@ -157,6 +159,13 @@ def add_file_imports(self, file: File) -> None: except Exception as e: logger.debug(f"Failed to extract imports from {file.path}: {e}") + def _resolve_import_name(self, files, lsp, file_path, path, identifier): + """Try to resolve an imported name as both a type and a function.""" + resolved = self.resolve_type(files, lsp, file_path, path, identifier) + if not resolved: + resolved = self.resolve_method(files, lsp, file_path, path, identifier) + return resolved + def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]: """ Resolve an import statement to the entities it imports. @@ -171,8 +180,7 @@ def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ if child.type == 'dotted_name': # Try to resolve the module/name identifier = child.children[0] if child.child_count > 0 else child - resolved = self.resolve_type(files, lsp, file_path, path, identifier) - res.extend(resolved) + res.extend(self._resolve_import_name(files, lsp, file_path, path, identifier)) elif child.type == 'aliased_import': # Get the actual name from aliased import (before 'as') if child.child_count > 0: @@ -181,8 +189,7 @@ def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ identifier = actual_name.children[0] else: identifier = actual_name - resolved = self.resolve_type(files, lsp, file_path, path, identifier) - res.extend(resolved) + res.extend(self._resolve_import_name(files, lsp, file_path, path, identifier)) elif import_node.type == 'import_from_statement': # Handle "from module import name1, name2" @@ -197,8 +204,7 @@ def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ if import_keyword_found and child.type == 'dotted_name': # Try to resolve the imported name identifier = child.children[0] if child.child_count > 0 else child - resolved = self.resolve_type(files, lsp, file_path, path, identifier) - res.extend(resolved) + res.extend(self._resolve_import_name(files, lsp, file_path, path, identifier)) elif import_keyword_found and child.type == 'aliased_import': # Handle "from module import name as alias" if child.child_count > 0: @@ -207,8 +213,7 @@ def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ identifier = actual_name.children[0] else: identifier = actual_name - resolved = self.resolve_type(files, lsp, file_path, path, identifier) - res.extend(resolved) + res.extend(self._resolve_import_name(files, lsp, file_path, path, identifier)) except Exception as e: logger.debug(f"Failed to resolve import: {e}") diff --git a/test-project/c.java b/test-project/c.java index 57e855cc..a2cec443 100644 --- a/test-project/c.java +++ b/test-project/c.java @@ -1,4 +1,4 @@ -package test-project; +package test_project; public class c { diff --git a/tests/test_py_imports.py b/tests/test_py_imports.py index 8e866037..e0622203 100644 --- a/tests/test_py_imports.py +++ b/tests/test_py_imports.py @@ -1,8 +1,7 @@ import os import unittest -from pathlib import Path -from api import SourceAnalyzer, File, Graph +from api import SourceAnalyzer, Graph class Test_PY_Imports(unittest.TestCase): From b795e65e0453fc827f6e1000dd48ee4a749899dc Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sat, 21 Mar 2026 13:32:16 +0200 Subject: [PATCH 3/7] fix: add stub add_file_imports/resolve_import to CSharpAnalyzer CSharpAnalyzer was missing implementations for the new abstract methods added by this PR, causing TypeError on import. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/csharp/analyzer.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/api/analyzers/csharp/analyzer.py b/api/analyzers/csharp/analyzer.py index 74c3906e..30932d1b 100644 --- a/api/analyzers/csharp/analyzer.py +++ b/api/analyzers/csharp/analyzer.py @@ -136,3 +136,11 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ return self.resolve_method(files, lsp, file_path, path, symbol) else: raise ValueError(f"Unknown key {key}") + + def add_file_imports(self, file: File) -> None: + # C# import tracking not yet implemented + pass + + def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node) -> list[Entity]: + # C# import resolution not yet implemented + return [] From f1b3757a595860476f827b8b22357f4a93f91c6a Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sat, 21 Mar 2026 23:28:45 +0200 Subject: [PATCH 4/7] =?UTF-8?q?fix:=20address=20review=20=E2=80=94=20fix?= =?UTF-8?q?=20signatures,=20use=20=5Fcaptures,=20remove=20stale=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix CSharpAnalyzer.resolve_import signature to match abstract (add Node type) - Switch add_file_imports to use self._captures() instead of query.captures() - Remove stale CI_OPTIMIZATION.md and test-project/b.py___ - Remove debug print from test Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CI_OPTIMIZATION.md | 160 ------------------------------- api/analyzers/csharp/analyzer.py | 2 +- api/analyzers/python/analyzer.py | 10 +- test-project/b.py___ | 73 -------------- tests/test_py_imports.py | 2 - 5 files changed, 3 insertions(+), 244 deletions(-) delete mode 100644 CI_OPTIMIZATION.md delete mode 100644 test-project/b.py___ diff --git a/CI_OPTIMIZATION.md b/CI_OPTIMIZATION.md deleted file mode 100644 index 3b19eb55..00000000 --- a/CI_OPTIMIZATION.md +++ /dev/null @@ -1,160 +0,0 @@ -# CI Pipeline Optimization Analysis (Staging Branch) - -## Current Workflows on Staging - -The staging branch has 3 workflow files (identical to main): - -| Workflow | File | Trigger | ~Duration | -|---|---|---|---| -| **Build** | `nextjs.yml` | All PRs + push to main | **~1 min** | -| **Playwright Tests** | `playwright.yml` | PRs + push to main/staging | **~10 min** (x2 shards) | -| **Release image** | `release-image.yml` | Tags + main push | release-only | - -Additionally, **CodeQL** runs on staging pushes. - -## Playwright Tests — The Bottleneck - -This is the critical path. It runs 2 shards in parallel, each taking ~10 min. Measured from recent staging runs: - -| Step | Shard 1 | Shard 2 | % of total | -|---|---|---|---| -| **Seed test data into FalkorDB** | **223s** | **220s** | **37%** | -| **Run Playwright tests** | 264s | 262s | 44% | -| **Install Playwright browsers** | 48s | 51s | 8% | -| Install backend deps (`pip install`) | 28s | 31s | 5% | -| Build frontend | 12s | 12s | 2% | -| Install frontend deps (`npm ci`) | 8s | 8s | 1% | -| Container init + setup | ~15s | ~15s | 3% | - -**Total per shard: ~600s (10 min). Total billable: ~20 min.** - -## Build Workflow — Wasted Work - -The Build workflow (~64s total) installs backend dependencies but does nothing with them: - -| Step | Duration | -|---|---| -| Install frontend deps | 7s | -| Build frontend | 14s | -| Lint frontend | <1s | -| **Install backend deps (`pip install`)** | **35s** | - -The backend install accounts for **55% of the Build workflow** and serves no purpose. - ---- - -## Optimization Recommendations - -### 1. Cache or pre-seed FalkorDB test data (saves **~3.5 min/shard = ~7 min total**) - -`seed_test_data.py` clones 2 GitHub repos (GraphRAG-SDK, Flask) and runs full source analysis every run. This is the single biggest time sink at **37% of Playwright runtime**. - -**Options:** -- **Best**: Export the seeded graph as an RDB dump, commit it as a test fixture, and restore with `redis-cli`. Eliminates the 220s step entirely. -- **Good**: Cache the cloned repos + analysis output with `actions/cache` keyed on the seed script hash + repo commit SHAs. -- **Minimum**: Cache just the git clones to skip network time. - -### 2. Cache Playwright browsers (saves **~50s/shard = ~1.5 min total**) - -Browsers are installed from scratch every run (`npx playwright install --with-deps`). Add: - -```yaml -- name: Cache Playwright browsers - id: playwright-cache - uses: actions/cache@v4 - with: - path: ~/.cache/ms-playwright - key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }} - -- name: Install Playwright Browsers - if: steps.playwright-cache.outputs.cache-hit != 'true' - run: npx playwright install --with-deps chromium - -- name: Install Playwright system deps - if: steps.playwright-cache.outputs.cache-hit == 'true' - run: npx playwright install-deps chromium -``` - -### 3. Switch `pip install` to `uv` (saves **~15-20s/shard**) - -Both workflows use slow `pip install`. `uv sync` is 3-5x faster: - -```yaml -- name: Install uv - uses: astral-sh/setup-uv@v5 - with: - version: "latest" - -- name: Install dependencies - run: uv sync -``` - -### 4. Remove unused backend install from Build workflow (saves **~35s**) - -`nextjs.yml` installs backend deps but runs no backend tests or lint. Either: -- **Remove** the `Setup Python` and `Install backend dependencies` steps entirely -- **Or** add backend unit tests / pylint to justify the install - -### 5. Add concurrency groups (saves **queued minutes**) - -The Build workflow has no concurrency group. Rapid pushes queue redundant runs: - -```yaml -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -``` - -The Playwright workflow also lacks a concurrency group. - -### 6. Add npm cache (saves **~3-5s/shard**) - -Neither workflow caches npm. Add to `setup-node`: - -```yaml -- uses: actions/setup-node@v4 - with: - node-version: 24 - cache: 'npm' - cache-dependency-path: | - package-lock.json - app/package-lock.json -``` - -### 7. Docker build caching for releases (saves **~2-5 min** on releases) - -No layer caching on the Docker build. Add: - -```yaml -- uses: docker/build-push-action@v5 - with: - context: . - file: ./Dockerfile - push: true - tags: ${{ env.TAGS }} - cache-from: type=gha - cache-to: type=gha,mode=max -``` - -### 8. Deduplicate npm installs in Playwright workflow - -The Playwright workflow runs `npm ci` twice — once for frontend (`./app`) and once for root (Playwright). These could be consolidated or at least cached. - ---- - -## Summary - -| # | Optimization | Time saved | Effort | -|---|---|---|---| -| 1 | Cache/pre-seed FalkorDB data | **~7 min** | Medium | -| 2 | Cache Playwright browsers | **~1.5 min** | Low | -| 3 | Switch to `uv` from `pip` | **~40s** | Low | -| 4 | Remove unused backend install from Build | **~35s** | Trivial | -| 5 | Add concurrency groups | Variable | Trivial | -| 6 | Add npm cache | ~10s | Trivial | -| 7 | Docker layer caching | ~2-5 min (releases) | Low | -| 8 | Deduplicate npm installs | ~5s | Low | - -**Total potential savings: ~9-10 min per CI run**, bringing Playwright from ~10 min/shard down to ~4-5 min/shard (dominated by the actual test execution). - -The single biggest win is **pre-seeding FalkorDB data** — it alone accounts for 37% of the Playwright workflow runtime. diff --git a/api/analyzers/csharp/analyzer.py b/api/analyzers/csharp/analyzer.py index 30932d1b..aa51034c 100644 --- a/api/analyzers/csharp/analyzer.py +++ b/api/analyzers/csharp/analyzer.py @@ -141,6 +141,6 @@ def add_file_imports(self, file: File) -> None: # C# import tracking not yet implemented pass - def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node) -> list[Entity]: + def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]: # C# import resolution not yet implemented return [] diff --git a/api/analyzers/python/analyzer.py b/api/analyzers/python/analyzer.py index b7b0c535..6083ef3c 100644 --- a/api/analyzers/python/analyzer.py +++ b/api/analyzers/python/analyzer.py @@ -137,16 +137,10 @@ def add_file_imports(self, file: File) -> None: - from module import name as alias """ try: - import warnings - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - # Query for both import types - import_query = self.language.query(""" + captures = self._captures(""" (import_statement) @import (import_from_statement) @import_from - """) - - captures = import_query.captures(file.tree.root_node) + """, file.tree.root_node) # Add all import statement nodes to the file if 'import' in captures: diff --git a/test-project/b.py___ b/test-project/b.py___ deleted file mode 100644 index 0efe117b..00000000 --- a/test-project/b.py___ +++ /dev/null @@ -1,73 +0,0 @@ -class Employee: - def __init__(self, name, employee_id, base_salary): - self.name = name - self.employee_id = employee_id - self.base_salary = base_salary - - def get_salary(self): - return self.base_salary - - def display_info(self): - return f"Employee: {self.name} (ID: {self.employee_id})" - - def apply_raise(self, percentage): - self.base_salary += (self.base_salary * percentage / 100) - return f"New salary after {percentage}% raise: ${self.base_salary:,.2f}" - -class Developer(Employee): - def __init__(self, name, employee_id, base_salary, programming_languages): - # Call parent class's __init__ method - super().__init__(name, employee_id, base_salary) - self.programming_languages = programming_languages - - def add_language(self, language): - self.programming_languages.append(language) - return f"Added {language} to skill set" - - # Override display_info method - def display_info(self): - basic_info = super().display_info() - return f"{basic_info}\nRole: Developer\nSkills: {', '.join(self.programming_languages)}" - -class Manager(Employee): - def __init__(self, name, employee_id, base_salary, team_members=None): - super().__init__(name, employee_id, base_salary) - self.team_members = team_members if team_members else [] - - def add_team_member(self, employee): - if isinstance(employee, Employee): - self.team_members.append(employee) - return f"Added {employee.name} to team" - return "Invalid team member" - - def get_salary(self): - # Managers get 10% bonus on top of base salary - return self.base_salary * 1.1 - - def display_info(self): - basic_info = super().display_info() - team_names = [member.name for member in self.team_members] - return f"{basic_info}\nRole: Manager\nTeam Members: {', '.join(team_names)}" - -# Example usage -if __name__ == "__main__": - # Create a developer - dev = Developer("Alice Smith", "DEV001", 75000, ["Python", "JavaScript"]) - print(dev.display_info()) - print(dev.add_language("Java")) - print(dev.apply_raise(10)) - - # Create another developer - dev2 = Developer("Bob Johnson", "DEV002", 70000, ["Python", "C++"]) - - # Create a manager - manager = Manager("Carol Williams", "MGR001", 100000) - print("\n" + manager.display_info()) - - # Add team members to manager - print(manager.add_team_member(dev)) - print(manager.add_team_member(dev2)) - - # Display updated manager info - print("\n" + manager.display_info()) - print(f"Manager's salary with bonus: ${manager.get_salary():,.2f}") \ No newline at end of file diff --git a/tests/test_py_imports.py b/tests/test_py_imports.py index e0622203..587da8f2 100644 --- a/tests/test_py_imports.py +++ b/tests/test_py_imports.py @@ -55,8 +55,6 @@ def test_import_tracking(self): self.assertGreater(len(result.result_set), 0, "module_b.py should import function_a") - print("✓ Import tracking test passed") - finally: # Cleanup: delete the test graph g.delete() From ad639f9565ac84bc639eefaa36fe2a3e5762ba1f Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 22 Mar 2026 17:17:42 +0200 Subject: [PATCH 5/7] fix(analyzers): add explicit Entity/File imports in Python analyzer Adds explicit imports for Entity and File alongside the star import, matching the pattern used in the Java analyzer. Fixes F405 ruff warnings. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/python/analyzer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/analyzers/python/analyzer.py b/api/analyzers/python/analyzer.py index 6083ef3c..074dbd39 100644 --- a/api/analyzers/python/analyzer.py +++ b/api/analyzers/python/analyzer.py @@ -5,6 +5,8 @@ import tomllib from ...entities import * +from ...entities.entity import Entity +from ...entities.file import File from typing import Optional from ..analyzer import AbstractAnalyzer From 4343b594334ee4ea9cc678abf3a213183612cafe Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 22 Mar 2026 18:11:02 +0200 Subject: [PATCH 6/7] fix(analyzers): add import stub methods to JavaScript and Kotlin analyzers Adds stub add_file_imports() and resolve_import() implementations to JavaScriptAnalyzer and KotlinAnalyzer to satisfy the abstract interface added in the import tracking feature. Without these stubs, the analyzers cannot be instantiated, breaking the seed_test_data step in CI. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/javascript/analyzer.py | 8 ++++++++ api/analyzers/kotlin/analyzer.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/api/analyzers/javascript/analyzer.py b/api/analyzers/javascript/analyzer.py index abc2879f..becbb7f6 100644 --- a/api/analyzers/javascript/analyzer.py +++ b/api/analyzers/javascript/analyzer.py @@ -158,6 +158,14 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ res.append(file.entities[method_dec]) return res + def add_file_imports(self, file: File) -> None: + """JavaScript import tracking not yet implemented.""" + pass + + def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]: + """JavaScript import resolution not yet implemented.""" + return [] + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: """Dispatch symbol resolution based on the symbol category. diff --git a/api/analyzers/kotlin/analyzer.py b/api/analyzers/kotlin/analyzer.py index 3758c302..ea720abe 100644 --- a/api/analyzers/kotlin/analyzer.py +++ b/api/analyzers/kotlin/analyzer.py @@ -148,6 +148,14 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ break return res + def add_file_imports(self, file: File) -> None: + """Kotlin import tracking not yet implemented.""" + pass + + def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]: + """Kotlin import resolution not yet implemented.""" + return [] + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: if key in ["implement_interface", "base_class", "parameters", "return_type"]: return self.resolve_type(files, lsp, file_path, path, symbol) From b035a86556fde81e4464fdff67966d21af13eff5 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 22 Mar 2026 23:06:42 +0200 Subject: [PATCH 7/7] fix(test): use relative include path in test-project/a.c Change absolute include path /src/ff.h to relative src/ff.h for portability. Addresses review comment: absolute include path may cause portability issues. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- test-project/a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-project/a.c b/test-project/a.c index 788c3f4b..bdde24d5 100644 --- a/test-project/a.c +++ b/test-project/a.c @@ -1,5 +1,5 @@ #include -#include "/src/ff.h" +#include "src/ff.h" /* Create an empty intset. */