From fcbadb0153a4478905d12848776a8595b6c3dc49 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:14:13 -0600 Subject: [PATCH 1/3] chore: release v3.8.0 --- CHANGELOG.md | 30 ++++++ README.md | 23 +++-- crates/codegraph-core/Cargo.toml | 2 +- docs/roadmap/BACKLOG.md | 2 +- docs/roadmap/ROADMAP.md | 16 ++- package-lock.json | 169 ++++++++++++++++++++++++++++++- package.json | 2 +- 7 files changed, 228 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea220857..0b854c8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,36 @@ All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines. +## [3.8.0](https://github.com/optave/ops-codegraph-tool/compare/v3.7.0...v3.8.0) (2026-04-01) + +**34 languages and a fully native build pipeline.** This release completes Phase 7 (Expanded Language Support) by shipping the final 11 languages — F#, Gleam, Clojure, Julia, R, Erlang, Solidity, Objective-C, CUDA, Groovy, and Verilog — bringing codegraph from 23 to 34 supported languages. On the performance side, the entire build pipeline now runs natively in Rust: graph algorithms (BFS, shortest path, Louvain, centrality), import edge building with barrel resolution, and build-glue queries all migrate from JS to napi-rs. A new Rust build orchestration layer coordinates the full native pipeline end-to-end. + +### Features + +* add F#, Gleam, Clojure, Julia, R, Erlang language support ([#722](https://github.com/optave/ops-codegraph-tool/pull/722)) +* add Solidity, Objective-C, CUDA, Groovy, Verilog language support ([#729](https://github.com/optave/ops-codegraph-tool/pull/729)) +* full Rust build orchestration ([#740](https://github.com/optave/ops-codegraph-tool/pull/740)) + +### Bug Fixes + +* **native:** enable bulkInsertNodes native path ([#736](https://github.com/optave/ops-codegraph-tool/pull/736)) +* **native:** enable bulkInsertNodes native path — null-visibility serialisation ([#737](https://github.com/optave/ops-codegraph-tool/pull/737)) +* **native:** prevent SQLITE_CORRUPT in incremental pipeline ([#728](https://github.com/optave/ops-codegraph-tool/pull/728)) +* **ocaml:** use LANGUAGE_OCAML_INTERFACE grammar for .mli files ([#730](https://github.com/optave/ops-codegraph-tool/pull/730)) +* address unresolved review feedback from batch4 language extractors ([#731](https://github.com/optave/ops-codegraph-tool/pull/731)) +* **bench:** report partial native results when incremental rebuild fails ([#741](https://github.com/optave/ops-codegraph-tool/pull/741)) + +### Performance + +* migrate graph algorithms (BFS, shortest path, Louvain, centrality) to Rust ([#732](https://github.com/optave/ops-codegraph-tool/pull/732)) +* migrate import edge building + barrel resolution to Rust ([#738](https://github.com/optave/ops-codegraph-tool/pull/738)) +* **native:** expose standalone complexity/CFG/dataflow analysis via napi-rs ([#733](https://github.com/optave/ops-codegraph-tool/pull/733)) +* native Rust build-glue queries (detect-changes, finalize, incremental) ([#735](https://github.com/optave/ops-codegraph-tool/pull/735)) + +### Refactors + +* **native:** remove call kind from AST node extraction ([#734](https://github.com/optave/ops-codegraph-tool/pull/734)) + ## [3.7.0](https://github.com/optave/ops-codegraph-tool/compare/v3.6.0...v3.7.0) (2026-03-30) **Six more languages and a CFG stability fix.** Codegraph now supports Elixir, Lua, Dart, Zig, Haskell, and OCaml — bringing the total to 23 languages with dual-engine extractors. A WAL conflict in the native CFG bulk-insert path is also fixed, preventing database corruption when JS and native connections overlap during control-flow graph writes. diff --git a/README.md b/README.md index 5295b745..bac7cc8d 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a | Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [axon](https://github.com/harshkedia177/axon) | [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | |---|:---:|:---:|:---:|:---:|:---:|:---:| -| Languages | **23** | ~12 | **32** | ~10 | 3 | 13 | +| Languages | **34** | ~12 | **32** | ~10 | 3 | 13 | | MCP server | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | | Dataflow + CFG + AST querying | **Yes** | **Yes** | **Yes**¹ | **Yes** | — | — | | Hybrid search (BM25 + semantic) | **Yes** | — | — | — | **Yes** | **Yes** | @@ -104,9 +104,9 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a | **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes | | **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds — agents work with current data | | **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — enriched with historically coupled files from git co-change analysis. Ships with a GitHub Actions workflow | -| **🌐** | **Multi-language, one graph** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + C + C++ + Kotlin + Swift + Scala + Bash + HCL + Elixir + Lua + Dart + Zig + Haskell + OCaml in a single graph — agents don't need per-language tools | +| **🌐** | **Multi-language, one graph** | 34 languages in a single graph — JS/TS, Python, Go, Rust, Java, C#, PHP, Ruby, C/C++, Kotlin, Swift, Scala, Bash, HCL, Elixir, Lua, Dart, Zig, Haskell, OCaml, F#, Gleam, Clojure, Julia, R, Erlang, Solidity, Objective-C, CUDA, Groovy, Verilog — agents don't need per-language tools | | **🧠** | **Hybrid search** | BM25 keyword + semantic embeddings fused via RRF — `hybrid` (default), `semantic`, or `keyword` mode; multi-query via `"auth; token; JWT"` | -| **🔬** | **Dataflow + CFG** | Track how data flows through functions (`flows_to`, `returns`, `mutates`) and visualize intraprocedural control flow graphs for all 23 languages | +| **🔬** | **Dataflow + CFG** | Track how data flows through functions (`flows_to`, `returns`, `mutates`) and visualize intraprocedural control flow graphs for all 34 languages | | **🔓** | **Fully local, zero cost** | No API keys, no accounts, no network calls. Optionally bring your own LLM provider — your code only goes where you choose | --- @@ -200,7 +200,7 @@ cd codegraph && npm install && npm link | 📋 | **Composite audit** | Single `audit` command combining explain + impact + health metrics per function — one call instead of 3-4 | | 🚦 | **Triage queue** | `triage` merges connectivity, hotspots, roles, and complexity into a ranked audit priority queue | | 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges — all 23 languages, included by default, skip with `--no-dataflow` | -| 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 23 languages — `cfg` command with text/DOT/Mermaid output, included by default, skip with `--no-cfg` | +| 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 34 languages — `cfg` command with text/DOT/Mermaid output, included by default, skip with `--no-cfg` | | 🔎 | **AST node querying** | Stored queryable AST nodes (calls, `new`, string, regex, throw, await) — `ast` command with SQL GLOB pattern matching | | 🧬 | **Expanded node/edge types** | `parameter`, `property`, `constant` node kinds with `parent_id` for sub-declaration queries; `contains`, `parameter_of`, `receiver` edge kinds | | 📊 | **Exports analysis** | `exports ` shows all exported symbols with per-symbol consumers, re-export detection, and counts | @@ -320,7 +320,7 @@ codegraph ast -k call # Filter by kind: call, new, string, regex codegraph ast -k throw --file src/ # Combine kind and file filters ``` -> **Note:** Dataflow and CFG are included by default for all 23 languages. Use `--no-dataflow` / `--no-cfg` for faster builds. +> **Note:** Dataflow and CFG are included by default for all 34 languages. Use `--no-dataflow` / `--no-cfg` for faster builds. ### Audit, Triage & Batch @@ -494,6 +494,17 @@ codegraph registry remove # Unregister | ![Zig](https://img.shields.io/badge/-Zig-F7A41D?style=flat-square&logo=zig&logoColor=white) | `.zig` | ✓ | ✓ | ✓ | — | — | ✓ | | ![Haskell](https://img.shields.io/badge/-Haskell-5D4F85?style=flat-square&logo=haskell&logoColor=white) | `.hs` | ✓ | ✓ | ✓ | — | — | ✓ | | ![OCaml](https://img.shields.io/badge/-OCaml-EC6813?style=flat-square&logo=ocaml&logoColor=white) | `.ml`, `.mli` | ✓ | ✓ | ✓ | — | — | ✓ | +| ![F#](https://img.shields.io/badge/-F%23-378BBA?style=flat-square&logo=fsharp&logoColor=white) | `.fs`, `.fsx`, `.fsi` | ✓ | ✓ | ✓ | — | — | ✓ | +| ![Gleam](https://img.shields.io/badge/-Gleam-FFAFF3?style=flat-square&logoColor=black) | `.gleam` | ✓ | ✓ | ✓ | — | — | ✓ | +| ![Clojure](https://img.shields.io/badge/-Clojure-5881D8?style=flat-square&logo=clojure&logoColor=white) | `.clj`, `.cljs`, `.cljc` | ✓ | ✓ | ✓ | — | — | ✓ | +| ![Julia](https://img.shields.io/badge/-Julia-9558B2?style=flat-square&logo=julia&logoColor=white) | `.jl` | ✓ | ✓ | ✓ | — | — | ✓ | +| ![R](https://img.shields.io/badge/-R-276DC3?style=flat-square&logo=r&logoColor=white) | `.r`, `.R` | ✓ | ✓ | ✓ | — | — | ✓ | +| ![Erlang](https://img.shields.io/badge/-Erlang-A90533?style=flat-square&logo=erlang&logoColor=white) | `.erl`, `.hrl` | ✓ | ✓ | ✓ | — | — | ✓ | +| ![Solidity](https://img.shields.io/badge/-Solidity-363636?style=flat-square&logo=solidity&logoColor=white) | `.sol` | ✓ | ✓ | ✓ | ✓ | — | ✓ | +| ![Objective-C](https://img.shields.io/badge/-Objective--C-438EFF?style=flat-square&logoColor=white) | `.m` | ✓ | ✓ | ✓ | ✓ | — | ✓ | +| ![CUDA](https://img.shields.io/badge/-CUDA-76B900?style=flat-square&logo=nvidia&logoColor=white) | `.cu`, `.cuh` | ✓ | ✓ | ✓ | ✓ | — | ✓ | +| ![Groovy](https://img.shields.io/badge/-Groovy-4298B8?style=flat-square&logo=apachegroovy&logoColor=white) | `.groovy`, `.gvy` | ✓ | ✓ | ✓ | ✓ | — | ✓ | +| ![Verilog](https://img.shields.io/badge/-Verilog-848484?style=flat-square&logoColor=white) | `.v`, `.sv` | ✓ | ✓ | ✓ | — | — | ✓ | | ![Terraform](https://img.shields.io/badge/-Terraform-844FBA?style=flat-square&logo=terraform&logoColor=white) | `.tf`, `.hcl` | ✓ | —³ | —³ | —³ | —³ | —³ | > ¹ **Heritage** = `extends`, `implements`, `include`/`extend` (Ruby), trait `impl` (Rust), receiver methods (Go). @@ -798,7 +809,7 @@ See **[ROADMAP.md](docs/roadmap/ROADMAP.md)** for the full development roadmap a 6. ~~**Resolution Accuracy**~~ — **Complete** (v3.3.1) — type inference, receiver type tracking, dead role sub-categories, resolution benchmarks, `package.json` exports, monorepo workspace resolution 7. ~~**TypeScript Migration**~~ — **Complete** (v3.4.0) — all 271 source files migrated from JS to TS, zero `.js` remaining 8. ~~**Native Analysis Acceleration**~~ — **Complete** (v3.5.0) — all build phases in Rust/rusqlite, sub-100ms incremental rebuilds, better-sqlite3 lazy-loaded as fallback only -9. **Expanded Language Support** — **In Progress** (v3.7.0) — Batch 1 shipped (C, C++, Kotlin, Swift, Scala, Bash), Batch 2 shipped (Elixir, Lua, Dart, Zig, Haskell, OCaml); 11 remaining in 2 batches (23 → 34) +9. ~~**Expanded Language Support**~~ — **Complete** (v3.8.0) — 23 new languages in 4 batches (11 → 34), dual-engine WASM + Rust support for all 10. **Analysis Depth** — TypeScript-native resolution, inter-procedural type propagation, field-based points-to analysis 11. **Runtime & Extensibility** — event-driven pipeline, plugin system, query caching, pagination 12. **Quality, Security & Technical Debt** — supply-chain security (SBOM, SLSA), CI coverage gates, timer cleanup, tech debt kill list diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index 65cba920..f436006c 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codegraph-core" -version = "3.7.0" +version = "3.8.0" edition = "2021" license = "Apache-2.0" diff --git a/docs/roadmap/BACKLOG.md b/docs/roadmap/BACKLOG.md index b1aa3a5d..5e4845e9 100644 --- a/docs/roadmap/BACKLOG.md +++ b/docs/roadmap/BACKLOG.md @@ -1,6 +1,6 @@ # Codegraph Feature Backlog -**Last updated:** 2026-03-30 +**Last updated:** 2026-04-01 **Source:** Features derived from [COMPETITIVE_ANALYSIS.md](../../generated/competitive/COMPETITIVE_ANALYSIS.md) and internal roadmap discussions. --- diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index b1ed7bfa..b50f625b 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -1,6 +1,6 @@ # Codegraph Roadmap -> **Current version:** 3.7.0 | **Status:** Active development | **Updated:** 2026-03-30 +> **Current version:** 3.8.0 | **Status:** Active development | **Updated:** 2026-04-01 Codegraph is a strong local-first code graph CLI. This roadmap describes planned improvements across fourteen phases -- closing gaps with commercial code intelligence platforms while preserving codegraph's core strengths: fully local, open source, zero cloud dependency by default. @@ -20,7 +20,7 @@ Codegraph is a strong local-first code graph CLI. This roadmap describes planned | [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **Complete** (v3.3.1) | | [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **Complete** (v3.4.0) | | [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Rust extraction for AST/CFG/dataflow/complexity; batch SQLite inserts; incremental rebuilds; native DB write pipeline; full rusqlite migration so native engine never touches better-sqlite3 | **Complete** (v3.5.0) | -| [**7**](#phase-7--expanded-language-support) | Expanded Language Support | Parser abstraction layer, 23 new languages in 4 batches (11 → 34), dual-engine support — Batch 1 (6 languages) shipped in v3.6.0, Batch 2 (6 languages) shipped in v3.7.0; 11 remaining in 2 batches (23 → 34) | **In Progress** (v3.7.0) | +| [**7**](#phase-7--expanded-language-support) | Expanded Language Support | Parser abstraction layer, 23 new languages in 4 batches (11 → 34), dual-engine support — all 4 batches shipped across v3.6.0–v3.8.0 | **Complete** (v3.8.0) | | [**8**](#phase-8--analysis-depth) | Analysis Depth | TypeScript-native resolution, inter-procedural type propagation, field-based points-to analysis, enhanced dynamic dispatch, barrel file resolution, precision/recall CI gates | Planned | | [**9**](#phase-9--runtime--extensibility) | Runtime & Extensibility | Event-driven pipeline, unified engine strategy, subgraph export filtering, transitive confidence, query caching, configuration profiles, pagination, plugin system | Planned | | [**10**](#phase-10--quality-security--technical-debt) | Quality, Security & Technical Debt | Supply-chain security, test quality gates, architectural debt cleanup | Planned | @@ -1292,7 +1292,9 @@ Structure building is unchanged — at 22ms it's already fast. --- -## Phase 7 -- Expanded Language Support +## Phase 7 -- Expanded Language Support ✅ + +> **Status:** Complete -- shipped across v3.6.0 → v3.8.0 **Goal:** Support every major programming language that has a mature tree-sitter grammar available in both WASM (npm) and Rust (crates.io). This takes codegraph from 11 to 34 languages, covering every actively-used language where dependency and call-graph analysis is meaningful. @@ -1345,10 +1347,12 @@ Actively maintained grammars with both npm and Rust packages available. | Haskell | `.hs` | `tree-sitter-haskell` | Official | 1.0M crate downloads | | OCaml | `.ml`, `.mli` | `tree-sitter-ocaml` | Official | ML family, mature grammar | -### 7.4 -- Batch 3: Functional & BEAM +### 7.4 -- Batch 3: Functional & BEAM ✅ Languages with solid tree-sitter grammars and active communities. +- ✅ All 6 languages shipped in v3.8.0 ([#722](https://github.com/optave/ops-codegraph-tool/pull/722)) + | Language | Extensions | Grammar | Org | Notes | |----------|-----------|---------|-----|-------| | F# | `.fs`, `.fsx`, `.fsi` | `tree-sitter-fsharp` | `ionide/` | .NET functional, Ionide community | @@ -1358,7 +1362,9 @@ Languages with solid tree-sitter grammars and active communities. | R | `.r`, `.R` | `tree-sitter-r` | `r-lib/` | Statistical computing, 135K crate downloads; WASM built from repo | | Erlang | `.erl`, `.hrl` | `tree-sitter-erlang` | `WhatsApp/` | BEAM VM; WASM built from repo | -### 7.5 -- Batch 4: Specialized +### 7.5 -- Batch 4: Specialized ✅ + +- ✅ All 5 languages shipped in v3.8.0 ([#729](https://github.com/optave/ops-codegraph-tool/pull/729)) | Language | Extensions | Grammar | Org | Notes | |----------|-----------|---------|-----|-------| diff --git a/package-lock.json b/package-lock.json index c842c831..d975d38e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@optave/codegraph", - "version": "3.7.0", + "version": "3.8.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@optave/codegraph", - "version": "3.7.0", + "version": "3.8.0", "license": "Apache-2.0", "dependencies": { "better-sqlite3": "^12.6.2", @@ -35,25 +35,30 @@ "tree-sitter-cli": "^0.26.5", "tree-sitter-clojure": "github:sogaiu/tree-sitter-clojure", "tree-sitter-cpp": "^0.23.4", + "tree-sitter-cuda": "^0.21.1", "tree-sitter-dart": "^1.0.0", "tree-sitter-elixir": "^0.3.5", "tree-sitter-erlang": "github:WhatsApp/tree-sitter-erlang#semver:*", "tree-sitter-fsharp": "^0.1.0", "tree-sitter-gleam": "github:gleam-lang/tree-sitter-gleam", "tree-sitter-go": "^0.25.0", + "tree-sitter-groovy": "^0.1.2", "tree-sitter-haskell": "^0.23.1", "tree-sitter-java": "^0.23.5", "tree-sitter-javascript": "^0.25.0", "tree-sitter-julia": "^0.23.1", "tree-sitter-kotlin": "^0.3.8", + "tree-sitter-objc": "^3.0.2", "tree-sitter-ocaml": "^0.24.2", "tree-sitter-php": "^0.24.2", "tree-sitter-python": "^0.25.0", "tree-sitter-ruby": "^0.23.1", "tree-sitter-rust": "^0.24.0", "tree-sitter-scala": "^0.24.0", + "tree-sitter-solidity": "^1.2.13", "tree-sitter-swift": "^0.7.1", "tree-sitter-typescript": "^0.23.2", + "tree-sitter-verilog": "^1.0.0", "typescript": "^6.0.2", "vitest": "^4.0.18" }, @@ -1314,6 +1319,9 @@ "cpu": [ "arm64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1327,6 +1335,9 @@ "cpu": [ "x64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1340,6 +1351,9 @@ "cpu": [ "x64" ], + "libc": [ + "musl" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -7201,6 +7215,28 @@ } } }, + "node_modules/tree-sitter-cuda": { + "version": "0.21.1", + "resolved": "https://registry.npmjs.org/tree-sitter-cuda/-/tree-sitter-cuda-0.21.1.tgz", + "integrity": "sha512-V8zI22cfiQyt0Q3v7KPkM4KVMPXqIgHrcbYCYvfFByLcM7PM6+mpzhRWGvUP4QgmwXkWHdcJoJcJC+ef6V9Z6A==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.5.0", + "node-gyp-build": "^4.8.4", + "tree-sitter-c": "0.24.1", + "tree-sitter-cpp": "0.23.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.4" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/tree-sitter-dart": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/tree-sitter-dart/-/tree-sitter-dart-1.0.0.tgz", @@ -7308,6 +7344,47 @@ } } }, + "node_modules/tree-sitter-groovy": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/tree-sitter-groovy/-/tree-sitter-groovy-0.1.2.tgz", + "integrity": "sha512-4dDUP3XKMwKfDCkm50EmUGPHblyPw0oXTv2ce0VrVYv8erxntsM3WTThC+vZgYT2yh+rYbbDVLsjvfGnHJQ8aw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.3", + "tree-sitter-java": "0.23.4" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-groovy/node_modules/tree-sitter-java": { + "version": "0.23.4", + "resolved": "https://registry.npmjs.org/tree-sitter-java/-/tree-sitter-java-0.23.4.tgz", + "integrity": "sha512-WmqZPzvaHpAcAdJBjwMFwusL+ahp2Liv6T0ASWU7sxGZGceSdP5MpW+2DwLNOiWld39C1WR+9qk99hk4qHK5vw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/tree-sitter-haskell": { "version": "0.23.1", "resolved": "https://registry.npmjs.org/tree-sitter-haskell/-/tree-sitter-haskell-0.23.1.tgz", @@ -7415,6 +7492,47 @@ "dev": true, "license": "MIT" }, + "node_modules/tree-sitter-objc": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/tree-sitter-objc/-/tree-sitter-objc-3.0.2.tgz", + "integrity": "sha512-Hs0ohmx1u5M+0K7efoW+dv/corhBsfjftfIYLtp7dSGeJ+Zj4c33tDIboBYLs6qijRlz6wtHFxa0YX+FibLulA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.0", + "node-gyp-build": "^4.8.4", + "tree-sitter-c": "^0.23.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-objc/node_modules/tree-sitter-c": { + "version": "0.23.6", + "resolved": "https://registry.npmjs.org/tree-sitter-c/-/tree-sitter-c-0.23.6.tgz", + "integrity": "sha512-0dxXKznVyUA0s6PjNolJNs2yF87O5aL538A/eR6njA5oqX3C3vH4vnx3QdOKwuUdpKEcFdHuiDpRKLLCA/tjvQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/tree-sitter-ocaml": { "version": "0.24.2", "resolved": "https://registry.npmjs.org/tree-sitter-ocaml/-/tree-sitter-ocaml-0.24.2.tgz", @@ -7535,6 +7653,27 @@ } } }, + "node_modules/tree-sitter-solidity": { + "version": "1.2.13", + "resolved": "https://registry.npmjs.org/tree-sitter-solidity/-/tree-sitter-solidity-1.2.13.tgz", + "integrity": "sha512-nO2AbcAuz2Qba8JnPNe/3FVjRRvGY3ApxSJ8UPIzfynJm4PYCMbBoXxxbprvMgjCbGYR/ZrHGIPKzXV7zBa+lQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2", + "yarn": "^1.22.21" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree_sitter": { + "optional": true + } + } + }, "node_modules/tree-sitter-swift": { "version": "0.7.1", "resolved": "https://registry.npmjs.org/tree-sitter-swift/-/tree-sitter-swift-0.7.1.tgz", @@ -7612,6 +7751,17 @@ } } }, + "node_modules/tree-sitter-verilog": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/tree-sitter-verilog/-/tree-sitter-verilog-1.0.0.tgz", + "integrity": "sha512-SSGUwA+mQ1Jxn/V2ROLj3+leO/68f+7MxWzoz5kOaJ3qzKAveSWjxOATGmiFMLy4DJ+/0pDXFnapwMDih2Cx6Q==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "nan": "^2.15.0" + } + }, "node_modules/trim-newlines": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/trim-newlines/-/trim-newlines-3.0.1.tgz", @@ -8064,6 +8214,21 @@ "node": ">=12" } }, + "node_modules/yarn": { + "version": "1.22.22", + "resolved": "https://registry.npmjs.org/yarn/-/yarn-1.22.22.tgz", + "integrity": "sha512-prL3kGtyG7o9Z9Sv8IPfBNrWTDmXB4Qbes8A9rEzt6wkJV8mUvoirjU0Mp3GGAU06Y0XQyA3/2/RQFVuK7MTfg==", + "dev": true, + "hasInstallScript": true, + "license": "BSD-2-Clause", + "bin": { + "yarn": "bin/yarn.js", + "yarnpkg": "bin/yarn.js" + }, + "engines": { + "node": ">=4.0.0" + } + }, "node_modules/zod": { "version": "4.3.6", "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", diff --git a/package.json b/package.json index 7bd6a877..2005e89b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@optave/codegraph", - "version": "3.7.0", + "version": "3.8.0", "description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them", "type": "module", "main": "dist/index.js", From 61b04e91b8fff211dc087ae7224d472f5dafbd1a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:14:49 -0600 Subject: [PATCH 2/3] perf: forward langId hint to native standalone analysis functions Pass the JS-resolved langId as an explicit Option to the three native standalone analysis functions (analyzeComplexity, buildCfgAnalysis, extractDataflowAnalysis). Rust now uses the hint as the primary language, falling back to extension detection only when None. This ensures files whose language is inferred by content rather than extension (e.g. .vue files tagged as "javascript", extensionless shebang files) use the native Rust path instead of silently falling back to WASM. Closes #739 --- crates/codegraph-core/src/analysis.rs | 25 ++++++++++++++++++------- crates/codegraph-core/src/lib.rs | 20 +++++++++++++------- src/ast-analysis/engine.ts | 6 +++--- src/types.ts | 18 +++++++++++++++--- 4 files changed, 49 insertions(+), 20 deletions(-) diff --git a/crates/codegraph-core/src/analysis.rs b/crates/codegraph-core/src/analysis.rs index db3daa27..a6541f6f 100644 --- a/crates/codegraph-core/src/analysis.rs +++ b/crates/codegraph-core/src/analysis.rs @@ -44,8 +44,18 @@ fn collect_function_nodes<'a>( } /// Parse source code and return a tree + language kind, or None if unsupported. -fn parse_source(source: &str, file_path: &str) -> Option<(tree_sitter::Tree, LanguageKind)> { - let lang = LanguageKind::from_extension(file_path)?; +/// When `lang_id` is provided, it is used as the primary language hint (supports +/// files whose language is inferred by content rather than extension, e.g. `.vue` +/// files tagged as `"javascript"` or extension-less files with a shebang). +/// Falls back to extension detection when `lang_id` is `None`. +fn parse_source( + source: &str, + file_path: &str, + lang_id: Option<&str>, +) -> Option<(tree_sitter::Tree, LanguageKind)> { + let lang = lang_id + .and_then(LanguageKind::from_lang_id) + .or_else(|| LanguageKind::from_extension(file_path))?; let mut parser = Parser::new(); parser.set_language(&lang.tree_sitter_language()).ok()?; let tree = parser.parse(source.as_bytes(), None)?; @@ -57,8 +67,9 @@ fn parse_source(source: &str, file_path: &str) -> Option<(tree_sitter::Tree, Lan pub fn analyze_complexity_standalone( source: &str, file_path: &str, + lang_id: Option<&str>, ) -> Vec { - let (tree, lang) = match parse_source(source, file_path) { + let (tree, lang) = match parse_source(source, file_path, lang_id) { Some(v) => v, None => return Vec::new(), }; @@ -91,8 +102,8 @@ pub fn analyze_complexity_standalone( /// Build control-flow graphs for all functions in the given source. /// Returns per-function results with name, line, and CFG data. -pub fn build_cfg_standalone(source: &str, file_path: &str) -> Vec { - let (tree, lang) = match parse_source(source, file_path) { +pub fn build_cfg_standalone(source: &str, file_path: &str, lang_id: Option<&str>) -> Vec { + let (tree, lang) = match parse_source(source, file_path, lang_id) { Some(v) => v, None => return Vec::new(), }; @@ -130,7 +141,7 @@ pub fn build_cfg_standalone(source: &str, file_path: &str) -> Vec Option { - let (tree, lang) = parse_source(source, file_path)?; +pub fn extract_dataflow_standalone(source: &str, file_path: &str, lang_id: Option<&str>) -> Option { + let (tree, lang) = parse_source(source, file_path, lang_id)?; extract_dataflow(&tree, source.as_bytes(), lang.lang_id_str()) } diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index cb8aab59..1b16b029 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -129,30 +129,36 @@ pub fn engine_version() -> String { /// Analyze complexity metrics for all functions in the given source. /// Returns per-function results (name, line, endLine, complexity metrics). -/// Language is detected from the file extension or treated as a lang_id. +/// When `lang_id` is provided, it takes priority over extension-based detection. #[napi] pub fn analyze_complexity( source: String, file_path: String, + lang_id: Option, ) -> Vec { - analysis::analyze_complexity_standalone(&source, &file_path) + analysis::analyze_complexity_standalone(&source, &file_path, lang_id.as_deref()) } /// Build control-flow graphs for all functions in the given source. /// Returns per-function results (name, line, endLine, CFG blocks + edges). -/// Language is detected from the file extension or treated as a lang_id. +/// When `lang_id` is provided, it takes priority over extension-based detection. #[napi] -pub fn build_cfg_analysis(source: String, file_path: String) -> Vec { - analysis::build_cfg_standalone(&source, &file_path) +pub fn build_cfg_analysis( + source: String, + file_path: String, + lang_id: Option, +) -> Vec { + analysis::build_cfg_standalone(&source, &file_path, lang_id.as_deref()) } /// Extract dataflow analysis for the given source. /// Returns file-level dataflow (parameters, returns, assignments, arg flows, mutations). -/// Language is detected from the file extension or treated as a lang_id. +/// When `lang_id` is provided, it takes priority over extension-based detection. #[napi] pub fn extract_dataflow_analysis( source: String, file_path: String, + lang_id: Option, ) -> Option { - analysis::extract_dataflow_standalone(&source, &file_path) + analysis::extract_dataflow_standalone(&source, &file_path, lang_id.as_deref()) } diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index d9efa418..ac2df0e1 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -151,7 +151,7 @@ function runNativeAnalysis( // Complexity if (needsComplexity && native.analyzeComplexity) { try { - const results = native.analyzeComplexity(source, absPath); + const results = native.analyzeComplexity(source, absPath, langId); storeNativeComplexityResults(results, defs); } catch (err: unknown) { debug(`native analyzeComplexity failed for ${relPath}: ${(err as Error).message}`); @@ -161,7 +161,7 @@ function runNativeAnalysis( // CFG if (needsCfg && native.buildCfgAnalysis) { try { - const results = native.buildCfgAnalysis(source, absPath); + const results = native.buildCfgAnalysis(source, absPath, langId); storeNativeCfgResults(results, defs); } catch (err: unknown) { debug(`native buildCfgAnalysis failed for ${relPath}: ${(err as Error).message}`); @@ -171,7 +171,7 @@ function runNativeAnalysis( // Dataflow if (needsDataflow && native.extractDataflowAnalysis) { try { - const result = native.extractDataflowAnalysis(source, absPath); + const result = native.extractDataflowAnalysis(source, absPath, langId); if (result) symbols.dataflow = result; } catch (err: unknown) { debug(`native extractDataflowAnalysis failed for ${relPath}: ${(err as Error).message}`); diff --git a/src/types.ts b/src/types.ts index b6e9031e..ea1da069 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1874,9 +1874,21 @@ export interface NativeAddon { rootDir: string, ): unknown[]; engineVersion(): string; - analyzeComplexity(source: string, filePath: string): NativeFunctionComplexityResult[]; - buildCfgAnalysis(source: string, filePath: string): NativeFunctionCfgResult[]; - extractDataflowAnalysis(source: string, filePath: string): DataflowResult | null; + analyzeComplexity( + source: string, + filePath: string, + langId?: string | null, + ): NativeFunctionComplexityResult[]; + buildCfgAnalysis( + source: string, + filePath: string, + langId?: string | null, + ): NativeFunctionCfgResult[]; + extractDataflowAnalysis( + source: string, + filePath: string, + langId?: string | null, + ): DataflowResult | null; ParseTreeCache: new () => NativeParseTreeCache; NativeDatabase: { openReadWrite(dbPath: string): NativeDatabase; From 9a6d5e639e51acad0684212590bc766f7f3c4102 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:33:49 -0600 Subject: [PATCH 3/3] fix: use langId fallback in extension-set guards for content-inferred files (#743) The needsComplexity/needsCfg/needsDataflow guards gated solely on file-extension sets, so content-inferred files (.vue tagged as "javascript", extensionless shebang files) could never reach the native or WASM analysis paths despite having a valid langId. Add langId-based fallback checks to all guard sites: the native analysis dispatcher, the WASM pre-parse check, the per-file CFG visitor setup, and the per-file dataflow visitor setup. --- src/ast-analysis/engine.ts | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index ac2df0e1..7852a51f 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -127,15 +127,17 @@ function runNativeAnalysis( const defs = symbols.definitions || []; + const langSupportsComplexity = COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId); + const langSupportsCfg = CFG_EXTENSIONS.has(ext) || CFG_RULES.has(langId); + const langSupportsDataflow = DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(langId); + const needsComplexity = - doComplexity && - COMPLEXITY_EXTENSIONS.has(ext) && - defs.some((d) => hasFuncBody(d) && !d.complexity); + doComplexity && langSupportsComplexity && defs.some((d) => hasFuncBody(d) && !d.complexity); const needsCfg = doCfg && - CFG_EXTENSIONS.has(ext) && + langSupportsCfg && defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks)); - const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); + const needsDataflow = doDataflow && !symbols.dataflow && langSupportsDataflow; if (!needsComplexity && !needsCfg && !needsDataflow) continue; @@ -305,16 +307,21 @@ async function ensureWasmTreesIfNeeded( !d.name.includes('.'); // AST: need tree when native didn't provide non-call astNodes - const needsAst = doAst && !Array.isArray(symbols.astNodes) && WALK_EXTENSIONS.has(ext); + const lid = symbols._langId || ''; + const needsAst = + doAst && + !Array.isArray(symbols.astNodes) && + (WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(lid)); const needsComplexity = doComplexity && - COMPLEXITY_EXTENSIONS.has(ext) && + (COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(lid)) && defs.some((d) => hasFuncBody(d) && !d.complexity); const needsCfg = doCfg && - CFG_EXTENSIONS.has(ext) && + (CFG_EXTENSIONS.has(ext) || CFG_RULES.has(lid)) && defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks)); - const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); + const needsDataflow = + doDataflow && !symbols.dataflow && (DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(lid)); if (needsAst || needsComplexity || needsCfg || needsDataflow) { needsWasmTrees = true; @@ -396,9 +403,9 @@ function setupComplexityVisitorForFile( } /** Set up CFG visitor if any definitions need WASM CFG analysis. */ -function setupCfgVisitorForFile(defs: Definition[], langId: string, ext: string): Visitor | null { +function setupCfgVisitorForFile(defs: Definition[], langId: string): Visitor | null { const cfgRulesForLang = CFG_RULES.get(langId); - if (!cfgRulesForLang || !CFG_EXTENSIONS.has(ext)) return null; + if (!cfgRulesForLang) return null; const needsWasmCfg = defs.some( (d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks), @@ -432,12 +439,12 @@ function setupVisitors( opts.complexity !== false ? setupComplexityVisitorForFile(defs, langId, walkerOpts) : null; if (complexityVisitor) visitors.push(complexityVisitor); - const cfgVisitor = opts.cfg !== false ? setupCfgVisitorForFile(defs, langId, ext) : null; + const cfgVisitor = opts.cfg !== false ? setupCfgVisitorForFile(defs, langId) : null; if (cfgVisitor) visitors.push(cfgVisitor); let dataflowVisitor: Visitor | null = null; const dfRules = DATAFLOW_RULES.get(langId); - if (opts.dataflow !== false && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { + if (opts.dataflow !== false && dfRules && !symbols.dataflow) { dataflowVisitor = createDataflowVisitor(dfRules); visitors.push(dataflowVisitor); }