diff --git a/crates/codegraph-core/src/analysis.rs b/crates/codegraph-core/src/analysis.rs index db3daa27..a6541f6f 100644 --- a/crates/codegraph-core/src/analysis.rs +++ b/crates/codegraph-core/src/analysis.rs @@ -44,8 +44,18 @@ fn collect_function_nodes<'a>( } /// Parse source code and return a tree + language kind, or None if unsupported. -fn parse_source(source: &str, file_path: &str) -> Option<(tree_sitter::Tree, LanguageKind)> { - let lang = LanguageKind::from_extension(file_path)?; +/// When `lang_id` is provided, it is used as the primary language hint (supports +/// files whose language is inferred by content rather than extension, e.g. `.vue` +/// files tagged as `"javascript"` or extension-less files with a shebang). +/// Falls back to extension detection when `lang_id` is `None`. +fn parse_source( + source: &str, + file_path: &str, + lang_id: Option<&str>, +) -> Option<(tree_sitter::Tree, LanguageKind)> { + let lang = lang_id + .and_then(LanguageKind::from_lang_id) + .or_else(|| LanguageKind::from_extension(file_path))?; let mut parser = Parser::new(); parser.set_language(&lang.tree_sitter_language()).ok()?; let tree = parser.parse(source.as_bytes(), None)?; @@ -57,8 +67,9 @@ fn parse_source(source: &str, file_path: &str) -> Option<(tree_sitter::Tree, Lan pub fn analyze_complexity_standalone( source: &str, file_path: &str, + lang_id: Option<&str>, ) -> Vec { - let (tree, lang) = match parse_source(source, file_path) { + let (tree, lang) = match parse_source(source, file_path, lang_id) { Some(v) => v, None => return Vec::new(), }; @@ -91,8 +102,8 @@ pub fn analyze_complexity_standalone( /// Build control-flow graphs for all functions in the given source. /// Returns per-function results with name, line, and CFG data. -pub fn build_cfg_standalone(source: &str, file_path: &str) -> Vec { - let (tree, lang) = match parse_source(source, file_path) { +pub fn build_cfg_standalone(source: &str, file_path: &str, lang_id: Option<&str>) -> Vec { + let (tree, lang) = match parse_source(source, file_path, lang_id) { Some(v) => v, None => return Vec::new(), }; @@ -130,7 +141,7 @@ pub fn build_cfg_standalone(source: &str, file_path: &str) -> Vec Option { - let (tree, lang) = parse_source(source, file_path)?; +pub fn extract_dataflow_standalone(source: &str, file_path: &str, lang_id: Option<&str>) -> Option { + let (tree, lang) = parse_source(source, file_path, lang_id)?; extract_dataflow(&tree, source.as_bytes(), lang.lang_id_str()) } diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index cb8aab59..1b16b029 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -129,30 +129,36 @@ pub fn engine_version() -> String { /// Analyze complexity metrics for all functions in the given source. /// Returns per-function results (name, line, endLine, complexity metrics). -/// Language is detected from the file extension or treated as a lang_id. +/// When `lang_id` is provided, it takes priority over extension-based detection. #[napi] pub fn analyze_complexity( source: String, file_path: String, + lang_id: Option, ) -> Vec { - analysis::analyze_complexity_standalone(&source, &file_path) + analysis::analyze_complexity_standalone(&source, &file_path, lang_id.as_deref()) } /// Build control-flow graphs for all functions in the given source. /// Returns per-function results (name, line, endLine, CFG blocks + edges). -/// Language is detected from the file extension or treated as a lang_id. +/// When `lang_id` is provided, it takes priority over extension-based detection. #[napi] -pub fn build_cfg_analysis(source: String, file_path: String) -> Vec { - analysis::build_cfg_standalone(&source, &file_path) +pub fn build_cfg_analysis( + source: String, + file_path: String, + lang_id: Option, +) -> Vec { + analysis::build_cfg_standalone(&source, &file_path, lang_id.as_deref()) } /// Extract dataflow analysis for the given source. /// Returns file-level dataflow (parameters, returns, assignments, arg flows, mutations). -/// Language is detected from the file extension or treated as a lang_id. +/// When `lang_id` is provided, it takes priority over extension-based detection. #[napi] pub fn extract_dataflow_analysis( source: String, file_path: String, + lang_id: Option, ) -> Option { - analysis::extract_dataflow_standalone(&source, &file_path) + analysis::extract_dataflow_standalone(&source, &file_path, lang_id.as_deref()) } diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index d9efa418..7852a51f 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -127,15 +127,17 @@ function runNativeAnalysis( const defs = symbols.definitions || []; + const langSupportsComplexity = COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId); + const langSupportsCfg = CFG_EXTENSIONS.has(ext) || CFG_RULES.has(langId); + const langSupportsDataflow = DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(langId); + const needsComplexity = - doComplexity && - COMPLEXITY_EXTENSIONS.has(ext) && - defs.some((d) => hasFuncBody(d) && !d.complexity); + doComplexity && langSupportsComplexity && defs.some((d) => hasFuncBody(d) && !d.complexity); const needsCfg = doCfg && - CFG_EXTENSIONS.has(ext) && + langSupportsCfg && defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks)); - const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); + const needsDataflow = doDataflow && !symbols.dataflow && langSupportsDataflow; if (!needsComplexity && !needsCfg && !needsDataflow) continue; @@ -151,7 +153,7 @@ function runNativeAnalysis( // Complexity if (needsComplexity && native.analyzeComplexity) { try { - const results = native.analyzeComplexity(source, absPath); + const results = native.analyzeComplexity(source, absPath, langId); storeNativeComplexityResults(results, defs); } catch (err: unknown) { debug(`native analyzeComplexity failed for ${relPath}: ${(err as Error).message}`); @@ -161,7 +163,7 @@ function runNativeAnalysis( // CFG if (needsCfg && native.buildCfgAnalysis) { try { - const results = native.buildCfgAnalysis(source, absPath); + const results = native.buildCfgAnalysis(source, absPath, langId); storeNativeCfgResults(results, defs); } catch (err: unknown) { debug(`native buildCfgAnalysis failed for ${relPath}: ${(err as Error).message}`); @@ -171,7 +173,7 @@ function runNativeAnalysis( // Dataflow if (needsDataflow && native.extractDataflowAnalysis) { try { - const result = native.extractDataflowAnalysis(source, absPath); + const result = native.extractDataflowAnalysis(source, absPath, langId); if (result) symbols.dataflow = result; } catch (err: unknown) { debug(`native extractDataflowAnalysis failed for ${relPath}: ${(err as Error).message}`); @@ -305,16 +307,21 @@ async function ensureWasmTreesIfNeeded( !d.name.includes('.'); // AST: need tree when native didn't provide non-call astNodes - const needsAst = doAst && !Array.isArray(symbols.astNodes) && WALK_EXTENSIONS.has(ext); + const lid = symbols._langId || ''; + const needsAst = + doAst && + !Array.isArray(symbols.astNodes) && + (WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(lid)); const needsComplexity = doComplexity && - COMPLEXITY_EXTENSIONS.has(ext) && + (COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(lid)) && defs.some((d) => hasFuncBody(d) && !d.complexity); const needsCfg = doCfg && - CFG_EXTENSIONS.has(ext) && + (CFG_EXTENSIONS.has(ext) || CFG_RULES.has(lid)) && defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks)); - const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); + const needsDataflow = + doDataflow && !symbols.dataflow && (DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(lid)); if (needsAst || needsComplexity || needsCfg || needsDataflow) { needsWasmTrees = true; @@ -396,9 +403,9 @@ function setupComplexityVisitorForFile( } /** Set up CFG visitor if any definitions need WASM CFG analysis. */ -function setupCfgVisitorForFile(defs: Definition[], langId: string, ext: string): Visitor | null { +function setupCfgVisitorForFile(defs: Definition[], langId: string): Visitor | null { const cfgRulesForLang = CFG_RULES.get(langId); - if (!cfgRulesForLang || !CFG_EXTENSIONS.has(ext)) return null; + if (!cfgRulesForLang) return null; const needsWasmCfg = defs.some( (d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks), @@ -432,12 +439,12 @@ function setupVisitors( opts.complexity !== false ? setupComplexityVisitorForFile(defs, langId, walkerOpts) : null; if (complexityVisitor) visitors.push(complexityVisitor); - const cfgVisitor = opts.cfg !== false ? setupCfgVisitorForFile(defs, langId, ext) : null; + const cfgVisitor = opts.cfg !== false ? setupCfgVisitorForFile(defs, langId) : null; if (cfgVisitor) visitors.push(cfgVisitor); let dataflowVisitor: Visitor | null = null; const dfRules = DATAFLOW_RULES.get(langId); - if (opts.dataflow !== false && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { + if (opts.dataflow !== false && dfRules && !symbols.dataflow) { dataflowVisitor = createDataflowVisitor(dfRules); visitors.push(dataflowVisitor); } diff --git a/src/types.ts b/src/types.ts index b6e9031e..ea1da069 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1874,9 +1874,21 @@ export interface NativeAddon { rootDir: string, ): unknown[]; engineVersion(): string; - analyzeComplexity(source: string, filePath: string): NativeFunctionComplexityResult[]; - buildCfgAnalysis(source: string, filePath: string): NativeFunctionCfgResult[]; - extractDataflowAnalysis(source: string, filePath: string): DataflowResult | null; + analyzeComplexity( + source: string, + filePath: string, + langId?: string | null, + ): NativeFunctionComplexityResult[]; + buildCfgAnalysis( + source: string, + filePath: string, + langId?: string | null, + ): NativeFunctionCfgResult[]; + extractDataflowAnalysis( + source: string, + filePath: string, + langId?: string | null, + ): DataflowResult | null; ParseTreeCache: new () => NativeParseTreeCache; NativeDatabase: { openReadWrite(dbPath: string): NativeDatabase;