diff --git a/package.json b/package.json index c7fd3226..7bd6a877 100644 --- a/package.json +++ b/package.json @@ -174,6 +174,11 @@ "tree-sitter-ruby": "^0.23.1", "tree-sitter-rust": "^0.24.0", "tree-sitter-scala": "^0.24.0", + "tree-sitter-solidity": "^1.2.13", + "tree-sitter-objc": "^3.0.2", + "tree-sitter-cuda": "^0.21.1", + "tree-sitter-groovy": "^0.1.2", + "tree-sitter-verilog": "^1.0.0", "tree-sitter-swift": "^0.7.1", "tree-sitter-typescript": "^0.23.2", "typescript": "^6.0.2", diff --git a/scripts/build-wasm.ts b/scripts/build-wasm.ts index 1f6d8867..4d979e3a 100644 --- a/scripts/build-wasm.ts +++ b/scripts/build-wasm.ts @@ -52,6 +52,11 @@ const grammars = [ { name: 'tree-sitter-julia', pkg: 'tree-sitter-julia', sub: null }, { name: 'tree-sitter-r', pkg: '@eagleoutice/tree-sitter-r', sub: null }, { name: 'tree-sitter-erlang', pkg: 'tree-sitter-erlang', sub: null }, + { name: 'tree-sitter-solidity', pkg: 'tree-sitter-solidity', sub: null }, + { name: 'tree-sitter-objc', pkg: 'tree-sitter-objc', sub: null }, + { name: 'tree-sitter-cuda', pkg: 'tree-sitter-cuda', sub: null }, + { name: 'tree-sitter-groovy', pkg: 'tree-sitter-groovy', sub: null }, + { name: 'tree-sitter-verilog', pkg: 'tree-sitter-verilog', sub: null }, ]; let failed = 0; diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 2dcaf8e1..32e8631d 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -21,18 +21,21 @@ export { extractCppSymbols, extractCSharpSymbols, extractCSymbols, + extractCudaSymbols, extractDartSymbols, extractElixirSymbols, extractErlangSymbols, extractFSharpSymbols, extractGleamSymbols, extractGoSymbols, + extractGroovySymbols, extractHaskellSymbols, extractHCLSymbols, extractJavaSymbols, extractJuliaSymbols, extractKotlinSymbols, extractLuaSymbols, + extractObjCSymbols, extractOCamlSymbols, extractPHPSymbols, extractPythonSymbols, @@ -40,8 +43,10 @@ export { extractRubySymbols, extractRustSymbols, extractScalaSymbols, + extractSoliditySymbols, extractSwiftSymbols, extractSymbols, + extractVerilogSymbols, extractZigSymbols, } from '../extractors/index.js'; @@ -51,18 +56,21 @@ import { extractCppSymbols, extractCSharpSymbols, extractCSymbols, + extractCudaSymbols, extractDartSymbols, extractElixirSymbols, extractErlangSymbols, extractFSharpSymbols, extractGleamSymbols, extractGoSymbols, + extractGroovySymbols, extractHaskellSymbols, extractHCLSymbols, extractJavaSymbols, extractJuliaSymbols, extractKotlinSymbols, extractLuaSymbols, + extractObjCSymbols, extractOCamlSymbols, extractPHPSymbols, extractPythonSymbols, @@ -70,8 +78,10 @@ import { extractRubySymbols, extractRustSymbols, extractScalaSymbols, + extractSoliditySymbols, extractSwiftSymbols, extractSymbols, + extractVerilogSymbols, extractZigSymbols, } from '../extractors/index.js'; @@ -588,6 +598,41 @@ export const LANGUAGE_REGISTRY: LanguageRegistryEntry[] = [ extractor: extractErlangSymbols, required: false, }, + { + id: 'solidity', + extensions: ['.sol'], + grammarFile: 'tree-sitter-solidity.wasm', + extractor: extractSoliditySymbols, + required: false, + }, + { + id: 'objc', + extensions: ['.m'], + grammarFile: 'tree-sitter-objc.wasm', + extractor: extractObjCSymbols, + required: false, + }, + { + id: 'cuda', + extensions: ['.cu', '.cuh'], + grammarFile: 'tree-sitter-cuda.wasm', + extractor: extractCudaSymbols, + required: false, + }, + { + id: 'groovy', + extensions: ['.groovy', '.gvy'], + grammarFile: 'tree-sitter-groovy.wasm', + extractor: extractGroovySymbols, + required: false, + }, + { + id: 'verilog', + extensions: ['.v', '.sv'], + grammarFile: 'tree-sitter-verilog.wasm', + extractor: extractVerilogSymbols, + required: false, + }, ]; const _extToLang: Map = new Map(); @@ -820,7 +865,7 @@ export function getActiveEngine(opts: ParseEngineOpts = {}): { */ export function createParseTreeCache(): any { const native = loadNative(); - if (!native || !native.ParseTreeCache) return null; + if (!native?.ParseTreeCache) return null; return new native.ParseTreeCache(); } diff --git a/src/extractors/cuda.ts b/src/extractors/cuda.ts new file mode 100644 index 00000000..63657f7f --- /dev/null +++ b/src/extractors/cuda.ts @@ -0,0 +1,314 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from CUDA files. + * + * CUDA is a C++ superset. The tree-sitter-cuda grammar extends C++ with + * __global__, __device__, __host__, __shared__ qualifiers and kernel + * launch syntax (<<<...>>>). We reuse C++ handler patterns and add + * CUDA-specific qualifier detection. + */ +export function extractCudaSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkCudaNode(tree.rootNode, ctx); + return ctx; +} + +const CUDA_QUALIFIERS = new Set([ + '__global__', + '__device__', + '__host__', + '__shared__', + '__constant__', +]); + +function walkCudaNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'function_definition': + handleCudaFunctionDef(node, ctx); + break; + case 'class_specifier': + handleCudaClassSpecifier(node, ctx); + break; + case 'struct_specifier': + handleCudaStructSpecifier(node, ctx); + break; + case 'enum_specifier': + handleCudaEnumSpecifier(node, ctx); + break; + case 'namespace_definition': + handleCudaNamespaceDef(node, ctx); + break; + case 'type_definition': + handleCudaTypedef(node, ctx); + break; + case 'preproc_include': + handleCudaInclude(node, ctx); + break; + case 'call_expression': + handleCudaCallExpression(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkCudaNode(child, ctx); + } +} + +// ── Handlers ─────────────────────────────────────────────────────────────── + +function handleCudaFunctionDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const declarator = node.childForFieldName('declarator'); + if (!declarator) return; + const funcDeclarator = + declarator.type === 'function_declarator' + ? declarator + : findChild(declarator, 'function_declarator'); + if (!funcDeclarator) return; + const nameNode = funcDeclarator.childForFieldName('declarator'); + if (!nameNode) return; + const name = nameNode.text; + + const parentClass = findCudaParentClass(node); + const fullName = parentClass ? `${parentClass}.${name}` : name; + const kind = parentClass ? 'method' : 'function'; + + const params = extractCudaParameters(funcDeclarator.childForFieldName('parameters')); + const decorators = extractCudaQualifiers(node); + + ctx.definitions.push({ + name: fullName, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: parentClass ? extractModifierVisibility(node) : undefined, + decorators: decorators.length > 0 ? decorators : undefined, + }); +} + +function handleCudaClassSpecifier(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const children = extractCudaClassFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); + + const baseClause = findChild(node, 'base_class_clause'); + if (baseClause) { + for (let i = 0; i < baseClause.childCount; i++) { + const child = baseClause.child(i); + if (child && (child.type === 'type_identifier' || child.type === 'qualified_identifier')) { + ctx.classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); + } + } + } +} + +function handleCudaStructSpecifier(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const children = extractCudaClassFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); +} + +function handleCudaEnumSpecifier(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const children = extractCudaEnumEntries(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); +} + +function handleCudaNamespaceDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'namespace', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleCudaTypedef(node: TreeSitterNode, ctx: ExtractorOutput): void { + let name: string | undefined; + for (let i = node.childCount - 1; i >= 0; i--) { + const child = node.child(i); + if ( + child && + (child.type === 'type_identifier' || + child.type === 'identifier' || + child.type === 'primitive_type') + ) { + name = child.text; + break; + } + } + if (!name) return; + ctx.definitions.push({ + name, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleCudaInclude(node: TreeSitterNode, ctx: ExtractorOutput): void { + const pathNode = node.childForFieldName('path'); + if (!pathNode) return; + const raw = pathNode.text; + const source = raw.replace(/^["<]|[">]$/g, ''); + const lastName = source.split('/').pop() ?? source; + ctx.imports.push({ + source, + names: [lastName], + line: node.startPosition.row + 1, + cInclude: true, + }); +} + +function handleCudaCallExpression(node: TreeSitterNode, ctx: ExtractorOutput): void { + const funcNode = node.childForFieldName('function'); + if (!funcNode) return; + const call: Call = { name: '', line: node.startPosition.row + 1 }; + if (funcNode.type === 'field_expression') { + const field = funcNode.childForFieldName('field'); + const argument = funcNode.childForFieldName('argument'); + if (field) call.name = field.text; + if (argument) call.receiver = argument.text; + } else { + call.name = funcNode.text; + } + if (call.name) ctx.calls.push(call); +} + +// ── Helpers ──────────────────────────────────────────────────────────────── + +function extractCudaQualifiers(node: TreeSitterNode): string[] { + const qualifiers: string[] = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (CUDA_QUALIFIERS.has(child.text)) { + qualifiers.push(child.text); + } + // Also check storage_class_specifier or attribute children + if (child.type === 'storage_class_specifier' || child.type === 'attribute_specifier') { + if (CUDA_QUALIFIERS.has(child.text)) qualifiers.push(child.text); + } + } + return qualifiers; +} + +function findCudaParentClass(node: TreeSitterNode): string | null { + let current = node.parent; + while (current) { + if (current.type === 'field_declaration_list') { + const classNode = current.parent; + if ( + classNode && + (classNode.type === 'class_specifier' || classNode.type === 'struct_specifier') + ) { + const nameNode = classNode.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + } + current = current.parent; + } + return null; +} + +function extractCudaParameters(paramListNode: TreeSitterNode | null): SubDeclaration[] { + const params: SubDeclaration[] = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param || param.type !== 'parameter_declaration') continue; + const nameNode = param.childForFieldName('declarator'); + if (nameNode) { + const name = + nameNode.type === 'identifier' + ? nameNode.text + : (findChild(nameNode, 'identifier')?.text ?? nameNode.text); + params.push({ name, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + return params; +} + +function extractCudaClassFields(classNode: TreeSitterNode): SubDeclaration[] { + const fields: SubDeclaration[] = []; + const body = + classNode.childForFieldName('body') || findChild(classNode, 'field_declaration_list'); + if (!body) return fields; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'field_declaration') continue; + const nameNode = member.childForFieldName('declarator'); + if (nameNode) { + const name = + nameNode.type === 'identifier' + ? nameNode.text + : (findChild(nameNode, 'identifier')?.text ?? nameNode.text); + fields.push({ + name, + kind: 'property', + line: member.startPosition.row + 1, + visibility: extractModifierVisibility(member), + }); + } + } + return fields; +} + +function extractCudaEnumEntries(enumNode: TreeSitterNode): SubDeclaration[] { + const entries: SubDeclaration[] = []; + const body = findChild(enumNode, 'enumerator_list'); + if (!body) return entries; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enumerator') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + entries.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return entries; +} diff --git a/src/extractors/groovy.ts b/src/extractors/groovy.ts new file mode 100644 index 00000000..bfb62be1 --- /dev/null +++ b/src/extractors/groovy.ts @@ -0,0 +1,332 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { + extractModifierVisibility, + findChild, + findParentNode, + lastPathSegment, + nodeEndLine, +} from './helpers.js'; + +/** + * Extract symbols from Groovy files. + * + * Groovy is a JVM language with Java-like class/interface/enum structures + * plus closures, traits, and dynamic typing. The tree-sitter-groovy grammar + * models classes, methods, imports, and call expressions similarly to Java. + */ +export function extractGroovySymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkGroovyNode(tree.rootNode, ctx); + return ctx; +} + +function walkGroovyNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'class_definition': + case 'class_declaration': + handleGroovyClassDecl(node, ctx); + break; + case 'interface_definition': + case 'interface_declaration': + handleGroovyInterfaceDecl(node, ctx); + break; + case 'enum_definition': + case 'enum_declaration': + handleGroovyEnumDecl(node, ctx); + break; + case 'method_definition': + case 'method_declaration': + handleGroovyMethodDecl(node, ctx); + break; + case 'constructor_definition': + case 'constructor_declaration': + handleGroovyConstructorDecl(node, ctx); + break; + case 'function_definition': + case 'function_declaration': + handleGroovyFunctionDecl(node, ctx); + break; + case 'import_statement': + case 'import_declaration': + handleGroovyImport(node, ctx); + break; + case 'method_call': + case 'method_invocation': + case 'call_expression': + case 'function_call': + handleGroovyCallExpr(node, ctx); + break; + case 'object_creation_expression': + handleGroovyObjectCreation(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkGroovyNode(child, ctx); + } +} + +// ── Handlers ─────────────────────────────────────────────────────────────── + +const GROOVY_PARENT_TYPES = [ + 'class_definition', + 'class_declaration', + 'enum_definition', + 'enum_declaration', + 'interface_definition', + 'interface_declaration', +] as const; + +function handleGroovyClassDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const name = nameNode.text; + + const members = extractGroovyClassMembers(node); + ctx.definitions.push({ + name, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: members.length > 0 ? members : undefined, + visibility: extractModifierVisibility(node), + }); + + // Superclass + const superclass = node.childForFieldName('superclass'); + if (superclass) { + const superName = + superclass.type === 'generic_type' ? superclass.child(0)?.text : superclass.text; + if (superName) { + ctx.classes.push({ name, extends: superName, line: node.startPosition.row + 1 }); + } + } + + // Interfaces + const interfaces = node.childForFieldName('interfaces'); + if (interfaces) { + for (let i = 0; i < interfaces.childCount; i++) { + const iface = interfaces.child(i); + if ( + iface && + (iface.type === 'type_identifier' || + iface.type === 'identifier' || + iface.type === 'generic_type') + ) { + const ifaceName = iface.type === 'generic_type' ? iface.child(0)?.text : iface.text; + if (ifaceName) { + ctx.classes.push({ name, implements: ifaceName, line: node.startPosition.row + 1 }); + } + } + } + } +} + +function handleGroovyInterfaceDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: extractModifierVisibility(node), + }); +} + +function handleGroovyEnumDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + const members: SubDeclaration[] = []; + const body = node.childForFieldName('body') || findChild(node, 'enum_body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; + if (child.type === 'enum_constant' || child.type === 'identifier') { + const constName = child.childForFieldName('name') || child; + members.push({ name: constName.text, kind: 'constant', line: child.startPosition.row + 1 }); + } + } + } + + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: members.length > 0 ? members : undefined, + }); +} + +function handleGroovyMethodDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findParentNode(node, GROOVY_PARENT_TYPES); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + + const params = extractGroovyParams(node); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} + +function handleGroovyConstructorDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findParentNode(node, GROOVY_PARENT_TYPES); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + + const params = extractGroovyParams(node); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} + +function handleGroovyFunctionDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + const params = extractGroovyParams(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} + +function handleGroovyImport(node: TreeSitterNode, ctx: ExtractorOutput): void { + // import foo.bar.Baz or import foo.bar.* + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if ( + child.type === 'dotted_identifier' || + child.type === 'scoped_identifier' || + child.type === 'identifier' || + child.type === 'qualified_name' + ) { + const fullPath = child.text; + const lastName = lastPathSegment(fullPath, '.'); + ctx.imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + javaImport: true, + }); + return; + } + } +} + +function handleGroovyCallExpr(node: TreeSitterNode, ctx: ExtractorOutput): void { + const call: Call = { name: '', line: node.startPosition.row + 1 }; + + // Try standard call_expression pattern + const funcNode = node.childForFieldName('function') || node.childForFieldName('method'); + if (funcNode) { + if (funcNode.type === 'field_expression' || funcNode.type === 'member_access') { + const field = funcNode.childForFieldName('field') || funcNode.childForFieldName('property'); + const obj = funcNode.childForFieldName('argument') || funcNode.childForFieldName('object'); + if (field) call.name = field.text; + if (obj) call.receiver = obj.text; + } else { + call.name = funcNode.text; + } + } else { + // method_call: first child is receiver/name + const nameNode = node.childForFieldName('name'); + const obj = node.childForFieldName('object'); + if (nameNode) { + call.name = nameNode.text; + if (obj) call.receiver = obj.text; + } + } + + if (call.name) ctx.calls.push(call); +} + +function handleGroovyObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): void { + const typeNode = node.childForFieldName('type'); + if (!typeNode) return; + const typeName = typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text; + if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); +} + +// ── Helpers ──────────────────────────────────────────────────────────────── + +function extractGroovyParams(funcNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const paramList = + funcNode.childForFieldName('parameters') || findChild(funcNode, 'formal_parameters'); + if (!paramList) return params; + + for (let i = 0; i < paramList.childCount; i++) { + const param = paramList.child(i); + if (!param) continue; + if (param.type === 'formal_parameter' || param.type === 'parameter') { + const nameNode = param.childForFieldName('name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractGroovyClassMembers(classNode: TreeSitterNode): SubDeclaration[] { + const members: SubDeclaration[] = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'class_body'); + if (!body) return members; + + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; + if (child.type === 'field_declaration') { + for (let j = 0; j < child.childCount; j++) { + const varDecl = child.child(j); + if (varDecl?.type === 'variable_declarator') { + const nameNode = varDecl.childForFieldName('name'); + if (nameNode) { + members.push({ + name: nameNode.text, + kind: 'property', + line: child.startPosition.row + 1, + visibility: extractModifierVisibility(child), + }); + } + } + } + } + } + return members; +} diff --git a/src/extractors/index.ts b/src/extractors/index.ts index 537994aa..9d3ee70d 100644 --- a/src/extractors/index.ts +++ b/src/extractors/index.ts @@ -3,12 +3,14 @@ export { extractCSymbols } from './c.js'; export { extractClojureSymbols } from './clojure.js'; export { extractCppSymbols } from './cpp.js'; export { extractCSharpSymbols } from './csharp.js'; +export { extractCudaSymbols } from './cuda.js'; export { extractDartSymbols } from './dart.js'; export { extractElixirSymbols } from './elixir.js'; export { extractErlangSymbols } from './erlang.js'; export { extractFSharpSymbols } from './fsharp.js'; export { extractGleamSymbols } from './gleam.js'; export { extractGoSymbols } from './go.js'; +export { extractGroovySymbols } from './groovy.js'; export { extractHaskellSymbols } from './haskell.js'; export { extractHCLSymbols } from './hcl.js'; export { extractJavaSymbols } from './java.js'; @@ -16,6 +18,7 @@ export { extractSymbols } from './javascript.js'; export { extractJuliaSymbols } from './julia.js'; export { extractKotlinSymbols } from './kotlin.js'; export { extractLuaSymbols } from './lua.js'; +export { extractObjCSymbols } from './objc.js'; export { extractOCamlSymbols } from './ocaml.js'; export { extractPHPSymbols } from './php.js'; export { extractPythonSymbols } from './python.js'; @@ -23,5 +26,7 @@ export { extractRSymbols } from './r.js'; export { extractRubySymbols } from './ruby.js'; export { extractRustSymbols } from './rust.js'; export { extractScalaSymbols } from './scala.js'; +export { extractSoliditySymbols } from './solidity.js'; export { extractSwiftSymbols } from './swift.js'; +export { extractVerilogSymbols } from './verilog.js'; export { extractZigSymbols } from './zig.js'; diff --git a/src/extractors/objc.ts b/src/extractors/objc.ts new file mode 100644 index 00000000..d9f56754 --- /dev/null +++ b/src/extractors/objc.ts @@ -0,0 +1,431 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Objective-C files. + * + * The tree-sitter-objc grammar extends C with @interface, @implementation, + * @protocol, method declarations, #import, and message expressions. + */ +export function extractObjCSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkObjCNode(tree.rootNode, ctx); + return ctx; +} + +function walkObjCNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'class_interface': + handleClassInterface(node, ctx); + break; + case 'class_implementation': + handleClassImplementation(node, ctx); + break; + case 'protocol_declaration': + handleProtocolDecl(node, ctx); + break; + case 'category_interface': + handleCategoryInterface(node, ctx); + break; + case 'category_implementation': + handleCategoryImplementation(node, ctx); + break; + case 'method_declaration': + case 'method_definition': + handleMethodDecl(node, ctx); + break; + case 'function_definition': + handleFunctionDef(node, ctx); + break; + case 'preproc_include': + case 'preproc_import': + handleImport(node, ctx); + break; + case 'import_declaration': + handleAtImport(node, ctx); + break; + case 'struct_specifier': + handleStructSpecifier(node, ctx); + break; + case 'enum_specifier': + handleEnumSpecifier(node, ctx); + break; + case 'type_definition': + handleTypedef(node, ctx); + break; + case 'call_expression': + handleCCallExpr(node, ctx); + break; + case 'message_expression': + handleMessageExpr(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkObjCNode(child, ctx); + } +} + +// ── ObjC class/protocol handlers ────────────────────────────────────────── + +function handleClassInterface(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findObjCDeclName(node); + if (!nameNode) return; + const name = nameNode.text; + + const members = collectClassMembers(node); + ctx.definitions.push({ + name, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: members.length > 0 ? members : undefined, + }); + + // Superclass + const superclass = node.childForFieldName('superclass'); + if (superclass) { + ctx.classes.push({ name, extends: superclass.text, line: node.startPosition.row + 1 }); + } + + // Protocols + const protocols = findChild(node, 'protocol_qualifiers'); + if (protocols) { + for (let i = 0; i < protocols.childCount; i++) { + const proto = protocols.child(i); + if (proto && proto.type === 'identifier') { + ctx.classes.push({ name, implements: proto.text, line: node.startPosition.row + 1 }); + } + } + } +} + +function handleClassImplementation(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findObjCDeclName(node); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleProtocolDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findObjCDeclName(node); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleCategoryInterface(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findObjCDeclName(node); + if (!nameNode) return; + const category = node.childForFieldName('category'); + const catName = category ? `${nameNode.text}(${category.text})` : nameNode.text; + + ctx.definitions.push({ + name: catName, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleCategoryImplementation(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findObjCDeclName(node); + if (!nameNode) return; + const category = node.childForFieldName('category'); + const catName = category ? `${nameNode.text}(${category.text})` : nameNode.text; + + ctx.definitions.push({ + name: catName, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +// ── Method / function handlers ──────────────────────────────────────────── + +function handleMethodDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const selector = buildSelector(node); + if (!selector) return; + + const parentClass = findObjCParentClass(node); + const fullName = parentClass ? `${parentClass}.${selector}` : selector; + + const params = extractMethodParams(node); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} + +function handleFunctionDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const declarator = node.childForFieldName('declarator'); + if (!declarator) return; + const funcDeclarator = + declarator.type === 'function_declarator' + ? declarator + : findChild(declarator, 'function_declarator'); + if (!funcDeclarator) return; + const nameNode = funcDeclarator.childForFieldName('declarator'); + if (!nameNode) return; + + const params = extractCParams(funcDeclarator.childForFieldName('parameters')); + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} + +// ── Import handlers ─────────────────────────────────────────────────────── + +function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { + const pathNode = node.childForFieldName('path'); + if (!pathNode) return; + const raw = pathNode.text; + const source = raw.replace(/^["<]|[">]$/g, ''); + const lastName = source.split('/').pop() ?? source; + ctx.imports.push({ + source, + names: [lastName], + line: node.startPosition.row + 1, + cInclude: true, + }); +} + +function handleAtImport(node: TreeSitterNode, ctx: ExtractorOutput): void { + // @import Foundation; + const moduleNode = node.childForFieldName('module') || findChild(node, 'identifier'); + if (moduleNode) { + ctx.imports.push({ + source: moduleNode.text, + names: [moduleNode.text], + line: node.startPosition.row + 1, + }); + } +} + +// ── C-compatible type handlers ──────────────────────────────────────────── + +function handleStructSpecifier(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleEnumSpecifier(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleTypedef(node: TreeSitterNode, ctx: ExtractorOutput): void { + let name: string | undefined; + for (let i = node.childCount - 1; i >= 0; i--) { + const child = node.child(i); + if ( + child && + (child.type === 'type_identifier' || + child.type === 'identifier' || + child.type === 'primitive_type') + ) { + name = child.text; + break; + } + } + if (!name) return; + ctx.definitions.push({ + name, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +// ── Call handlers ───────────────────────────────────────────────────────── + +function handleCCallExpr(node: TreeSitterNode, ctx: ExtractorOutput): void { + const funcNode = node.childForFieldName('function'); + if (!funcNode) return; + const call: Call = { name: '', line: node.startPosition.row + 1 }; + if (funcNode.type === 'field_expression') { + const field = funcNode.childForFieldName('field'); + const argument = funcNode.childForFieldName('argument'); + if (field) call.name = field.text; + if (argument) call.receiver = argument.text; + } else { + call.name = funcNode.text; + } + if (call.name) ctx.calls.push(call); +} + +function handleMessageExpr(node: TreeSitterNode, ctx: ExtractorOutput): void { + // [receiver selector:arg ...] + const receiver = node.childForFieldName('receiver'); + const selector = node.childForFieldName('selector'); + if (!selector) return; + + const call: Call = { name: selector.text, line: node.startPosition.row + 1 }; + if (receiver) call.receiver = receiver.text; + ctx.calls.push(call); +} + +// ── Helpers ─────────────────────────────────────────────────────────────── + +function buildSelector(methodNode: TreeSitterNode): string | null { + const selector = methodNode.childForFieldName('selector'); + if (selector) return selector.text; + + // Build selector from keyword children: initWith:name: + const parts: string[] = []; + for (let i = 0; i < methodNode.childCount; i++) { + const child = methodNode.child(i); + if (!child) continue; + if (child.type === 'keyword_selector') { + for (let j = 0; j < child.childCount; j++) { + const kw = child.child(j); + if (kw && kw.type === 'keyword_declarator') { + const kwName = kw.childForFieldName('keyword'); + if (kwName) parts.push(kwName.text); + } + } + } + if (child.type === 'identifier' && i === 1) { + // Simple unary selector + return child.text; + } + } + return parts.length > 0 ? `${parts.join(':')}:` : null; +} + +function findObjCParentClass(node: TreeSitterNode): string | null { + let current = node.parent; + while (current) { + if ( + current.type === 'class_interface' || + current.type === 'class_implementation' || + current.type === 'protocol_declaration' || + current.type === 'category_interface' || + current.type === 'category_implementation' + ) { + const nameNode = current.childForFieldName('name') || findObjCDeclName(current); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; +} + +/** + * Find the declaration name for ObjC constructs where the grammar does not + * expose the class/protocol name as a named field. The identifier appears + * right after the `@interface` / `@implementation` / `@protocol` keyword. + */ +function findObjCDeclName(node: TreeSitterNode): TreeSitterNode | null { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'identifier') return child; + } + return null; +} + +function collectClassMembers(classNode: TreeSitterNode): SubDeclaration[] { + const members: SubDeclaration[] = []; + for (let i = 0; i < classNode.childCount; i++) { + const child = classNode.child(i); + if (!child) continue; + if (child.type === 'method_declaration' || child.type === 'method_definition') { + const sel = buildSelector(child); + if (sel) { + members.push({ name: sel, kind: 'method', line: child.startPosition.row + 1 }); + } + } + if (child.type === 'property_declaration') { + const propName = child.childForFieldName('name'); + if (propName) { + members.push({ name: propName.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + } + return members; +} + +function extractMethodParams(methodNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + for (let i = 0; i < methodNode.childCount; i++) { + const child = methodNode.child(i); + if (!child || child.type !== 'keyword_selector') continue; + for (let j = 0; j < child.childCount; j++) { + const kw = child.child(j); + if (kw && kw.type === 'keyword_declarator') { + const nameNode = kw.childForFieldName('name'); + if (nameNode) { + params.push({ + name: nameNode.text, + kind: 'parameter', + line: nameNode.startPosition.row + 1, + }); + } + } + } + } + return params; +} + +function extractCParams(paramListNode: TreeSitterNode | null): SubDeclaration[] { + const params: SubDeclaration[] = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param || param.type !== 'parameter_declaration') continue; + const nameNode = param.childForFieldName('declarator'); + if (nameNode) { + const name = + nameNode.type === 'identifier' + ? nameNode.text + : (findChild(nameNode, 'identifier')?.text ?? nameNode.text); + params.push({ name, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + return params; +} diff --git a/src/extractors/solidity.ts b/src/extractors/solidity.ts new file mode 100644 index 00000000..4466b1b1 --- /dev/null +++ b/src/extractors/solidity.ts @@ -0,0 +1,368 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { + extractModifierVisibility, + findChild, + findParentNode, + nodeEndLine, + stripQuotes, +} from './helpers.js'; + +/** + * Extract symbols from Solidity files. + * + * Solidity's tree-sitter grammar covers contracts, interfaces, libraries, + * structs, enums, events, errors, functions, modifiers, and import paths. + */ +export function extractSoliditySymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkSolidityNode(tree.rootNode, ctx); + return ctx; +} + +function walkSolidityNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'contract_declaration': + handleContractDecl(node, ctx, 'class'); + break; + case 'interface_declaration': + handleContractDecl(node, ctx, 'interface'); + break; + case 'library_declaration': + handleContractDecl(node, ctx, 'module'); + break; + case 'struct_declaration': + handleStructDecl(node, ctx); + break; + case 'enum_declaration': + handleEnumDecl(node, ctx); + break; + case 'function_definition': + handleFunctionDef(node, ctx); + break; + case 'modifier_definition': + handleModifierDef(node, ctx); + break; + case 'event_definition': + handleEventDef(node, ctx); + break; + case 'error_declaration': + handleErrorDecl(node, ctx); + break; + case 'state_variable_declaration': + handleStateVarDecl(node, ctx); + break; + case 'import_directive': + handleImportDirective(node, ctx); + break; + case 'call_expression': + case 'function_call': + handleCallExpression(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkSolidityNode(child, ctx); + } +} + +// ── Handlers ─────────────────────────────────────────────────────────────── + +const SOL_PARENT_TYPES = [ + 'contract_declaration', + 'interface_declaration', + 'library_declaration', +] as const; + +function handleContractDecl( + node: TreeSitterNode, + ctx: ExtractorOutput, + kind: 'class' | 'interface' | 'module', +): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const name = nameNode.text; + + const members: SubDeclaration[] = []; + const body = node.childForFieldName('body') || findChild(node, 'contract_body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; + if (child.type === 'function_definition') { + const fnName = child.childForFieldName('name'); + if (fnName) { + members.push({ name: fnName.text, kind: 'method', line: child.startPosition.row + 1 }); + } + } else if (child.type === 'state_variable_declaration') { + const varName = child.childForFieldName('name'); + if (varName) { + members.push({ + name: varName.text, + kind: 'property', + line: child.startPosition.row + 1, + visibility: extractSolVisibility(child), + }); + } + } + } + } + + ctx.definitions.push({ + name, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: members.length > 0 ? members : undefined, + }); + + // Inheritance + const inheritance = findChild(node, 'inheritance_specifier'); + if (inheritance) { + for (let i = 0; i < inheritance.childCount; i++) { + const child = inheritance.child(i); + if (!child) continue; + if (child.type === 'user_defined_type' || child.type === 'identifier') { + ctx.classes.push({ name, extends: child.text, line: node.startPosition.row + 1 }); + } + } + } +} + +function handleStructDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + const members: SubDeclaration[] = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'struct_member') { + const memberName = child.childForFieldName('name'); + if (memberName) { + members.push({ + name: memberName.text, + kind: 'property', + line: child.startPosition.row + 1, + }); + } + } + } + + const parent = findParentNode(node, SOL_PARENT_TYPES); + const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text; + + ctx.definitions.push({ + name: fullName, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: members.length > 0 ? members : undefined, + }); +} + +function handleEnumDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + const members: SubDeclaration[] = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'enum_value') { + members.push({ name: child.text, kind: 'constant', line: child.startPosition.row + 1 }); + } + } + + const parent = findParentNode(node, SOL_PARENT_TYPES); + const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text; + + ctx.definitions.push({ + name: fullName, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: members.length > 0 ? members : undefined, + }); +} + +function handleFunctionDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parent = findParentNode(node, SOL_PARENT_TYPES); + const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text; + const kind = parent ? 'method' : 'function'; + + const params = extractSolParams(node); + ctx.definitions.push({ + name: fullName, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractSolVisibility(node), + }); +} + +function handleModifierDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parent = findParentNode(node, SOL_PARENT_TYPES); + const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text; + + ctx.definitions.push({ + name: fullName, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + decorators: ['modifier'], + }); +} + +function handleEventDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parent = findParentNode(node, SOL_PARENT_TYPES); + const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text; + + ctx.definitions.push({ + name: fullName, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + decorators: ['event'], + }); +} + +function handleErrorDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parent = findParentNode(node, SOL_PARENT_TYPES); + const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text; + + ctx.definitions.push({ + name: fullName, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + decorators: ['error'], + }); +} + +function handleStateVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parent = findParentNode(node, SOL_PARENT_TYPES); + const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text; + + ctx.definitions.push({ + name: fullName, + kind: 'variable', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: extractSolVisibility(node), + }); +} + +function handleImportDirective(node: TreeSitterNode, ctx: ExtractorOutput): void { + // import "path"; or import { X } from "path"; or import "path" as Alias; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'string' || child.type === 'string_literal') { + const source = stripQuotes(child.text); + const names: string[] = []; + // Look for imported symbols + for (let j = 0; j < node.childCount; j++) { + const sibling = node.child(j); + if (sibling && sibling.type === 'identifier') names.push(sibling.text); + if (sibling && sibling.type === 'import_declaration') { + const id = findChild(sibling, 'identifier'); + if (id) names.push(id.text); + } + } + ctx.imports.push({ + source, + names: names.length > 0 ? names : ['*'], + line: node.startPosition.row + 1, + }); + return; + } + // source_import: handles `import * as X from "path"` + if (child.type === 'source_import' || child.type === 'import_clause') { + const strNode = findChild(child, 'string') || findChild(child, 'string_literal'); + if (strNode) { + ctx.imports.push({ + source: stripQuotes(strNode.text), + names: ['*'], + line: node.startPosition.row + 1, + }); + return; + } + } + } +} + +function handleCallExpression(node: TreeSitterNode, ctx: ExtractorOutput): void { + const funcNode = node.childForFieldName('function') || node.childForFieldName('callee'); + if (!funcNode) return; + + const call: Call = { name: '', line: node.startPosition.row + 1 }; + if (funcNode.type === 'member_expression' || funcNode.type === 'member_access') { + const prop = funcNode.childForFieldName('property') || funcNode.childForFieldName('member'); + const obj = funcNode.childForFieldName('object') || funcNode.childForFieldName('expression'); + if (prop) call.name = prop.text; + if (obj) call.receiver = obj.text; + } else { + call.name = funcNode.text; + } + if (call.name) ctx.calls.push(call); +} + +// ── Helpers ──────────────────────────────────────────────────────────────── + +function extractSolParams(funcNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const paramList = + funcNode.childForFieldName('parameters') || findChild(funcNode, 'parameter_list'); + if (!paramList) return params; + + for (let i = 0; i < paramList.childCount; i++) { + const param = paramList.child(i); + if (!param || param.type !== 'parameter') continue; + const nameNode = param.childForFieldName('name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + return params; +} + +function extractSolVisibility( + node: TreeSitterNode, +): 'public' | 'private' | 'protected' | undefined { + // Solidity visibility is embedded as child keywords or visibility nodes + const vis = extractModifierVisibility(node); + if (vis) return vis; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + const t = child.text; + if (t === 'public' || t === 'external') return 'public'; + if (t === 'private') return 'private'; + if (t === 'internal') return 'protected'; + } + return undefined; +} diff --git a/src/extractors/verilog.ts b/src/extractors/verilog.ts new file mode 100644 index 00000000..1b85fec5 --- /dev/null +++ b/src/extractors/verilog.ts @@ -0,0 +1,315 @@ +import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Verilog/SystemVerilog files. + * + * The tree-sitter-verilog grammar covers modules, interfaces, packages, + * tasks, functions, classes, always blocks, and instantiations. + */ +export function extractVerilogSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkVerilogNode(tree.rootNode, ctx); + return ctx; +} + +function walkVerilogNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'module_declaration': + handleModuleDecl(node, ctx); + break; + case 'interface_declaration': + handleInterfaceDecl(node, ctx); + break; + case 'package_declaration': + handlePackageDecl(node, ctx); + break; + case 'class_declaration': + handleClassDecl(node, ctx); + break; + case 'function_declaration': + handleFunctionDecl(node, ctx); + break; + case 'task_declaration': + handleTaskDecl(node, ctx); + break; + case 'module_instantiation': + handleModuleInstantiation(node, ctx); + break; + case 'package_import_declaration': + handlePackageImport(node, ctx); + break; + case 'include_compiler_directive': + handleIncludeDirective(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkVerilogNode(child, ctx); + } +} + +// ── Handlers ─────────────────────────────────────────────────────────────── + +function handleModuleDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findModuleName(node); + if (!nameNode) return; + + const ports = extractPorts(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: ports.length > 0 ? ports : undefined, + }); +} + +function handleInterfaceDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findDeclName(node); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handlePackageDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findDeclName(node); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleClassDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + + // Superclass via extends + const superclass = node.childForFieldName('superclass'); + if (superclass) { + ctx.classes.push({ + name: nameNode.text, + extends: superclass.text, + line: node.startPosition.row + 1, + }); + } +} + +function handleFunctionDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findFunctionOrTaskName(node, 'function_identifier'); + if (!nameNode) return; + + const parentModule = findVerilogParent(node); + const fullName = parentModule ? `${parentModule}.${nameNode.text}` : nameNode.text; + + ctx.definitions.push({ + name: fullName, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleTaskDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findFunctionOrTaskName(node, 'task_identifier'); + if (!nameNode) return; + + const parentModule = findVerilogParent(node); + const fullName = parentModule ? `${parentModule}.${nameNode.text}` : nameNode.text; + + ctx.definitions.push({ + name: fullName, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleModuleInstantiation(node: TreeSitterNode, ctx: ExtractorOutput): void { + // Module instantiations are like function calls: `ModuleName instance_name(...);` + const moduleType = node.childForFieldName('type') || node.child(0); + if (!moduleType) return; + + ctx.calls.push({ + name: moduleType.text, + line: node.startPosition.row + 1, + }); +} + +function handlePackageImport(node: TreeSitterNode, ctx: ExtractorOutput): void { + // import pkg::item; or import pkg::*; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'package_import_item') { + const text = child.text; + const parts = text.split('::'); + const pkg = parts[0] ?? text; + const item = parts[1] ?? '*'; + ctx.imports.push({ + source: pkg, + names: [item], + line: node.startPosition.row + 1, + }); + } + } +} + +function handleIncludeDirective(node: TreeSitterNode, ctx: ExtractorOutput): void { + // `include "file.vh" + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'string_literal' || child.type === 'quoted_string')) { + const source = child.text.replace(/^["']|["']$/g, ''); + ctx.imports.push({ + source, + names: [source.split('/').pop() ?? source], + line: node.startPosition.row + 1, + cInclude: true, + }); + return; + } + } +} + +// ── Helpers ──────────────────────────────────────────────────────────────── + +function findModuleName(node: TreeSitterNode): TreeSitterNode | null { + // Try field name first, then look for module_header > identifier + const nameNode = node.childForFieldName('name'); + if (nameNode) return nameNode; + + const header = findChild(node, 'module_header'); + if (header) { + const id = findChild(header, 'simple_identifier') || findChild(header, 'identifier'); + if (id) return id; + } + + // Direct child identifier after `module` keyword + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'simple_identifier' || child.type === 'identifier')) return child; + } + return null; +} + +function findDeclName(node: TreeSitterNode): TreeSitterNode | null { + const nameNode = node.childForFieldName('name'); + if (nameNode) return nameNode; + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'simple_identifier' || child.type === 'identifier')) return child; + } + return null; +} + +/** + * Find a function or task name by searching for the dedicated identifier node + * type (e.g. `function_identifier`, `task_identifier`) recursively through + * body declarations. Falls back to `findDeclName` for grammars that use + * plain identifiers. + */ +function findFunctionOrTaskName( + node: TreeSitterNode, + identifierType: string, +): TreeSitterNode | null { + // Try the standard approach first + const simple = findDeclName(node); + if (simple) return simple; + + // Search children (including body declarations) for the dedicated identifier node + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === identifierType) return child; + // Look one level deeper into body declarations + for (let j = 0; j < child.childCount; j++) { + const grandchild = child.child(j); + if (grandchild && grandchild.type === identifierType) return grandchild; + } + } + return null; +} + +function findVerilogParent(node: TreeSitterNode): string | null { + let current = node.parent; + while (current) { + if ( + current.type === 'module_declaration' || + current.type === 'interface_declaration' || + current.type === 'package_declaration' || + current.type === 'class_declaration' + ) { + const name = findDeclName(current) || findModuleName(current); + return name ? name.text : null; + } + current = current.parent; + } + return null; +} + +function extractPorts(moduleNode: TreeSitterNode): SubDeclaration[] { + const ports: SubDeclaration[] = []; + + // Look for port declarations in the module header or body + const collectFromNode = (node: TreeSitterNode): void => { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + + if ( + child.type === 'ansi_port_declaration' || + child.type === 'port_declaration' || + child.type === 'input_declaration' || + child.type === 'output_declaration' || + child.type === 'inout_declaration' + ) { + const nameNode = + child.childForFieldName('name') || + findChild(child, 'simple_identifier') || + findChild(child, 'identifier'); + if (nameNode) { + ports.push({ name: nameNode.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + + // Recurse into port list containers + if ( + child.type === 'list_of_port_declarations' || + child.type === 'module_header' || + child.type === 'port_declaration_list' + ) { + collectFromNode(child); + } + } + }; + + collectFromNode(moduleNode); + return ports; +} diff --git a/src/types.ts b/src/types.ts index 0491ec8a..9adbc0ea 100644 --- a/src/types.ts +++ b/src/types.ts @@ -102,7 +102,12 @@ export type LanguageId = | 'clojure' | 'julia' | 'r' - | 'erlang'; + | 'erlang' + | 'solidity' + | 'objc' + | 'cuda' + | 'groovy' + | 'verilog'; /** Engine mode selector. */ export type EngineMode = 'native' | 'wasm' | 'auto'; diff --git a/tests/parsers/cuda.test.ts b/tests/parsers/cuda.test.ts new file mode 100644 index 00000000..9ea910fc --- /dev/null +++ b/tests/parsers/cuda.test.ts @@ -0,0 +1,57 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractCudaSymbols } from '../../src/domain/parser.js'; + +describe('CUDA parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseCuda(code: string) { + const parser = parsers.get('cuda'); + if (!parser) throw new Error('CUDA parser not available'); + const tree = parser.parse(code); + return extractCudaSymbols(tree, 'test.cu'); + } + + it('extracts function definitions', () => { + const symbols = parseCuda(`void hostFunction(int n) { + return; +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'hostFunction', kind: 'function' }), + ); + }); + + it('extracts struct definitions', () => { + const symbols = parseCuda(`struct Vec3 { + float x, y, z; +};`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Vec3', kind: 'struct' }), + ); + }); + + it('extracts class definitions', () => { + const symbols = parseCuda(`class CudaManager { +public: + void init(); +};`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'CudaManager', kind: 'class' }), + ); + }); + + it('extracts #include as imports', () => { + const symbols = parseCuda(`#include `); + expect(symbols.imports).toContainEqual(expect.objectContaining({ source: 'cuda_runtime.h' })); + }); + + it('extracts call expressions', () => { + const symbols = parseCuda(`void foo() { + cudaMalloc(&ptr, size); +}`); + expect(symbols.calls).toContainEqual(expect.objectContaining({ name: 'cudaMalloc' })); + }); +}); diff --git a/tests/parsers/groovy.test.ts b/tests/parsers/groovy.test.ts new file mode 100644 index 00000000..3b03e3d7 --- /dev/null +++ b/tests/parsers/groovy.test.ts @@ -0,0 +1,61 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractGroovySymbols } from '../../src/domain/parser.js'; + +describe('Groovy parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseGroovy(code: string) { + const parser = parsers.get('groovy'); + if (!parser) throw new Error('Groovy parser not available'); + const tree = parser.parse(code); + return extractGroovySymbols(tree, 'test.groovy'); + } + + it('extracts class declarations', () => { + const symbols = parseGroovy(`class MyService { + String name + void process() {} +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MyService', kind: 'class' }), + ); + }); + + it('extracts method declarations', () => { + const symbols = parseGroovy(`class Calc { + int add(int a, int b) { + return a + b + } +}`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'method' })); + }); + + it('extracts import statements', () => { + const symbols = parseGroovy(`import groovy.json.JsonSlurper`); + expect(symbols.imports).toContainEqual( + expect.objectContaining({ source: 'groovy.json.JsonSlurper' }), + ); + }); + + it('extracts interface declarations', () => { + const symbols = parseGroovy(`interface Processor { + void process() +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Processor', kind: 'interface' }), + ); + }); + + it('extracts enum declarations', () => { + const symbols = parseGroovy(`enum Color { + RED, GREEN, BLUE +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Color', kind: 'enum' }), + ); + }); +}); diff --git a/tests/parsers/objc.test.ts b/tests/parsers/objc.test.ts new file mode 100644 index 00000000..33c5668e --- /dev/null +++ b/tests/parsers/objc.test.ts @@ -0,0 +1,59 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractObjCSymbols } from '../../src/domain/parser.js'; + +describe('Objective-C parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseObjC(code: string) { + const parser = parsers.get('objc'); + if (!parser) throw new Error('Objective-C parser not available'); + const tree = parser.parse(code); + return extractObjCSymbols(tree, 'test.m'); + } + + it('extracts class interface declarations', () => { + const symbols = parseObjC(`@interface MyClass : NSObject +- (void)doSomething; +@end`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MyClass', kind: 'class' }), + ); + }); + + it('extracts protocol declarations', () => { + const symbols = parseObjC(`@protocol MyDelegate +- (void)didFinish; +@end`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MyDelegate', kind: 'interface' }), + ); + }); + + it('extracts C function definitions', () => { + const symbols = parseObjC(`void helper(int x) { + return; +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'helper', kind: 'function' }), + ); + }); + + it('extracts #import as imports', () => { + const symbols = parseObjC(`#import `); + expect(symbols.imports).toContainEqual( + expect.objectContaining({ source: 'Foundation/Foundation.h' }), + ); + }); + + it('extracts inheritance', () => { + const symbols = parseObjC(`@interface MyView : UIView +@end`); + expect(symbols.classes).toContainEqual( + expect.objectContaining({ name: 'MyView', extends: 'UIView' }), + ); + }); +}); diff --git a/tests/parsers/solidity.test.ts b/tests/parsers/solidity.test.ts new file mode 100644 index 00000000..ac94b79c --- /dev/null +++ b/tests/parsers/solidity.test.ts @@ -0,0 +1,62 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractSoliditySymbols } from '../../src/domain/parser.js'; + +describe('Solidity parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseSol(code: string) { + const parser = parsers.get('solidity'); + if (!parser) throw new Error('Solidity parser not available'); + const tree = parser.parse(code); + return extractSoliditySymbols(tree, 'test.sol'); + } + + it('extracts contract declarations', () => { + const symbols = parseSol(`// SPDX-License-Identifier: MIT +pragma solidity ^0.8.0; + +contract MyToken { + uint256 public totalSupply; +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MyToken', kind: 'class' }), + ); + }); + + it('extracts interface declarations', () => { + const symbols = parseSol(`interface IERC20 { + function transfer(address to, uint256 amount) external returns (bool); +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'IERC20', kind: 'interface' }), + ); + }); + + it('extracts function definitions', () => { + const symbols = parseSol(`contract Token { + function transfer(address to, uint256 amount) public returns (bool) { + return true; + } +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Token.transfer', kind: 'method' }), + ); + }); + + it('extracts import directives', () => { + const symbols = parseSol(`import "./IERC20.sol";`); + expect(symbols.imports).toContainEqual(expect.objectContaining({ source: './IERC20.sol' })); + }); + + it('extracts inheritance', () => { + const symbols = parseSol(`contract MyToken is ERC20 { +}`); + expect(symbols.classes).toContainEqual( + expect.objectContaining({ name: 'MyToken', extends: 'ERC20' }), + ); + }); +}); diff --git a/tests/parsers/verilog.test.ts b/tests/parsers/verilog.test.ts new file mode 100644 index 00000000..7c4894bf --- /dev/null +++ b/tests/parsers/verilog.test.ts @@ -0,0 +1,64 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractVerilogSymbols } from '../../src/domain/parser.js'; + +describe('Verilog parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseVerilog(code: string) { + const parser = parsers.get('verilog'); + if (!parser) throw new Error('Verilog parser not available'); + const tree = parser.parse(code); + return extractVerilogSymbols(tree, 'test.v'); + } + + it('extracts module declarations', () => { + const symbols = parseVerilog(`module counter( + input clk, + input reset, + output reg [7:0] count +); +endmodule`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'counter', kind: 'module' }), + ); + }); + + it('extracts function declarations', () => { + const symbols = parseVerilog(`module math; + function integer add; + input integer a, b; + add = a + b; + endfunction +endmodule`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'function' })); + }); + + it('extracts task declarations', () => { + const symbols = parseVerilog(`module tb; + task display_msg; + $display("hello"); + endtask +endmodule`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'function' })); + }); + + it('extracts module instantiations as calls', () => { + const symbols = parseVerilog(`module top; + counter u1(.clk(clk), .reset(reset)); +endmodule`); + expect(symbols.calls).toContainEqual(expect.objectContaining({ name: 'counter' })); + }); + + it('extracts package imports', () => { + const symbols = parseVerilog(`module m; + import pkg::item; +endmodule`); + expect(symbols.imports).toContainEqual( + expect.objectContaining({ source: 'pkg', names: ['item'] }), + ); + }); +});