From bf890feeafbc5f3c2d022e3368022e2012e91174 Mon Sep 17 00:00:00 2001 From: Mike Bannister Date: Mon, 16 Mar 2026 18:26:46 -0400 Subject: [PATCH] feat: add TOML tree-sitter language support --- .../docs/core-concepts/plugin-system.mdx | 1 + src/kit/queries/toml/tags.scm | 19 +++++ src/kit/tree_sitter_symbol_extractor.py | 18 +++- tests/test_symbol_extraction_multilang.py | 1 + tests/test_toml_symbols.py | 83 +++++++++++++++++++ tests/test_tree_sitter_languages.py | 1 + 6 files changed, 119 insertions(+), 4 deletions(-) create mode 100644 src/kit/queries/toml/tags.scm create mode 100644 tests/test_toml_symbols.py diff --git a/docs/src/content/docs/core-concepts/plugin-system.mdx b/docs/src/content/docs/core-concepts/plugin-system.mdx index 1d497da9..753375e3 100644 --- a/docs/src/content/docs/core-concepts/plugin-system.mdx +++ b/docs/src/content/docs/core-concepts/plugin-system.mdx @@ -24,6 +24,7 @@ kit comes with built-in support for 12+ programming languages: - **Dart** (`.dart`) - Classes, functions, mixins, enums, extensions - **HCL/Terraform** (`.hcl`, `.tf`) - Resources, variables, modules - **Haskell** (`.hs`) - Module header, functions (including lambda-binds), common type-level declarations +- **TOML** (`.toml`) - Tables, array tables Each language supports comprehensive symbol extraction including: - **Classes and interfaces** with inheritance relationships diff --git a/src/kit/queries/toml/tags.scm b/src/kit/queries/toml/tags.scm new file mode 100644 index 00000000..c3a1e536 --- /dev/null +++ b/src/kit/queries/toml/tags.scm @@ -0,0 +1,19 @@ +;; tags.scm for TOML symbol extraction (tree-sitter-toml) + +; Table headers with bare key: [section] +(table (bare_key) @name) @definition.table + +; Table headers with dotted key: [section.subsection] +(table (dotted_key) @name) @definition.table + +; Table headers with quoted key: ["section.name"] +(table (quoted_key) @name) @definition.table + +; Array table headers with bare key: [[array]] +(table_array_element (bare_key) @name) @definition.table_array + +; Array table headers with dotted key: [[parent.array]] +(table_array_element (dotted_key) @name) @definition.table_array + +; Array table headers with quoted key: [["array.name"]] +(table_array_element (quoted_key) @name) @definition.table_array diff --git a/src/kit/tree_sitter_symbol_extractor.py b/src/kit/tree_sitter_symbol_extractor.py index 273346f5..1f8cfa35 100644 --- a/src/kit/tree_sitter_symbol_extractor.py +++ b/src/kit/tree_sitter_symbol_extractor.py @@ -34,6 +34,7 @@ ".hxx": "cpp", ".zig": "zig", ".cs": "csharp", + ".toml": "toml", } @@ -350,11 +351,18 @@ def reset_plugins(cls) -> None: ".hxx": "cpp", ".zig": "zig", ".cs": "csharp", + ".toml": "toml", } LANGUAGES.clear() LANGUAGES.update(original_languages) cls.LANGUAGES = set(LANGUAGES.keys()) + @staticmethod + def _strip_wrapping_quotes(text: str) -> str: + if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}: + return text[1:-1] + return text + @staticmethod def extract_symbols(ext: str, source_code: str) -> List[Dict[str, Any]]: """Extracts symbols from source code using tree-sitter queries.""" @@ -454,10 +462,12 @@ def extract_symbols(ext: str, source_code: str) -> List[Dict[str, Any]]: if hasattr(actual_name_node, "text") and actual_name_node.text else str(actual_name_node) ) - # HCL: Strip quotes from string literals - if ext == ".tf" and hasattr(actual_name_node, "type") and actual_name_node.type == "string_lit": - if len(symbol_name) >= 2 and symbol_name.startswith('"') and symbol_name.endswith('"'): - symbol_name = symbol_name[1:-1] + node_type = actual_name_node.type if hasattr(actual_name_node, "type") else None + if ( + (ext == ".tf" and node_type == "string_lit") + or (ext == ".toml" and node_type == "quoted_key") + ): + symbol_name = TreeSitterSymbolExtractor._strip_wrapping_quotes(symbol_name) definition_capture = next( ((name, node) for name, node in captures.items() if name.startswith("definition.")), None diff --git a/tests/test_symbol_extraction_multilang.py b/tests/test_symbol_extraction_multilang.py index 77767289..63331ab4 100644 --- a/tests/test_symbol_extraction_multilang.py +++ b/tests/test_symbol_extraction_multilang.py @@ -9,6 +9,7 @@ ".java": "class Bar { void foo() {} }\n", ".rs": "fn foo() {}\nstruct Bar;\n", ".zig": "pub fn foo() void {}\npub const Bar = struct {};\n", + ".toml": "[foo]\nbar = 1\n", } diff --git a/tests/test_toml_symbols.py b/tests/test_toml_symbols.py new file mode 100644 index 00000000..23871fd0 --- /dev/null +++ b/tests/test_toml_symbols.py @@ -0,0 +1,83 @@ +import pytest + +from kit.tree_sitter_symbol_extractor import TreeSitterSymbolExtractor + +TOML_SAMPLE = """\ +[package] +name = "my-app" +version = "1.0.0" + +[dependencies] +serde = "1.0" + +[build.settings] +opt-level = 2 + +[[bin]] +name = "main" +path = "src/main.rs" + +[[test]] +name = "integration" +""" + +TOML_QUOTED_SAMPLE = """\ +["foo.bar"] +value = 1 + +[["bin.name"]] +name = "main" + +['lit'] +value = 2 +""" + + +def test_toml_parser_and_query_available(): + parser = TreeSitterSymbolExtractor.get_parser(".toml") + query = TreeSitterSymbolExtractor.get_query(".toml") + if not parser or not query: + pytest.skip("TOML parser or query not available in this environment") + + tree = parser.parse(TOML_SAMPLE.encode("utf-8")) + assert tree.root_node is not None + + +def test_toml_symbols(): + parser = TreeSitterSymbolExtractor.get_parser(".toml") + query = TreeSitterSymbolExtractor.get_query(".toml") + if not parser or not query: + pytest.skip("TOML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".toml", TOML_SAMPLE) + names = {s["name"] for s in symbols} + types = {s["type"] for s in symbols} + + assert "package" in names + assert "dependencies" in names + assert "build.settings" in names + assert "bin" in names + assert "test" in names + + assert "table" in types + assert "table_array" in types + + +def test_toml_quoted_table_names_are_normalized(): + parser = TreeSitterSymbolExtractor.get_parser(".toml") + query = TreeSitterSymbolExtractor.get_query(".toml") + if not parser or not query: + pytest.skip("TOML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".toml", TOML_QUOTED_SAMPLE) + names = {s["name"] for s in symbols} + + assert "foo.bar" in names + assert "bin.name" in names + assert "lit" in names + + +def test_toml_in_supported_languages(): + supported = TreeSitterSymbolExtractor.list_supported_languages() + assert "toml" in supported + assert ".toml" in supported["toml"] diff --git a/tests/test_tree_sitter_languages.py b/tests/test_tree_sitter_languages.py index f08160a9..59657024 100644 --- a/tests/test_tree_sitter_languages.py +++ b/tests/test_tree_sitter_languages.py @@ -12,6 +12,7 @@ "c": b"int foo() { return 42; }\n", "dart": b"int foo() { return 42; }\n", "zig": b"pub fn foo() void { }\n", + "toml": b'[package]\nname = "test"\n', }