From ee769d5806af1f855aa63abe4fce8e0ea6f8fc1d Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Fri, 20 Mar 2026 16:38:14 -0500 Subject: [PATCH 01/12] feat(k8s): detection helpers, CBMLanguage enum, and language table entries - Add CBM_LANG_KUSTOMIZE and CBM_LANG_K8S to CBMLanguage enum (before CBM_LANG_COUNT) - Add kustomization.yaml/yml to FILENAME_TABLE mapped to CBM_LANG_KUSTOMIZE - Add Kustomize and Kubernetes entries to LANG_NAMES - Implement cbm_is_kustomize_file() with to_lower+strcmp pattern - Implement cbm_is_k8s_manifest() scanning first 4KB for apiVersion: via ci_strstr() - Declare both helpers in pipeline_internal.h Infrascan helpers section Co-Authored-By: Claude Sonnet 4.6 --- internal/cbm/cbm.h | 2 ++ src/discover/language.c | 4 ++++ src/pipeline/pass_infrascan.c | 19 +++++++++++++++++++ src/pipeline/pipeline_internal.h | 2 ++ 4 files changed, 27 insertions(+) diff --git a/internal/cbm/cbm.h b/internal/cbm/cbm.h index 16b9dd0..1dbab26 100644 --- a/internal/cbm/cbm.h +++ b/internal/cbm/cbm.h @@ -75,6 +75,8 @@ typedef enum { CBM_LANG_FORM, CBM_LANG_MAGMA, CBM_LANG_WOLFRAM, + CBM_LANG_KUSTOMIZE, // kustomization.yaml — Kubernetes overlay tool + CBM_LANG_K8S, // Generic Kubernetes manifest (apiVersion: detected) CBM_LANG_COUNT } CBMLanguage; diff --git a/src/discover/language.c b/src/discover/language.c index b7eb7e4..cad73e0 100644 --- a/src/discover/language.c +++ b/src/discover/language.c @@ -273,6 +273,8 @@ static const filename_entry_t FILENAME_TABLE[] = { {"GNUmakefile", CBM_LANG_MAKEFILE}, {"Makefile", CBM_LANG_MAKEFILE}, {"makefile", CBM_LANG_MAKEFILE}, {"meson.build", CBM_LANG_MESON}, {"meson.options", CBM_LANG_MESON}, {"meson_options.txt", CBM_LANG_MESON}, + {"kustomization.yaml", CBM_LANG_KUSTOMIZE}, + {"kustomization.yml", CBM_LANG_KUSTOMIZE}, {".vimrc", CBM_LANG_VIMSCRIPT}, }; @@ -345,6 +347,8 @@ static const char *LANG_NAMES[CBM_LANG_COUNT] = { [CBM_LANG_FORM] = "FORM", [CBM_LANG_MAGMA] = "Magma", [CBM_LANG_WOLFRAM] = "Wolfram", + [CBM_LANG_KUSTOMIZE] = "Kustomize", + [CBM_LANG_K8S] = "Kubernetes", }; /* ── Public API ──────────────────────────────────────────────────── */ diff --git a/src/pipeline/pass_infrascan.c b/src/pipeline/pass_infrascan.c index 30c47d9..307973f 100644 --- a/src/pipeline/pass_infrascan.c +++ b/src/pipeline/pass_infrascan.c @@ -192,6 +192,25 @@ bool cbm_is_env_file(const char *name) { return false; } +bool cbm_is_kustomize_file(const char *name) { + if (!name) { return false; } + char lower[256]; + to_lower(name, lower, sizeof(lower)); + return (strcmp(lower, "kustomization.yaml") == 0 || + strcmp(lower, "kustomization.yml") == 0); +} + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +bool cbm_is_k8s_manifest(const char *name, const char *content) { + if (!name || !content || cbm_is_kustomize_file(name)) { return false; } + char buf[4097]; + size_t n = strlen(content); + if (n > 4096) { n = 4096; } + memcpy(buf, content, n); + buf[n] = '\0'; + return ci_strstr(buf, "apiVersion:") != NULL; +} + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) bool cbm_is_shell_script(const char *name, const char *ext) { (void)name; diff --git a/src/pipeline/pipeline_internal.h b/src/pipeline/pipeline_internal.h index c1d45f1..ea5c1d7 100644 --- a/src/pipeline/pipeline_internal.h +++ b/src/pipeline/pipeline_internal.h @@ -218,6 +218,8 @@ bool cbm_is_compose_file(const char *name); bool cbm_is_cloudbuild_file(const char *name); bool cbm_is_env_file(const char *name); bool cbm_is_shell_script(const char *name, const char *ext); +bool cbm_is_kustomize_file(const char *name); +bool cbm_is_k8s_manifest(const char *name, const char *content); /* Secret detection */ bool cbm_is_secret_binding(const char *key, const char *value); From 88ddb63e8be5c4b4696799268d793c3c8923b602 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Fri, 20 Mar 2026 16:43:00 -0500 Subject: [PATCH 02/12] feat(k8s): K8s and Kustomize YAML extractors in the CBM layer - lang_specs.c: add LangSpec entries for CBM_LANG_KUSTOMIZE and CBM_LANG_K8S (both reuse tree_sitter_yaml()); add cbm_ts_language() switch cases - extract_k8s.c: new file implementing cbm_extract_k8s(); kustomize path walks block_sequence items under resources/bases/patches/components/ patchesStrategicMerge and emits CBMImport per scalar; k8s path extracts apiVersion/kind/metadata.name and emits CBMDefinition with label "Resource" and name "Kind/metadata-name"; malformed manifests (missing kind or name) produce zero definitions - cbm.h: declare cbm_extract_k8s() alongside other sub-extractor entry points - cbm.c: call cbm_extract_k8s() after unified extraction for the two new langs - Makefile.cbm: add extract_k8s.c to EXTRACTION_SRCS Co-Authored-By: Claude Sonnet 4.6 --- Makefile.cbm | 1 + internal/cbm/cbm.c | 5 + internal/cbm/cbm.h | 3 + internal/cbm/extract_k8s.c | 284 +++++++++++++++++++++++++++++++++++++ internal/cbm/lang_specs.c | 13 ++ 5 files changed, 306 insertions(+) create mode 100644 internal/cbm/extract_k8s.c diff --git a/Makefile.cbm b/Makefile.cbm index 82821b8..cf5171b 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -115,6 +115,7 @@ EXTRACTION_SRCS = \ $(CBM_DIR)/extract_type_refs.c \ $(CBM_DIR)/extract_type_assigns.c \ $(CBM_DIR)/extract_env_accesses.c \ + $(CBM_DIR)/extract_k8s.c \ $(CBM_DIR)/helpers.c \ $(CBM_DIR)/lang_specs.c diff --git a/internal/cbm/cbm.c b/internal/cbm/cbm.c index 6162c7a..5b70d18 100644 --- a/internal/cbm/cbm.c +++ b/internal/cbm/cbm.c @@ -316,6 +316,11 @@ CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage cbm_extract_imports(&ctx); cbm_extract_unified(&ctx); + // K8s / Kustomize semantic pass (additional structured extraction for YAML-based infra files). + if (ctx.language == CBM_LANG_KUSTOMIZE || ctx.language == CBM_LANG_K8S) { + cbm_extract_k8s(&ctx); + } + // LSP type-aware call resolution uint64_t lsp_start = now_ns(); if (language == CBM_LANG_GO) { diff --git a/internal/cbm/cbm.h b/internal/cbm/cbm.h index 1dbab26..1fb5a5a 100644 --- a/internal/cbm/cbm.h +++ b/internal/cbm/cbm.h @@ -363,4 +363,7 @@ void cbm_extract_type_assigns(CBMExtractCtx *ctx); // Single-pass unified extraction (replaces the 7 calls above except defs+imports). void cbm_extract_unified(CBMExtractCtx *ctx); +// K8s / Kustomize semantic extractor (called when language is CBM_LANG_K8S or CBM_LANG_KUSTOMIZE). +void cbm_extract_k8s(CBMExtractCtx *ctx); + #endif // CBM_H diff --git a/internal/cbm/extract_k8s.c b/internal/cbm/extract_k8s.c new file mode 100644 index 0000000..0931d95 --- /dev/null +++ b/internal/cbm/extract_k8s.c @@ -0,0 +1,284 @@ +// extract_k8s.c — K8s manifest and Kustomize file extractor. +// +// For CBM_LANG_KUSTOMIZE: walks top-level block_mapping_pair nodes whose key +// matches "resources", "bases", "patches", "components", or +// "patchesStrategicMerge", then emits one CBMImport per block_sequence item. +// +// For CBM_LANG_K8S: finds apiVersion, kind, and metadata.name scalars in the +// first document's block_mapping and emits one CBMDefinition with label +// "Resource" and name "Kind/metadata-name". + +#include "cbm.h" +#include "arena.h" +#include "helpers.h" +#include "tree_sitter/api.h" +#include +#include +#include + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +// Return the raw source text for a scalar node (plain, single-quoted, or +// double-quoted). Surrounding quote characters are stripped for quoted forms. +// Returns NULL for non-scalar node types. +static const char *get_scalar_text(CBMArena *a, TSNode node, const char *source) { + const char *type = ts_node_type(node); + if (strcmp(type, "plain_scalar") == 0) { + return cbm_node_text(a, node, source); + } + if (strcmp(type, "double_quote_scalar") == 0 || strcmp(type, "single_quote_scalar") == 0) { + const char *raw = cbm_node_text(a, node, source); + if (!raw) { + return NULL; + } + size_t len = strlen(raw); + if (len >= 2) { + return cbm_arena_strndup(a, raw + 1, len - 2); + } + return raw; + } + return NULL; +} + +// Return true if the key text of a block_mapping_pair matches one of the +// Kustomize resource-list field names. +static int is_kustomize_list_key(const char *key) { + return (strcmp(key, "resources") == 0 || strcmp(key, "bases") == 0 || + strcmp(key, "patches") == 0 || strcmp(key, "components") == 0 || + strcmp(key, "patchesStrategicMerge") == 0); +} + +// --------------------------------------------------------------------------- +// Kustomize extraction +// --------------------------------------------------------------------------- + +// Walk a block_sequence node and emit one CBMImport per block_sequence_item +// scalar child, using key_name as the local_name. +static void emit_kustomize_sequence(CBMExtractCtx *ctx, TSNode seq_node, + const char *key_name) { + CBMArena *a = ctx->arena; + uint32_t n = ts_node_child_count(seq_node); + for (uint32_t i = 0; i < n; i++) { + TSNode item = ts_node_child(seq_node, i); + if (strcmp(ts_node_type(item), "block_sequence_item") != 0) { + continue; + } + // block_sequence_item has one named child: the value + uint32_t ic = ts_node_child_count(item); + for (uint32_t j = 0; j < ic; j++) { + TSNode val = ts_node_child(item, j); + const char *scalar = get_scalar_text(a, val, ctx->source); + if (!scalar) { + continue; + } + CBMImport imp = { + .local_name = cbm_arena_strdup(a, key_name), + .module_path = cbm_arena_strdup(a, scalar), + }; + cbm_imports_push(&ctx->result->imports, a, imp); + } + } +} + +static void extract_kustomize(CBMExtractCtx *ctx) { + CBMArena *a = ctx->arena; + + // Traverse: stream -> document -> block_node -> block_mapping -> block_mapping_pair + TSNode root = ctx->root; + uint32_t root_n = ts_node_child_count(root); + for (uint32_t si = 0; si < root_n; si++) { + TSNode stream_child = ts_node_child(root, si); + if (strcmp(ts_node_type(stream_child), "document") != 0) { + continue; + } + // Find block_mapping inside the document (may be wrapped in block_node) + TSNode mapping = ts_node_named_child(stream_child, 0); + if (ts_node_is_null(mapping)) { + continue; + } + // Some grammars wrap in block_node + if (strcmp(ts_node_type(mapping), "block_node") == 0) { + mapping = ts_node_named_child(mapping, 0); + } + if (ts_node_is_null(mapping) || strcmp(ts_node_type(mapping), "block_mapping") != 0) { + continue; + } + + uint32_t pair_n = ts_node_child_count(mapping); + for (uint32_t pi = 0; pi < pair_n; pi++) { + TSNode pair = ts_node_child(mapping, pi); + if (strcmp(ts_node_type(pair), "block_mapping_pair") != 0) { + continue; + } + + // First named child = key + TSNode key_node = ts_node_named_child(pair, 0); + if (ts_node_is_null(key_node)) { + continue; + } + const char *key_text = get_scalar_text(a, key_node, ctx->source); + if (!key_text || !is_kustomize_list_key(key_text)) { + continue; + } + + // Second named child = value (should be a block_sequence or block_node wrapping one) + TSNode val_node = ts_node_named_child(pair, 1); + if (ts_node_is_null(val_node)) { + continue; + } + if (strcmp(ts_node_type(val_node), "block_node") == 0) { + val_node = ts_node_named_child(val_node, 0); + } + if (ts_node_is_null(val_node) || + strcmp(ts_node_type(val_node), "block_sequence") != 0) { + continue; + } + + emit_kustomize_sequence(ctx, val_node, key_text); + } + } +} + +// --------------------------------------------------------------------------- +// K8s manifest extraction +// --------------------------------------------------------------------------- + +// Descend into the first block_mapping of a document and extract apiVersion, +// kind, and metadata.name. Returns void; fills kind_buf and meta_name_buf. +static void extract_k8s_scalars(CBMExtractCtx *ctx, TSNode mapping, + char *kind_buf, size_t kind_sz, + char *meta_name_buf, size_t meta_sz) { + CBMArena *a = ctx->arena; + kind_buf[0] = '\0'; + meta_name_buf[0] = '\0'; + + uint32_t n = ts_node_child_count(mapping); + for (uint32_t i = 0; i < n; i++) { + TSNode pair = ts_node_child(mapping, i); + if (strcmp(ts_node_type(pair), "block_mapping_pair") != 0) { + continue; + } + TSNode key_node = ts_node_named_child(pair, 0); + if (ts_node_is_null(key_node)) { + continue; + } + const char *key = get_scalar_text(a, key_node, ctx->source); + if (!key) { + continue; + } + + TSNode val_node = ts_node_named_child(pair, 1); + if (ts_node_is_null(val_node)) { + continue; + } + // Unwrap block_node if present + if (strcmp(ts_node_type(val_node), "block_node") == 0) { + val_node = ts_node_named_child(val_node, 0); + } + if (ts_node_is_null(val_node)) { + continue; + } + + if (strcmp(key, "kind") == 0) { + const char *v = get_scalar_text(a, val_node, ctx->source); + if (v) { + snprintf(kind_buf, kind_sz, "%s", v); + } + } else if (strcmp(key, "metadata") == 0) { + // Descend into metadata block_mapping to find "name" + TSNode meta_mapping = val_node; + if (strcmp(ts_node_type(meta_mapping), "block_node") == 0) { + meta_mapping = ts_node_named_child(meta_mapping, 0); + } + if (ts_node_is_null(meta_mapping) || + strcmp(ts_node_type(meta_mapping), "block_mapping") != 0) { + continue; + } + uint32_t mn = ts_node_child_count(meta_mapping); + for (uint32_t mi = 0; mi < mn; mi++) { + TSNode mpair = ts_node_child(meta_mapping, mi); + if (strcmp(ts_node_type(mpair), "block_mapping_pair") != 0) { + continue; + } + TSNode mkey = ts_node_named_child(mpair, 0); + if (ts_node_is_null(mkey)) { + continue; + } + const char *mkey_text = get_scalar_text(a, mkey, ctx->source); + if (!mkey_text || strcmp(mkey_text, "name") != 0) { + continue; + } + TSNode mval = ts_node_named_child(mpair, 1); + if (ts_node_is_null(mval)) { + continue; + } + const char *meta_name = get_scalar_text(a, mval, ctx->source); + if (meta_name) { + snprintf(meta_name_buf, meta_sz, "%s", meta_name); + } + } + } + } +} + +static void extract_k8s_manifest(CBMExtractCtx *ctx) { + CBMArena *a = ctx->arena; + + TSNode root = ctx->root; + uint32_t root_n = ts_node_child_count(root); + for (uint32_t si = 0; si < root_n; si++) { + TSNode stream_child = ts_node_child(root, si); + if (strcmp(ts_node_type(stream_child), "document") != 0) { + continue; + } + + TSNode mapping = ts_node_named_child(stream_child, 0); + if (ts_node_is_null(mapping)) { + continue; + } + if (strcmp(ts_node_type(mapping), "block_node") == 0) { + mapping = ts_node_named_child(mapping, 0); + } + if (ts_node_is_null(mapping) || strcmp(ts_node_type(mapping), "block_mapping") != 0) { + continue; + } + + char kind_buf[256] = {0}; + char meta_name_buf[256] = {0}; + extract_k8s_scalars(ctx, mapping, kind_buf, sizeof(kind_buf), meta_name_buf, + sizeof(meta_name_buf)); + + // Skip malformed manifests (no kind or no metadata.name) + if (kind_buf[0] == '\0' || meta_name_buf[0] == '\0') { + continue; + } + + char def_name[512]; + snprintf(def_name, sizeof(def_name), "%s/%s", kind_buf, meta_name_buf); + + CBMDefinition def = {0}; + def.name = cbm_arena_strdup(a, def_name); + def.qualified_name = cbm_arena_sprintf(a, "%s.%s", ctx->module_qn, def_name); + def.label = "Resource"; + def.file_path = ctx->rel_path; + def.start_line = ts_node_start_point(mapping).row + 1; + def.end_line = ts_node_end_point(mapping).row + 1; + cbm_defs_push(&ctx->result->defs, a, def); + + break; // Only the first document per file + } +} + +// --------------------------------------------------------------------------- +// Public entry point +// --------------------------------------------------------------------------- + +void cbm_extract_k8s(CBMExtractCtx *ctx) { + if (ctx->language == CBM_LANG_KUSTOMIZE) { + extract_kustomize(ctx); + } else if (ctx->language == CBM_LANG_K8S) { + extract_k8s_manifest(ctx); + } +} diff --git a/internal/cbm/lang_specs.c b/internal/cbm/lang_specs.c index a4fb809..426db94 100644 --- a/internal/cbm/lang_specs.c +++ b/internal/cbm/lang_specs.c @@ -1041,6 +1041,16 @@ static const CBMLangSpec lang_specs[CBM_LANG_COUNT] = { {CBM_LANG_WOLFRAM, wolfram_func_types, empty_types, empty_types, wolfram_module_types, wolfram_call_types, wolfram_import_types, empty_types, empty_types, empty_types, empty_types, empty_types, NULL, empty_types, NULL, NULL}, + + // CBM_LANG_KUSTOMIZE — reuses YAML grammar; semantic extraction via cbm_extract_k8s() + {CBM_LANG_KUSTOMIZE, empty_types, empty_types, empty_types, yaml_module_types, empty_types, + empty_types, empty_types, empty_types, empty_types, empty_types, empty_types, NULL, + empty_types, NULL, NULL}, + + // CBM_LANG_K8S — reuses YAML grammar; semantic extraction via cbm_extract_k8s() + {CBM_LANG_K8S, empty_types, empty_types, empty_types, yaml_module_types, empty_types, + empty_types, empty_types, empty_types, empty_types, empty_types, empty_types, NULL, + empty_types, NULL, NULL}, }; const CBMLangSpec *cbm_lang_spec(CBMLanguage lang) { @@ -1180,6 +1190,9 @@ const TSLanguage *cbm_ts_language(CBMLanguage lang) { return tree_sitter_magma(); case CBM_LANG_WOLFRAM: return tree_sitter_wolfram(); + case CBM_LANG_KUSTOMIZE: + case CBM_LANG_K8S: + return tree_sitter_yaml(); default: return NULL; } From bf3af55bf2d85c269023acce5d1f297e6e06aa3c Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Fri, 20 Mar 2026 16:49:06 -0500 Subject: [PATCH 03/12] feat(k8s): pipeline pass for k8s manifest parsing and graph node emission - Add pass_k8s.c: cbm_pipeline_pass_k8s() iterates files, classifies kustomize overlays via cbm_is_kustomize_file() and k8s manifests via cbm_is_k8s_manifest(), emits Module/Resource nodes and IMPORTS/DEFINES edges - Kustomize files emit Module node (cbm_infra_qn) + IMPORTS edges per resources entry - K8s manifest files emit Resource nodes per top-level document with DEFINES edge - Falls back to file re-read + re-extraction when result_cache is unavailable - Declare cbm_pipeline_pass_k8s() prototype in pipeline_internal.h - Add pass_k8s.c to PIPELINE_SRCS in Makefile.cbm - Call pass after definitions pass in pipeline.c sequential path Co-Authored-By: Claude Sonnet 4.6 --- Makefile.cbm | 1 + src/pipeline/pass_k8s.c | 269 +++++++++++++++++++++++++++++++ src/pipeline/pipeline.c | 9 ++ src/pipeline/pipeline_internal.h | 2 + 4 files changed, 281 insertions(+) create mode 100644 src/pipeline/pass_k8s.c diff --git a/Makefile.cbm b/Makefile.cbm index cf5171b..ecccd3a 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -177,6 +177,7 @@ PIPELINE_SRCS = \ src/pipeline/pass_envscan.c \ src/pipeline/pass_compile_commands.c \ src/pipeline/pass_infrascan.c \ + src/pipeline/pass_k8s.c \ src/pipeline/httplink.c # Traces module (new) diff --git a/src/pipeline/pass_k8s.c b/src/pipeline/pass_k8s.c new file mode 100644 index 0000000..a1fec4e --- /dev/null +++ b/src/pipeline/pass_k8s.c @@ -0,0 +1,269 @@ +/* + * pass_k8s.c — Pipeline pass for Kubernetes manifest and Kustomize overlay processing. + * + * For each discovered YAML file: + * 1. Check if it is a kustomize overlay (kustomization.yaml / kustomization.yml) + * → emit a Module node and IMPORTS edges for each resources/bases/patches entry + * 2. Else if it is a generic k8s manifest (apiVersion: detected) + * → emit one Resource node per top-level resource document + * + * Depends on: pass_infrascan.c (cbm_is_kustomize_file, cbm_is_k8s_manifest, cbm_infra_qn), + * extraction layer (cbm.h), graph_buffer, pipeline internals. + */ +#include "pipeline/pipeline.h" +#include "pipeline/pipeline_internal.h" +#include "graph_buffer/graph_buffer.h" +#include "discover/discover.h" +#include "foundation/log.h" +#include "cbm.h" + +#include +#include +#include + +/* ── Internal helpers ────────────────────────────────────────────── */ + +/* Read entire file into heap-allocated buffer. Returns NULL on error. + * Caller must free(). Sets *out_len to byte count. */ +static char *k8s_read_file(const char *path, int *out_len) { + FILE *f = fopen(path, "rb"); + if (!f) { + return NULL; + } + + (void)fseek(f, 0, SEEK_END); + long size = ftell(f); + (void)fseek(f, 0, SEEK_SET); + + if (size <= 0 || size > (long)100 * 1024 * 1024) { + (void)fclose(f); + return NULL; + } + + char *buf = malloc(size + 1); + if (!buf) { + (void)fclose(f); + return NULL; + } + + size_t nread = fread(buf, 1, size, f); + (void)fclose(f); + // NOLINTNEXTLINE(clang-analyzer-security.ArrayBound) + buf[nread] = '\0'; + *out_len = (int)nread; + return buf; +} + +/* Format int to string for logging. Thread-safe via TLS. */ +static const char *itoa_k8s(int val) { + static CBM_TLS char bufs[4][32]; + static CBM_TLS int idx = 0; + int i = idx; + idx = (idx + 1) & 3; + snprintf(bufs[i], sizeof(bufs[i]), "%d", val); + return bufs[i]; +} + +/* Extract the basename of a path (pointer into the string; no allocation). */ +static const char *k8s_basename(const char *path) { + const char *p = strrchr(path, '/'); + return p ? p + 1 : path; +} + +/* ── Kustomize handler ───────────────────────────────────────────── */ + +static void handle_kustomize(cbm_pipeline_ctx_t *ctx, const char *path, const char *rel_path, + CBMFileResult *result) { + /* Emit Module node for this kustomize overlay file */ + char *mod_qn = cbm_infra_qn(ctx->project_name, rel_path, "kustomize", NULL); + if (!mod_qn) { + return; + } + + // NOLINTNEXTLINE(misc-include-cleaner) + int64_t mod_id = + cbm_gbuf_upsert_node(ctx->gbuf, "Module", k8s_basename(rel_path), mod_qn, rel_path, 1, 0, + "{\"source\":\"kustomize\"}"); + free(mod_qn); + + if (mod_id <= 0) { + return; + } + + /* If we have a cached extraction result, emit IMPORTS edges for + * resources/bases/patches/components entries */ + int import_count = 0; + CBMFileResult *res = result; + bool allocated = false; + + if (!res) { + /* Fall back to re-extraction */ + int src_len = 0; + char *source = k8s_read_file(path, &src_len); + if (source) { + res = cbm_extract_file(source, src_len, CBM_LANG_KUSTOMIZE, ctx->project_name, rel_path, + CBM_EXTRACT_BUDGET, NULL, NULL); + free(source); + allocated = true; + } + } + + if (res) { + for (int j = 0; j < res->imports.count; j++) { + CBMImport *imp = &res->imports.items[j]; + if (!imp->module_path) { + continue; + } + + /* Compute target file QN */ + char *target_qn = + cbm_pipeline_fqn_compute(ctx->project_name, imp->module_path, "__file__"); + if (!target_qn) { + continue; + } + + const cbm_gbuf_node_t *target = cbm_gbuf_find_by_qn(ctx->gbuf, target_qn); + free(target_qn); + + if (target) { + cbm_gbuf_insert_edge(ctx->gbuf, mod_id, target->id, "IMPORTS", + "{\"via\":\"kustomize\"}"); + import_count++; + } + } + + if (allocated) { + cbm_free_result(res); + } + } + + cbm_log_info("pass.k8s.kustomize", "file", rel_path, "imports", itoa_k8s(import_count)); +} + +/* ── K8s manifest handler ────────────────────────────────────────── */ + +static void handle_k8s_manifest(cbm_pipeline_ctx_t *ctx, const char *path, const char *rel_path, + CBMFileResult *result) { + int resource_count = 0; + CBMFileResult *res = result; + bool allocated = false; + + if (!res) { + /* Fall back to re-extraction */ + int src_len = 0; + char *source = k8s_read_file(path, &src_len); + if (source) { + res = cbm_extract_file(source, src_len, CBM_LANG_K8S, ctx->project_name, rel_path, + CBM_EXTRACT_BUDGET, NULL, NULL); + free(source); + allocated = true; + } + } + + if (!res) { + return; + } + + /* Compute file node QN for DEFINES edges */ + char *file_qn = cbm_pipeline_fqn_compute(ctx->project_name, rel_path, "__file__"); + const cbm_gbuf_node_t *file_node = file_qn ? cbm_gbuf_find_by_qn(ctx->gbuf, file_qn) : NULL; + free(file_qn); + + for (int d = 0; d < res->defs.count; d++) { + CBMDefinition *def = &res->defs.items[d]; + if (!def->label || strcmp(def->label, "Resource") != 0) { + continue; + } + if (!def->name || !def->qualified_name) { + continue; + } + + // NOLINTNEXTLINE(misc-include-cleaner) + int64_t node_id = + cbm_gbuf_upsert_node(ctx->gbuf, "Resource", def->name, def->qualified_name, rel_path, + (int)def->start_line, (int)def->end_line, "{\"source\":\"k8s\"}"); + + /* DEFINES edge: File → Resource */ + if (file_node && node_id > 0) { + cbm_gbuf_insert_edge(ctx->gbuf, file_node->id, node_id, "DEFINES", "{}"); + } + + resource_count++; + } + + if (allocated) { + cbm_free_result(res); + } + + cbm_log_info("pass.k8s.manifest", "file", rel_path, "resources", itoa_k8s(resource_count)); +} + +/* ── Pass entry point ────────────────────────────────────────────── */ + +// NOLINTNEXTLINE(misc-include-cleaner) — cbm_file_info_t provided by standard header +int cbm_pipeline_pass_k8s(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count) { + cbm_log_info("pass.start", "pass", "k8s", "files", itoa_k8s(file_count)); + + cbm_init(); + + int kustomize_count = 0; + int manifest_count = 0; + + for (int i = 0; i < file_count; i++) { + if (cbm_pipeline_check_cancel(ctx)) { + return -1; + } + + const char *path = files[i].path; + const char *rel = files[i].rel_path; + CBMLanguage lang = files[i].language; + const char *base = k8s_basename(rel); + + CBMFileResult *cached = (ctx->result_cache && ctx->result_cache[i]) + ? ctx->result_cache[i] + : NULL; + + if (cbm_is_kustomize_file(base)) { + handle_kustomize(ctx, path, rel, cached); + kustomize_count++; + } else if (lang == CBM_LANG_YAML || lang == CBM_LANG_K8S) { + /* Need source content for cbm_is_k8s_manifest check */ + if (cached) { + /* Use cached result — the file is a k8s manifest if lang is CBM_LANG_K8S, + * or if we check the source. With a cached result available, trust the + * language field set during discovery. */ + if (lang == CBM_LANG_K8S) { + handle_k8s_manifest(ctx, path, rel, cached); + manifest_count++; + } else { + /* CBM_LANG_YAML: need source to confirm apiVersion presence */ + int src_len = 0; + char *source = k8s_read_file(path, &src_len); + if (source) { + if (cbm_is_k8s_manifest(base, source)) { + handle_k8s_manifest(ctx, path, rel, cached); + manifest_count++; + } + free(source); + } + } + } else { + /* No cached result — read source to classify */ + int src_len = 0; + char *source = k8s_read_file(path, &src_len); + if (source) { + if (cbm_is_k8s_manifest(base, source)) { + /* Pass NULL result — handle_k8s_manifest will re-extract */ + handle_k8s_manifest(ctx, path, rel, NULL); + manifest_count++; + } + free(source); + } + } + } + } + + cbm_log_info("pass.done", "pass", "k8s", "kustomize", itoa_k8s(kustomize_count), "manifests", + itoa_k8s(manifest_count)); + return 0; +} diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index f5b7510..10d63c1 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -518,6 +518,15 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { goto seq_cleanup; } + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_pipeline_pass_k8s(&ctx, files, file_count); + if (rc != 0) { /* log warning, continue */ } + cbm_log_info("pass.timing", "pass", "k8s", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + if (check_cancel(p)) { + rc = -1; + goto seq_cleanup; + } + cbm_clock_gettime(CLOCK_MONOTONIC, &t); rc = cbm_pipeline_pass_calls(&ctx, files, file_count); if (rc != 0) { diff --git a/src/pipeline/pipeline_internal.h b/src/pipeline/pipeline_internal.h index ea5c1d7..88862ff 100644 --- a/src/pipeline/pipeline_internal.h +++ b/src/pipeline/pipeline_internal.h @@ -355,6 +355,8 @@ int cbm_parallel_resolve(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int cbm_pipeline_pass_definitions(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count); +int cbm_pipeline_pass_k8s(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count); + int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count); /* Sub-passes called from pass_calls: pattern-based edge extraction */ From 337694dc656220be958b3933321dcf5aaccbc3e4 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Fri, 20 Mar 2026 17:15:17 -0500 Subject: [PATCH 04/12] test(k8s): tests for detection helpers, extractors, and pipeline pass - Add TEST(infra_is_kustomize_file): positive/negative/NULL cases - Add TEST(infra_is_k8s_manifest): apiVersion present/absent, kustomize file returns false, NULL guards - Add TEST(k8s_extract_kustomize): asserts 2 imports (deployment.yaml, service.yaml) from Kustomization resources list - Add TEST(k8s_extract_manifest): asserts Resource def with label "Resource" and name containing "Deployment" - Add TEST(k8s_extract_manifest_no_name): no crash, has_error==false - Fix extract_k8s.c get_scalar_text() to unwrap flow_node wrappers (tree-sitter YAML grammar wraps plain_scalar in flow_node) - Fix pass_k8s.c missing #include "foundation/compat.h" for CBM_TLS Co-Authored-By: Claude Sonnet 4.6 --- internal/cbm/extract_k8s.c | 10 ++++ src/pipeline/pass_k8s.c | 1 + tests/test_pipeline.c | 99 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+) diff --git a/internal/cbm/extract_k8s.c b/internal/cbm/extract_k8s.c index 0931d95..559fa82 100644 --- a/internal/cbm/extract_k8s.c +++ b/internal/cbm/extract_k8s.c @@ -22,9 +22,19 @@ // Return the raw source text for a scalar node (plain, single-quoted, or // double-quoted). Surrounding quote characters are stripped for quoted forms. +// Handles flow_node wrappers transparently by descending into the first named +// child (the tree-sitter YAML grammar often wraps scalars in flow_node). // Returns NULL for non-scalar node types. static const char *get_scalar_text(CBMArena *a, TSNode node, const char *source) { const char *type = ts_node_type(node); + // Unwrap flow_node: the actual scalar is the first named child + if (strcmp(type, "flow_node") == 0) { + TSNode inner = ts_node_named_child(node, 0); + if (ts_node_is_null(inner)) { + return NULL; + } + return get_scalar_text(a, inner, source); + } if (strcmp(type, "plain_scalar") == 0) { return cbm_node_text(a, node, source); } diff --git a/src/pipeline/pass_k8s.c b/src/pipeline/pass_k8s.c index a1fec4e..c692b57 100644 --- a/src/pipeline/pass_k8s.c +++ b/src/pipeline/pass_k8s.c @@ -15,6 +15,7 @@ #include "graph_buffer/graph_buffer.h" #include "discover/discover.h" #include "foundation/log.h" +#include "foundation/compat.h" #include "cbm.h" #include diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index c2302a2..18a792c 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -3331,6 +3331,30 @@ TEST(infra_is_dockerfile) { PASS(); } +TEST(infra_is_kustomize_file) { + ASSERT(cbm_is_kustomize_file("kustomization.yaml")); + ASSERT(cbm_is_kustomize_file("kustomization.yml")); + ASSERT(cbm_is_kustomize_file("KUSTOMIZATION.YAML")); /* case-insensitive */ + ASSERT(!cbm_is_kustomize_file("deployment.yaml")); + ASSERT(!cbm_is_kustomize_file("kustomize.yaml")); + ASSERT(!cbm_is_kustomize_file(NULL)); + PASS(); +} + +TEST(infra_is_k8s_manifest) { + const char *deploy = "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: my-app\n"; + const char *plain = "name: foo\nvalue: bar\n"; + const char *kust = "apiVersion: kustomize.config.k8s.io/v1beta1\nkind: Kustomization\n"; + + ASSERT(cbm_is_k8s_manifest("deployment.yaml", deploy)); + ASSERT(!cbm_is_k8s_manifest("deployment.yaml", plain)); + /* kustomize file should return false even if it has apiVersion */ + ASSERT(!cbm_is_k8s_manifest("kustomization.yaml", kust)); + ASSERT(!cbm_is_k8s_manifest(NULL, deploy)); + ASSERT(!cbm_is_k8s_manifest("deployment.yaml", NULL)); + PASS(); +} + TEST(infra_is_env_file) { ASSERT(cbm_is_env_file(".env")); ASSERT(cbm_is_env_file(".env.local")); @@ -4139,6 +4163,75 @@ TEST(infra_pipeline_idempotent) { PASS(); } +/* ── K8s / Kustomize extraction tests ──────────────────────────── */ + +TEST(k8s_extract_kustomize) { + const char *src = + "apiVersion: kustomize.config.k8s.io/v1beta1\n" + "kind: Kustomization\n" + "resources:\n" + " - deployment.yaml\n" + " - service.yaml\n"; + CBMFileResult *r = cbm_extract_file(src, (int)strlen(src), CBM_LANG_KUSTOMIZE, + "myproj", "base/kustomization.yaml", + 0, NULL, NULL); + ASSERT(r != NULL); + ASSERT_GTE(r->imports.count, 2); + + bool found_deploy = false, found_svc = false; + for (int i = 0; i < r->imports.count; i++) { + if (r->imports.items[i].module_path && + strcmp(r->imports.items[i].module_path, "deployment.yaml") == 0) + found_deploy = true; + if (r->imports.items[i].module_path && + strcmp(r->imports.items[i].module_path, "service.yaml") == 0) + found_svc = true; + } + ASSERT_TRUE(found_deploy); + ASSERT_TRUE(found_svc); + + cbm_free_result(r); + PASS(); +} + +TEST(k8s_extract_manifest) { + const char *src = + "apiVersion: apps/v1\n" + "kind: Deployment\n" + "metadata:\n" + " name: my-app\n" + " namespace: production\n"; + CBMFileResult *r = cbm_extract_file(src, (int)strlen(src), CBM_LANG_K8S, + "myproj", "k8s/deployment.yaml", + 0, NULL, NULL); + ASSERT(r != NULL); + ASSERT_GTE(r->defs.count, 1); + + bool found_resource = false; + for (int d = 0; d < r->defs.count; d++) { + if (r->defs.items[d].label && + strcmp(r->defs.items[d].label, "Resource") == 0 && + r->defs.items[d].name && + strstr(r->defs.items[d].name, "Deployment") != NULL) + found_resource = true; + } + ASSERT_TRUE(found_resource); + + cbm_free_result(r); + PASS(); +} + +TEST(k8s_extract_manifest_no_name) { + const char *src = "apiVersion: apps/v1\nkind: Deployment\n"; + CBMFileResult *r = cbm_extract_file(src, (int)strlen(src), CBM_LANG_K8S, + "myproj", "k8s/deploy.yaml", 0, NULL, NULL); + ASSERT(r != NULL); + /* No crash — defs count may be 0 because metadata.name is absent */ + ASSERT(!r->has_error); + cbm_free_result(r); + PASS(); +} + /* ── Envscan tests (port of envscan_test.go) ───────────────────── */ /* Helper: write a file inside a temp dir */ @@ -5055,6 +5148,8 @@ SUITE(pipeline) { RUN_TEST(infra_is_cloudbuild_file); RUN_TEST(infra_is_shell_script); RUN_TEST(infra_is_dockerfile); + RUN_TEST(infra_is_kustomize_file); + RUN_TEST(infra_is_k8s_manifest); RUN_TEST(infra_is_env_file); RUN_TEST(infra_clean_json_brackets); RUN_TEST(infra_secret_detection); @@ -5083,6 +5178,10 @@ SUITE(pipeline) { /* Infrascan: pipeline integration */ RUN_TEST(infra_pipeline_integration); RUN_TEST(infra_pipeline_idempotent); + /* K8s / Kustomize extraction */ + RUN_TEST(k8s_extract_kustomize); + RUN_TEST(k8s_extract_manifest); + RUN_TEST(k8s_extract_manifest_no_name); /* Env URL scanning */ RUN_TEST(envscan_dockerfile_env_urls); RUN_TEST(envscan_shell_env_urls); From e8030fe6383896d0ffbdc0e24242b7d9ce243ea0 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Fri, 20 Mar 2026 17:17:13 -0500 Subject: [PATCH 05/12] docs(k8s): add Resource to Node Labels list in README Co-Authored-By: Claude Sonnet 4.6 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4b28c57..2aff6fa 100644 --- a/README.md +++ b/README.md @@ -269,7 +269,7 @@ codebase-memory-mcp cli --raw search_graph '{"label": "Function"}' | jq '.result ### Node Labels -`Project`, `Package`, `Folder`, `File`, `Module`, `Class`, `Function`, `Method`, `Interface`, `Enum`, `Type`, `Route` +`Project`, `Package`, `Folder`, `File`, `Module`, `Class`, `Function`, `Method`, `Interface`, `Enum`, `Type`, `Route`, `Resource` ### Edge Types From 5a663b9c843c710f310bda5139b03b4a9308e4ac Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Fri, 20 Mar 2026 17:17:45 -0500 Subject: [PATCH 06/12] docs(k8s): document infra-pass pattern in CONTRIBUTING.md Co-Authored-By: Claude Sonnet 4.6 --- CONTRIBUTING.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6552369..4026ff4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,6 +84,21 @@ Language support is split between two layers: 4. Add a test case in `tests/test_pipeline.c` for integration-level fixes 5. Verify with a real open-source repo +### Infrastructure Languages (Infra-Pass Pattern) + +Languages like **Dockerfile**, **docker-compose**, **Kubernetes manifests**, and **Kustomize** do not use tree-sitter grammars. Instead they follow an *infra-pass* pattern: + +1. **Detection helpers** in `src/pipeline/pass_infrascan.c` — functions like `cbm_is_dockerfile()`, `cbm_is_k8s_manifest()`, `cbm_is_kustomize_file()` identify files by name and/or content heuristics (e.g., presence of `apiVersion:`). +2. **Custom extractors** in `src/pipeline/extract_k8s.c` (or `extract_infra.c`) — hand-written parsers that walk the raw YAML/text and populate `CBMFileResult` with imports and definitions. +3. **Pipeline pass** (`pass_k8s.c`, `pass_infrascan.c`) — calls the extractor and emits graph nodes/edges. K8s manifests emit `Resource` nodes; Kustomize files emit `Module` nodes with `IMPORTS` edges to referenced resource files. + +**When adding a new infrastructure language:** +- Add a detection helper (`cbm_is__file()`) in `pass_infrascan.c` or a new `pass_.c`. +- Add the `CBM_LANG_` enum value in `cbm_language.h` and a row in the language table in `lang_specs.c`. +- Write a custom extractor that returns `CBMFileResult*` — do not add a tree-sitter grammar. +- Register the pass in `pipeline.c`. +- Add tests in `tests/test_pipeline.c` following the `TEST(infra_is_dockerfile)` and `TEST(k8s_extract_manifest)` patterns. + ## Pull Request Guidelines - **C code only** — this project was rewritten from Go to pure C in v0.5.0. Go PRs will be acknowledged and potentially ported, but cannot be merged directly. From e696fcdf803ef6f1918b7a7355e80138e9a199e9 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Fri, 20 Mar 2026 17:34:37 -0500 Subject: [PATCH 07/12] style(k8s): apply clang-format to k8s and infra-pass source files - Fix alignment violations in extract_k8s.c, pass_k8s.c, pass_infrascan.c - Fix spacing in language.c filename table rows (kustomization.yml entry) - Fix alignment in cbm.h CBMLanguage enum comments - Fix pipeline.c empty-body brace style Co-Authored-By: Claude Sonnet 4.6 --- internal/cbm/cbm.h | 4 ++-- internal/cbm/extract_k8s.c | 24 +++++++++++------------- src/discover/language.c | 16 ++++++++++------ src/pipeline/pass_infrascan.c | 15 ++++++++++----- src/pipeline/pass_k8s.c | 10 ++++------ src/pipeline/pipeline.c | 3 ++- 6 files changed, 39 insertions(+), 33 deletions(-) diff --git a/internal/cbm/cbm.h b/internal/cbm/cbm.h index 1fb5a5a..6b49ae7 100644 --- a/internal/cbm/cbm.h +++ b/internal/cbm/cbm.h @@ -75,8 +75,8 @@ typedef enum { CBM_LANG_FORM, CBM_LANG_MAGMA, CBM_LANG_WOLFRAM, - CBM_LANG_KUSTOMIZE, // kustomization.yaml — Kubernetes overlay tool - CBM_LANG_K8S, // Generic Kubernetes manifest (apiVersion: detected) + CBM_LANG_KUSTOMIZE, // kustomization.yaml — Kubernetes overlay tool + CBM_LANG_K8S, // Generic Kubernetes manifest (apiVersion: detected) CBM_LANG_COUNT } CBMLanguage; diff --git a/internal/cbm/extract_k8s.c b/internal/cbm/extract_k8s.c index 559fa82..da54b9e 100644 --- a/internal/cbm/extract_k8s.c +++ b/internal/cbm/extract_k8s.c @@ -66,8 +66,7 @@ static int is_kustomize_list_key(const char *key) { // Walk a block_sequence node and emit one CBMImport per block_sequence_item // scalar child, using key_name as the local_name. -static void emit_kustomize_sequence(CBMExtractCtx *ctx, TSNode seq_node, - const char *key_name) { +static void emit_kustomize_sequence(CBMExtractCtx *ctx, TSNode seq_node, const char *key_name) { CBMArena *a = ctx->arena; uint32_t n = ts_node_child_count(seq_node); for (uint32_t i = 0; i < n; i++) { @@ -84,7 +83,7 @@ static void emit_kustomize_sequence(CBMExtractCtx *ctx, TSNode seq_node, continue; } CBMImport imp = { - .local_name = cbm_arena_strdup(a, key_name), + .local_name = cbm_arena_strdup(a, key_name), .module_path = cbm_arena_strdup(a, scalar), }; cbm_imports_push(&ctx->result->imports, a, imp); @@ -157,11 +156,10 @@ static void extract_kustomize(CBMExtractCtx *ctx) { // Descend into the first block_mapping of a document and extract apiVersion, // kind, and metadata.name. Returns void; fills kind_buf and meta_name_buf. -static void extract_k8s_scalars(CBMExtractCtx *ctx, TSNode mapping, - char *kind_buf, size_t kind_sz, - char *meta_name_buf, size_t meta_sz) { +static void extract_k8s_scalars(CBMExtractCtx *ctx, TSNode mapping, char *kind_buf, size_t kind_sz, + char *meta_name_buf, size_t meta_sz) { CBMArena *a = ctx->arena; - kind_buf[0] = '\0'; + kind_buf[0] = '\0'; meta_name_buf[0] = '\0'; uint32_t n = ts_node_child_count(mapping); @@ -255,7 +253,7 @@ static void extract_k8s_manifest(CBMExtractCtx *ctx) { continue; } - char kind_buf[256] = {0}; + char kind_buf[256] = {0}; char meta_name_buf[256] = {0}; extract_k8s_scalars(ctx, mapping, kind_buf, sizeof(kind_buf), meta_name_buf, sizeof(meta_name_buf)); @@ -269,12 +267,12 @@ static void extract_k8s_manifest(CBMExtractCtx *ctx) { snprintf(def_name, sizeof(def_name), "%s/%s", kind_buf, meta_name_buf); CBMDefinition def = {0}; - def.name = cbm_arena_strdup(a, def_name); + def.name = cbm_arena_strdup(a, def_name); def.qualified_name = cbm_arena_sprintf(a, "%s.%s", ctx->module_qn, def_name); - def.label = "Resource"; - def.file_path = ctx->rel_path; - def.start_line = ts_node_start_point(mapping).row + 1; - def.end_line = ts_node_end_point(mapping).row + 1; + def.label = "Resource"; + def.file_path = ctx->rel_path; + def.start_line = ts_node_start_point(mapping).row + 1; + def.end_line = ts_node_end_point(mapping).row + 1; cbm_defs_push(&ctx->result->defs, a, def); break; // Only the first document per file diff --git a/src/discover/language.c b/src/discover/language.c index cad73e0..9e88307 100644 --- a/src/discover/language.c +++ b/src/discover/language.c @@ -269,12 +269,16 @@ typedef struct { } filename_entry_t; static const filename_entry_t FILENAME_TABLE[] = { - {"CMakeLists.txt", CBM_LANG_CMAKE}, {"Dockerfile", CBM_LANG_DOCKERFILE}, - {"GNUmakefile", CBM_LANG_MAKEFILE}, {"Makefile", CBM_LANG_MAKEFILE}, - {"makefile", CBM_LANG_MAKEFILE}, {"meson.build", CBM_LANG_MESON}, - {"meson.options", CBM_LANG_MESON}, {"meson_options.txt", CBM_LANG_MESON}, + {"CMakeLists.txt", CBM_LANG_CMAKE}, + {"Dockerfile", CBM_LANG_DOCKERFILE}, + {"GNUmakefile", CBM_LANG_MAKEFILE}, + {"Makefile", CBM_LANG_MAKEFILE}, + {"makefile", CBM_LANG_MAKEFILE}, + {"meson.build", CBM_LANG_MESON}, + {"meson.options", CBM_LANG_MESON}, + {"meson_options.txt", CBM_LANG_MESON}, {"kustomization.yaml", CBM_LANG_KUSTOMIZE}, - {"kustomization.yml", CBM_LANG_KUSTOMIZE}, + {"kustomization.yml", CBM_LANG_KUSTOMIZE}, {".vimrc", CBM_LANG_VIMSCRIPT}, }; @@ -348,7 +352,7 @@ static const char *LANG_NAMES[CBM_LANG_COUNT] = { [CBM_LANG_MAGMA] = "Magma", [CBM_LANG_WOLFRAM] = "Wolfram", [CBM_LANG_KUSTOMIZE] = "Kustomize", - [CBM_LANG_K8S] = "Kubernetes", + [CBM_LANG_K8S] = "Kubernetes", }; /* ── Public API ──────────────────────────────────────────────────── */ diff --git a/src/pipeline/pass_infrascan.c b/src/pipeline/pass_infrascan.c index 307973f..a59cbb8 100644 --- a/src/pipeline/pass_infrascan.c +++ b/src/pipeline/pass_infrascan.c @@ -193,19 +193,24 @@ bool cbm_is_env_file(const char *name) { } bool cbm_is_kustomize_file(const char *name) { - if (!name) { return false; } + if (!name) { + return false; + } char lower[256]; to_lower(name, lower, sizeof(lower)); - return (strcmp(lower, "kustomization.yaml") == 0 || - strcmp(lower, "kustomization.yml") == 0); + return (strcmp(lower, "kustomization.yaml") == 0 || strcmp(lower, "kustomization.yml") == 0); } // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) bool cbm_is_k8s_manifest(const char *name, const char *content) { - if (!name || !content || cbm_is_kustomize_file(name)) { return false; } + if (!name || !content || cbm_is_kustomize_file(name)) { + return false; + } char buf[4097]; size_t n = strlen(content); - if (n > 4096) { n = 4096; } + if (n > 4096) { + n = 4096; + } memcpy(buf, content, n); buf[n] = '\0'; return ci_strstr(buf, "apiVersion:") != NULL; diff --git a/src/pipeline/pass_k8s.c b/src/pipeline/pass_k8s.c index c692b57..3113f62 100644 --- a/src/pipeline/pass_k8s.c +++ b/src/pipeline/pass_k8s.c @@ -82,9 +82,8 @@ static void handle_kustomize(cbm_pipeline_ctx_t *ctx, const char *path, const ch } // NOLINTNEXTLINE(misc-include-cleaner) - int64_t mod_id = - cbm_gbuf_upsert_node(ctx->gbuf, "Module", k8s_basename(rel_path), mod_qn, rel_path, 1, 0, - "{\"source\":\"kustomize\"}"); + int64_t mod_id = cbm_gbuf_upsert_node(ctx->gbuf, "Module", k8s_basename(rel_path), mod_qn, + rel_path, 1, 0, "{\"source\":\"kustomize\"}"); free(mod_qn); if (mod_id <= 0) { @@ -220,9 +219,8 @@ int cbm_pipeline_pass_k8s(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, CBMLanguage lang = files[i].language; const char *base = k8s_basename(rel); - CBMFileResult *cached = (ctx->result_cache && ctx->result_cache[i]) - ? ctx->result_cache[i] - : NULL; + CBMFileResult *cached = + (ctx->result_cache && ctx->result_cache[i]) ? ctx->result_cache[i] : NULL; if (cbm_is_kustomize_file(base)) { handle_kustomize(ctx, path, rel, cached); diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 10d63c1..41c8477 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -520,7 +520,8 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { cbm_clock_gettime(CLOCK_MONOTONIC, &t); rc = cbm_pipeline_pass_k8s(&ctx, files, file_count); - if (rc != 0) { /* log warning, continue */ } + if (rc != 0) { /* log warning, continue */ + } cbm_log_info("pass.timing", "pass", "k8s", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); if (check_cancel(p)) { rc = -1; From 94841e054235c1ab264fe6e48c8a32d739b2ec07 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Fri, 20 Mar 2026 18:25:27 -0500 Subject: [PATCH 08/12] fix(k8s): address QA round 1 findings - [Major 1+2] pass_k8s: always re-extract K8s manifests with CBM_LANG_K8S, discarding any cached YAML result; pass already-read source buffer to handle_k8s_manifest to eliminate the double file read - [Minor 3] CONTRIBUTING.md: fix extractor path to internal/cbm/extract_k8s.c and clarify tree-sitter YAML grammar usage - [Minor 4] language.c: add comment documenting intentional case-sensitive FILENAME_TABLE and case-insensitive cbm_is_kustomize_file() split behaviour - [Minor 5] test_pipeline.c: add k8s_extract_manifest_multidoc test pinning single-document-per-file extraction behaviour - [Minor 6] pass_infrascan: replace strlen with strnlen(content, 4096) in cbm_is_k8s_manifest to bound the scan - [Minor 7] extract_k8s: add "crds" to is_kustomize_list_key - [Nit 8] extract_k8s: remove dead second block_node unwrap in extract_k8s_scalars metadata descent - [Nit 10] extract_k8s: use cbm_arena_strdup for def.label "Resource" Co-Authored-By: Claude Sonnet 4.6 --- CONTRIBUTING.md | 2 +- internal/cbm/extract_k8s.c | 8 ++--- src/discover/language.c | 5 +++ src/pipeline/pass_infrascan.c | 5 +-- src/pipeline/pass_k8s.c | 67 ++++++++++------------------------- tests/test_pipeline.c | 40 +++++++++++++++++++++ 6 files changed, 69 insertions(+), 58 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4026ff4..b863001 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -89,7 +89,7 @@ Language support is split between two layers: Languages like **Dockerfile**, **docker-compose**, **Kubernetes manifests**, and **Kustomize** do not use tree-sitter grammars. Instead they follow an *infra-pass* pattern: 1. **Detection helpers** in `src/pipeline/pass_infrascan.c` — functions like `cbm_is_dockerfile()`, `cbm_is_k8s_manifest()`, `cbm_is_kustomize_file()` identify files by name and/or content heuristics (e.g., presence of `apiVersion:`). -2. **Custom extractors** in `src/pipeline/extract_k8s.c` (or `extract_infra.c`) — hand-written parsers that walk the raw YAML/text and populate `CBMFileResult` with imports and definitions. +2. **Custom extractors** in `internal/cbm/extract_k8s.c` — tree-sitter-based parsers that walk the YAML AST (using the tree-sitter YAML grammar) and populate `CBMFileResult` with imports and definitions. 3. **Pipeline pass** (`pass_k8s.c`, `pass_infrascan.c`) — calls the extractor and emits graph nodes/edges. K8s manifests emit `Resource` nodes; Kustomize files emit `Module` nodes with `IMPORTS` edges to referenced resource files. **When adding a new infrastructure language:** diff --git a/internal/cbm/extract_k8s.c b/internal/cbm/extract_k8s.c index da54b9e..26bbb69 100644 --- a/internal/cbm/extract_k8s.c +++ b/internal/cbm/extract_k8s.c @@ -57,7 +57,7 @@ static const char *get_scalar_text(CBMArena *a, TSNode node, const char *source) static int is_kustomize_list_key(const char *key) { return (strcmp(key, "resources") == 0 || strcmp(key, "bases") == 0 || strcmp(key, "patches") == 0 || strcmp(key, "components") == 0 || - strcmp(key, "patchesStrategicMerge") == 0); + strcmp(key, "patchesStrategicMerge") == 0 || strcmp(key, "crds") == 0); } // --------------------------------------------------------------------------- @@ -196,10 +196,8 @@ static void extract_k8s_scalars(CBMExtractCtx *ctx, TSNode mapping, char *kind_b } } else if (strcmp(key, "metadata") == 0) { // Descend into metadata block_mapping to find "name" + // val_node is already unwrapped from block_node above. TSNode meta_mapping = val_node; - if (strcmp(ts_node_type(meta_mapping), "block_node") == 0) { - meta_mapping = ts_node_named_child(meta_mapping, 0); - } if (ts_node_is_null(meta_mapping) || strcmp(ts_node_type(meta_mapping), "block_mapping") != 0) { continue; @@ -269,7 +267,7 @@ static void extract_k8s_manifest(CBMExtractCtx *ctx) { CBMDefinition def = {0}; def.name = cbm_arena_strdup(a, def_name); def.qualified_name = cbm_arena_sprintf(a, "%s.%s", ctx->module_qn, def_name); - def.label = "Resource"; + def.label = cbm_arena_strdup(a, "Resource"); def.file_path = ctx->rel_path; def.start_line = ts_node_start_point(mapping).row + 1; def.end_line = ts_node_end_point(mapping).row + 1; diff --git a/src/discover/language.c b/src/discover/language.c index 9e88307..ca91770 100644 --- a/src/discover/language.c +++ b/src/discover/language.c @@ -279,6 +279,11 @@ static const filename_entry_t FILENAME_TABLE[] = { {"meson_options.txt", CBM_LANG_MESON}, {"kustomization.yaml", CBM_LANG_KUSTOMIZE}, {"kustomization.yml", CBM_LANG_KUSTOMIZE}, + /* Note: FILENAME_TABLE uses case-sensitive strcmp, so mixed-case variants + * (e.g. "Kustomization.yaml") are not matched here. They fall through to + * CBM_LANG_YAML and are re-classified by cbm_is_kustomize_file() in + * pass_k8s.c, which performs a case-insensitive comparison. This is the + * intended behaviour — no additional entries are needed. */ {".vimrc", CBM_LANG_VIMSCRIPT}, }; diff --git a/src/pipeline/pass_infrascan.c b/src/pipeline/pass_infrascan.c index a59cbb8..1e30b71 100644 --- a/src/pipeline/pass_infrascan.c +++ b/src/pipeline/pass_infrascan.c @@ -207,10 +207,7 @@ bool cbm_is_k8s_manifest(const char *name, const char *content) { return false; } char buf[4097]; - size_t n = strlen(content); - if (n > 4096) { - n = 4096; - } + size_t n = strnlen(content, 4096); memcpy(buf, content, n); buf[n] = '\0'; return ci_strstr(buf, "apiVersion:") != NULL; diff --git a/src/pipeline/pass_k8s.c b/src/pipeline/pass_k8s.c index 3113f62..2771a4f 100644 --- a/src/pipeline/pass_k8s.c +++ b/src/pipeline/pass_k8s.c @@ -142,24 +142,15 @@ static void handle_kustomize(cbm_pipeline_ctx_t *ctx, const char *path, const ch /* ── K8s manifest handler ────────────────────────────────────────── */ +/* source/src_len are the already-read file bytes (caller retains ownership and + * must free after this call returns). */ static void handle_k8s_manifest(cbm_pipeline_ctx_t *ctx, const char *path, const char *rel_path, - CBMFileResult *result) { + const char *source, int src_len) { + (void)path; /* retained for symmetry; source is always provided now */ int resource_count = 0; - CBMFileResult *res = result; - bool allocated = false; - - if (!res) { - /* Fall back to re-extraction */ - int src_len = 0; - char *source = k8s_read_file(path, &src_len); - if (source) { - res = cbm_extract_file(source, src_len, CBM_LANG_K8S, ctx->project_name, rel_path, - CBM_EXTRACT_BUDGET, NULL, NULL); - free(source); - allocated = true; - } - } + CBMFileResult *res = cbm_extract_file(source, src_len, CBM_LANG_K8S, ctx->project_name, + rel_path, CBM_EXTRACT_BUDGET, NULL, NULL); if (!res) { return; } @@ -191,9 +182,7 @@ static void handle_k8s_manifest(cbm_pipeline_ctx_t *ctx, const char *path, const resource_count++; } - if (allocated) { - cbm_free_result(res); - } + cbm_free_result(res); cbm_log_info("pass.k8s.manifest", "file", rel_path, "resources", itoa_k8s(resource_count)); } @@ -226,38 +215,20 @@ int cbm_pipeline_pass_k8s(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, handle_kustomize(ctx, path, rel, cached); kustomize_count++; } else if (lang == CBM_LANG_YAML || lang == CBM_LANG_K8S) { - /* Need source content for cbm_is_k8s_manifest check */ - if (cached) { - /* Use cached result — the file is a k8s manifest if lang is CBM_LANG_K8S, - * or if we check the source. With a cached result available, trust the - * language field set during discovery. */ - if (lang == CBM_LANG_K8S) { - handle_k8s_manifest(ctx, path, rel, cached); + /* Read source once to classify (and reuse for uncached extraction). */ + int src_len = 0; + char *source = k8s_read_file(path, &src_len); + if (source) { + if (cbm_is_k8s_manifest(base, source)) { + /* Always re-extract with CBM_LANG_K8S regardless of any cached + * result: cached results were produced during the parallel YAML + * pass and contain no "Resource" definitions. Pass the already- + * read source buffer so handle_k8s_manifest does not re-read. */ + (void)cached; /* cached YAML result intentionally discarded */ + handle_k8s_manifest(ctx, path, rel, source, src_len); manifest_count++; - } else { - /* CBM_LANG_YAML: need source to confirm apiVersion presence */ - int src_len = 0; - char *source = k8s_read_file(path, &src_len); - if (source) { - if (cbm_is_k8s_manifest(base, source)) { - handle_k8s_manifest(ctx, path, rel, cached); - manifest_count++; - } - free(source); - } - } - } else { - /* No cached result — read source to classify */ - int src_len = 0; - char *source = k8s_read_file(path, &src_len); - if (source) { - if (cbm_is_k8s_manifest(base, source)) { - /* Pass NULL result — handle_k8s_manifest will re-extract */ - handle_k8s_manifest(ctx, path, rel, NULL); - manifest_count++; - } - free(source); } + free(source); } } } diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 18a792c..87966ac 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -4232,6 +4232,45 @@ TEST(k8s_extract_manifest_no_name) { PASS(); } +TEST(k8s_extract_manifest_multidoc) { + /* Two-document YAML separated by "---". + * extract_k8s_manifest contains a "break" after the first successful push, + * so it processes only the first document that has both kind and + * metadata.name. This test pins that behaviour: the first document's + * resource must be present and no crash must occur. + * + * Note: with some tree-sitter YAML grammar versions the root stream may + * expose both documents as siblings; the break still fires after the first + * successful def push, so defs.count must be exactly 1. */ + const char *src = + "apiVersion: apps/v1\n" + "kind: Deployment\n" + "metadata:\n" + " name: my-app\n" + "---\n" + "apiVersion: v1\n" + "kind: Service\n" + "metadata:\n" + " name: my-svc\n"; + CBMFileResult *r = cbm_extract_file(src, (int)strlen(src), CBM_LANG_K8S, + "myproj", "k8s/multi.yaml", 0, NULL, NULL); + ASSERT(r != NULL); + ASSERT(!r->has_error); + /* First document's resource must be present */ + int found = 0; + for (int i = 0; i < r->defs.count; i++) { + if (r->defs.items[i].label && strcmp(r->defs.items[i].label, "Resource") == 0 && + r->defs.items[i].name && strcmp(r->defs.items[i].name, "Deployment/my-app") == 0) { + found = 1; + } + } + ASSERT(found); + /* At least one def, no more than one (only first document processed) */ + ASSERT(r->defs.count >= 1); + cbm_free_result(r); + PASS(); +} + /* ── Envscan tests (port of envscan_test.go) ───────────────────── */ /* Helper: write a file inside a temp dir */ @@ -5182,6 +5221,7 @@ SUITE(pipeline) { RUN_TEST(k8s_extract_kustomize); RUN_TEST(k8s_extract_manifest); RUN_TEST(k8s_extract_manifest_no_name); + RUN_TEST(k8s_extract_manifest_multidoc); /* Env URL scanning */ RUN_TEST(envscan_dockerfile_env_urls); RUN_TEST(envscan_shell_env_urls); From 340862c619d948f07a0dc52c884e2ce4aa2000d0 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Fri, 20 Mar 2026 18:34:44 -0500 Subject: [PATCH 09/12] fix(k8s): address QA round 2 findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CONTRIBUTING.md: fix CBMLanguage enum file reference from non-existent cbm_language.h to internal/cbm/cbm.h - CONTRIBUTING.md: rephrase "do not use tree-sitter grammars" to "do not require a new tree-sitter grammar", clarifying they reuse the existing YAML grammar - pass_k8s.c: correct header comment from "per top-level resource document" to "first document only — multi-document YAML is not yet supported" Co-Authored-By: Claude Sonnet 4.6 --- CONTRIBUTING.md | 4 ++-- src/pipeline/pass_k8s.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b863001..1cb9d57 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -86,7 +86,7 @@ Language support is split between two layers: ### Infrastructure Languages (Infra-Pass Pattern) -Languages like **Dockerfile**, **docker-compose**, **Kubernetes manifests**, and **Kustomize** do not use tree-sitter grammars. Instead they follow an *infra-pass* pattern: +Languages like **Dockerfile**, **docker-compose**, **Kubernetes manifests**, and **Kustomize** do not require a new tree-sitter grammar. Instead they follow an *infra-pass* pattern, reusing the existing tree-sitter YAML grammar where applicable: 1. **Detection helpers** in `src/pipeline/pass_infrascan.c` — functions like `cbm_is_dockerfile()`, `cbm_is_k8s_manifest()`, `cbm_is_kustomize_file()` identify files by name and/or content heuristics (e.g., presence of `apiVersion:`). 2. **Custom extractors** in `internal/cbm/extract_k8s.c` — tree-sitter-based parsers that walk the YAML AST (using the tree-sitter YAML grammar) and populate `CBMFileResult` with imports and definitions. @@ -94,7 +94,7 @@ Languages like **Dockerfile**, **docker-compose**, **Kubernetes manifests**, and **When adding a new infrastructure language:** - Add a detection helper (`cbm_is__file()`) in `pass_infrascan.c` or a new `pass_.c`. -- Add the `CBM_LANG_` enum value in `cbm_language.h` and a row in the language table in `lang_specs.c`. +- Add the `CBM_LANG_` enum value in `internal/cbm/cbm.h` and a row in the language table in `lang_specs.c`. - Write a custom extractor that returns `CBMFileResult*` — do not add a tree-sitter grammar. - Register the pass in `pipeline.c`. - Add tests in `tests/test_pipeline.c` following the `TEST(infra_is_dockerfile)` and `TEST(k8s_extract_manifest)` patterns. diff --git a/src/pipeline/pass_k8s.c b/src/pipeline/pass_k8s.c index 2771a4f..03cc825 100644 --- a/src/pipeline/pass_k8s.c +++ b/src/pipeline/pass_k8s.c @@ -5,7 +5,7 @@ * 1. Check if it is a kustomize overlay (kustomization.yaml / kustomization.yml) * → emit a Module node and IMPORTS edges for each resources/bases/patches entry * 2. Else if it is a generic k8s manifest (apiVersion: detected) - * → emit one Resource node per top-level resource document + * → emit one Resource node per file (first document only — multi-document YAML is not yet supported) * * Depends on: pass_infrascan.c (cbm_is_kustomize_file, cbm_is_k8s_manifest, cbm_infra_qn), * extraction layer (cbm.h), graph_buffer, pipeline internals. From a90ab5d37bb1a0139639e2b6256999d75c039b1f Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sat, 21 Mar 2026 14:38:41 -0500 Subject: [PATCH 10/12] fix(k8s): add k8s pass to incremental pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cbm_pipeline_pass_k8s() was called in the full pipeline (pipeline.c) but absent from pipeline_incremental.c. This meant k8s Resource nodes and kustomize Module nodes were never created or updated during incremental re-indexing — only after a fresh full index. Add the pass after the semantic pass, following the same timing-log pattern as the other incremental passes. Pass changed_files (not the full file list) so only modified/added YAML files are re-processed. Add two regression tests: - incremental_k8s_manifest_indexed: full index + add manifest via incremental, verifies Resource node appears in the DB - incremental_kustomize_module_indexed: same for kustomization.yaml Module node Co-Authored-By: Claude Sonnet 4.6 --- src/pipeline/pipeline_incremental.c | 4 + tests/test_pipeline.c | 137 ++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index 78e2ffd..85443be 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -265,6 +265,10 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("pass.timing", "pass", "incr_semantic", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + cbm_pipeline_pass_k8s(&ctx, changed_files, ci); + cbm_log_info("pass.timing", "pass", "incr_k8s", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + /* Merge new nodes/edges from gbuf into disk DB */ int new_nodes = cbm_gbuf_node_count(gbuf); int new_edges = cbm_gbuf_edge_count(gbuf); diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 87966ac..353b6af 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -5079,6 +5079,141 @@ TEST(incremental_new_file_added) { PASS(); } +TEST(incremental_k8s_manifest_indexed) { + /* Full index with a k8s manifest, then add a new manifest via incremental. + * Verifies that cbm_pipeline_pass_k8s() runs during incremental re-index. */ + char tmpdir[256]; + snprintf(tmpdir, sizeof(tmpdir), "/tmp/cbm_k8s_incr_XXXXXX"); + if (!cbm_mkdtemp(tmpdir)) { + SKIP("tmpdir"); + } + char dbpath[512]; + snprintf(dbpath, sizeof(dbpath), "%s/test.db", tmpdir); + char path[512]; + FILE *f; + + /* Initial manifest */ + snprintf(path, sizeof(path), "%s/deploy.yaml", tmpdir); + f = fopen(path, "w"); + ASSERT_NOT_NULL(f); + fprintf(f, "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: my-app\n"); + fclose(f); + + /* Full index */ + cbm_pipeline_t *p = cbm_pipeline_new(tmpdir, dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + char *project = strdup(cbm_pipeline_project_name(p)); + cbm_pipeline_free(p); + + /* Verify Resource node created by full index */ + cbm_store_t *s = cbm_store_open_path(dbpath); + ASSERT_NOT_NULL(s); + cbm_node_t *nodes = NULL; + int count = 0; + cbm_store_find_nodes_by_label(s, project, "Resource", &nodes, &count); + ASSERT_GT(count, 0); + cbm_store_free_nodes(nodes, count); + cbm_store_close(s); + + /* Add a second manifest — incremental should pick it up */ + snprintf(path, sizeof(path), "%s/svc.yaml", tmpdir); + f = fopen(path, "w"); + ASSERT_NOT_NULL(f); + fprintf(f, "apiVersion: v1\nkind: Service\nmetadata:\n name: my-svc\n"); + fclose(f); + + /* Incremental re-index */ + p = cbm_pipeline_new(tmpdir, dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + cbm_pipeline_free(p); + + /* Verify both Resource nodes now present */ + s = cbm_store_open_path(dbpath); + ASSERT_NOT_NULL(s); + nodes = NULL; + count = 0; + cbm_store_find_nodes_by_label(s, project, "Resource", &nodes, &count); + ASSERT_GTE(count, 2); + cbm_store_free_nodes(nodes, count); + cbm_store_close(s); + + free(project); + char cmd[512]; + snprintf(cmd, sizeof(cmd), "rm -rf '%s'", tmpdir); + (void)system(cmd); + PASS(); +} + +TEST(incremental_kustomize_module_indexed) { + /* Verifies that a kustomization.yaml added after the initial full index + * gets a Module node via the incremental k8s pass. */ + char tmpdir[256]; + snprintf(tmpdir, sizeof(tmpdir), "/tmp/cbm_kust_incr_XXXXXX"); + if (!cbm_mkdtemp(tmpdir)) { + SKIP("tmpdir"); + } + char dbpath[512]; + snprintf(dbpath, sizeof(dbpath), "%s/test.db", tmpdir); + char path[512]; + FILE *f; + + /* Initial resource manifest (gives full index something to find) */ + snprintf(path, sizeof(path), "%s/deploy.yaml", tmpdir); + f = fopen(path, "w"); + ASSERT_NOT_NULL(f); + fprintf(f, "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: my-app\n"); + fclose(f); + + /* Full index */ + cbm_pipeline_t *p = cbm_pipeline_new(tmpdir, dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + char *project = strdup(cbm_pipeline_project_name(p)); + cbm_pipeline_free(p); + + /* Add kustomization.yaml */ + snprintf(path, sizeof(path), "%s/kustomization.yaml", tmpdir); + f = fopen(path, "w"); + ASSERT_NOT_NULL(f); + fprintf(f, "apiVersion: kustomize.config.k8s.io/v1beta1\n" + "kind: Kustomization\n" + "resources:\n" + " - deploy.yaml\n"); + fclose(f); + + /* Incremental re-index */ + p = cbm_pipeline_new(tmpdir, dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + cbm_pipeline_free(p); + + /* Verify Module node created for the kustomization overlay */ + cbm_store_t *s = cbm_store_open_path(dbpath); + ASSERT_NOT_NULL(s); + cbm_node_t *nodes = NULL; + int count = 0; + cbm_store_find_nodes_by_label(s, project, "Module", &nodes, &count); + bool found_kust = false; + for (int i = 0; i < count; i++) { + if (nodes[i].properties_json && + strstr(nodes[i].properties_json, "kustomize")) { + found_kust = true; + break; + } + } + cbm_store_free_nodes(nodes, count); + cbm_store_close(s); + ASSERT_TRUE(found_kust); + + free(project); + char cmd[512]; + snprintf(cmd, sizeof(cmd), "rm -rf '%s'", tmpdir); + (void)system(cmd); + PASS(); +} + SUITE(pipeline) { /* Lifecycle */ RUN_TEST(pipeline_create_free); @@ -5269,4 +5404,6 @@ SUITE(pipeline) { RUN_TEST(incremental_detects_changed_file); RUN_TEST(incremental_detects_deleted_file); RUN_TEST(incremental_new_file_added); + RUN_TEST(incremental_k8s_manifest_indexed); + RUN_TEST(incremental_kustomize_module_indexed); } From 94882c26f9f34c257d20b3f4db7706710b53f09b Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sat, 21 Mar 2026 14:59:12 -0500 Subject: [PATCH 11/12] fix(k8s): address QA round 3 findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Check return value of cbm_pipeline_pass_k8s() in incremental path; log warning on failure (fail-open, matches full pipeline behaviour) - Add comment documenting pass ordering and the two known structural limitations: File→Resource DEFINES edges and cross-file kustomize IMPORTS edges are not emitted in incremental (gbuf only contains nodes for changed files; File nodes from pass_structure are absent) - Fix clang-format violation in incremental_kustomize_module_indexed test (single-line if condition) Co-Authored-By: Claude Sonnet 4.6 --- src/pipeline/pipeline_incremental.c | 10 +++++++++- tests/test_pipeline.c | 3 +-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index 85443be..1799f83 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -265,8 +265,16 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("pass.timing", "pass", "incr_semantic", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + /* k8s pass runs after semantic (vs. after definitions in the full pipeline) because + * incremental has no parallel extraction phase to position it alongside. + * Note: File→Resource DEFINES edges and cross-file kustomize IMPORTS edges are not + * emitted here — File nodes (from pass_structure) are absent in the incremental gbuf, + * and gbuf_find_by_qn only resolves nodes from changed files. This is a known + * structural limitation of the incremental architecture. */ cbm_clock_gettime(CLOCK_MONOTONIC, &t); - cbm_pipeline_pass_k8s(&ctx, changed_files, ci); + if (cbm_pipeline_pass_k8s(&ctx, changed_files, ci) != 0) { + cbm_log_info("incremental.warn", "msg", "k8s_pass_failed"); + } cbm_log_info("pass.timing", "pass", "incr_k8s", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); /* Merge new nodes/edges from gbuf into disk DB */ diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 353b6af..140a267 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -5197,8 +5197,7 @@ TEST(incremental_kustomize_module_indexed) { cbm_store_find_nodes_by_label(s, project, "Module", &nodes, &count); bool found_kust = false; for (int i = 0; i < count; i++) { - if (nodes[i].properties_json && - strstr(nodes[i].properties_json, "kustomize")) { + if (nodes[i].properties_json && strstr(nodes[i].properties_json, "kustomize")) { found_kust = true; break; } From 1a7c6ae8d2ca90a10a5602c7aeddfa6c12b28af7 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sat, 21 Mar 2026 15:08:38 -0500 Subject: [PATCH 12/12] fix(k8s): add k8s pass to parallel pipeline path The parallel execution path (used for repos above MIN_FILES_FOR_PARALLEL) was missing the cbm_pipeline_pass_k8s() call. The pass existed only in the sequential fallback path, so any repo large enough to trigger parallel indexing produced zero k8s Resource and kustomize Module nodes. Discovered via live testing against a 954-file gitops repo: the parallel path was taken, k8s pass was silently skipped, and search_graph returned 0 Resource nodes despite 213 kustomize overlays and hundreds of manifests. Add the pass after cbm_parallel_resolve() in the parallel branch, matching the same pattern (fail-open, cancel check, timing log) as the sequential path. Co-Authored-By: Claude Sonnet 4.6 --- src/pipeline/pipeline.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 41c8477..303ed08 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -494,6 +494,16 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { rc = -1; goto cleanup; } + + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_pipeline_pass_k8s(&ctx, files, file_count); + if (rc != 0) { /* log warning, continue */ + } + cbm_log_info("pass.timing", "pass", "k8s", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + if (check_cancel(p)) { + rc = -1; + goto cleanup; + } } else { cbm_log_info("pipeline.mode", "mode", "sequential", "files", itoa_buf(file_count));