From 3d9ee78d075fa80cb5aa55328dd7c6af9e31172a Mon Sep 17 00:00:00 2001 From: Siva Date: Thu, 9 Apr 2026 19:21:44 +0530 Subject: [PATCH 1/3] fix: add allowlist validation for RAG pipeline names --- .../internal/database/rag_service_config.go | 10 ++++- .../database/rag_service_config_test.go | 43 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/server/internal/database/rag_service_config.go b/server/internal/database/rag_service_config.go index 5c86eee9..c2546ef9 100644 --- a/server/internal/database/rag_service_config.go +++ b/server/internal/database/rag_service_config.go @@ -4,11 +4,17 @@ import ( "bytes" "encoding/json" "fmt" + "regexp" "slices" "sort" "strings" ) +// ragPipelineNamePattern restricts pipeline names to lowercase alphanumeric +// characters, hyphens, and underscores. This keeps key filenames +// ({name}_embedding.key / {name}_rag.key) safe and auditable. +var ragPipelineNamePattern = regexp.MustCompile(`^[a-z0-9_-]+$`) + // RAGPipelineLLMConfig represents LLM configuration for an embedding or RAG step. type RAGPipelineLLMConfig struct { Provider string `json:"provider"` @@ -126,9 +132,11 @@ func validateRAGPipeline(p RAGPipeline, i int, seenNames map[string]bool) []erro var errs []error prefix := fmt.Sprintf("pipelines[%d]", i) - // name (required, unique) + // name (required, allowlist, unique) if p.Name == "" { errs = append(errs, fmt.Errorf("%s.name is required", prefix)) + } else if !ragPipelineNamePattern.MatchString(p.Name) { + errs = append(errs, fmt.Errorf("%s.name %q is invalid: must match ^[a-z0-9_-]+$", prefix, p.Name)) } else if seenNames[p.Name] { errs = append(errs, fmt.Errorf("pipelines contains duplicate name %q", p.Name)) } else { diff --git a/server/internal/database/rag_service_config_test.go b/server/internal/database/rag_service_config_test.go index 953f41c7..4f4477b0 100644 --- a/server/internal/database/rag_service_config_test.go +++ b/server/internal/database/rag_service_config_test.go @@ -384,6 +384,49 @@ func TestParseRAGServiceConfig_MissingRAGLLM(t *testing.T) { assert.Contains(t, errs[0].Error(), "rag_llm.provider") } +func TestParseRAGServiceConfig_PipelineNameAllowlist(t *testing.T) { + validNames := []string{ + "default", + "my-pipeline", + "my_pipeline", + "pipeline-1", + "a", + "abc123", + "a-b_c-1", + } + for _, name := range validNames { + t.Run("valid/"+name, func(t *testing.T) { + config := minimalRAGConfig() + config["pipelines"].([]any)[0].(map[string]any)["name"] = name + _, errs := database.ParseRAGServiceConfig(config, false) + assert.Empty(t, errs, "name %q should be valid", name) + }) + } + + invalidNames := []string{ + "My Pipeline", // uppercase + space + "pipeline name", // space + "pipeline/name", // slash + "../etc/passwd", // path traversal + "UPPER", // uppercase + "pipešŸ”„line", // unicode emoji + "pipeline.name", // dot + "", // empty (covered separately, but included for completeness) + } + for _, name := range invalidNames { + if name == "" { + continue // empty name is a separate "required" error + } + t.Run("invalid/"+name, func(t *testing.T) { + config := minimalRAGConfig() + config["pipelines"].([]any)[0].(map[string]any)["name"] = name + _, errs := database.ParseRAGServiceConfig(config, false) + require.NotEmpty(t, errs, "name %q should be invalid", name) + assert.Contains(t, errs[0].Error(), "must match ^[a-z0-9_-]+$") + }) + } +} + func TestParseRAGServiceConfig_MultiplePipelines(t *testing.T) { config := map[string]any{ "pipelines": []any{ From fe33b5a5694cd392dd28f2485438a9779ce39c13 Mon Sep 17 00:00:00 2001 From: Siva Date: Thu, 9 Apr 2026 22:18:49 +0530 Subject: [PATCH 2/3] addressing AI review comments --- server/internal/database/rag_service_config.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/server/internal/database/rag_service_config.go b/server/internal/database/rag_service_config.go index c2546ef9..68ad8769 100644 --- a/server/internal/database/rag_service_config.go +++ b/server/internal/database/rag_service_config.go @@ -10,10 +10,15 @@ import ( "strings" ) +// ragPipelineNamePatternText is the allowlist pattern for RAG pipeline names. +// It is kept as a const so that the compiled regexp and the error message both +// reference the same literal and cannot drift apart. +const ragPipelineNamePatternText = `^[a-z0-9_-]+$` + // ragPipelineNamePattern restricts pipeline names to lowercase alphanumeric // characters, hyphens, and underscores. This keeps key filenames // ({name}_embedding.key / {name}_rag.key) safe and auditable. -var ragPipelineNamePattern = regexp.MustCompile(`^[a-z0-9_-]+$`) +var ragPipelineNamePattern = regexp.MustCompile(ragPipelineNamePatternText) // RAGPipelineLLMConfig represents LLM configuration for an embedding or RAG step. type RAGPipelineLLMConfig struct { @@ -136,7 +141,7 @@ func validateRAGPipeline(p RAGPipeline, i int, seenNames map[string]bool) []erro if p.Name == "" { errs = append(errs, fmt.Errorf("%s.name is required", prefix)) } else if !ragPipelineNamePattern.MatchString(p.Name) { - errs = append(errs, fmt.Errorf("%s.name %q is invalid: must match ^[a-z0-9_-]+$", prefix, p.Name)) + errs = append(errs, fmt.Errorf("%s.name %q is invalid: must match %s", prefix, p.Name, ragPipelineNamePatternText)) } else if seenNames[p.Name] { errs = append(errs, fmt.Errorf("pipelines contains duplicate name %q", p.Name)) } else { From 90d25b95cf1f94dc8ee26558d9356b873b8ca8b2 Mon Sep 17 00:00:00 2001 From: Siva Date: Sun, 12 Apr 2026 19:03:50 +0530 Subject: [PATCH 3/3] addressing review comments --- server/internal/database/rag_service_config.go | 7 ++++--- server/internal/database/rag_service_config_test.go | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/server/internal/database/rag_service_config.go b/server/internal/database/rag_service_config.go index 68ad8769..a51feca7 100644 --- a/server/internal/database/rag_service_config.go +++ b/server/internal/database/rag_service_config.go @@ -13,11 +13,12 @@ import ( // ragPipelineNamePatternText is the allowlist pattern for RAG pipeline names. // It is kept as a const so that the compiled regexp and the error message both // reference the same literal and cannot drift apart. -const ragPipelineNamePatternText = `^[a-z0-9_-]+$` +const ragPipelineNamePatternText = `^[a-z0-9_][a-z0-9_-]*$` // ragPipelineNamePattern restricts pipeline names to lowercase alphanumeric -// characters, hyphens, and underscores. This keeps key filenames -// ({name}_embedding.key / {name}_rag.key) safe and auditable. +// characters, hyphens, and underscores. The first character must not be a +// hyphen so that names are safe as filename components and cannot be +// misinterpreted as CLI flags if ever passed to a command. var ragPipelineNamePattern = regexp.MustCompile(ragPipelineNamePatternText) // RAGPipelineLLMConfig represents LLM configuration for an embedding or RAG step. diff --git a/server/internal/database/rag_service_config_test.go b/server/internal/database/rag_service_config_test.go index 4f4477b0..b9f3f7ca 100644 --- a/server/internal/database/rag_service_config_test.go +++ b/server/internal/database/rag_service_config_test.go @@ -411,6 +411,7 @@ func TestParseRAGServiceConfig_PipelineNameAllowlist(t *testing.T) { "UPPER", // uppercase "pipešŸ”„line", // unicode emoji "pipeline.name", // dot + "-pipeline", // leading hyphen (could be misread as a CLI flag) "", // empty (covered separately, but included for completeness) } for _, name := range invalidNames { @@ -422,7 +423,7 @@ func TestParseRAGServiceConfig_PipelineNameAllowlist(t *testing.T) { config["pipelines"].([]any)[0].(map[string]any)["name"] = name _, errs := database.ParseRAGServiceConfig(config, false) require.NotEmpty(t, errs, "name %q should be invalid", name) - assert.Contains(t, errs[0].Error(), "must match ^[a-z0-9_-]+$") + assert.Contains(t, errs[0].Error(), "must match ^[a-z0-9_][a-z0-9_-]*$") }) } }