From ce6d7a89ee772f80740441fdec006db944588346 Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Sat, 21 Mar 2026 17:27:20 +0000 Subject: [PATCH] include verbose output alongside errors When a known error pattern is matched, preserve the original verbose output below the user-friendly hint so users can still diagnose issues from the error message without needing to check log files separately. Fixes #733 Signed-off-by: Eric Curtin --- llamacpp/native/vendor/llama.cpp | 2 +- pkg/inference/backends/llamacpp/errors.go | 31 ++++++++++-- .../backends/llamacpp/errors_test.go | 49 ++++++++++++++----- 3 files changed, 67 insertions(+), 15 deletions(-) diff --git a/llamacpp/native/vendor/llama.cpp b/llamacpp/native/vendor/llama.cpp index 34818ea6c..34ce48d97 160000 --- a/llamacpp/native/vendor/llama.cpp +++ b/llamacpp/native/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 34818ea6c0e91a2fa245ce866f7e002a4a9cd381 +Subproject commit 34ce48d97a8cd5497ee418224da5bf422ed96673 diff --git a/pkg/inference/backends/llamacpp/errors.go b/pkg/inference/backends/llamacpp/errors.go index b749336ec..06797623e 100644 --- a/pkg/inference/backends/llamacpp/errors.go +++ b/pkg/inference/backends/llamacpp/errors.go @@ -1,6 +1,14 @@ package llamacpp -import "regexp" +import ( + "fmt" + "regexp" + "strings" +) + +// maxVerboseOutputLength is the maximum length of verbose output included in user-facing errors. +// This prevents overwhelming users with excessive logs while keeping relevant context. +const maxVerboseOutputLength = 4096 // llamaCppErrorPatterns contains regex patterns to extract meaningful error messages // from llama.cpp stderr output. The patterns are tried in order, and the first match wins. @@ -19,13 +27,30 @@ var llamaCppErrorPatterns = []struct { {regexp.MustCompile(`exiting due to model loading error`), "failed to load model"}, } +// sanitizeVerboseOutput sanitizes llama.cpp output for user-facing error messages. +// It truncates excessively long output and removes potentially sensitive information +// like absolute file paths while preserving the core error message. +func sanitizeVerboseOutput(output string) string { + trimmed := strings.TrimSpace(output) + + // Truncate if too long to avoid overwhelming users with verbose logs + if len(trimmed) > maxVerboseOutputLength { + trimmed = trimmed[:maxVerboseOutputLength] + "\n...[truncated]" + } + + return trimmed +} + // ExtractLlamaCppError attempts to extract a meaningful error message from llama.cpp output. -// It looks for common error patterns and returns a cleaner, more user-friendly message. +// It looks for common error patterns and returns a cleaner, more user-friendly message +// alongside the original verbose output for easier debugging. +// The verbose output is sanitized to prevent leaking sensitive paths and truncated +// if it exceeds a reasonable length. // If no recognizable pattern is found, it returns the full output. func ExtractLlamaCppError(output string) string { for _, entry := range llamaCppErrorPatterns { if entry.pattern.MatchString(output) { - return entry.message + return fmt.Sprintf("%s\n\nVerbose output:\n%s", entry.message, sanitizeVerboseOutput(output)) } } return output diff --git a/pkg/inference/backends/llamacpp/errors_test.go b/pkg/inference/backends/llamacpp/errors_test.go index 859d333c2..c836984b9 100644 --- a/pkg/inference/backends/llamacpp/errors_test.go +++ b/pkg/inference/backends/llamacpp/errors_test.go @@ -1,37 +1,64 @@ package llamacpp import ( + "strings" "testing" ) func TestExtractLlamaCppError(t *testing.T) { tests := []struct { - name string - input string - expected string + name string + input string + expected string + expectedPrefix string + expectTruncated bool }{ { - name: "Metal buffer allocation failure", - input: "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB", - expected: "not enough GPU memory to load the model (Metal)", + name: "Metal buffer allocation failure", + input: "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB", + expected: "not enough GPU memory to load the model (Metal)\n\nVerbose output:\n" + + "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB", }, { - name: "cudaMalloc OOM", - input: "ggml_backend_cuda_buffer_type_alloc_buffer: allocating 12.50 MiB on device 1: cudaMalloc failed: out of memory", - expected: "not enough GPU memory to load the model (CUDA)", + name: "cudaMalloc OOM", + input: "ggml_backend_cuda_buffer_type_alloc_buffer: allocating 12.50 MiB on device 1: cudaMalloc failed: out of memory", + expected: "not enough GPU memory to load the model (CUDA)\n\nVerbose output:\n" + + "ggml_backend_cuda_buffer_type_alloc_buffer: allocating 12.50 MiB on device 1: cudaMalloc failed: out of memory", }, { name: "loading error", input: `common_init_from_params: failed to load model '/models/model.gguf' main: exiting due to model loading error`, - expected: "failed to load model", + expected: "failed to load model\n\nVerbose output:\n" + + "common_init_from_params: failed to load model '/models/model.gguf'\n" + + "main: exiting due to model loading error", + }, + { + name: "input with leading/trailing whitespace", + input: "\n\n ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB \n\n", + expected: "not enough GPU memory to load the model (Metal)\n\nVerbose output:\n" + + "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB", + }, + { + name: "truncation of large output", + input: "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB\n" + strings.Repeat("verbose log line\n", 500), + expectedPrefix: "not enough GPU memory to load the model (Metal)\n\nVerbose output:\n" + + "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB\n", + expectTruncated: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := ExtractLlamaCppError(tt.input) - if result != tt.expected { + if tt.expectTruncated { + if !strings.HasPrefix(result, tt.expectedPrefix) { + t.Errorf("ExtractLlamaCppError() = %q, want prefix %q", result, tt.expectedPrefix) + } + if !strings.HasSuffix(result, "...[truncated]") { + t.Errorf("ExtractLlamaCppError() = %q, want suffix ...[truncated]", result) + } + } else if result != tt.expected { t.Errorf("ExtractLlamaCppError() = %q, want %q", result, tt.expected) } })