diff --git a/core/config/meta/build.go b/core/config/meta/build.go new file mode 100644 index 000000000000..b45d7c569fcb --- /dev/null +++ b/core/config/meta/build.go @@ -0,0 +1,141 @@ +package meta + +import ( + "reflect" + "sort" + "sync" +) + +var ( + cachedMetadata *ConfigMetadata + cacheMu sync.RWMutex +) + +// BuildConfigMetadata reflects on the given struct type (ModelConfig), +// merges the enrichment registry, and returns the full ConfigMetadata. +// The result is cached in memory after the first call. +func BuildConfigMetadata(modelConfigType reflect.Type) *ConfigMetadata { + cacheMu.RLock() + if cachedMetadata != nil { + cacheMu.RUnlock() + return cachedMetadata + } + cacheMu.RUnlock() + + cacheMu.Lock() + defer cacheMu.Unlock() + + // Double-check after acquiring write lock + if cachedMetadata != nil { + return cachedMetadata + } + + cachedMetadata = buildConfigMetadataUncached(modelConfigType, DefaultRegistry()) + return cachedMetadata +} + +// buildConfigMetadataUncached does the actual work without caching. +// Exported via lowercase for testability through BuildForTest. +func buildConfigMetadataUncached(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata { + fields := WalkModelConfig(modelConfigType) + + // Apply registry overrides + for i := range fields { + override, ok := registry[fields[i].Path] + if !ok { + continue + } + applyOverride(&fields[i], override) + } + + // Sort fields by section order then by field order + sectionOrder := make(map[string]int) + for _, s := range DefaultSections() { + sectionOrder[s.ID] = s.Order + } + + sort.SliceStable(fields, func(i, j int) bool { + si := sectionOrder[fields[i].Section] + sj := sectionOrder[fields[j].Section] + if si != sj { + return si < sj + } + return fields[i].Order < fields[j].Order + }) + + // Collect sections that actually have fields + usedSections := make(map[string]bool) + for _, f := range fields { + usedSections[f.Section] = true + } + + var sections []Section + for _, s := range DefaultSections() { + if usedSections[s.ID] { + sections = append(sections, s) + } + } + + return &ConfigMetadata{ + Sections: sections, + Fields: fields, + } +} + +// applyOverride merges non-zero override values into the field. +func applyOverride(f *FieldMeta, o FieldMetaOverride) { + if o.Section != "" { + f.Section = o.Section + } + if o.Label != "" { + f.Label = o.Label + } + if o.Description != "" { + f.Description = o.Description + } + if o.Component != "" { + f.Component = o.Component + } + if o.Placeholder != "" { + f.Placeholder = o.Placeholder + } + if o.Default != nil { + f.Default = o.Default + } + if o.Min != nil { + f.Min = o.Min + } + if o.Max != nil { + f.Max = o.Max + } + if o.Step != nil { + f.Step = o.Step + } + if o.Options != nil { + f.Options = o.Options + } + if o.AutocompleteProvider != "" { + f.AutocompleteProvider = o.AutocompleteProvider + } + if o.VRAMImpact { + f.VRAMImpact = true + } + if o.Advanced { + f.Advanced = true + } + if o.Order != 0 { + f.Order = o.Order + } +} + +// BuildForTest builds metadata without caching, for use in tests. +func BuildForTest(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata { + return buildConfigMetadataUncached(modelConfigType, registry) +} + +// ResetCache clears the cached metadata (useful for testing). +func ResetCache() { + cacheMu.Lock() + defer cacheMu.Unlock() + cachedMetadata = nil +} diff --git a/core/config/meta/build_test.go b/core/config/meta/build_test.go new file mode 100644 index 000000000000..aa9acb889a8c --- /dev/null +++ b/core/config/meta/build_test.go @@ -0,0 +1,211 @@ +package meta_test + +import ( + "reflect" + "testing" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/config/meta" +) + +func TestBuildConfigMetadata(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + if len(md.Sections) == 0 { + t.Fatal("expected sections, got 0") + } + if len(md.Fields) == 0 { + t.Fatal("expected fields, got 0") + } + + // Verify sections are ordered + for i := 1; i < len(md.Sections); i++ { + if md.Sections[i].Order < md.Sections[i-1].Order { + t.Errorf("sections not ordered: %s (order=%d) before %s (order=%d)", + md.Sections[i-1].ID, md.Sections[i-1].Order, + md.Sections[i].ID, md.Sections[i].Order) + } + } +} + +func TestRegistryOverrides(t *testing.T) { + registry := map[string]meta.FieldMetaOverride{ + "name": { + Label: "My Custom Label", + Description: "Custom description", + Component: "textarea", + Order: 999, + }, + } + + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), registry) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + f, ok := byPath["name"] + if !ok { + t.Fatal("field 'name' not found") + } + if f.Label != "My Custom Label" { + t.Errorf("expected label 'My Custom Label', got %q", f.Label) + } + if f.Description != "Custom description" { + t.Errorf("expected description 'Custom description', got %q", f.Description) + } + if f.Component != "textarea" { + t.Errorf("expected component 'textarea', got %q", f.Component) + } + if f.Order != 999 { + t.Errorf("expected order 999, got %d", f.Order) + } +} + +func TestUnregisteredFieldsGetDefaults(t *testing.T) { + // Use empty registry - all fields should still get auto-generated metadata + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), map[string]meta.FieldMetaOverride{}) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + // context_size should still exist with auto-generated label + f, ok := byPath["context_size"] + if !ok { + t.Fatal("field 'context_size' not found") + } + if f.Label == "" { + t.Error("expected auto-generated label, got empty") + } + if f.UIType != "int" { + t.Errorf("expected UIType 'int', got %q", f.UIType) + } + if f.Component == "" { + t.Error("expected auto-generated component, got empty") + } +} + +func TestDefaultRegistryOverridesApply(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + // Verify enriched fields got their overrides + tests := []struct { + path string + label string + description string + vramImpact bool + }{ + {"context_size", "Context Size", "Maximum context window in tokens", true}, + {"gpu_layers", "GPU Layers", "Number of layers to offload to GPU (-1 = all)", true}, + {"backend", "Backend", "The inference backend to use (e.g. llama-cpp, vllm, diffusers)", false}, + {"parameters.temperature", "Temperature", "Sampling temperature (higher = more creative, lower = more deterministic)", false}, + {"template.chat", "Chat Template", "Go template for chat completion requests", false}, + } + + for _, tt := range tests { + f, ok := byPath[tt.path] + if !ok { + t.Errorf("field %q not found", tt.path) + continue + } + if f.Label != tt.label { + t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label) + } + if f.Description != tt.description { + t.Errorf("field %q: expected description %q, got %q", tt.path, tt.description, f.Description) + } + if f.VRAMImpact != tt.vramImpact { + t.Errorf("field %q: expected vramImpact=%v, got %v", tt.path, tt.vramImpact, f.VRAMImpact) + } + } +} + +func TestStaticOptionsFields(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + // Fields with static options should have Options populated and no AutocompleteProvider + staticFields := []string{"quantization", "cache_type_k", "cache_type_v", "diffusers.pipeline_type", "diffusers.scheduler_type"} + for _, path := range staticFields { + f, ok := byPath[path] + if !ok { + t.Errorf("field %q not found", path) + continue + } + if len(f.Options) == 0 { + t.Errorf("field %q: expected Options to be populated", path) + } + if f.AutocompleteProvider != "" { + t.Errorf("field %q: expected no AutocompleteProvider, got %q", path, f.AutocompleteProvider) + } + } +} + +func TestDynamicProviderFields(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + // Fields with dynamic providers should have AutocompleteProvider and no Options + dynamicFields := map[string]string{ + "backend": meta.ProviderBackends, + "pipeline.llm": meta.ProviderModelsChat, + "pipeline.tts": meta.ProviderModelsTTS, + "pipeline.transcription": meta.ProviderModelsTranscript, + "pipeline.vad": meta.ProviderModelsVAD, + } + for path, expectedProvider := range dynamicFields { + f, ok := byPath[path] + if !ok { + t.Errorf("field %q not found", path) + continue + } + if f.AutocompleteProvider != expectedProvider { + t.Errorf("field %q: expected AutocompleteProvider %q, got %q", path, expectedProvider, f.AutocompleteProvider) + } + if len(f.Options) != 0 { + t.Errorf("field %q: expected no Options, got %d", path, len(f.Options)) + } + } +} + +func TestVRAMImpactFields(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + var vramFields []string + for _, f := range md.Fields { + if f.VRAMImpact { + vramFields = append(vramFields, f.Path) + } + } + + if len(vramFields) == 0 { + t.Error("expected some VRAM impact fields, got 0") + } + + // context_size and gpu_layers should be marked + expected := map[string]bool{"context_size": true, "gpu_layers": true} + for _, path := range vramFields { + if expected[path] { + delete(expected, path) + } + } + for path := range expected { + t.Errorf("expected VRAM impact field %q not found", path) + } +} diff --git a/core/config/meta/constants.go b/core/config/meta/constants.go new file mode 100644 index 000000000000..24e24015fb49 --- /dev/null +++ b/core/config/meta/constants.go @@ -0,0 +1,63 @@ +package meta + +// Dynamic autocomplete provider constants (runtime lookup required). +const ( + ProviderBackends = "backends" + ProviderModels = "models" + ProviderModelsChat = "models:chat" + ProviderModelsTTS = "models:tts" + ProviderModelsTranscript = "models:transcript" + ProviderModelsVAD = "models:vad" +) + +// Static option lists embedded directly in field metadata. + +var QuantizationOptions = []FieldOption{ + {Value: "q4_0", Label: "Q4_0"}, + {Value: "q4_1", Label: "Q4_1"}, + {Value: "q5_0", Label: "Q5_0"}, + {Value: "q5_1", Label: "Q5_1"}, + {Value: "q8_0", Label: "Q8_0"}, + {Value: "q2_K", Label: "Q2_K"}, + {Value: "q3_K_S", Label: "Q3_K_S"}, + {Value: "q3_K_M", Label: "Q3_K_M"}, + {Value: "q3_K_L", Label: "Q3_K_L"}, + {Value: "q4_K_S", Label: "Q4_K_S"}, + {Value: "q4_K_M", Label: "Q4_K_M"}, + {Value: "q5_K_S", Label: "Q5_K_S"}, + {Value: "q5_K_M", Label: "Q5_K_M"}, + {Value: "q6_K", Label: "Q6_K"}, +} + +var CacheTypeOptions = []FieldOption{ + {Value: "f16", Label: "F16"}, + {Value: "f32", Label: "F32"}, + {Value: "q8_0", Label: "Q8_0"}, + {Value: "q4_0", Label: "Q4_0"}, + {Value: "q4_1", Label: "Q4_1"}, + {Value: "q5_0", Label: "Q5_0"}, + {Value: "q5_1", Label: "Q5_1"}, +} + +var DiffusersPipelineOptions = []FieldOption{ + {Value: "StableDiffusionPipeline", Label: "StableDiffusionPipeline"}, + {Value: "StableDiffusionImg2ImgPipeline", Label: "StableDiffusionImg2ImgPipeline"}, + {Value: "StableDiffusionXLPipeline", Label: "StableDiffusionXLPipeline"}, + {Value: "StableDiffusionXLImg2ImgPipeline", Label: "StableDiffusionXLImg2ImgPipeline"}, + {Value: "StableDiffusionDepth2ImgPipeline", Label: "StableDiffusionDepth2ImgPipeline"}, + {Value: "DiffusionPipeline", Label: "DiffusionPipeline"}, + {Value: "StableVideoDiffusionPipeline", Label: "StableVideoDiffusionPipeline"}, +} + +var DiffusersSchedulerOptions = []FieldOption{ + {Value: "ddim", Label: "DDIM"}, + {Value: "ddpm", Label: "DDPM"}, + {Value: "pndm", Label: "PNDM"}, + {Value: "lms", Label: "LMS"}, + {Value: "euler", Label: "Euler"}, + {Value: "euler_a", Label: "Euler A"}, + {Value: "dpm_multistep", Label: "DPM Multistep"}, + {Value: "dpm_singlestep", Label: "DPM Singlestep"}, + {Value: "heun", Label: "Heun"}, + {Value: "unipc", Label: "UniPC"}, +} diff --git a/core/config/meta/reflect.go b/core/config/meta/reflect.go new file mode 100644 index 000000000000..ef1d0b4b07ad --- /dev/null +++ b/core/config/meta/reflect.go @@ -0,0 +1,259 @@ +package meta + +import ( + "reflect" + "strings" + "unicode" +) + +// WalkModelConfig uses reflection to discover all exported, YAML-tagged fields +// in the given struct type (expected to be config.ModelConfig) and returns a +// slice of FieldMeta with sensible defaults derived from the type information. +func WalkModelConfig(t reflect.Type) []FieldMeta { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + var fields []FieldMeta + walkStruct(t, "", "", &fields) + return fields +} + +// walkStruct recursively walks a struct type, collecting FieldMeta entries. +// prefix is the dot-path prefix for nested structs (e.g. "function.grammar."). +// parentYAMLPrefix is used for inline embedding with prefix (e.g. "parameters."). +func walkStruct(t reflect.Type, prefix, parentYAMLPrefix string, out *[]FieldMeta) { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return + } + + for i := range t.NumField() { + sf := t.Field(i) + + // Skip unexported fields + if !sf.IsExported() { + continue + } + + yamlTag := sf.Tag.Get("yaml") + if yamlTag == "-" { + continue + } + + yamlKey, opts := parseTag(yamlTag) + + // Handle inline embedding (e.g. LLMConfig `yaml:",inline"`) + if opts.contains("inline") { + ft := sf.Type + if ft.Kind() == reflect.Pointer { + ft = ft.Elem() + } + if ft.Kind() == reflect.Struct { + walkStruct(ft, prefix, parentYAMLPrefix, out) + } + continue + } + + // If no yaml key and it's an embedded struct without inline, skip unknown pattern + if yamlKey == "" { + ft := sf.Type + if ft.Kind() == reflect.Pointer { + ft = ft.Elem() + } + // Anonymous struct without yaml tag - treat as inline + if sf.Anonymous && ft.Kind() == reflect.Struct { + walkStruct(ft, prefix, parentYAMLPrefix, out) + continue + } + // Named field without yaml tag - skip + continue + } + + ft := sf.Type + isPtr := ft.Kind() == reflect.Pointer + if isPtr { + ft = ft.Elem() + } + + // Named nested struct (not a special type) -> recurse with prefix + if ft.Kind() == reflect.Struct && !isSpecialType(ft) { + nestedPrefix := prefix + yamlKey + "." + walkStruct(ft, nestedPrefix, "", out) + continue + } + + // Leaf field + path := prefix + yamlKey + goType := sf.Type.String() + uiType, component := inferUIType(sf.Type) + section := inferSection(prefix) + label := labelFromKey(yamlKey) + + *out = append(*out, FieldMeta{ + Path: path, + YAMLKey: yamlKey, + GoType: goType, + UIType: uiType, + Pointer: isPtr, + Section: section, + Label: label, + Component: component, + Order: len(*out), + }) + } +} + +// isSpecialType returns true for struct types that should be treated as leaf +// values rather than recursed into (e.g. custom JSON marshalers). +func isSpecialType(t reflect.Type) bool { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + name := t.Name() + // LogprobsValue, URI types are leaf values despite being structs + switch name { + case "LogprobsValue", "URI": + return true + } + return false +} + +// inferUIType maps a Go reflect.Type to a UI type string and default component. +func inferUIType(t reflect.Type) (uiType, component string) { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + + switch t.Kind() { + case reflect.Bool: + return "bool", "toggle" + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return "int", "number" + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + return "int", "number" + case reflect.Float32, reflect.Float64: + return "float", "number" + case reflect.String: + return "string", "input" + case reflect.Slice: + elem := t.Elem() + if elem.Kind() == reflect.String { + return "[]string", "string-list" + } + if elem.Kind() == reflect.Pointer { + elem = elem.Elem() + } + if elem.Kind() == reflect.Struct { + return "[]object", "json-editor" + } + return "[]any", "json-editor" + case reflect.Map: + return "map", "map-editor" + case reflect.Struct: + // Special types treated as leaves + if isSpecialType(t) { + return "bool", "toggle" // LogprobsValue + } + return "object", "json-editor" + default: + return "any", "input" + } +} + +// inferSection determines the config section from the dot-path prefix. +func inferSection(prefix string) string { + if prefix == "" { + return "general" + } + // Remove trailing dot + p := strings.TrimSuffix(prefix, ".") + + // Use the top-level prefix to determine section + parts := strings.SplitN(p, ".", 2) + top := parts[0] + + switch top { + case "parameters": + return "parameters" + case "template": + return "templates" + case "function": + return "functions" + case "reasoning": + return "reasoning" + case "diffusers": + return "diffusers" + case "tts": + return "tts" + case "pipeline": + return "pipeline" + case "grpc": + return "grpc" + case "agent": + return "agent" + case "mcp": + return "mcp" + case "feature_flags": + return "other" + case "limit_mm_per_prompt": + return "llm" + default: + return "other" + } +} + +// labelFromKey converts a yaml key like "context_size" to "Context Size". +func labelFromKey(key string) string { + parts := strings.Split(key, "_") + for i, p := range parts { + if len(p) > 0 { + runes := []rune(p) + runes[0] = unicode.ToUpper(runes[0]) + parts[i] = string(runes) + } + } + return strings.Join(parts, " ") +} + +// tagOptions is a set of comma-separated yaml tag options. +type tagOptions string + +func (o tagOptions) contains(optName string) bool { + s := string(o) + for s != "" { + var name string + if name, s, _ = strings.Cut(s, ","); name == optName { + return true + } + } + return false +} + +// parseTag splits a yaml struct tag into the key name and options. +func parseTag(tag string) (string, tagOptions) { + if tag == "" { + return "", "" + } + before, after, found := strings.Cut(tag, ",") + if found { + return before, tagOptions(after) + } + return tag, "" +} + +// SectionForPath returns the section ID for a given dot-path. +// Exported so tests and the registry can use it. +func SectionForPath(path string) string { + before, _, found := strings.Cut(path, ".") + if !found { + return "general" + } + return inferSection(before + ".") +} + +// GoTypeName returns a human-readable Go type string for display. +func GoTypeName(t reflect.Type) string { + return t.String() +} diff --git a/core/config/meta/reflect_test.go b/core/config/meta/reflect_test.go new file mode 100644 index 000000000000..408bb2a1ecc3 --- /dev/null +++ b/core/config/meta/reflect_test.go @@ -0,0 +1,208 @@ +package meta_test + +import ( + "reflect" + "testing" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/config/meta" +) + +func TestWalkModelConfig(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + if len(fields) == 0 { + t.Fatal("expected fields from ModelConfig, got 0") + } + + // Build a lookup by path + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + // Verify some top-level fields exist + for _, path := range []string{"name", "backend", "cuda", "step"} { + if _, ok := byPath[path]; !ok { + t.Errorf("expected field %q not found", path) + } + } + + // Verify inline LLMConfig fields appear at top level (no prefix) + for _, path := range []string{"context_size", "gpu_layers", "threads", "mmap"} { + if _, ok := byPath[path]; !ok { + t.Errorf("expected inline LLMConfig field %q not found", path) + } + } + + // Verify nested struct fields have correct prefix + for _, path := range []string{ + "template.chat", + "template.completion", + "template.use_tokenizer_template", + "function.grammar.parallel_calls", + "function.grammar.mixed_mode", + "diffusers.pipeline_type", + "diffusers.cuda", + "pipeline.llm", + "pipeline.tts", + "reasoning.disable", + "agent.max_iterations", + "grpc.attempts", + } { + if _, ok := byPath[path]; !ok { + t.Errorf("expected nested field %q not found", path) + } + } + + // Verify PredictionOptions fields have parameters. prefix + for _, path := range []string{ + "parameters.temperature", + "parameters.top_p", + "parameters.top_k", + "parameters.max_tokens", + "parameters.seed", + } { + if _, ok := byPath[path]; !ok { + t.Errorf("expected parameters field %q not found", path) + } + } + + // Verify TTSConfig fields have tts. prefix + if _, ok := byPath["tts.voice"]; !ok { + t.Error("expected tts.voice field not found") + } +} + +func TestSkipsYAMLDashFields(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + // modelConfigFile has yaml:"-" tag, should be skipped + for _, f := range fields { + if f.Path == "modelConfigFile" || f.Path == "modelTemplate" { + t.Errorf("field %q should have been skipped (yaml:\"-\")", f.Path) + } + } +} + +func TestTypeMapping(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + tests := []struct { + path string + uiType string + pointer bool + }{ + {"name", "string", false}, + {"cuda", "bool", false}, + {"context_size", "int", true}, + {"gpu_layers", "int", true}, + {"threads", "int", true}, + {"f16", "bool", true}, + {"mmap", "bool", true}, + {"stopwords", "[]string", false}, + {"roles", "map", false}, + {"parameters.temperature", "float", true}, + {"parameters.top_k", "int", true}, + {"function.grammar.parallel_calls", "bool", false}, + } + + for _, tt := range tests { + f, ok := byPath[tt.path] + if !ok { + t.Errorf("field %q not found", tt.path) + continue + } + if f.UIType != tt.uiType { + t.Errorf("field %q: expected UIType %q, got %q", tt.path, tt.uiType, f.UIType) + } + if f.Pointer != tt.pointer { + t.Errorf("field %q: expected Pointer=%v, got %v", tt.path, tt.pointer, f.Pointer) + } + } +} + +func TestSectionAssignment(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + tests := []struct { + path string + section string + }{ + {"name", "general"}, + {"backend", "general"}, + {"context_size", "general"}, // inline LLMConfig -> no prefix -> general + {"parameters.temperature", "parameters"}, + {"template.chat", "templates"}, + {"function.grammar.parallel_calls", "functions"}, + {"diffusers.cuda", "diffusers"}, + {"pipeline.llm", "pipeline"}, + {"reasoning.disable", "reasoning"}, + {"agent.max_iterations", "agent"}, + {"grpc.attempts", "grpc"}, + } + + for _, tt := range tests { + f, ok := byPath[tt.path] + if !ok { + t.Errorf("field %q not found", tt.path) + continue + } + if f.Section != tt.section { + t.Errorf("field %q: expected section %q, got %q", tt.path, tt.section, f.Section) + } + } +} + +func TestLabelGeneration(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + tests := []struct { + path string + label string + }{ + {"context_size", "Context Size"}, + {"gpu_layers", "Gpu Layers"}, + {"name", "Name"}, + {"cuda", "Cuda"}, + } + + for _, tt := range tests { + f, ok := byPath[tt.path] + if !ok { + t.Errorf("field %q not found", tt.path) + continue + } + if f.Label != tt.label { + t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label) + } + } +} + +func TestFieldCount(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + // We expect a large number of fields (100+) given the config complexity + if len(fields) < 80 { + t.Errorf("expected at least 80 fields, got %d", len(fields)) + } + t.Logf("Total fields discovered: %d", len(fields)) +} diff --git a/core/config/meta/registry.go b/core/config/meta/registry.go new file mode 100644 index 000000000000..bebba468dc2d --- /dev/null +++ b/core/config/meta/registry.go @@ -0,0 +1,314 @@ +package meta + +// DefaultRegistry returns enrichment overrides for the ~30 most commonly used +// config fields. Fields not listed here still appear with auto-generated +// labels and type-inferred components. +func DefaultRegistry() map[string]FieldMetaOverride { + f64 := func(v float64) *float64 { return &v } + + return map[string]FieldMetaOverride{ + // --- General --- + "name": { + Section: "general", + Label: "Model Name", + Description: "Unique identifier for this model configuration", + Component: "input", + Order: 0, + }, + "backend": { + Section: "general", + Label: "Backend", + Description: "The inference backend to use (e.g. llama-cpp, vllm, diffusers)", + Component: "select", + AutocompleteProvider: ProviderBackends, + Order: 1, + }, + "description": { + Section: "general", + Label: "Description", + Description: "Human-readable description of what this model does", + Component: "textarea", + Order: 2, + }, + "usage": { + Section: "general", + Label: "Usage", + Description: "Usage instructions or notes", + Component: "textarea", + Advanced: true, + Order: 3, + }, + "cuda": { + Section: "general", + Label: "CUDA", + Description: "Explicitly enable CUDA acceleration", + Order: 5, + }, + "known_usecases": { + Section: "general", + Label: "Known Use Cases", + Description: "Capabilities this model supports (e.g. FLAG_CHAT, FLAG_COMPLETION)", + Component: "string-list", + Order: 6, + }, + + // --- LLM --- + "context_size": { + Section: "llm", + Label: "Context Size", + Description: "Maximum context window in tokens", + Component: "number", + VRAMImpact: true, + Order: 10, + }, + "gpu_layers": { + Section: "llm", + Label: "GPU Layers", + Description: "Number of layers to offload to GPU (-1 = all)", + Component: "number", + Min: f64(-1), + VRAMImpact: true, + Order: 11, + }, + "threads": { + Section: "llm", + Label: "Threads", + Description: "Number of CPU threads for inference", + Component: "number", + Min: f64(1), + Order: 12, + }, + "f16": { + Section: "llm", + Label: "F16", + Description: "Use 16-bit floating point for key/value cache", + Order: 13, + }, + "mmap": { + Section: "llm", + Label: "Memory Map", + Description: "Use memory-mapped files for model loading", + Order: 14, + }, + "mmlock": { + Section: "llm", + Label: "Memory Lock", + Description: "Lock model memory to prevent swapping", + Advanced: true, + Order: 15, + }, + "low_vram": { + Section: "llm", + Label: "Low VRAM", + Description: "Optimize for systems with limited GPU memory", + VRAMImpact: true, + Order: 16, + }, + "embeddings": { + Section: "llm", + Label: "Embeddings", + Description: "Enable embedding generation mode", + Order: 17, + }, + "quantization": { + Section: "llm", + Label: "Quantization", + Description: "Quantization method (e.g. q4_0, q5_1, q8_0)", + Component: "select", + Options: QuantizationOptions, + Advanced: true, + Order: 20, + }, + "flash_attention": { + Section: "llm", + Label: "Flash Attention", + Description: "Enable flash attention for faster inference", + Component: "input", + Advanced: true, + Order: 21, + }, + "cache_type_k": { + Section: "llm", + Label: "KV Cache Type (K)", + Description: "Quantization type for key cache (e.g. f16, q8_0, q4_0)", + Component: "select", + Options: CacheTypeOptions, + VRAMImpact: true, + Advanced: true, + Order: 22, + }, + "cache_type_v": { + Section: "llm", + Label: "KV Cache Type (V)", + Description: "Quantization type for value cache", + Component: "select", + Options: CacheTypeOptions, + VRAMImpact: true, + Advanced: true, + Order: 23, + }, + + // --- Parameters --- + "parameters.temperature": { + Section: "parameters", + Label: "Temperature", + Description: "Sampling temperature (higher = more creative, lower = more deterministic)", + Component: "slider", + Min: f64(0), + Max: f64(2), + Step: f64(0.05), + Order: 30, + }, + "parameters.top_p": { + Section: "parameters", + Label: "Top P", + Description: "Nucleus sampling threshold", + Component: "slider", + Min: f64(0), + Max: f64(1), + Step: f64(0.01), + Order: 31, + }, + "parameters.top_k": { + Section: "parameters", + Label: "Top K", + Description: "Top-K sampling: consider only the K most likely tokens", + Component: "number", + Min: f64(0), + Order: 32, + }, + "parameters.max_tokens": { + Section: "parameters", + Label: "Max Tokens", + Description: "Maximum number of tokens to generate (0 = unlimited)", + Component: "number", + Min: f64(0), + Order: 33, + }, + "parameters.repeat_penalty": { + Section: "parameters", + Label: "Repeat Penalty", + Description: "Penalize repeated tokens (1.0 = no penalty)", + Component: "number", + Min: f64(0), + Advanced: true, + Order: 34, + }, + "parameters.seed": { + Section: "parameters", + Label: "Seed", + Description: "Random seed (-1 = random)", + Component: "number", + Advanced: true, + Order: 35, + }, + + // --- Templates --- + "template.chat": { + Section: "templates", + Label: "Chat Template", + Description: "Go template for chat completion requests", + Component: "code-editor", + Order: 40, + }, + "template.chat_message": { + Section: "templates", + Label: "Chat Message Template", + Description: "Go template for individual chat messages", + Component: "code-editor", + Order: 41, + }, + "template.completion": { + Section: "templates", + Label: "Completion Template", + Description: "Go template for completion requests", + Component: "code-editor", + Order: 42, + }, + "template.use_tokenizer_template": { + Section: "templates", + Label: "Use Tokenizer Template", + Description: "Use the chat template from the model's tokenizer config", + Order: 43, + }, + + // --- Pipeline --- + "pipeline.llm": { + Section: "pipeline", + Label: "LLM Model", + Description: "Model to use for LLM inference in the pipeline", + Component: "model-select", + AutocompleteProvider: ProviderModelsChat, + Order: 60, + }, + "pipeline.tts": { + Section: "pipeline", + Label: "TTS Model", + Description: "Model to use for text-to-speech in the pipeline", + Component: "model-select", + AutocompleteProvider: ProviderModelsTTS, + Order: 61, + }, + "pipeline.transcription": { + Section: "pipeline", + Label: "Transcription Model", + Description: "Model to use for speech-to-text in the pipeline", + Component: "model-select", + AutocompleteProvider: ProviderModelsTranscript, + Order: 62, + }, + "pipeline.vad": { + Section: "pipeline", + Label: "VAD Model", + Description: "Model to use for voice activity detection in the pipeline", + Component: "model-select", + AutocompleteProvider: ProviderModelsVAD, + Order: 63, + }, + + // --- Functions --- + "function.grammar.parallel_calls": { + Section: "functions", + Label: "Parallel Calls", + Description: "Allow the LLM to return multiple function calls in one response", + Order: 70, + }, + "function.grammar.mixed_mode": { + Section: "functions", + Label: "Mixed Mode", + Description: "Allow the LLM to return both text and function calls", + Order: 71, + }, + "function.grammar.disable": { + Section: "functions", + Label: "Disable Grammar", + Description: "Disable grammar-constrained generation for function calls", + Advanced: true, + Order: 72, + }, + + // --- Diffusers --- + "diffusers.pipeline_type": { + Section: "diffusers", + Label: "Pipeline Type", + Description: "Diffusers pipeline type (e.g. StableDiffusionPipeline)", + Component: "select", + Options: DiffusersPipelineOptions, + Order: 80, + }, + "diffusers.scheduler_type": { + Section: "diffusers", + Label: "Scheduler Type", + Description: "Noise scheduler type", + Component: "select", + Options: DiffusersSchedulerOptions, + Order: 81, + }, + "diffusers.cuda": { + Section: "diffusers", + Label: "CUDA", + Description: "Enable CUDA for diffusers", + Order: 82, + }, + } +} diff --git a/core/config/meta/types.go b/core/config/meta/types.go new file mode 100644 index 000000000000..dcd21fb55806 --- /dev/null +++ b/core/config/meta/types.go @@ -0,0 +1,83 @@ +package meta + +// FieldMeta describes a single configuration field for UI rendering and agent discovery. +type FieldMeta struct { + Path string `json:"path"` // dot-path: "context_size", "function.grammar.parallel_calls" + YAMLKey string `json:"yaml_key"` // leaf yaml key + GoType string `json:"go_type"` // "*int", "string", "[]string" + UIType string `json:"ui_type"` // "string", "int", "float", "bool", "[]string", "map", "object" + Pointer bool `json:"pointer,omitempty"` // true = nil means "not set" + Section string `json:"section"` // "general", "llm", "templates", etc. + Label string `json:"label"` // human-readable label + Description string `json:"description,omitempty"` // help text + Component string `json:"component"` // "input", "number", "toggle", "select", "slider", etc. + Placeholder string `json:"placeholder,omitempty"` + Default any `json:"default,omitempty"` + Min *float64 `json:"min,omitempty"` + Max *float64 `json:"max,omitempty"` + Step *float64 `json:"step,omitempty"` + Options []FieldOption `json:"options,omitempty"` + + AutocompleteProvider string `json:"autocomplete_provider,omitempty"` // "backends", "models:chat", etc. + VRAMImpact bool `json:"vram_impact,omitempty"` + Advanced bool `json:"advanced,omitempty"` + Order int `json:"order"` +} + +// FieldOption represents a choice in a select/enum field. +type FieldOption struct { + Value string `json:"value"` + Label string `json:"label"` +} + +// Section groups related fields in the UI. +type Section struct { + ID string `json:"id"` + Label string `json:"label"` + Icon string `json:"icon,omitempty"` + Order int `json:"order"` +} + +// ConfigMetadata is the top-level response for the metadata API. +type ConfigMetadata struct { + Sections []Section `json:"sections"` + Fields []FieldMeta `json:"fields"` +} + +// FieldMetaOverride holds registry overrides that are merged on top of +// the reflection-discovered defaults. Only non-zero fields override. +type FieldMetaOverride struct { + Section string + Label string + Description string + Component string + Placeholder string + Default any + Min *float64 + Max *float64 + Step *float64 + Options []FieldOption + AutocompleteProvider string + VRAMImpact bool + Advanced bool + Order int +} + +// DefaultSections defines the well-known config sections in display order. +func DefaultSections() []Section { + return []Section{ + {ID: "general", Label: "General", Icon: "settings", Order: 0}, + {ID: "llm", Label: "LLM", Icon: "cpu", Order: 10}, + {ID: "parameters", Label: "Parameters", Icon: "sliders", Order: 20}, + {ID: "templates", Label: "Templates", Icon: "file-text", Order: 30}, + {ID: "functions", Label: "Functions / Tools", Icon: "tool", Order: 40}, + {ID: "reasoning", Label: "Reasoning", Icon: "brain", Order: 45}, + {ID: "diffusers", Label: "Diffusers", Icon: "image", Order: 50}, + {ID: "tts", Label: "TTS", Icon: "volume-2", Order: 55}, + {ID: "pipeline", Label: "Pipeline", Icon: "git-merge", Order: 60}, + {ID: "grpc", Label: "gRPC", Icon: "server", Order: 65}, + {ID: "agent", Label: "Agent", Icon: "bot", Order: 70}, + {ID: "mcp", Label: "MCP", Icon: "plug", Order: 75}, + {ID: "other", Label: "Other", Icon: "more-horizontal", Order: 100}, + } +} diff --git a/core/http/endpoints/localai/api_skills.go b/core/http/endpoints/localai/api_skills.go new file mode 100644 index 000000000000..abf3292f6fb7 --- /dev/null +++ b/core/http/endpoints/localai/api_skills.go @@ -0,0 +1,320 @@ +package localai + +import ( + "net/http" + + "github.com/labstack/echo/v4" +) + +// APISkill describes a task-focused guide for agents interacting with the LocalAI API. +type APISkill struct { + Name string `json:"name"` + Description string `json:"description"` + Category string `json:"category"` + Endpoints []APISkillEndpoint `json:"endpoints"` + Guide string `json:"guide,omitempty"` +} + +// APISkillEndpoint describes a single endpoint within a skill. +type APISkillEndpoint struct { + Method string `json:"method"` + Path string `json:"path"` + Desc string `json:"description"` +} + +var apiSkills = []APISkill{ + { + Name: "config-management", + Description: "Discover, read, and modify model configuration fields with VRAM estimation", + Category: "configuration", + Endpoints: []APISkillEndpoint{ + {Method: "GET", Path: "/api/models/config-metadata", Desc: "List all config fields with types, sections, and options"}, + {Method: "GET", Path: "/api/models/config-metadata/autocomplete/:provider", Desc: "Get dynamic values for a field (backends, models)"}, + {Method: "GET", Path: "/api/models/config-json/:name", Desc: "Read a model's full config as JSON"}, + {Method: "PATCH", Path: "/api/models/config-json/:name", Desc: "Partially update a model config via JSON merge"}, + {Method: "POST", Path: "/api/models/vram-estimate", Desc: "Estimate VRAM usage for a model"}, + }, + Guide: `# Config Management + +## Discover fields +` + "```" + ` +GET /api/models/config-metadata +` + "```" + ` +Returns all ~170 config fields with type info, UI hints, and options. +Fields with static options (quantization, cache_type_k, etc.) include an "options" array. +Fields with dynamic values (backend, pipeline.llm, etc.) have an "autocomplete_provider". + +## Read a model config +` + "```" + ` +GET /api/models/config-json/my-model +` + "```" + ` + +## Update specific fields +` + "```" + ` +PATCH /api/models/config-json/my-model +Content-Type: application/json + +{"context_size": 4096, "gpu_layers": -1} +` + "```" + ` +Performs a deep merge: nested objects are merged recursively, scalars are overwritten. + +## Get dynamic autocomplete values +` + "```" + ` +GET /api/models/config-metadata/autocomplete/backends +GET /api/models/config-metadata/autocomplete/models:chat +` + "```" + ` +Providers: backends, models, models:chat, models:tts, models:transcript, models:vad + +## Estimate VRAM +` + "```" + ` +POST /api/models/vram-estimate +{"model": "my-model", "context_size": 8192, "gpu_layers": -1} +` + "```" + ` +Returns size_bytes, vram_bytes with display strings. If context_size is omitted and +not set in config, defaults to 8192 and includes model_max_context from GGUF metadata. +`, + }, + { + Name: "model-management", + Description: "Browse the gallery, install, delete, and import models", + Category: "models", + Endpoints: []APISkillEndpoint{ + {Method: "GET", Path: "/models/available", Desc: "List models available in configured galleries"}, + {Method: "POST", Path: "/models/apply", Desc: "Install a model from the gallery"}, + {Method: "POST", Path: "/models/delete/:name", Desc: "Delete an installed model"}, + {Method: "POST", Path: "/models/import", Desc: "Import a custom model config"}, + {Method: "POST", Path: "/models/edit/:name", Desc: "Replace a model's full config"}, + {Method: "POST", Path: "/models/reload", Desc: "Reload all model configurations from disk"}, + {Method: "GET", Path: "/v1/models", Desc: "List installed models (OpenAI-compatible)"}, + }, + Guide: `# Model Management + +## List gallery models +` + "```" + ` +GET /models/available +` + "```" + ` +Returns models from all configured galleries with name, description, URLs, and tags. + +## Install a model +` + "```" + ` +POST /models/apply +{"id": "gallery-name@model-name"} +` + "```" + ` +Returns a job UUID. Poll status with GET /models/jobs/:uuid. + +## Delete a model +` + "```" + ` +POST /models/delete/my-model +` + "```" + ` + +## Import a custom model +` + "```" + ` +POST /models/import +Content-Type: application/json + +{"name": "my-model", "backend": "llama-cpp", "model": "path/to/model.gguf"} +` + "```" + ` + +## List installed models +` + "```" + ` +GET /v1/models +` + "```" + ` +OpenAI-compatible model listing. +`, + }, + { + Name: "chat-inference", + Description: "OpenAI-compatible chat completions, text completions, and embeddings", + Category: "inference", + Endpoints: []APISkillEndpoint{ + {Method: "POST", Path: "/v1/chat/completions", Desc: "Chat completion (streaming or non-streaming)"}, + {Method: "POST", Path: "/v1/completions", Desc: "Text completion"}, + {Method: "POST", Path: "/v1/embeddings", Desc: "Generate embeddings"}, + }, + Guide: `# Chat Inference + +## Chat completion +` + "```" + ` +POST /v1/chat/completions +{ + "model": "my-model", + "messages": [{"role": "user", "content": "Hello"}], + "temperature": 0.7, + "stream": false +} +` + "```" + ` +Set "stream": true for SSE streaming. Supports tool/function calling when the +model config has function templates configured. + +## Text completion +` + "```" + ` +POST /v1/completions +{"model": "my-model", "prompt": "Once upon a time", "max_tokens": 100} +` + "```" + ` + +## Embeddings +` + "```" + ` +POST /v1/embeddings +{"model": "my-embedding-model", "input": "text to embed"} +` + "```" + ` +Returns a vector in the standard OpenAI embeddings format. +`, + }, + { + Name: "audio", + Description: "Text-to-speech, voice activity detection, and transcription", + Category: "audio", + Endpoints: []APISkillEndpoint{ + {Method: "POST", Path: "/tts", Desc: "Generate speech from text"}, + {Method: "POST", Path: "/v1/audio/transcriptions", Desc: "Transcribe audio to text"}, + {Method: "POST", Path: "/vad", Desc: "Detect voice activity in audio"}, + }, + Guide: `# Audio + +## Text-to-speech +` + "```" + ` +POST /tts +{"model": "my-tts-model", "input": "Hello world", "voice": "default"} +` + "```" + ` +Returns audio data (WAV format by default). + +## Transcription +` + "```" + ` +POST /v1/audio/transcriptions +Content-Type: multipart/form-data + +file=@audio.wav +model=my-whisper-model +` + "```" + ` +OpenAI-compatible whisper transcription endpoint. + +## Voice Activity Detection +` + "```" + ` +POST /vad +Content-Type: multipart/form-data + +file=@audio.wav +model=my-vad-model +` + "```" + ` +Returns segments with start/end timestamps where speech is detected. +`, + }, + { + Name: "monitoring", + Description: "System metrics, backend status, and request tracing", + Category: "operations", + Endpoints: []APISkillEndpoint{ + {Method: "GET", Path: "/metrics", Desc: "Prometheus metrics"}, + {Method: "GET", Path: "/backend/monitor", Desc: "Running backend status"}, + {Method: "POST", Path: "/backend/shutdown", Desc: "Shut down a running backend"}, + {Method: "GET", Path: "/system", Desc: "System information (CPU, memory, GPU)"}, + {Method: "GET", Path: "/version", Desc: "LocalAI version"}, + {Method: "GET", Path: "/api/traces", Desc: "Recent request traces"}, + }, + Guide: `# Monitoring + +## Prometheus metrics +` + "```" + ` +GET /metrics +` + "```" + ` + +## Backend monitor +` + "```" + ` +GET /backend/monitor +` + "```" + ` +Returns status of running inference backends (memory usage, loaded models). + +## Shut down a backend +` + "```" + ` +POST /backend/shutdown +{"model": "my-model"} +` + "```" + ` + +## System info +` + "```" + ` +GET /system +` + "```" + ` +Returns CPU, memory, GPU information and loaded models. + +## Request traces +` + "```" + ` +GET /api/traces +` + "```" + ` +Returns recent request traces with timing, model used, and token counts. +Useful for debugging latency and throughput issues. +`, + }, + { + Name: "mcp", + Description: "Model Context Protocol — tool-augmented chat with MCP servers", + Category: "mcp", + Endpoints: []APISkillEndpoint{ + {Method: "POST", Path: "/v1/mcp/chat/completions", Desc: "Chat with MCP tool execution"}, + {Method: "GET", Path: "/v1/mcp/servers/:model", Desc: "List MCP servers for a model"}, + {Method: "GET", Path: "/v1/mcp/prompts/:model", Desc: "List MCP prompts for a model"}, + {Method: "GET", Path: "/v1/mcp/resources/:model", Desc: "List MCP resources for a model"}, + }, + Guide: `# MCP (Model Context Protocol) + +## Chat with MCP tools +` + "```" + ` +POST /v1/mcp/chat/completions +{ + "model": "my-model", + "messages": [{"role": "user", "content": "Search for X"}], + "stream": true +} +` + "```" + ` +The model's config must define MCP servers. The endpoint handles tool execution +automatically, returning intermediate tool results in the stream. + +## List MCP servers +` + "```" + ` +GET /v1/mcp/servers/my-model +` + "```" + ` + +## List MCP prompts +` + "```" + ` +GET /v1/mcp/prompts/my-model +` + "```" + ` + +## List MCP resources +` + "```" + ` +GET /v1/mcp/resources/my-model +` + "```" + ` +`, + }, +} + +// ListAPISkillsEndpoint returns all skills without the Guide field (compact list). +func ListAPISkillsEndpoint() echo.HandlerFunc { + return func(c echo.Context) error { + compact := make([]APISkill, len(apiSkills)) + for i, s := range apiSkills { + compact[i] = APISkill{ + Name: s.Name, + Description: s.Description, + Category: s.Category, + Endpoints: s.Endpoints, + } + } + return c.JSON(http.StatusOK, compact) + } +} + +// GetAPISkillEndpoint returns a single skill by name, including the Guide. +func GetAPISkillEndpoint() echo.HandlerFunc { + byName := make(map[string]*APISkill, len(apiSkills)) + for i := range apiSkills { + byName[apiSkills[i].Name] = &apiSkills[i] + } + + return func(c echo.Context) error { + name := c.Param("name") + skill, ok := byName[name] + if !ok { + return c.JSON(http.StatusNotFound, map[string]any{"error": "skill not found: " + name}) + } + return c.JSON(http.StatusOK, skill) + } +} diff --git a/core/http/endpoints/localai/config_meta.go b/core/http/endpoints/localai/config_meta.go new file mode 100644 index 000000000000..c8910af6227b --- /dev/null +++ b/core/http/endpoints/localai/config_meta.go @@ -0,0 +1,186 @@ +package localai + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "reflect" + "sort" + "strings" + + "dario.cat/mergo" + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/config/meta" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/services" + "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/utils" + "github.com/mudler/xlog" + "gopkg.in/yaml.v3" +) + +// ConfigMetadataEndpoint returns field metadata for all config fields. +func ConfigMetadataEndpoint() echo.HandlerFunc { + return func(c echo.Context) error { + md := meta.BuildConfigMetadata(reflect.TypeOf(config.ModelConfig{})) + return c.JSON(http.StatusOK, md) + } +} + +// AutocompleteEndpoint handles dynamic autocomplete lookups for config fields. +// Static option lists (quantizations, cache types, diffusers pipelines/schedulers) +// are embedded directly in the field metadata Options; only truly dynamic values +// that require runtime lookup are served here. +func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + provider := c.Param("provider") + var values []string + + switch { + case provider == meta.ProviderBackends: + installedBackends, err := gallery.ListSystemBackends(appConfig.SystemState) + if err == nil { + for name := range installedBackends { + values = append(values, name) + } + } + sort.Strings(values) + + case provider == meta.ProviderModels: + modelConfigs := cl.GetAllModelsConfigs() + for _, cfg := range modelConfigs { + values = append(values, cfg.Name) + } + modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) + values = append(values, modelsWithoutConfig...) + sort.Strings(values) + + case strings.HasPrefix(provider, "models:"): + capability := strings.TrimPrefix(provider, "models:") + var filterFn config.ModelConfigFilterFn + switch capability { + case "chat": + filterFn = config.BuildUsecaseFilterFn(config.FLAG_CHAT) + case "tts": + filterFn = config.BuildUsecaseFilterFn(config.FLAG_TTS) + case "vad": + filterFn = config.BuildUsecaseFilterFn(config.FLAG_VAD) + case "transcript": + filterFn = config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT) + default: + filterFn = config.NoFilterFn + } + filteredConfigs := cl.GetModelConfigsByFilter(filterFn) + for _, cfg := range filteredConfigs { + values = append(values, cfg.Name) + } + sort.Strings(values) + + default: + return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown provider: " + provider}) + } + + return c.JSON(http.StatusOK, map[string]any{"values": values}) + } +} + +// PatchConfigEndpoint handles PATCH requests to partially update a model config +// using nested JSON merge. +func PatchConfigEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + modelName := c.Param("name") + if decoded, err := url.PathUnescape(modelName); err == nil { + modelName = decoded + } + if modelName == "" { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"}) + } + + modelConfig, exists := cl.GetModelConfig(modelName) + if !exists { + return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"}) + } + + // Read patch body + patchBody, err := io.ReadAll(c.Request().Body) + if err != nil || len(patchBody) == 0 { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "request body is empty or unreadable"}) + } + + // Validate patch body is valid JSON + var patchMap map[string]any + if err := json.Unmarshal(patchBody, &patchMap); err != nil { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid JSON: " + err.Error()}) + } + + // Marshal existing config to JSON + existingJSON, err := json.Marshal(modelConfig) + if err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal existing config"}) + } + + // Deep-merge patch into existing + var existingMap map[string]any + if err := json.Unmarshal(existingJSON, &existingMap); err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to parse existing config"}) + } + + if err := mergo.Merge(&existingMap, patchMap, mergo.WithOverride); err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to merge configs: " + err.Error()}) + } + + // Marshal merged config back to JSON + mergedJSON, err := json.Marshal(existingMap) + if err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal merged config"}) + } + + // Unmarshal to ModelConfig for validation + var updatedConfig config.ModelConfig + if err := json.Unmarshal(mergedJSON, &updatedConfig); err != nil { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "merged config is invalid: " + err.Error()}) + } + + // Validate + if valid, err := updatedConfig.Validate(); !valid { + errMsg := "validation failed" + if err != nil { + errMsg = err.Error() + } + return c.JSON(http.StatusBadRequest, map[string]any{"error": errMsg}) + } + + // Write as YAML to disk + configPath := modelConfig.GetModelConfigFile() + if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil { + return c.JSON(http.StatusForbidden, map[string]any{"error": "config path not trusted: " + err.Error()}) + } + + yamlData, err := yaml.Marshal(updatedConfig) + if err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal YAML"}) + } + + if err := os.WriteFile(configPath, yamlData, 0644); err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to write config file"}) + } + + // Reload configs + if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to reload configs: " + err.Error()}) + } + + if err := cl.Preload(appConfig.SystemState.Model.ModelsPath); err != nil { + xlog.Warn("Failed to preload after PATCH", "error", err) + } + + return c.JSON(http.StatusOK, map[string]any{ + "success": true, + "message": fmt.Sprintf("Model '%s' updated successfully", modelName), + }) + } +} diff --git a/core/http/endpoints/localai/vram.go b/core/http/endpoints/localai/vram.go new file mode 100644 index 000000000000..8bbea3ef406a --- /dev/null +++ b/core/http/endpoints/localai/vram.go @@ -0,0 +1,121 @@ +package localai + +import ( + "context" + "fmt" + "net/http" + "path" + "strings" + "time" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/pkg/vram" +) + +type vramEstimateRequest struct { + Model string `json:"model"` + ContextSize uint32 `json:"context_size,omitempty"` + GPULayers int `json:"gpu_layers,omitempty"` + KVQuantBits int `json:"kv_quant_bits,omitempty"` +} + +type vramEstimateResponse struct { + vram.EstimateResult + ContextNote string `json:"context_note,omitempty"` + ModelMaxContext uint64 `json:"model_max_context,omitempty"` +} + +// VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an +// installed model configuration. For uninstalled models (gallery URLs), use +// the gallery-level estimates in /api/models instead. +func VRAMEstimateEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true} + + return func(c echo.Context) error { + var req vramEstimateRequest + if err := c.Bind(&req); err != nil { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid request body"}) + } + + if req.Model == "" { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"}) + } + + modelConfig, exists := cl.GetModelConfig(req.Model) + if !exists { + return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"}) + } + + // Build file inputs from the model's download files + var files []vram.FileInput + var firstGGUF string + for _, f := range modelConfig.DownloadFiles { + ext := strings.ToLower(path.Ext(path.Base(f.Filename))) + if weightExts[ext] { + files = append(files, vram.FileInput{URI: string(f.URI), Size: 0}) + if firstGGUF == "" && ext == ".gguf" { + firstGGUF = string(f.URI) + } + } + } + + // Also include the main model file if it looks like a weight file + if modelConfig.Model != "" { + ext := strings.ToLower(path.Ext(path.Base(modelConfig.Model))) + if weightExts[ext] { + files = append(files, vram.FileInput{URI: modelConfig.Model, Size: 0}) + if firstGGUF == "" && ext == ".gguf" { + firstGGUF = modelConfig.Model + } + } + } + + if len(files) == 0 { + return c.JSON(http.StatusOK, map[string]any{ + "message": "no weight files found for estimation", + }) + } + + contextDefaulted := false + opts := vram.EstimateOptions{ + ContextLength: req.ContextSize, + GPULayers: req.GPULayers, + KVQuantBits: req.KVQuantBits, + } + if opts.ContextLength == 0 { + if modelConfig.ContextSize != nil { + opts.ContextLength = uint32(*modelConfig.ContextSize) + } else { + opts.ContextLength = 8192 + contextDefaulted = true + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + result, err := vram.Estimate(ctx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader()) + if err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": err.Error()}) + } + + resp := vramEstimateResponse{EstimateResult: result} + + // When context was defaulted to 8192, read the GGUF metadata to report + // the model's trained maximum context length so callers know the estimate + // may be conservative. + if contextDefaulted && firstGGUF != "" { + ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(ctx, firstGGUF) + if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 { + resp.ModelMaxContext = ggufMeta.MaximumContextLength + resp.ContextNote = fmt.Sprintf( + "Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.", + ggufMeta.MaximumContextLength, + ) + } + } + + return c.JSON(http.StatusOK, resp) + } +} diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 85134a6a3c7a..fc91d34717a0 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -132,6 +132,113 @@ func RegisterLocalAIRoutes(router *echo.Echo, }{Version: internal.PrintableVersion()}) }) + // Agent discovery endpoint + router.GET("/.well-known/localai.json", func(c echo.Context) error { + return c.JSON(200, map[string]any{ + "version": internal.PrintableVersion(), + // Flat endpoint list for backwards compatibility + "endpoints": map[string]any{ + "models": "/v1/models", + "chat_completions": "/v1/chat/completions", + "completions": "/v1/completions", + "embeddings": "/v1/embeddings", + "config_metadata": "/api/models/config-metadata", + "config_json": "/api/models/config-json/:name", + "config_patch": "/api/models/config-json/:name", + "autocomplete": "/api/models/config-metadata/autocomplete/:provider", + "vram_estimate": "/api/models/vram-estimate", + "tts": "/tts", + "transcription": "/v1/audio/transcriptions", + "image_generation": "/v1/images/generations", + "swagger": "/swagger/index.html", + "skills": "/api/skills", + }, + // Categorized endpoint groups for structured discovery + "endpoint_groups": map[string]any{ + "openai_compatible": map[string]string{ + "models": "/v1/models", + "chat_completions": "/v1/chat/completions", + "completions": "/v1/completions", + "embeddings": "/v1/embeddings", + "transcription": "/v1/audio/transcriptions", + "image_generation": "/v1/images/generations", + }, + "config_management": map[string]string{ + "config_metadata": "/api/models/config-metadata", + "config_json": "/api/models/config-json/:name", + "config_patch": "/api/models/config-json/:name", + "autocomplete": "/api/models/config-metadata/autocomplete/:provider", + "vram_estimate": "/api/models/vram-estimate", + }, + "model_management": map[string]string{ + "list_gallery": "/models/available", + "install": "/models/apply", + "delete": "/models/delete/:name", + "edit": "/models/edit/:name", + "import": "/models/import", + "reload": "/models/reload", + }, + "ai_functions": map[string]string{ + "tts": "/tts", + "vad": "/vad", + "video": "/video", + "detection": "/v1/detection", + "tokenize": "/v1/tokenize", + }, + "monitoring": map[string]string{ + "metrics": "/metrics", + "backend_monitor": "/backend/monitor", + "backend_shutdown": "/backend/shutdown", + "system": "/system", + "version": "/version", + "traces": "/api/traces", + }, + "mcp": map[string]string{ + "chat_completions": "/v1/mcp/chat/completions", + "servers": "/v1/mcp/servers/:model", + "prompts": "/v1/mcp/prompts/:model", + "resources": "/v1/mcp/resources/:model", + }, + "p2p": map[string]string{ + "nodes": "/api/p2p", + "token": "/api/p2p/token", + }, + "agents": map[string]string{ + "tasks": "/api/agent/tasks", + "jobs": "/api/agent/jobs", + "execute": "/api/agent/jobs/execute", + }, + "settings": map[string]string{ + "get": "/api/settings", + "update": "/api/settings", + }, + "stores": map[string]string{ + "set": "/stores/set", + "get": "/stores/get", + "find": "/stores/find", + "delete": "/stores/delete", + }, + "docs": map[string]string{ + "swagger": "/swagger/index.html", + "skills": "/api/skills", + }, + }, + "capabilities": map[string]bool{ + "config_metadata": true, + "config_patch": true, + "vram_estimate": true, + "mcp": !appConfig.DisableMCP, + "agents": appConfig.AgentPool.Enabled, + "p2p": appConfig.P2PToken != "", + "tracing": true, + }, + }) + }) + + // API skills for agent discovery (no auth — agents should discover these without credentials) + router.GET("/api/skills", localai.ListAPISkillsEndpoint()) + router.GET("/api/skills/:name", localai.GetAPISkillEndpoint()) + router.GET("/api/features", func(c echo.Context) error { return c.JSON(200, map[string]bool{ "agents": appConfig.AgentPool.Enabled, diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 81d9b4275ef0..ad539c7ee1f0 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -701,6 +701,18 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model return c.JSON(http.StatusOK, modelConfig) }, adminMiddleware) + // Config metadata API - returns field metadata for all ~170 config fields + app.GET("/api/models/config-metadata", localai.ConfigMetadataEndpoint(), adminMiddleware) + + // Autocomplete providers for config fields (dynamic values only) + app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware) + + // PATCH config endpoint - partial update using nested JSON merge + app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware) + + // VRAM estimation endpoint + app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware) + // Get installed model YAML config for the React model editor app.GET("/api/models/edit/:name", func(c echo.Context) error { modelName := c.Param("name") @@ -1307,3 +1319,4 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model }) }, adminMiddleware) } + diff --git a/pkg/vram/gguf_reader.go b/pkg/vram/gguf_reader.go index 631c017f7418..3f731b482d3b 100644 --- a/pkg/vram/gguf_reader.go +++ b/pkg/vram/gguf_reader.go @@ -34,10 +34,11 @@ func (defaultGGUFReader) ReadMetadata(ctx context.Context, uri string) (*GGUFMet func ggufFileToMeta(f *gguf.GGUFFile) *GGUFMeta { arch := f.Architecture() meta := &GGUFMeta{ - BlockCount: uint32(arch.BlockCount), - EmbeddingLength: uint32(arch.EmbeddingLength), - HeadCount: uint32(arch.AttentionHeadCount), - HeadCountKV: uint32(arch.AttentionHeadCountKV), + BlockCount: uint32(arch.BlockCount), + EmbeddingLength: uint32(arch.EmbeddingLength), + HeadCount: uint32(arch.AttentionHeadCount), + HeadCountKV: uint32(arch.AttentionHeadCountKV), + MaximumContextLength: arch.MaximumContextLength, } if meta.HeadCountKV == 0 { meta.HeadCountKV = meta.HeadCount diff --git a/pkg/vram/types.go b/pkg/vram/types.go index cda76aff6378..abb9ad06c301 100644 --- a/pkg/vram/types.go +++ b/pkg/vram/types.go @@ -15,10 +15,11 @@ type SizeResolver interface { // GGUFMeta holds parsed GGUF metadata used for VRAM estimation. type GGUFMeta struct { - BlockCount uint32 - EmbeddingLength uint32 - HeadCount uint32 - HeadCountKV uint32 + BlockCount uint32 + EmbeddingLength uint32 + HeadCount uint32 + HeadCountKV uint32 + MaximumContextLength uint64 } // GGUFMetadataReader reads GGUF metadata from a URI (e.g. via HTTP Range).