diff --git a/core/config/meta/build.go b/core/config/meta/build.go
new file mode 100644
index 000000000000..b45d7c569fcb
--- /dev/null
+++ b/core/config/meta/build.go
@@ -0,0 +1,141 @@
+package meta
+
+import (
+	"reflect"
+	"sort"
+	"sync"
+)
+
+var (
+	cachedMetadata *ConfigMetadata
+	cacheMu        sync.RWMutex
+)
+
+// BuildConfigMetadata reflects on the given struct type (ModelConfig),
+// merges the enrichment registry, and returns the full ConfigMetadata.
+// The result is cached in memory after the first call.
+func BuildConfigMetadata(modelConfigType reflect.Type) *ConfigMetadata {
+	cacheMu.RLock()
+	if cachedMetadata != nil {
+		cacheMu.RUnlock()
+		return cachedMetadata
+	}
+	cacheMu.RUnlock()
+
+	cacheMu.Lock()
+	defer cacheMu.Unlock()
+
+	// Double-check after acquiring write lock
+	if cachedMetadata != nil {
+		return cachedMetadata
+	}
+
+	cachedMetadata = buildConfigMetadataUncached(modelConfigType, DefaultRegistry())
+	return cachedMetadata
+}
+
+// buildConfigMetadataUncached does the actual work without caching.
+// Exported via lowercase for testability through BuildForTest.
+func buildConfigMetadataUncached(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata {
+	fields := WalkModelConfig(modelConfigType)
+
+	// Apply registry overrides
+	for i := range fields {
+		override, ok := registry[fields[i].Path]
+		if !ok {
+			continue
+		}
+		applyOverride(&fields[i], override)
+	}
+
+	// Sort fields by section order then by field order
+	sectionOrder := make(map[string]int)
+	for _, s := range DefaultSections() {
+		sectionOrder[s.ID] = s.Order
+	}
+
+	sort.SliceStable(fields, func(i, j int) bool {
+		si := sectionOrder[fields[i].Section]
+		sj := sectionOrder[fields[j].Section]
+		if si != sj {
+			return si < sj
+		}
+		return fields[i].Order < fields[j].Order
+	})
+
+	// Collect sections that actually have fields
+	usedSections := make(map[string]bool)
+	for _, f := range fields {
+		usedSections[f.Section] = true
+	}
+
+	var sections []Section
+	for _, s := range DefaultSections() {
+		if usedSections[s.ID] {
+			sections = append(sections, s)
+		}
+	}
+
+	return &ConfigMetadata{
+		Sections: sections,
+		Fields:   fields,
+	}
+}
+
+// applyOverride merges non-zero override values into the field.
+func applyOverride(f *FieldMeta, o FieldMetaOverride) {
+	if o.Section != "" {
+		f.Section = o.Section
+	}
+	if o.Label != "" {
+		f.Label = o.Label
+	}
+	if o.Description != "" {
+		f.Description = o.Description
+	}
+	if o.Component != "" {
+		f.Component = o.Component
+	}
+	if o.Placeholder != "" {
+		f.Placeholder = o.Placeholder
+	}
+	if o.Default != nil {
+		f.Default = o.Default
+	}
+	if o.Min != nil {
+		f.Min = o.Min
+	}
+	if o.Max != nil {
+		f.Max = o.Max
+	}
+	if o.Step != nil {
+		f.Step = o.Step
+	}
+	if o.Options != nil {
+		f.Options = o.Options
+	}
+	if o.AutocompleteProvider != "" {
+		f.AutocompleteProvider = o.AutocompleteProvider
+	}
+	if o.VRAMImpact {
+		f.VRAMImpact = true
+	}
+	if o.Advanced {
+		f.Advanced = true
+	}
+	if o.Order != 0 {
+		f.Order = o.Order
+	}
+}
+
+// BuildForTest builds metadata without caching, for use in tests.
+func BuildForTest(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata {
+	return buildConfigMetadataUncached(modelConfigType, registry)
+}
+
+// ResetCache clears the cached metadata (useful for testing).
+func ResetCache() {
+	cacheMu.Lock()
+	defer cacheMu.Unlock()
+	cachedMetadata = nil
+}
diff --git a/core/config/meta/build_test.go b/core/config/meta/build_test.go
new file mode 100644
index 000000000000..aa9acb889a8c
--- /dev/null
+++ b/core/config/meta/build_test.go
@@ -0,0 +1,211 @@
+package meta_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config/meta"
+)
+
+func TestBuildConfigMetadata(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	if len(md.Sections) == 0 {
+		t.Fatal("expected sections, got 0")
+	}
+	if len(md.Fields) == 0 {
+		t.Fatal("expected fields, got 0")
+	}
+
+	// Verify sections are ordered
+	for i := 1; i < len(md.Sections); i++ {
+		if md.Sections[i].Order < md.Sections[i-1].Order {
+			t.Errorf("sections not ordered: %s (order=%d) before %s (order=%d)",
+				md.Sections[i-1].ID, md.Sections[i-1].Order,
+				md.Sections[i].ID, md.Sections[i].Order)
+		}
+	}
+}
+
+func TestRegistryOverrides(t *testing.T) {
+	registry := map[string]meta.FieldMetaOverride{
+		"name": {
+			Label:       "My Custom Label",
+			Description: "Custom description",
+			Component:   "textarea",
+			Order:       999,
+		},
+	}
+
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), registry)
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	f, ok := byPath["name"]
+	if !ok {
+		t.Fatal("field 'name' not found")
+	}
+	if f.Label != "My Custom Label" {
+		t.Errorf("expected label 'My Custom Label', got %q", f.Label)
+	}
+	if f.Description != "Custom description" {
+		t.Errorf("expected description 'Custom description', got %q", f.Description)
+	}
+	if f.Component != "textarea" {
+		t.Errorf("expected component 'textarea', got %q", f.Component)
+	}
+	if f.Order != 999 {
+		t.Errorf("expected order 999, got %d", f.Order)
+	}
+}
+
+func TestUnregisteredFieldsGetDefaults(t *testing.T) {
+	// Use empty registry - all fields should still get auto-generated metadata
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), map[string]meta.FieldMetaOverride{})
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	// context_size should still exist with auto-generated label
+	f, ok := byPath["context_size"]
+	if !ok {
+		t.Fatal("field 'context_size' not found")
+	}
+	if f.Label == "" {
+		t.Error("expected auto-generated label, got empty")
+	}
+	if f.UIType != "int" {
+		t.Errorf("expected UIType 'int', got %q", f.UIType)
+	}
+	if f.Component == "" {
+		t.Error("expected auto-generated component, got empty")
+	}
+}
+
+func TestDefaultRegistryOverridesApply(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	// Verify enriched fields got their overrides
+	tests := []struct {
+		path        string
+		label       string
+		description string
+		vramImpact  bool
+	}{
+		{"context_size", "Context Size", "Maximum context window in tokens", true},
+		{"gpu_layers", "GPU Layers", "Number of layers to offload to GPU (-1 = all)", true},
+		{"backend", "Backend", "The inference backend to use (e.g. llama-cpp, vllm, diffusers)", false},
+		{"parameters.temperature", "Temperature", "Sampling temperature (higher = more creative, lower = more deterministic)", false},
+		{"template.chat", "Chat Template", "Go template for chat completion requests", false},
+	}
+
+	for _, tt := range tests {
+		f, ok := byPath[tt.path]
+		if !ok {
+			t.Errorf("field %q not found", tt.path)
+			continue
+		}
+		if f.Label != tt.label {
+			t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label)
+		}
+		if f.Description != tt.description {
+			t.Errorf("field %q: expected description %q, got %q", tt.path, tt.description, f.Description)
+		}
+		if f.VRAMImpact != tt.vramImpact {
+			t.Errorf("field %q: expected vramImpact=%v, got %v", tt.path, tt.vramImpact, f.VRAMImpact)
+		}
+	}
+}
+
+func TestStaticOptionsFields(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	// Fields with static options should have Options populated and no AutocompleteProvider
+	staticFields := []string{"quantization", "cache_type_k", "cache_type_v", "diffusers.pipeline_type", "diffusers.scheduler_type"}
+	for _, path := range staticFields {
+		f, ok := byPath[path]
+		if !ok {
+			t.Errorf("field %q not found", path)
+			continue
+		}
+		if len(f.Options) == 0 {
+			t.Errorf("field %q: expected Options to be populated", path)
+		}
+		if f.AutocompleteProvider != "" {
+			t.Errorf("field %q: expected no AutocompleteProvider, got %q", path, f.AutocompleteProvider)
+		}
+	}
+}
+
+func TestDynamicProviderFields(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	// Fields with dynamic providers should have AutocompleteProvider and no Options
+	dynamicFields := map[string]string{
+		"backend":                meta.ProviderBackends,
+		"pipeline.llm":          meta.ProviderModelsChat,
+		"pipeline.tts":          meta.ProviderModelsTTS,
+		"pipeline.transcription": meta.ProviderModelsTranscript,
+		"pipeline.vad":          meta.ProviderModelsVAD,
+	}
+	for path, expectedProvider := range dynamicFields {
+		f, ok := byPath[path]
+		if !ok {
+			t.Errorf("field %q not found", path)
+			continue
+		}
+		if f.AutocompleteProvider != expectedProvider {
+			t.Errorf("field %q: expected AutocompleteProvider %q, got %q", path, expectedProvider, f.AutocompleteProvider)
+		}
+		if len(f.Options) != 0 {
+			t.Errorf("field %q: expected no Options, got %d", path, len(f.Options))
+		}
+	}
+}
+
+func TestVRAMImpactFields(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	var vramFields []string
+	for _, f := range md.Fields {
+		if f.VRAMImpact {
+			vramFields = append(vramFields, f.Path)
+		}
+	}
+
+	if len(vramFields) == 0 {
+		t.Error("expected some VRAM impact fields, got 0")
+	}
+
+	// context_size and gpu_layers should be marked
+	expected := map[string]bool{"context_size": true, "gpu_layers": true}
+	for _, path := range vramFields {
+		if expected[path] {
+			delete(expected, path)
+		}
+	}
+	for path := range expected {
+		t.Errorf("expected VRAM impact field %q not found", path)
+	}
+}
diff --git a/core/config/meta/constants.go b/core/config/meta/constants.go
new file mode 100644
index 000000000000..24e24015fb49
--- /dev/null
+++ b/core/config/meta/constants.go
@@ -0,0 +1,63 @@
+package meta
+
+// Dynamic autocomplete provider constants (runtime lookup required).
+const (
+	ProviderBackends         = "backends"
+	ProviderModels           = "models"
+	ProviderModelsChat       = "models:chat"
+	ProviderModelsTTS        = "models:tts"
+	ProviderModelsTranscript = "models:transcript"
+	ProviderModelsVAD        = "models:vad"
+)
+
+// Static option lists embedded directly in field metadata.
+
+var QuantizationOptions = []FieldOption{
+	{Value: "q4_0", Label: "Q4_0"},
+	{Value: "q4_1", Label: "Q4_1"},
+	{Value: "q5_0", Label: "Q5_0"},
+	{Value: "q5_1", Label: "Q5_1"},
+	{Value: "q8_0", Label: "Q8_0"},
+	{Value: "q2_K", Label: "Q2_K"},
+	{Value: "q3_K_S", Label: "Q3_K_S"},
+	{Value: "q3_K_M", Label: "Q3_K_M"},
+	{Value: "q3_K_L", Label: "Q3_K_L"},
+	{Value: "q4_K_S", Label: "Q4_K_S"},
+	{Value: "q4_K_M", Label: "Q4_K_M"},
+	{Value: "q5_K_S", Label: "Q5_K_S"},
+	{Value: "q5_K_M", Label: "Q5_K_M"},
+	{Value: "q6_K", Label: "Q6_K"},
+}
+
+var CacheTypeOptions = []FieldOption{
+	{Value: "f16", Label: "F16"},
+	{Value: "f32", Label: "F32"},
+	{Value: "q8_0", Label: "Q8_0"},
+	{Value: "q4_0", Label: "Q4_0"},
+	{Value: "q4_1", Label: "Q4_1"},
+	{Value: "q5_0", Label: "Q5_0"},
+	{Value: "q5_1", Label: "Q5_1"},
+}
+
+var DiffusersPipelineOptions = []FieldOption{
+	{Value: "StableDiffusionPipeline", Label: "StableDiffusionPipeline"},
+	{Value: "StableDiffusionImg2ImgPipeline", Label: "StableDiffusionImg2ImgPipeline"},
+	{Value: "StableDiffusionXLPipeline", Label: "StableDiffusionXLPipeline"},
+	{Value: "StableDiffusionXLImg2ImgPipeline", Label: "StableDiffusionXLImg2ImgPipeline"},
+	{Value: "StableDiffusionDepth2ImgPipeline", Label: "StableDiffusionDepth2ImgPipeline"},
+	{Value: "DiffusionPipeline", Label: "DiffusionPipeline"},
+	{Value: "StableVideoDiffusionPipeline", Label: "StableVideoDiffusionPipeline"},
+}
+
+var DiffusersSchedulerOptions = []FieldOption{
+	{Value: "ddim", Label: "DDIM"},
+	{Value: "ddpm", Label: "DDPM"},
+	{Value: "pndm", Label: "PNDM"},
+	{Value: "lms", Label: "LMS"},
+	{Value: "euler", Label: "Euler"},
+	{Value: "euler_a", Label: "Euler A"},
+	{Value: "dpm_multistep", Label: "DPM Multistep"},
+	{Value: "dpm_singlestep", Label: "DPM Singlestep"},
+	{Value: "heun", Label: "Heun"},
+	{Value: "unipc", Label: "UniPC"},
+}
diff --git a/core/config/meta/reflect.go b/core/config/meta/reflect.go
new file mode 100644
index 000000000000..ef1d0b4b07ad
--- /dev/null
+++ b/core/config/meta/reflect.go
@@ -0,0 +1,259 @@
+package meta
+
+import (
+	"reflect"
+	"strings"
+	"unicode"
+)
+
+// WalkModelConfig uses reflection to discover all exported, YAML-tagged fields
+// in the given struct type (expected to be config.ModelConfig) and returns a
+// slice of FieldMeta with sensible defaults derived from the type information.
+func WalkModelConfig(t reflect.Type) []FieldMeta {
+	if t.Kind() == reflect.Pointer {
+		t = t.Elem()
+	}
+	var fields []FieldMeta
+	walkStruct(t, "", "", &fields)
+	return fields
+}
+
+// walkStruct recursively walks a struct type, collecting FieldMeta entries.
+// prefix is the dot-path prefix for nested structs (e.g. "function.grammar.").
+// parentYAMLPrefix is used for inline embedding with prefix (e.g. "parameters.").
+func walkStruct(t reflect.Type, prefix, parentYAMLPrefix string, out *[]FieldMeta) {
+	if t.Kind() == reflect.Pointer {
+		t = t.Elem()
+	}
+	if t.Kind() != reflect.Struct {
+		return
+	}
+
+	for i := range t.NumField() {
+		sf := t.Field(i)
+
+		// Skip unexported fields
+		if !sf.IsExported() {
+			continue
+		}
+
+		yamlTag := sf.Tag.Get("yaml")
+		if yamlTag == "-" {
+			continue
+		}
+
+		yamlKey, opts := parseTag(yamlTag)
+
+		// Handle inline embedding (e.g. LLMConfig `yaml:",inline"`)
+		if opts.contains("inline") {
+			ft := sf.Type
+			if ft.Kind() == reflect.Pointer {
+				ft = ft.Elem()
+			}
+			if ft.Kind() == reflect.Struct {
+				walkStruct(ft, prefix, parentYAMLPrefix, out)
+			}
+			continue
+		}
+
+		// If no yaml key and it's an embedded struct without inline, skip unknown pattern
+		if yamlKey == "" {
+			ft := sf.Type
+			if ft.Kind() == reflect.Pointer {
+				ft = ft.Elem()
+			}
+			// Anonymous struct without yaml tag - treat as inline
+			if sf.Anonymous && ft.Kind() == reflect.Struct {
+				walkStruct(ft, prefix, parentYAMLPrefix, out)
+				continue
+			}
+			// Named field without yaml tag - skip
+			continue
+		}
+
+		ft := sf.Type
+		isPtr := ft.Kind() == reflect.Pointer
+		if isPtr {
+			ft = ft.Elem()
+		}
+
+		// Named nested struct (not a special type) -> recurse with prefix
+		if ft.Kind() == reflect.Struct && !isSpecialType(ft) {
+			nestedPrefix := prefix + yamlKey + "."
+			walkStruct(ft, nestedPrefix, "", out)
+			continue
+		}
+
+		// Leaf field
+		path := prefix + yamlKey
+		goType := sf.Type.String()
+		uiType, component := inferUIType(sf.Type)
+		section := inferSection(prefix)
+		label := labelFromKey(yamlKey)
+
+		*out = append(*out, FieldMeta{
+			Path:      path,
+			YAMLKey:   yamlKey,
+			GoType:    goType,
+			UIType:    uiType,
+			Pointer:   isPtr,
+			Section:   section,
+			Label:     label,
+			Component: component,
+			Order:     len(*out),
+		})
+	}
+}
+
+// isSpecialType returns true for struct types that should be treated as leaf
+// values rather than recursed into (e.g. custom JSON marshalers).
+func isSpecialType(t reflect.Type) bool {
+	if t.Kind() == reflect.Pointer {
+		t = t.Elem()
+	}
+	name := t.Name()
+	// LogprobsValue, URI types are leaf values despite being structs
+	switch name {
+	case "LogprobsValue", "URI":
+		return true
+	}
+	return false
+}
+
+// inferUIType maps a Go reflect.Type to a UI type string and default component.
+func inferUIType(t reflect.Type) (uiType, component string) {
+	if t.Kind() == reflect.Pointer {
+		t = t.Elem()
+	}
+
+	switch t.Kind() {
+	case reflect.Bool:
+		return "bool", "toggle"
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return "int", "number"
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		return "int", "number"
+	case reflect.Float32, reflect.Float64:
+		return "float", "number"
+	case reflect.String:
+		return "string", "input"
+	case reflect.Slice:
+		elem := t.Elem()
+		if elem.Kind() == reflect.String {
+			return "[]string", "string-list"
+		}
+		if elem.Kind() == reflect.Pointer {
+			elem = elem.Elem()
+		}
+		if elem.Kind() == reflect.Struct {
+			return "[]object", "json-editor"
+		}
+		return "[]any", "json-editor"
+	case reflect.Map:
+		return "map", "map-editor"
+	case reflect.Struct:
+		// Special types treated as leaves
+		if isSpecialType(t) {
+			return "bool", "toggle" // LogprobsValue
+		}
+		return "object", "json-editor"
+	default:
+		return "any", "input"
+	}
+}
+
+// inferSection determines the config section from the dot-path prefix.
+func inferSection(prefix string) string {
+	if prefix == "" {
+		return "general"
+	}
+	// Remove trailing dot
+	p := strings.TrimSuffix(prefix, ".")
+
+	// Use the top-level prefix to determine section
+	parts := strings.SplitN(p, ".", 2)
+	top := parts[0]
+
+	switch top {
+	case "parameters":
+		return "parameters"
+	case "template":
+		return "templates"
+	case "function":
+		return "functions"
+	case "reasoning":
+		return "reasoning"
+	case "diffusers":
+		return "diffusers"
+	case "tts":
+		return "tts"
+	case "pipeline":
+		return "pipeline"
+	case "grpc":
+		return "grpc"
+	case "agent":
+		return "agent"
+	case "mcp":
+		return "mcp"
+	case "feature_flags":
+		return "other"
+	case "limit_mm_per_prompt":
+		return "llm"
+	default:
+		return "other"
+	}
+}
+
+// labelFromKey converts a yaml key like "context_size" to "Context Size".
+func labelFromKey(key string) string {
+	parts := strings.Split(key, "_")
+	for i, p := range parts {
+		if len(p) > 0 {
+			runes := []rune(p)
+			runes[0] = unicode.ToUpper(runes[0])
+			parts[i] = string(runes)
+		}
+	}
+	return strings.Join(parts, " ")
+}
+
+// tagOptions is a set of comma-separated yaml tag options.
+type tagOptions string
+
+func (o tagOptions) contains(optName string) bool {
+	s := string(o)
+	for s != "" {
+		var name string
+		if name, s, _ = strings.Cut(s, ","); name == optName {
+			return true
+		}
+	}
+	return false
+}
+
+// parseTag splits a yaml struct tag into the key name and options.
+func parseTag(tag string) (string, tagOptions) {
+	if tag == "" {
+		return "", ""
+	}
+	before, after, found := strings.Cut(tag, ",")
+	if found {
+		return before, tagOptions(after)
+	}
+	return tag, ""
+}
+
+// SectionForPath returns the section ID for a given dot-path.
+// Exported so tests and the registry can use it.
+func SectionForPath(path string) string {
+	before, _, found := strings.Cut(path, ".")
+	if !found {
+		return "general"
+	}
+	return inferSection(before + ".")
+}
+
+// GoTypeName returns a human-readable Go type string for display.
+func GoTypeName(t reflect.Type) string {
+	return t.String()
+}
diff --git a/core/config/meta/reflect_test.go b/core/config/meta/reflect_test.go
new file mode 100644
index 000000000000..408bb2a1ecc3
--- /dev/null
+++ b/core/config/meta/reflect_test.go
@@ -0,0 +1,208 @@
+package meta_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config/meta"
+)
+
+func TestWalkModelConfig(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+	if len(fields) == 0 {
+		t.Fatal("expected fields from ModelConfig, got 0")
+	}
+
+	// Build a lookup by path
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	// Verify some top-level fields exist
+	for _, path := range []string{"name", "backend", "cuda", "step"} {
+		if _, ok := byPath[path]; !ok {
+			t.Errorf("expected field %q not found", path)
+		}
+	}
+
+	// Verify inline LLMConfig fields appear at top level (no prefix)
+	for _, path := range []string{"context_size", "gpu_layers", "threads", "mmap"} {
+		if _, ok := byPath[path]; !ok {
+			t.Errorf("expected inline LLMConfig field %q not found", path)
+		}
+	}
+
+	// Verify nested struct fields have correct prefix
+	for _, path := range []string{
+		"template.chat",
+		"template.completion",
+		"template.use_tokenizer_template",
+		"function.grammar.parallel_calls",
+		"function.grammar.mixed_mode",
+		"diffusers.pipeline_type",
+		"diffusers.cuda",
+		"pipeline.llm",
+		"pipeline.tts",
+		"reasoning.disable",
+		"agent.max_iterations",
+		"grpc.attempts",
+	} {
+		if _, ok := byPath[path]; !ok {
+			t.Errorf("expected nested field %q not found", path)
+		}
+	}
+
+	// Verify PredictionOptions fields have parameters. prefix
+	for _, path := range []string{
+		"parameters.temperature",
+		"parameters.top_p",
+		"parameters.top_k",
+		"parameters.max_tokens",
+		"parameters.seed",
+	} {
+		if _, ok := byPath[path]; !ok {
+			t.Errorf("expected parameters field %q not found", path)
+		}
+	}
+
+	// Verify TTSConfig fields have tts. prefix
+	if _, ok := byPath["tts.voice"]; !ok {
+		t.Error("expected tts.voice field not found")
+	}
+}
+
+func TestSkipsYAMLDashFields(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	// modelConfigFile has yaml:"-" tag, should be skipped
+	for _, f := range fields {
+		if f.Path == "modelConfigFile" || f.Path == "modelTemplate" {
+			t.Errorf("field %q should have been skipped (yaml:\"-\")", f.Path)
+		}
+	}
+}
+
+func TestTypeMapping(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	tests := []struct {
+		path    string
+		uiType  string
+		pointer bool
+	}{
+		{"name", "string", false},
+		{"cuda", "bool", false},
+		{"context_size", "int", true},
+		{"gpu_layers", "int", true},
+		{"threads", "int", true},
+		{"f16", "bool", true},
+		{"mmap", "bool", true},
+		{"stopwords", "[]string", false},
+		{"roles", "map", false},
+		{"parameters.temperature", "float", true},
+		{"parameters.top_k", "int", true},
+		{"function.grammar.parallel_calls", "bool", false},
+	}
+
+	for _, tt := range tests {
+		f, ok := byPath[tt.path]
+		if !ok {
+			t.Errorf("field %q not found", tt.path)
+			continue
+		}
+		if f.UIType != tt.uiType {
+			t.Errorf("field %q: expected UIType %q, got %q", tt.path, tt.uiType, f.UIType)
+		}
+		if f.Pointer != tt.pointer {
+			t.Errorf("field %q: expected Pointer=%v, got %v", tt.path, tt.pointer, f.Pointer)
+		}
+	}
+}
+
+func TestSectionAssignment(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	tests := []struct {
+		path    string
+		section string
+	}{
+		{"name", "general"},
+		{"backend", "general"},
+		{"context_size", "general"},   // inline LLMConfig -> no prefix -> general
+		{"parameters.temperature", "parameters"},
+		{"template.chat", "templates"},
+		{"function.grammar.parallel_calls", "functions"},
+		{"diffusers.cuda", "diffusers"},
+		{"pipeline.llm", "pipeline"},
+		{"reasoning.disable", "reasoning"},
+		{"agent.max_iterations", "agent"},
+		{"grpc.attempts", "grpc"},
+	}
+
+	for _, tt := range tests {
+		f, ok := byPath[tt.path]
+		if !ok {
+			t.Errorf("field %q not found", tt.path)
+			continue
+		}
+		if f.Section != tt.section {
+			t.Errorf("field %q: expected section %q, got %q", tt.path, tt.section, f.Section)
+		}
+	}
+}
+
+func TestLabelGeneration(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	tests := []struct {
+		path  string
+		label string
+	}{
+		{"context_size", "Context Size"},
+		{"gpu_layers", "Gpu Layers"},
+		{"name", "Name"},
+		{"cuda", "Cuda"},
+	}
+
+	for _, tt := range tests {
+		f, ok := byPath[tt.path]
+		if !ok {
+			t.Errorf("field %q not found", tt.path)
+			continue
+		}
+		if f.Label != tt.label {
+			t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label)
+		}
+	}
+}
+
+func TestFieldCount(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+	// We expect a large number of fields (100+) given the config complexity
+	if len(fields) < 80 {
+		t.Errorf("expected at least 80 fields, got %d", len(fields))
+	}
+	t.Logf("Total fields discovered: %d", len(fields))
+}
diff --git a/core/config/meta/registry.go b/core/config/meta/registry.go
new file mode 100644
index 000000000000..bebba468dc2d
--- /dev/null
+++ b/core/config/meta/registry.go
@@ -0,0 +1,314 @@
+package meta
+
+// DefaultRegistry returns enrichment overrides for the ~30 most commonly used
+// config fields. Fields not listed here still appear with auto-generated
+// labels and type-inferred components.
+func DefaultRegistry() map[string]FieldMetaOverride {
+	f64 := func(v float64) *float64 { return &v }
+
+	return map[string]FieldMetaOverride{
+		// --- General ---
+		"name": {
+			Section:     "general",
+			Label:       "Model Name",
+			Description: "Unique identifier for this model configuration",
+			Component:   "input",
+			Order:       0,
+		},
+		"backend": {
+			Section:              "general",
+			Label:                "Backend",
+			Description:          "The inference backend to use (e.g. llama-cpp, vllm, diffusers)",
+			Component:            "select",
+			AutocompleteProvider: ProviderBackends,
+			Order:                1,
+		},
+		"description": {
+			Section:     "general",
+			Label:       "Description",
+			Description: "Human-readable description of what this model does",
+			Component:   "textarea",
+			Order:       2,
+		},
+		"usage": {
+			Section:     "general",
+			Label:       "Usage",
+			Description: "Usage instructions or notes",
+			Component:   "textarea",
+			Advanced:    true,
+			Order:       3,
+		},
+		"cuda": {
+			Section:     "general",
+			Label:       "CUDA",
+			Description: "Explicitly enable CUDA acceleration",
+			Order:       5,
+		},
+		"known_usecases": {
+			Section:     "general",
+			Label:       "Known Use Cases",
+			Description: "Capabilities this model supports (e.g. FLAG_CHAT, FLAG_COMPLETION)",
+			Component:   "string-list",
+			Order:       6,
+		},
+
+		// --- LLM ---
+		"context_size": {
+			Section:     "llm",
+			Label:       "Context Size",
+			Description: "Maximum context window in tokens",
+			Component:   "number",
+			VRAMImpact:  true,
+			Order:       10,
+		},
+		"gpu_layers": {
+			Section:     "llm",
+			Label:       "GPU Layers",
+			Description: "Number of layers to offload to GPU (-1 = all)",
+			Component:   "number",
+			Min:         f64(-1),
+			VRAMImpact:  true,
+			Order:       11,
+		},
+		"threads": {
+			Section:     "llm",
+			Label:       "Threads",
+			Description: "Number of CPU threads for inference",
+			Component:   "number",
+			Min:         f64(1),
+			Order:       12,
+		},
+		"f16": {
+			Section:     "llm",
+			Label:       "F16",
+			Description: "Use 16-bit floating point for key/value cache",
+			Order:       13,
+		},
+		"mmap": {
+			Section:     "llm",
+			Label:       "Memory Map",
+			Description: "Use memory-mapped files for model loading",
+			Order:       14,
+		},
+		"mmlock": {
+			Section:     "llm",
+			Label:       "Memory Lock",
+			Description: "Lock model memory to prevent swapping",
+			Advanced:    true,
+			Order:       15,
+		},
+		"low_vram": {
+			Section:     "llm",
+			Label:       "Low VRAM",
+			Description: "Optimize for systems with limited GPU memory",
+			VRAMImpact:  true,
+			Order:       16,
+		},
+		"embeddings": {
+			Section:     "llm",
+			Label:       "Embeddings",
+			Description: "Enable embedding generation mode",
+			Order:       17,
+		},
+		"quantization": {
+			Section:     "llm",
+			Label:       "Quantization",
+			Description: "Quantization method (e.g. q4_0, q5_1, q8_0)",
+			Component:   "select",
+			Options:     QuantizationOptions,
+			Advanced:    true,
+			Order:       20,
+		},
+		"flash_attention": {
+			Section:     "llm",
+			Label:       "Flash Attention",
+			Description: "Enable flash attention for faster inference",
+			Component:   "input",
+			Advanced:    true,
+			Order:       21,
+		},
+		"cache_type_k": {
+			Section:     "llm",
+			Label:       "KV Cache Type (K)",
+			Description: "Quantization type for key cache (e.g. f16, q8_0, q4_0)",
+			Component:   "select",
+			Options:     CacheTypeOptions,
+			VRAMImpact:  true,
+			Advanced:    true,
+			Order:       22,
+		},
+		"cache_type_v": {
+			Section:     "llm",
+			Label:       "KV Cache Type (V)",
+			Description: "Quantization type for value cache",
+			Component:   "select",
+			Options:     CacheTypeOptions,
+			VRAMImpact:  true,
+			Advanced:    true,
+			Order:       23,
+		},
+
+		// --- Parameters ---
+		"parameters.temperature": {
+			Section:     "parameters",
+			Label:       "Temperature",
+			Description: "Sampling temperature (higher = more creative, lower = more deterministic)",
+			Component:   "slider",
+			Min:         f64(0),
+			Max:         f64(2),
+			Step:        f64(0.05),
+			Order:       30,
+		},
+		"parameters.top_p": {
+			Section:     "parameters",
+			Label:       "Top P",
+			Description: "Nucleus sampling threshold",
+			Component:   "slider",
+			Min:         f64(0),
+			Max:         f64(1),
+			Step:        f64(0.01),
+			Order:       31,
+		},
+		"parameters.top_k": {
+			Section:     "parameters",
+			Label:       "Top K",
+			Description: "Top-K sampling: consider only the K most likely tokens",
+			Component:   "number",
+			Min:         f64(0),
+			Order:       32,
+		},
+		"parameters.max_tokens": {
+			Section:     "parameters",
+			Label:       "Max Tokens",
+			Description: "Maximum number of tokens to generate (0 = unlimited)",
+			Component:   "number",
+			Min:         f64(0),
+			Order:       33,
+		},
+		"parameters.repeat_penalty": {
+			Section:     "parameters",
+			Label:       "Repeat Penalty",
+			Description: "Penalize repeated tokens (1.0 = no penalty)",
+			Component:   "number",
+			Min:         f64(0),
+			Advanced:    true,
+			Order:       34,
+		},
+		"parameters.seed": {
+			Section:     "parameters",
+			Label:       "Seed",
+			Description: "Random seed (-1 = random)",
+			Component:   "number",
+			Advanced:    true,
+			Order:       35,
+		},
+
+		// --- Templates ---
+		"template.chat": {
+			Section:     "templates",
+			Label:       "Chat Template",
+			Description: "Go template for chat completion requests",
+			Component:   "code-editor",
+			Order:       40,
+		},
+		"template.chat_message": {
+			Section:     "templates",
+			Label:       "Chat Message Template",
+			Description: "Go template for individual chat messages",
+			Component:   "code-editor",
+			Order:       41,
+		},
+		"template.completion": {
+			Section:     "templates",
+			Label:       "Completion Template",
+			Description: "Go template for completion requests",
+			Component:   "code-editor",
+			Order:       42,
+		},
+		"template.use_tokenizer_template": {
+			Section:     "templates",
+			Label:       "Use Tokenizer Template",
+			Description: "Use the chat template from the model's tokenizer config",
+			Order:       43,
+		},
+
+		// --- Pipeline ---
+		"pipeline.llm": {
+			Section:              "pipeline",
+			Label:                "LLM Model",
+			Description:          "Model to use for LLM inference in the pipeline",
+			Component:            "model-select",
+			AutocompleteProvider: ProviderModelsChat,
+			Order:                60,
+		},
+		"pipeline.tts": {
+			Section:              "pipeline",
+			Label:                "TTS Model",
+			Description:          "Model to use for text-to-speech in the pipeline",
+			Component:            "model-select",
+			AutocompleteProvider: ProviderModelsTTS,
+			Order:                61,
+		},
+		"pipeline.transcription": {
+			Section:              "pipeline",
+			Label:                "Transcription Model",
+			Description:          "Model to use for speech-to-text in the pipeline",
+			Component:            "model-select",
+			AutocompleteProvider: ProviderModelsTranscript,
+			Order:                62,
+		},
+		"pipeline.vad": {
+			Section:              "pipeline",
+			Label:                "VAD Model",
+			Description:          "Model to use for voice activity detection in the pipeline",
+			Component:            "model-select",
+			AutocompleteProvider: ProviderModelsVAD,
+			Order:                63,
+		},
+
+		// --- Functions ---
+		"function.grammar.parallel_calls": {
+			Section:     "functions",
+			Label:       "Parallel Calls",
+			Description: "Allow the LLM to return multiple function calls in one response",
+			Order:       70,
+		},
+		"function.grammar.mixed_mode": {
+			Section:     "functions",
+			Label:       "Mixed Mode",
+			Description: "Allow the LLM to return both text and function calls",
+			Order:       71,
+		},
+		"function.grammar.disable": {
+			Section:     "functions",
+			Label:       "Disable Grammar",
+			Description: "Disable grammar-constrained generation for function calls",
+			Advanced:    true,
+			Order:       72,
+		},
+
+		// --- Diffusers ---
+		"diffusers.pipeline_type": {
+			Section:     "diffusers",
+			Label:       "Pipeline Type",
+			Description: "Diffusers pipeline type (e.g. StableDiffusionPipeline)",
+			Component:   "select",
+			Options:     DiffusersPipelineOptions,
+			Order:       80,
+		},
+		"diffusers.scheduler_type": {
+			Section:     "diffusers",
+			Label:       "Scheduler Type",
+			Description: "Noise scheduler type",
+			Component:   "select",
+			Options:     DiffusersSchedulerOptions,
+			Order:       81,
+		},
+		"diffusers.cuda": {
+			Section:     "diffusers",
+			Label:       "CUDA",
+			Description: "Enable CUDA for diffusers",
+			Order:       82,
+		},
+	}
+}
diff --git a/core/config/meta/types.go b/core/config/meta/types.go
new file mode 100644
index 000000000000..dcd21fb55806
--- /dev/null
+++ b/core/config/meta/types.go
@@ -0,0 +1,83 @@
+package meta
+
+// FieldMeta describes a single configuration field for UI rendering and agent discovery.
+type FieldMeta struct {
+	Path        string        `json:"path"`                  // dot-path: "context_size", "function.grammar.parallel_calls"
+	YAMLKey     string        `json:"yaml_key"`              // leaf yaml key
+	GoType      string        `json:"go_type"`               // "*int", "string", "[]string"
+	UIType      string        `json:"ui_type"`               // "string", "int", "float", "bool", "[]string", "map", "object"
+	Pointer     bool          `json:"pointer,omitempty"`     // true = nil means "not set"
+	Section     string        `json:"section"`               // "general", "llm", "templates", etc.
+	Label       string        `json:"label"`                 // human-readable label
+	Description string        `json:"description,omitempty"` // help text
+	Component   string        `json:"component"`             // "input", "number", "toggle", "select", "slider", etc.
+	Placeholder string        `json:"placeholder,omitempty"`
+	Default     any           `json:"default,omitempty"`
+	Min         *float64      `json:"min,omitempty"`
+	Max         *float64      `json:"max,omitempty"`
+	Step        *float64      `json:"step,omitempty"`
+	Options     []FieldOption `json:"options,omitempty"`
+
+	AutocompleteProvider string `json:"autocomplete_provider,omitempty"` // "backends", "models:chat", etc.
+	VRAMImpact           bool   `json:"vram_impact,omitempty"`
+	Advanced             bool   `json:"advanced,omitempty"`
+	Order                int    `json:"order"`
+}
+
+// FieldOption represents a choice in a select/enum field.
+type FieldOption struct {
+	Value string `json:"value"`
+	Label string `json:"label"`
+}
+
+// Section groups related fields in the UI.
+type Section struct {
+	ID    string `json:"id"`
+	Label string `json:"label"`
+	Icon  string `json:"icon,omitempty"`
+	Order int    `json:"order"`
+}
+
+// ConfigMetadata is the top-level response for the metadata API.
+type ConfigMetadata struct {
+	Sections []Section   `json:"sections"`
+	Fields   []FieldMeta `json:"fields"`
+}
+
+// FieldMetaOverride holds registry overrides that are merged on top of
+// the reflection-discovered defaults. Only non-zero fields override.
+type FieldMetaOverride struct {
+	Section              string
+	Label                string
+	Description          string
+	Component            string
+	Placeholder          string
+	Default              any
+	Min                  *float64
+	Max                  *float64
+	Step                 *float64
+	Options              []FieldOption
+	AutocompleteProvider string
+	VRAMImpact           bool
+	Advanced             bool
+	Order                int
+}
+
+// DefaultSections defines the well-known config sections in display order.
+func DefaultSections() []Section {
+	return []Section{
+		{ID: "general", Label: "General", Icon: "settings", Order: 0},
+		{ID: "llm", Label: "LLM", Icon: "cpu", Order: 10},
+		{ID: "parameters", Label: "Parameters", Icon: "sliders", Order: 20},
+		{ID: "templates", Label: "Templates", Icon: "file-text", Order: 30},
+		{ID: "functions", Label: "Functions / Tools", Icon: "tool", Order: 40},
+		{ID: "reasoning", Label: "Reasoning", Icon: "brain", Order: 45},
+		{ID: "diffusers", Label: "Diffusers", Icon: "image", Order: 50},
+		{ID: "tts", Label: "TTS", Icon: "volume-2", Order: 55},
+		{ID: "pipeline", Label: "Pipeline", Icon: "git-merge", Order: 60},
+		{ID: "grpc", Label: "gRPC", Icon: "server", Order: 65},
+		{ID: "agent", Label: "Agent", Icon: "bot", Order: 70},
+		{ID: "mcp", Label: "MCP", Icon: "plug", Order: 75},
+		{ID: "other", Label: "Other", Icon: "more-horizontal", Order: 100},
+	}
+}
diff --git a/core/http/endpoints/localai/api_skills.go b/core/http/endpoints/localai/api_skills.go
new file mode 100644
index 000000000000..abf3292f6fb7
--- /dev/null
+++ b/core/http/endpoints/localai/api_skills.go
@@ -0,0 +1,320 @@
+package localai
+
+import (
+	"net/http"
+
+	"github.com/labstack/echo/v4"
+)
+
+// APISkill describes a task-focused guide for agents interacting with the LocalAI API.
+type APISkill struct {
+	Name        string             `json:"name"`
+	Description string             `json:"description"`
+	Category    string             `json:"category"`
+	Endpoints   []APISkillEndpoint `json:"endpoints"`
+	Guide       string             `json:"guide,omitempty"`
+}
+
+// APISkillEndpoint describes a single endpoint within a skill.
+type APISkillEndpoint struct {
+	Method string `json:"method"`
+	Path   string `json:"path"`
+	Desc   string `json:"description"`
+}
+
+var apiSkills = []APISkill{
+	{
+		Name:        "config-management",
+		Description: "Discover, read, and modify model configuration fields with VRAM estimation",
+		Category:    "configuration",
+		Endpoints: []APISkillEndpoint{
+			{Method: "GET", Path: "/api/models/config-metadata", Desc: "List all config fields with types, sections, and options"},
+			{Method: "GET", Path: "/api/models/config-metadata/autocomplete/:provider", Desc: "Get dynamic values for a field (backends, models)"},
+			{Method: "GET", Path: "/api/models/config-json/:name", Desc: "Read a model's full config as JSON"},
+			{Method: "PATCH", Path: "/api/models/config-json/:name", Desc: "Partially update a model config via JSON merge"},
+			{Method: "POST", Path: "/api/models/vram-estimate", Desc: "Estimate VRAM usage for a model"},
+		},
+		Guide: `# Config Management
+
+## Discover fields
+` + "```" + `
+GET /api/models/config-metadata
+` + "```" + `
+Returns all ~170 config fields with type info, UI hints, and options.
+Fields with static options (quantization, cache_type_k, etc.) include an "options" array.
+Fields with dynamic values (backend, pipeline.llm, etc.) have an "autocomplete_provider".
+
+## Read a model config
+` + "```" + `
+GET /api/models/config-json/my-model
+` + "```" + `
+
+## Update specific fields
+` + "```" + `
+PATCH /api/models/config-json/my-model
+Content-Type: application/json
+
+{"context_size": 4096, "gpu_layers": -1}
+` + "```" + `
+Performs a deep merge: nested objects are merged recursively, scalars are overwritten.
+
+## Get dynamic autocomplete values
+` + "```" + `
+GET /api/models/config-metadata/autocomplete/backends
+GET /api/models/config-metadata/autocomplete/models:chat
+` + "```" + `
+Providers: backends, models, models:chat, models:tts, models:transcript, models:vad
+
+## Estimate VRAM
+` + "```" + `
+POST /api/models/vram-estimate
+{"model": "my-model", "context_size": 8192, "gpu_layers": -1}
+` + "```" + `
+Returns size_bytes, vram_bytes with display strings. If context_size is omitted and
+not set in config, defaults to 8192 and includes model_max_context from GGUF metadata.
+`,
+	},
+	{
+		Name:        "model-management",
+		Description: "Browse the gallery, install, delete, and import models",
+		Category:    "models",
+		Endpoints: []APISkillEndpoint{
+			{Method: "GET", Path: "/models/available", Desc: "List models available in configured galleries"},
+			{Method: "POST", Path: "/models/apply", Desc: "Install a model from the gallery"},
+			{Method: "POST", Path: "/models/delete/:name", Desc: "Delete an installed model"},
+			{Method: "POST", Path: "/models/import", Desc: "Import a custom model config"},
+			{Method: "POST", Path: "/models/edit/:name", Desc: "Replace a model's full config"},
+			{Method: "POST", Path: "/models/reload", Desc: "Reload all model configurations from disk"},
+			{Method: "GET", Path: "/v1/models", Desc: "List installed models (OpenAI-compatible)"},
+		},
+		Guide: `# Model Management
+
+## List gallery models
+` + "```" + `
+GET /models/available
+` + "```" + `
+Returns models from all configured galleries with name, description, URLs, and tags.
+
+## Install a model
+` + "```" + `
+POST /models/apply
+{"id": "gallery-name@model-name"}
+` + "```" + `
+Returns a job UUID. Poll status with GET /models/jobs/:uuid.
+
+## Delete a model
+` + "```" + `
+POST /models/delete/my-model
+` + "```" + `
+
+## Import a custom model
+` + "```" + `
+POST /models/import
+Content-Type: application/json
+
+{"name": "my-model", "backend": "llama-cpp", "model": "path/to/model.gguf"}
+` + "```" + `
+
+## List installed models
+` + "```" + `
+GET /v1/models
+` + "```" + `
+OpenAI-compatible model listing.
+`,
+	},
+	{
+		Name:        "chat-inference",
+		Description: "OpenAI-compatible chat completions, text completions, and embeddings",
+		Category:    "inference",
+		Endpoints: []APISkillEndpoint{
+			{Method: "POST", Path: "/v1/chat/completions", Desc: "Chat completion (streaming or non-streaming)"},
+			{Method: "POST", Path: "/v1/completions", Desc: "Text completion"},
+			{Method: "POST", Path: "/v1/embeddings", Desc: "Generate embeddings"},
+		},
+		Guide: `# Chat Inference
+
+## Chat completion
+` + "```" + `
+POST /v1/chat/completions
+{
+  "model": "my-model",
+  "messages": [{"role": "user", "content": "Hello"}],
+  "temperature": 0.7,
+  "stream": false
+}
+` + "```" + `
+Set "stream": true for SSE streaming. Supports tool/function calling when the
+model config has function templates configured.
+
+## Text completion
+` + "```" + `
+POST /v1/completions
+{"model": "my-model", "prompt": "Once upon a time", "max_tokens": 100}
+` + "```" + `
+
+## Embeddings
+` + "```" + `
+POST /v1/embeddings
+{"model": "my-embedding-model", "input": "text to embed"}
+` + "```" + `
+Returns a vector in the standard OpenAI embeddings format.
+`,
+	},
+	{
+		Name:        "audio",
+		Description: "Text-to-speech, voice activity detection, and transcription",
+		Category:    "audio",
+		Endpoints: []APISkillEndpoint{
+			{Method: "POST", Path: "/tts", Desc: "Generate speech from text"},
+			{Method: "POST", Path: "/v1/audio/transcriptions", Desc: "Transcribe audio to text"},
+			{Method: "POST", Path: "/vad", Desc: "Detect voice activity in audio"},
+		},
+		Guide: `# Audio
+
+## Text-to-speech
+` + "```" + `
+POST /tts
+{"model": "my-tts-model", "input": "Hello world", "voice": "default"}
+` + "```" + `
+Returns audio data (WAV format by default).
+
+## Transcription
+` + "```" + `
+POST /v1/audio/transcriptions
+Content-Type: multipart/form-data
+
+file=@audio.wav
+model=my-whisper-model
+` + "```" + `
+OpenAI-compatible whisper transcription endpoint.
+
+## Voice Activity Detection
+` + "```" + `
+POST /vad
+Content-Type: multipart/form-data
+
+file=@audio.wav
+model=my-vad-model
+` + "```" + `
+Returns segments with start/end timestamps where speech is detected.
+`,
+	},
+	{
+		Name:        "monitoring",
+		Description: "System metrics, backend status, and request tracing",
+		Category:    "operations",
+		Endpoints: []APISkillEndpoint{
+			{Method: "GET", Path: "/metrics", Desc: "Prometheus metrics"},
+			{Method: "GET", Path: "/backend/monitor", Desc: "Running backend status"},
+			{Method: "POST", Path: "/backend/shutdown", Desc: "Shut down a running backend"},
+			{Method: "GET", Path: "/system", Desc: "System information (CPU, memory, GPU)"},
+			{Method: "GET", Path: "/version", Desc: "LocalAI version"},
+			{Method: "GET", Path: "/api/traces", Desc: "Recent request traces"},
+		},
+		Guide: `# Monitoring
+
+## Prometheus metrics
+` + "```" + `
+GET /metrics
+` + "```" + `
+
+## Backend monitor
+` + "```" + `
+GET /backend/monitor
+` + "```" + `
+Returns status of running inference backends (memory usage, loaded models).
+
+## Shut down a backend
+` + "```" + `
+POST /backend/shutdown
+{"model": "my-model"}
+` + "```" + `
+
+## System info
+` + "```" + `
+GET /system
+` + "```" + `
+Returns CPU, memory, GPU information and loaded models.
+
+## Request traces
+` + "```" + `
+GET /api/traces
+` + "```" + `
+Returns recent request traces with timing, model used, and token counts.
+Useful for debugging latency and throughput issues.
+`,
+	},
+	{
+		Name:        "mcp",
+		Description: "Model Context Protocol — tool-augmented chat with MCP servers",
+		Category:    "mcp",
+		Endpoints: []APISkillEndpoint{
+			{Method: "POST", Path: "/v1/mcp/chat/completions", Desc: "Chat with MCP tool execution"},
+			{Method: "GET", Path: "/v1/mcp/servers/:model", Desc: "List MCP servers for a model"},
+			{Method: "GET", Path: "/v1/mcp/prompts/:model", Desc: "List MCP prompts for a model"},
+			{Method: "GET", Path: "/v1/mcp/resources/:model", Desc: "List MCP resources for a model"},
+		},
+		Guide: `# MCP (Model Context Protocol)
+
+## Chat with MCP tools
+` + "```" + `
+POST /v1/mcp/chat/completions
+{
+  "model": "my-model",
+  "messages": [{"role": "user", "content": "Search for X"}],
+  "stream": true
+}
+` + "```" + `
+The model's config must define MCP servers. The endpoint handles tool execution
+automatically, returning intermediate tool results in the stream.
+
+## List MCP servers
+` + "```" + `
+GET /v1/mcp/servers/my-model
+` + "```" + `
+
+## List MCP prompts
+` + "```" + `
+GET /v1/mcp/prompts/my-model
+` + "```" + `
+
+## List MCP resources
+` + "```" + `
+GET /v1/mcp/resources/my-model
+` + "```" + `
+`,
+	},
+}
+
+// ListAPISkillsEndpoint returns all skills without the Guide field (compact list).
+func ListAPISkillsEndpoint() echo.HandlerFunc {
+	return func(c echo.Context) error {
+		compact := make([]APISkill, len(apiSkills))
+		for i, s := range apiSkills {
+			compact[i] = APISkill{
+				Name:        s.Name,
+				Description: s.Description,
+				Category:    s.Category,
+				Endpoints:   s.Endpoints,
+			}
+		}
+		return c.JSON(http.StatusOK, compact)
+	}
+}
+
+// GetAPISkillEndpoint returns a single skill by name, including the Guide.
+func GetAPISkillEndpoint() echo.HandlerFunc {
+	byName := make(map[string]*APISkill, len(apiSkills))
+	for i := range apiSkills {
+		byName[apiSkills[i].Name] = &apiSkills[i]
+	}
+
+	return func(c echo.Context) error {
+		name := c.Param("name")
+		skill, ok := byName[name]
+		if !ok {
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "skill not found: " + name})
+		}
+		return c.JSON(http.StatusOK, skill)
+	}
+}
diff --git a/core/http/endpoints/localai/config_meta.go b/core/http/endpoints/localai/config_meta.go
new file mode 100644
index 000000000000..c8910af6227b
--- /dev/null
+++ b/core/http/endpoints/localai/config_meta.go
@@ -0,0 +1,186 @@
+package localai
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"os"
+	"reflect"
+	"sort"
+	"strings"
+
+	"dario.cat/mergo"
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config/meta"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/utils"
+	"github.com/mudler/xlog"
+	"gopkg.in/yaml.v3"
+)
+
+// ConfigMetadataEndpoint returns field metadata for all config fields.
+func ConfigMetadataEndpoint() echo.HandlerFunc {
+	return func(c echo.Context) error {
+		md := meta.BuildConfigMetadata(reflect.TypeOf(config.ModelConfig{}))
+		return c.JSON(http.StatusOK, md)
+	}
+}
+
+// AutocompleteEndpoint handles dynamic autocomplete lookups for config fields.
+// Static option lists (quantizations, cache types, diffusers pipelines/schedulers)
+// are embedded directly in the field metadata Options; only truly dynamic values
+// that require runtime lookup are served here.
+func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		provider := c.Param("provider")
+		var values []string
+
+		switch {
+		case provider == meta.ProviderBackends:
+			installedBackends, err := gallery.ListSystemBackends(appConfig.SystemState)
+			if err == nil {
+				for name := range installedBackends {
+					values = append(values, name)
+				}
+			}
+			sort.Strings(values)
+
+		case provider == meta.ProviderModels:
+			modelConfigs := cl.GetAllModelsConfigs()
+			for _, cfg := range modelConfigs {
+				values = append(values, cfg.Name)
+			}
+			modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)
+			values = append(values, modelsWithoutConfig...)
+			sort.Strings(values)
+
+		case strings.HasPrefix(provider, "models:"):
+			capability := strings.TrimPrefix(provider, "models:")
+			var filterFn config.ModelConfigFilterFn
+			switch capability {
+			case "chat":
+				filterFn = config.BuildUsecaseFilterFn(config.FLAG_CHAT)
+			case "tts":
+				filterFn = config.BuildUsecaseFilterFn(config.FLAG_TTS)
+			case "vad":
+				filterFn = config.BuildUsecaseFilterFn(config.FLAG_VAD)
+			case "transcript":
+				filterFn = config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)
+			default:
+				filterFn = config.NoFilterFn
+			}
+			filteredConfigs := cl.GetModelConfigsByFilter(filterFn)
+			for _, cfg := range filteredConfigs {
+				values = append(values, cfg.Name)
+			}
+			sort.Strings(values)
+
+		default:
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown provider: " + provider})
+		}
+
+		return c.JSON(http.StatusOK, map[string]any{"values": values})
+	}
+}
+
+// PatchConfigEndpoint handles PATCH requests to partially update a model config
+// using nested JSON merge.
+func PatchConfigEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		modelName := c.Param("name")
+		if decoded, err := url.PathUnescape(modelName); err == nil {
+			modelName = decoded
+		}
+		if modelName == "" {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"})
+		}
+
+		modelConfig, exists := cl.GetModelConfig(modelName)
+		if !exists {
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"})
+		}
+
+		// Read patch body
+		patchBody, err := io.ReadAll(c.Request().Body)
+		if err != nil || len(patchBody) == 0 {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "request body is empty or unreadable"})
+		}
+
+		// Validate patch body is valid JSON
+		var patchMap map[string]any
+		if err := json.Unmarshal(patchBody, &patchMap); err != nil {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid JSON: " + err.Error()})
+		}
+
+		// Marshal existing config to JSON
+		existingJSON, err := json.Marshal(modelConfig)
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal existing config"})
+		}
+
+		// Deep-merge patch into existing
+		var existingMap map[string]any
+		if err := json.Unmarshal(existingJSON, &existingMap); err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to parse existing config"})
+		}
+
+		if err := mergo.Merge(&existingMap, patchMap, mergo.WithOverride); err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to merge configs: " + err.Error()})
+		}
+
+		// Marshal merged config back to JSON
+		mergedJSON, err := json.Marshal(existingMap)
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal merged config"})
+		}
+
+		// Unmarshal to ModelConfig for validation
+		var updatedConfig config.ModelConfig
+		if err := json.Unmarshal(mergedJSON, &updatedConfig); err != nil {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "merged config is invalid: " + err.Error()})
+		}
+
+		// Validate
+		if valid, err := updatedConfig.Validate(); !valid {
+			errMsg := "validation failed"
+			if err != nil {
+				errMsg = err.Error()
+			}
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": errMsg})
+		}
+
+		// Write as YAML to disk
+		configPath := modelConfig.GetModelConfigFile()
+		if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil {
+			return c.JSON(http.StatusForbidden, map[string]any{"error": "config path not trusted: " + err.Error()})
+		}
+
+		yamlData, err := yaml.Marshal(updatedConfig)
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal YAML"})
+		}
+
+		if err := os.WriteFile(configPath, yamlData, 0644); err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to write config file"})
+		}
+
+		// Reload configs
+		if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to reload configs: " + err.Error()})
+		}
+
+		if err := cl.Preload(appConfig.SystemState.Model.ModelsPath); err != nil {
+			xlog.Warn("Failed to preload after PATCH", "error", err)
+		}
+
+		return c.JSON(http.StatusOK, map[string]any{
+			"success": true,
+			"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
+		})
+	}
+}
diff --git a/core/http/endpoints/localai/vram.go b/core/http/endpoints/localai/vram.go
new file mode 100644
index 000000000000..8bbea3ef406a
--- /dev/null
+++ b/core/http/endpoints/localai/vram.go
@@ -0,0 +1,121 @@
+package localai
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"path"
+	"strings"
+	"time"
+
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/vram"
+)
+
+type vramEstimateRequest struct {
+	Model       string `json:"model"`
+	ContextSize uint32 `json:"context_size,omitempty"`
+	GPULayers   int    `json:"gpu_layers,omitempty"`
+	KVQuantBits int    `json:"kv_quant_bits,omitempty"`
+}
+
+type vramEstimateResponse struct {
+	vram.EstimateResult
+	ContextNote     string `json:"context_note,omitempty"`
+	ModelMaxContext uint64 `json:"model_max_context,omitempty"`
+}
+
+// VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an
+// installed model configuration. For uninstalled models (gallery URLs), use
+// the gallery-level estimates in /api/models instead.
+func VRAMEstimateEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true}
+
+	return func(c echo.Context) error {
+		var req vramEstimateRequest
+		if err := c.Bind(&req); err != nil {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid request body"})
+		}
+
+		if req.Model == "" {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"})
+		}
+
+		modelConfig, exists := cl.GetModelConfig(req.Model)
+		if !exists {
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"})
+		}
+
+		// Build file inputs from the model's download files
+		var files []vram.FileInput
+		var firstGGUF string
+		for _, f := range modelConfig.DownloadFiles {
+			ext := strings.ToLower(path.Ext(path.Base(f.Filename)))
+			if weightExts[ext] {
+				files = append(files, vram.FileInput{URI: string(f.URI), Size: 0})
+				if firstGGUF == "" && ext == ".gguf" {
+					firstGGUF = string(f.URI)
+				}
+			}
+		}
+
+		// Also include the main model file if it looks like a weight file
+		if modelConfig.Model != "" {
+			ext := strings.ToLower(path.Ext(path.Base(modelConfig.Model)))
+			if weightExts[ext] {
+				files = append(files, vram.FileInput{URI: modelConfig.Model, Size: 0})
+				if firstGGUF == "" && ext == ".gguf" {
+					firstGGUF = modelConfig.Model
+				}
+			}
+		}
+
+		if len(files) == 0 {
+			return c.JSON(http.StatusOK, map[string]any{
+				"message": "no weight files found for estimation",
+			})
+		}
+
+		contextDefaulted := false
+		opts := vram.EstimateOptions{
+			ContextLength: req.ContextSize,
+			GPULayers:     req.GPULayers,
+			KVQuantBits:   req.KVQuantBits,
+		}
+		if opts.ContextLength == 0 {
+			if modelConfig.ContextSize != nil {
+				opts.ContextLength = uint32(*modelConfig.ContextSize)
+			} else {
+				opts.ContextLength = 8192
+				contextDefaulted = true
+			}
+		}
+
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		defer cancel()
+
+		result, err := vram.Estimate(ctx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": err.Error()})
+		}
+
+		resp := vramEstimateResponse{EstimateResult: result}
+
+		// When context was defaulted to 8192, read the GGUF metadata to report
+		// the model's trained maximum context length so callers know the estimate
+		// may be conservative.
+		if contextDefaulted && firstGGUF != "" {
+			ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(ctx, firstGGUF)
+			if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 {
+				resp.ModelMaxContext = ggufMeta.MaximumContextLength
+				resp.ContextNote = fmt.Sprintf(
+					"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
+					ggufMeta.MaximumContextLength,
+				)
+			}
+		}
+
+		return c.JSON(http.StatusOK, resp)
+	}
+}
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index 85134a6a3c7a..fc91d34717a0 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -132,6 +132,113 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 		}{Version: internal.PrintableVersion()})
 	})
 
+	// Agent discovery endpoint
+	router.GET("/.well-known/localai.json", func(c echo.Context) error {
+		return c.JSON(200, map[string]any{
+			"version": internal.PrintableVersion(),
+			// Flat endpoint list for backwards compatibility
+			"endpoints": map[string]any{
+				"models":           "/v1/models",
+				"chat_completions": "/v1/chat/completions",
+				"completions":      "/v1/completions",
+				"embeddings":       "/v1/embeddings",
+				"config_metadata":  "/api/models/config-metadata",
+				"config_json":      "/api/models/config-json/:name",
+				"config_patch":     "/api/models/config-json/:name",
+				"autocomplete":     "/api/models/config-metadata/autocomplete/:provider",
+				"vram_estimate":    "/api/models/vram-estimate",
+				"tts":              "/tts",
+				"transcription":    "/v1/audio/transcriptions",
+				"image_generation": "/v1/images/generations",
+				"swagger":          "/swagger/index.html",
+				"skills":           "/api/skills",
+			},
+			// Categorized endpoint groups for structured discovery
+			"endpoint_groups": map[string]any{
+				"openai_compatible": map[string]string{
+					"models":           "/v1/models",
+					"chat_completions": "/v1/chat/completions",
+					"completions":      "/v1/completions",
+					"embeddings":       "/v1/embeddings",
+					"transcription":    "/v1/audio/transcriptions",
+					"image_generation": "/v1/images/generations",
+				},
+				"config_management": map[string]string{
+					"config_metadata": "/api/models/config-metadata",
+					"config_json":     "/api/models/config-json/:name",
+					"config_patch":    "/api/models/config-json/:name",
+					"autocomplete":    "/api/models/config-metadata/autocomplete/:provider",
+					"vram_estimate":   "/api/models/vram-estimate",
+				},
+				"model_management": map[string]string{
+					"list_gallery": "/models/available",
+					"install":      "/models/apply",
+					"delete":       "/models/delete/:name",
+					"edit":         "/models/edit/:name",
+					"import":       "/models/import",
+					"reload":       "/models/reload",
+				},
+				"ai_functions": map[string]string{
+					"tts":       "/tts",
+					"vad":       "/vad",
+					"video":     "/video",
+					"detection": "/v1/detection",
+					"tokenize":  "/v1/tokenize",
+				},
+				"monitoring": map[string]string{
+					"metrics":          "/metrics",
+					"backend_monitor":  "/backend/monitor",
+					"backend_shutdown": "/backend/shutdown",
+					"system":           "/system",
+					"version":          "/version",
+					"traces":           "/api/traces",
+				},
+				"mcp": map[string]string{
+					"chat_completions": "/v1/mcp/chat/completions",
+					"servers":          "/v1/mcp/servers/:model",
+					"prompts":          "/v1/mcp/prompts/:model",
+					"resources":        "/v1/mcp/resources/:model",
+				},
+				"p2p": map[string]string{
+					"nodes": "/api/p2p",
+					"token": "/api/p2p/token",
+				},
+				"agents": map[string]string{
+					"tasks":   "/api/agent/tasks",
+					"jobs":    "/api/agent/jobs",
+					"execute": "/api/agent/jobs/execute",
+				},
+				"settings": map[string]string{
+					"get":    "/api/settings",
+					"update": "/api/settings",
+				},
+				"stores": map[string]string{
+					"set":    "/stores/set",
+					"get":    "/stores/get",
+					"find":   "/stores/find",
+					"delete": "/stores/delete",
+				},
+				"docs": map[string]string{
+					"swagger": "/swagger/index.html",
+					"skills":  "/api/skills",
+				},
+			},
+			"capabilities": map[string]bool{
+				"config_metadata": true,
+				"config_patch":    true,
+				"vram_estimate":   true,
+				"mcp":             !appConfig.DisableMCP,
+				"agents":          appConfig.AgentPool.Enabled,
+				"p2p":             appConfig.P2PToken != "",
+				"tracing":         true,
+			},
+		})
+	})
+
+	// API skills for agent discovery (no auth — agents should discover these without credentials)
+	router.GET("/api/skills", localai.ListAPISkillsEndpoint())
+	router.GET("/api/skills/:name", localai.GetAPISkillEndpoint())
+
 	router.GET("/api/features", func(c echo.Context) error {
 		return c.JSON(200, map[string]bool{
 			"agents": appConfig.AgentPool.Enabled,
diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go
index 81d9b4275ef0..ad539c7ee1f0 100644
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -701,6 +701,18 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		return c.JSON(http.StatusOK, modelConfig)
 	}, adminMiddleware)
 
+	// Config metadata API - returns field metadata for all ~170 config fields
+	app.GET("/api/models/config-metadata", localai.ConfigMetadataEndpoint(), adminMiddleware)
+
+	// Autocomplete providers for config fields (dynamic values only)
+	app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
+
+	// PATCH config endpoint - partial update using nested JSON merge
+	app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware)
+
+	// VRAM estimation endpoint
+	app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
+
 	// Get installed model YAML config for the React model editor
 	app.GET("/api/models/edit/:name", func(c echo.Context) error {
 		modelName := c.Param("name")
@@ -1307,3 +1319,4 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		})
 	}, adminMiddleware)
 }
+
diff --git a/pkg/vram/gguf_reader.go b/pkg/vram/gguf_reader.go
index 631c017f7418..3f731b482d3b 100644
--- a/pkg/vram/gguf_reader.go
+++ b/pkg/vram/gguf_reader.go
@@ -34,10 +34,11 @@ func (defaultGGUFReader) ReadMetadata(ctx context.Context, uri string) (*GGUFMet
 func ggufFileToMeta(f *gguf.GGUFFile) *GGUFMeta {
 	arch := f.Architecture()
 	meta := &GGUFMeta{
-		BlockCount:       uint32(arch.BlockCount),
-		EmbeddingLength:  uint32(arch.EmbeddingLength),
-		HeadCount:        uint32(arch.AttentionHeadCount),
-		HeadCountKV:      uint32(arch.AttentionHeadCountKV),
+		BlockCount:           uint32(arch.BlockCount),
+		EmbeddingLength:      uint32(arch.EmbeddingLength),
+		HeadCount:            uint32(arch.AttentionHeadCount),
+		HeadCountKV:          uint32(arch.AttentionHeadCountKV),
+		MaximumContextLength: arch.MaximumContextLength,
 	}
 	if meta.HeadCountKV == 0 {
 		meta.HeadCountKV = meta.HeadCount
diff --git a/pkg/vram/types.go b/pkg/vram/types.go
index cda76aff6378..abb9ad06c301 100644
--- a/pkg/vram/types.go
+++ b/pkg/vram/types.go
@@ -15,10 +15,11 @@ type SizeResolver interface {
 
 // GGUFMeta holds parsed GGUF metadata used for VRAM estimation.
 type GGUFMeta struct {
-	BlockCount       uint32
-	EmbeddingLength  uint32
-	HeadCount        uint32
-	HeadCountKV      uint32
+	BlockCount            uint32
+	EmbeddingLength       uint32
+	HeadCount             uint32
+	HeadCountKV           uint32
+	MaximumContextLength  uint64
 }
 
 // GGUFMetadataReader reads GGUF metadata from a URI (e.g. via HTTP Range).