sharpninja · sharpninja · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/.semversioner/next-release/patch-20260315024056229023.json b/.semversioner/next-release/patch-20260315024056229023.json
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "reconfigure vector store size by embedding model"
+}
diff --git a/docs/upstream-sync/upstream-3502c222.md b/docs/upstream-sync/upstream-3502c222.md
@@ -6,6 +6,17 @@
 
 ---
 
-Analysis unavailable: HTTP Error 401: Unauthorized
+Manual review complete.
 
-Manual review of upstream commit `3502c222` is required.
+## Summary
+
+Upstream commit `3502c222` updates Python config validation so that, after probing the configured embedding model, GraphRAG automatically realigns `vector_store.vector_size` and each index schema vector dimension to the actual embedding width.
+
+## Dotnet parity
+
+The dotnet codebase does not have a direct `validate_config.py` equivalent yet, so parity is implemented in the immutable configuration models:
+
+- `GraphRagConfig.SyncVectorStoreDimensions(...)` now realigns vector-store dimensions when the configured embed-text model returns a different embedding width.
+- `VectorStoreConfig.WithVectorSize(...)` and `IndexSchema.WithVectorSize(...)` propagate the updated dimension consistently.
+
+No additional missed Python parity changes were identified in this upstream commit beyond the vector-size synchronization behavior.
diff --git a/dotnet/src/GraphRag.Vectors/IndexSchema.cs b/dotnet/src/GraphRag.Vectors/IndexSchema.cs
@@ -32,4 +32,16 @@ public sealed record IndexSchema
     /// Gets the mapping of field names to their types.
     /// </summary>
     public Dictionary<string, string>? Fields { get; init; }
+
+    /// <summary>
+    /// Returns a copy of the schema with the specified vector size.
+    /// </summary>
+    /// <param name="vectorSize">The vector dimension to apply.</param>
+    /// <returns>A copy of the schema with the updated vector size.</returns>
+    /// <exception cref="ArgumentOutOfRangeException">Thrown when <paramref name="vectorSize"/> is less than or equal to zero.</exception>
+    public IndexSchema WithVectorSize(int vectorSize)
+    {
+        ArgumentOutOfRangeException.ThrowIfNegativeOrZero(vectorSize);
+        return this with { VectorSize = vectorSize };
+    }
 }
diff --git a/dotnet/src/GraphRag.Vectors/VectorStoreConfig.cs b/dotnet/src/GraphRag.Vectors/VectorStoreConfig.cs
@@ -52,4 +52,21 @@ public sealed record VectorStoreConfig
     /// Gets the index schema configuration.
     /// </summary>
     public IndexSchema? IndexSchema { get; init; }
+
+    /// <summary>
+    /// Returns a copy of the vector store configuration with the specified vector size.
+    /// </summary>
+    /// <param name="vectorSize">The vector dimension to apply.</param>
+    /// <returns>A copy of the vector store configuration with the updated vector size.</returns>
+    /// <exception cref="ArgumentOutOfRangeException">Thrown when <paramref name="vectorSize"/> is less than or equal to zero.</exception>
+    public VectorStoreConfig WithVectorSize(int vectorSize)
+    {
+        ArgumentOutOfRangeException.ThrowIfNegativeOrZero(vectorSize);
+
+        return this with
+        {
+            VectorSize = vectorSize,
+            IndexSchema = IndexSchema?.WithVectorSize(vectorSize),
+        };
+    }
 }
diff --git a/dotnet/src/GraphRag/Config/Models/GraphRagConfig.cs b/dotnet/src/GraphRag/Config/Models/GraphRagConfig.cs
@@ -6,6 +6,7 @@
 using GraphRag.Config.Enums;
 using GraphRag.Input;
 using GraphRag.Llm.Config;
+using GraphRag.Llm.Types;
 using GraphRag.Storage;
 using GraphRag.Storage.Tables;
 using GraphRag.Vectors;
@@ -131,4 +132,35 @@ public ModelConfig GetEmbeddingModelConfig(string? modelId = null)
 
         throw new KeyNotFoundException($"Embedding model '{key}' not found in configuration.");
     }
+
+    /// <summary>
+    /// Returns a copy of the configuration with vector store dimensions synchronized to an embedding response.
+    /// </summary>
+    /// <param name="embeddingModelId">The embedding model that produced the response.</param>
+    /// <param name="response">The embedding response to inspect.</param>
+    /// <returns>
+    /// The current configuration when the response is empty, already aligned, or produced by a different embedding model;
+    /// otherwise a copy with the vector store dimensions updated to match the response.
+    /// </returns>
+    public GraphRagConfig SyncVectorStoreDimensions(string embeddingModelId, LlmEmbeddingResponse response)
+    {
+        ArgumentNullException.ThrowIfNull(embeddingModelId);
+        ArgumentNullException.ThrowIfNull(response);
+
+        if (!string.Equals(embeddingModelId, EmbedText.EmbeddingModelId, StringComparison.Ordinal))
+        {
+            return this;
+        }
+
+        var detectedVectorSize = response.FirstEmbedding.Count;
+        if (detectedVectorSize == 0 || detectedVectorSize == VectorStore.VectorSize)
+        {
+            return this;
+        }
+
+        return this with
+        {
+            VectorStore = VectorStore.WithVectorSize(detectedVectorSize),
+        };
+    }
 }
diff --git a/dotnet/tests/GraphRag.Tests.Unit/Config/GraphRagConfigMethodTests.cs b/dotnet/tests/GraphRag.Tests.Unit/Config/GraphRagConfigMethodTests.cs
@@ -6,6 +6,8 @@
 using GraphRag.Config.Errors;
 using GraphRag.Config.Models;
 using GraphRag.Llm.Config;
+using GraphRag.Llm.Types;
+using GraphRag.Vectors;
 
 namespace GraphRag.Tests.Unit.Config;
 
@@ -133,4 +135,57 @@ public void Workflows_CanBeSet()
 
         config.Workflows.Should().BeEquivalentTo(workflows);
     }
+
+    [Fact]
+    public void SyncVectorStoreDimensions_UpdatesVectorStoreAndSchema_ForConfiguredEmbeddingModel()
+    {
+        var config = new GraphRagConfig
+        {
+            EmbedText = new EmbedTextConfig { EmbeddingModelId = "embed-model" },
+            VectorStore = new VectorStoreConfig
+            {
+                Type = "azure_ai_search",
+                VectorSize = 3072,
+                IndexSchema = new IndexSchema { IndexName = "entities", VectorSize = 3072 },
+            },
+        };
+        var response = new LlmEmbeddingResponse([[1.0f, 2.0f, 3.0f]]);
+
+        var result = config.SyncVectorStoreDimensions("embed-model", response);
+
+        result.Should().NotBeSameAs(config);
+        result.VectorStore.VectorSize.Should().Be(3);
+        result.VectorStore.IndexSchema.Should().NotBeNull();
+        result.VectorStore.IndexSchema!.VectorSize.Should().Be(3);
+        config.VectorStore.VectorSize.Should().Be(3072);
+        config.VectorStore.IndexSchema!.VectorSize.Should().Be(3072);
+    }
+
+    [Fact]
+    public void SyncVectorStoreDimensions_ReturnsSameConfig_WhenEmbeddingModelDoesNotMatch()
+    {
+        var config = new GraphRagConfig
+        {
+            EmbedText = new EmbedTextConfig { EmbeddingModelId = "embed-model" },
+        };
+        var response = new LlmEmbeddingResponse([[1.0f, 2.0f, 3.0f]]);
+
+        var result = config.SyncVectorStoreDimensions("different-model", response);
+
+        result.Should().BeSameAs(config);
+    }
+
+    [Fact]
+    public void SyncVectorStoreDimensions_ReturnsSameConfig_WhenResponseIsEmpty()
+    {
+        var config = new GraphRagConfig
+        {
+            EmbedText = new EmbedTextConfig { EmbeddingModelId = "embed-model" },
+        };
+        var response = new LlmEmbeddingResponse([]);
+
+        var result = config.SyncVectorStoreDimensions("embed-model", response);
+
+        result.Should().BeSameAs(config);
+    }
 }
diff --git a/dotnet/tests/GraphRag.Tests.Unit/Vectors/IndexSchemaTests.cs b/dotnet/tests/GraphRag.Tests.Unit/Vectors/IndexSchemaTests.cs
@@ -21,4 +21,16 @@ public void DefaultValues_AreCorrect()
         schema.VectorSize.Should().Be(3072);
         schema.Fields.Should().BeNull();
     }
+
+    [Fact]
+    public void WithVectorSize_ReturnsUpdatedCopy()
+    {
+        var schema = new IndexSchema { IndexName = "test", VectorSize = 3072 };
+
+        var updated = schema.WithVectorSize(1536);
+
+        updated.Should().NotBeSameAs(schema);
+        updated.VectorSize.Should().Be(1536);
+        schema.VectorSize.Should().Be(3072);
+    }
 }
diff --git a/dotnet/tests/GraphRag.Tests.Unit/Vectors/VectorStoreConfigTests.cs b/dotnet/tests/GraphRag.Tests.Unit/Vectors/VectorStoreConfigTests.cs
@@ -0,0 +1,33 @@
+// Copyright (c) 2025 Microsoft Corporation.
+// Licensed under the MIT License
+
+using FluentAssertions;
+using GraphRag.Vectors;
+
+namespace GraphRag.Tests.Unit.Vectors;
+
+/// <summary>
+/// Unit tests for <see cref="VectorStoreConfig"/>.
+/// </summary>
+public class VectorStoreConfigTests
+{
+    [Fact]
+    public void WithVectorSize_ReturnsUpdatedCopy_AndSynchronizesSchema()
+    {
+        var config = new VectorStoreConfig
+        {
+            Type = "azure_ai_search",
+            VectorSize = 3072,
+            IndexSchema = new IndexSchema { IndexName = "entities", VectorSize = 3072 },
+        };
+
+        var updated = config.WithVectorSize(1536);
+
+        updated.Should().NotBeSameAs(config);
+        updated.VectorSize.Should().Be(1536);
+        updated.IndexSchema.Should().NotBeNull();
+        updated.IndexSchema!.VectorSize.Should().Be(1536);
+        config.VectorSize.Should().Be(3072);
+        config.IndexSchema!.VectorSize.Should().Be(3072);
+    }
+}
diff --git a/packages/graphrag/graphrag/index/validate_config.py b/packages/graphrag/graphrag/index/validate_config.py
@@ -6,12 +6,16 @@
 import asyncio
 import logging
 import sys
+from typing import TYPE_CHECKING
 
 from graphrag_llm.completion import create_completion
 from graphrag_llm.embedding import create_embedding
 
 from graphrag.config.models.graph_rag_config import GraphRagConfig
 
+if TYPE_CHECKING:
+    from graphrag_llm.types import LLMEmbeddingResponse
+
 logger = logging.getLogger(__name__)
 
 
@@ -29,13 +33,40 @@ def validate_config_names(parameters: GraphRagConfig) -> None:
     for id, config in parameters.embedding_models.items():
         embed_llm = create_embedding(config)
         try:
-            asyncio.run(
+            response = asyncio.run(
                 embed_llm.embedding_async(
                     input=["This is an LLM Embedding Test String"]
                 )
             )
             logger.info("Embedding LLM Config Params Validated")
+
+            if id == parameters.embed_text.embedding_model_id:
+                _sync_vector_store_dimensions(parameters, response)
         except Exception as e:  # noqa: BLE001
             logger.error(f"Embedding configuration error detected.\n{e}")  # noqa
             print(f"Failed to validate embedding model ({id}) params", e)  # noqa: T201
             sys.exit(1)
+
+
+def _sync_vector_store_dimensions(
+    parameters: GraphRagConfig,
+    response: "LLMEmbeddingResponse",
+) -> None:
+    """Sync vector store dimensions to match the actual embedding model output."""
+    detected = len(response.first_embedding)
+    if detected == 0:
+        return
+
+    configured = parameters.vector_store.vector_size
+    if detected == configured:
+        return
+
+    logger.warning(
+        "Embedding model produces %d-dimensional vectors but vector_size is "
+        "configured as %d. Overriding vector_size to match the model.",
+        detected,
+        configured,
+    )
+    parameters.vector_store.vector_size = detected
+    for schema in parameters.vector_store.index_schema.values():
+        schema.vector_size = detected