diff --git a/dotnet/src/GraphRag/DataModel/Entity.cs b/dotnet/src/GraphRag/DataModel/Entity.cs index f98c779df3..ef0db2e431 100644 --- a/dotnet/src/GraphRag/DataModel/Entity.cs +++ b/dotnet/src/GraphRag/DataModel/Entity.cs @@ -38,6 +38,16 @@ public sealed record Entity : Named /// public IReadOnlyList? TextUnitIds { get; init; } + /// + /// Gets the frequency count indicating how many times this entity was extracted. + /// + public int Frequency { get; init; } + + /// + /// Gets the degree of the entity in the graph (number of relationships involving this entity). + /// + public int Degree { get; init; } + /// /// Gets the rank of the entity. /// @@ -66,6 +76,8 @@ public static Entity FromDictionary(Dictionary data) NameEmbedding = data.TryGetValue("name_embedding", out var nameEmb) ? nameEmb as IReadOnlyList : null, CommunityIds = data.TryGetValue("community_ids", out var commIds) ? commIds as IReadOnlyList : null, TextUnitIds = data.TryGetValue("text_unit_ids", out var tuIds) ? tuIds as IReadOnlyList : null, + Frequency = data.TryGetValue("frequency", out var freq) && freq is int f ? f : 0, + Degree = data.TryGetValue("degree", out var deg) && deg is int d ? d : 0, Rank = data.TryGetValue("rank", out var rank) && rank is int r ? r : 1, Attributes = data.TryGetValue("attributes", out var attrs) ? attrs as Dictionary : null, }; diff --git a/dotnet/src/GraphRag/DataModel/Relationship.cs b/dotnet/src/GraphRag/DataModel/Relationship.cs index 41fc6ae15a..bba99983e7 100644 --- a/dotnet/src/GraphRag/DataModel/Relationship.cs +++ b/dotnet/src/GraphRag/DataModel/Relationship.cs @@ -38,6 +38,11 @@ public sealed record Relationship : Identified /// public IReadOnlyList? TextUnitIds { get; init; } + /// + /// Gets the combined degree of the source and target entities in the graph. + /// + public int CombinedDegree { get; init; } + /// /// Gets the rank of the relationship. /// @@ -65,6 +70,7 @@ public static Relationship FromDictionary(Dictionary data) Description = data.TryGetValue("description", out var desc) ? desc?.ToString() : null, DescriptionEmbedding = data.TryGetValue("description_embedding", out var descEmb) ? descEmb as IReadOnlyList : null, TextUnitIds = data.TryGetValue("text_unit_ids", out var tuIds) ? tuIds as IReadOnlyList : null, + CombinedDegree = data.TryGetValue("combined_degree", out var cd) && cd is int c ? c : 0, Rank = data.TryGetValue("rank", out var rank) && rank is int r ? r : 1, Attributes = data.TryGetValue("attributes", out var attrs) ? attrs as Dictionary : null, }; diff --git a/dotnet/src/GraphRag/DataModel/RowTransformers.cs b/dotnet/src/GraphRag/DataModel/RowTransformers.cs new file mode 100644 index 0000000000..fb51d789cf --- /dev/null +++ b/dotnet/src/GraphRag/DataModel/RowTransformers.cs @@ -0,0 +1,357 @@ +// Copyright (c) 2025 Microsoft Corporation. +// Licensed under the MIT License + +using System.Globalization; + +namespace GraphRag.DataModel; + +/// +/// Row-level type coercion helpers for streaming table reads. +/// +/// +/// Each transformer converts a raw +/// row (as produced by CSV/Parquet readers, where values may be strings) +/// into a dictionary with properly typed fields. +/// +public static class RowTransformers +{ + /// + /// Coerces a value to an integer, returning when missing or empty. + /// + /// The value to convert. + /// The default value when conversion fails. + /// The converted integer value. + public static int SafeInt(object? value, int fill = -1) + { + if (value is null) + { + return fill; + } + + if (value is int i) + { + return i; + } + + if (value is long l) + { + return (int)l; + } + + var s = value.ToString(); + if (string.IsNullOrEmpty(s)) + { + return fill; + } + + return int.TryParse(s, NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsed) + ? parsed + : fill; + } + + /// + /// Coerces a value to a double, returning when missing or empty. + /// + /// The value to convert. + /// The default value when conversion fails. + /// The converted double value. + public static double SafeFloat(object? value, double fill = 0.0) + { + if (value is null) + { + return fill; + } + + if (value is double d) + { + return double.IsNaN(d) ? fill : d; + } + + if (value is float f) + { + return float.IsNaN(f) ? fill : f; + } + + var s = value.ToString(); + if (string.IsNullOrEmpty(s)) + { + return fill; + } + + return double.TryParse(s, NumberStyles.Float | NumberStyles.AllowThousands, CultureInfo.InvariantCulture, out var parsed) + ? (double.IsNaN(parsed) ? fill : parsed) + : fill; + } + + /// + /// Parses a value into a list, handling CSV-encoded strings and array types. + /// + /// The value to parse. + /// A list of string values. + public static List CoerceList(object? value) + { + if (value is null) + { + return []; + } + + if (value is List list) + { + return list; + } + + if (value is IReadOnlyList readOnlyList) + { + return [.. readOnlyList]; + } + + if (value is IEnumerable enumerable) + { + return [.. enumerable]; + } + + if (value is string s) + { + return ParseListString(s); + } + + return []; + } + + /// + /// Coerces types for an entity row. + /// + /// The raw row dictionary. + /// The same dictionary with coerced types. + public static Dictionary TransformEntityRow(Dictionary row) + { + if (row.TryGetValue("human_readable_id", out var hrid)) + { + row["human_readable_id"] = SafeInt(hrid); + } + + if (row.TryGetValue("text_unit_ids", out var tuIds)) + { + row["text_unit_ids"] = CoerceList(tuIds); + } + + if (row.TryGetValue("frequency", out var freq)) + { + row["frequency"] = SafeInt(freq, 0); + } + + if (row.TryGetValue("degree", out var deg)) + { + row["degree"] = SafeInt(deg, 0); + } + + return row; + } + + /// + /// Adds a title_description column for embedding generation. + /// + /// The raw row dictionary. + /// The same dictionary with the added column. + public static Dictionary TransformEntityRowForEmbedding(Dictionary row) + { + var title = row.TryGetValue("title", out var t) ? t?.ToString() ?? string.Empty : string.Empty; + var description = row.TryGetValue("description", out var d) ? d?.ToString() ?? string.Empty : string.Empty; + row["title_description"] = $"{title}:{description}"; + return row; + } + + /// + /// Coerces types for a relationship row. + /// + /// The raw row dictionary. + /// The same dictionary with coerced types. + public static Dictionary TransformRelationshipRow(Dictionary row) + { + if (row.TryGetValue("human_readable_id", out var hrid)) + { + row["human_readable_id"] = SafeInt(hrid); + } + + if (row.TryGetValue("weight", out var weight)) + { + row["weight"] = SafeFloat(weight); + } + + if (row.TryGetValue("combined_degree", out var cd)) + { + row["combined_degree"] = SafeInt(cd, 0); + } + + if (row.TryGetValue("text_unit_ids", out var tuIds)) + { + row["text_unit_ids"] = CoerceList(tuIds); + } + + return row; + } + + /// + /// Coerces types for a community row. + /// + /// The raw row dictionary. + /// The same dictionary with coerced types. + public static Dictionary TransformCommunityRow(Dictionary row) + { + if (row.TryGetValue("human_readable_id", out var hrid)) + { + row["human_readable_id"] = SafeInt(hrid); + } + + row["community"] = SafeInt(row.GetValueOrDefault("community")); + row["level"] = SafeInt(row.GetValueOrDefault("level")); + row["children"] = CoerceList(row.GetValueOrDefault("children")); + + if (row.TryGetValue("entity_ids", out var entityIds)) + { + row["entity_ids"] = CoerceList(entityIds); + } + + if (row.TryGetValue("relationship_ids", out var relIds)) + { + row["relationship_ids"] = CoerceList(relIds); + } + + if (row.TryGetValue("text_unit_ids", out var tuIds)) + { + row["text_unit_ids"] = CoerceList(tuIds); + } + + row["period"] = row.GetValueOrDefault("period")?.ToString() ?? string.Empty; + row["size"] = SafeInt(row.GetValueOrDefault("size"), 0); + + return row; + } + + /// + /// Coerces types for a community report row. + /// + /// The raw row dictionary. + /// The same dictionary with coerced types. + public static Dictionary TransformCommunityReportRow(Dictionary row) + { + if (row.TryGetValue("human_readable_id", out var hrid)) + { + row["human_readable_id"] = SafeInt(hrid); + } + + row["community"] = SafeInt(row.GetValueOrDefault("community")); + row["level"] = SafeInt(row.GetValueOrDefault("level")); + row["children"] = CoerceList(row.GetValueOrDefault("children")); + row["rank"] = SafeFloat(row.GetValueOrDefault("rank")); + row["findings"] = CoerceList(row.GetValueOrDefault("findings")); + row["size"] = SafeInt(row.GetValueOrDefault("size"), 0); + + return row; + } + + /// + /// Coerces types for a covariate row. + /// + /// The raw row dictionary. + /// The same dictionary with coerced types. + public static Dictionary TransformCovariateRow(Dictionary row) + { + if (row.TryGetValue("human_readable_id", out var hrid)) + { + row["human_readable_id"] = SafeInt(hrid); + } + + return row; + } + + /// + /// Coerces types for a text unit row. + /// + /// The raw row dictionary. + /// The same dictionary with coerced types. + public static Dictionary TransformTextUnitRow(Dictionary row) + { + if (row.TryGetValue("human_readable_id", out var hrid)) + { + row["human_readable_id"] = SafeInt(hrid); + } + + row["n_tokens"] = SafeInt(row.GetValueOrDefault("n_tokens"), 0); + + if (row.TryGetValue("entity_ids", out var entityIds)) + { + row["entity_ids"] = CoerceList(entityIds); + } + + if (row.TryGetValue("relationship_ids", out var relIds)) + { + row["relationship_ids"] = CoerceList(relIds); + } + + if (row.TryGetValue("covariate_ids", out var covIds)) + { + row["covariate_ids"] = CoerceList(covIds); + } + + return row; + } + + /// + /// Coerces types for a document row. + /// + /// The raw row dictionary. + /// The same dictionary with coerced types. + public static Dictionary TransformDocumentRow(Dictionary row) + { + if (row.TryGetValue("human_readable_id", out var hrid)) + { + row["human_readable_id"] = SafeInt(hrid); + } + + if (row.TryGetValue("text_unit_ids", out var tuIds)) + { + row["text_unit_ids"] = CoerceList(tuIds); + } + + return row; + } + + /// + /// Parses a CSV-encoded list string (e.g. "[a, b, c]") into a list. + /// + /// The string value to parse. + /// A list of trimmed, non-empty string values. + internal static List ParseListString(string value) + { + if (string.IsNullOrWhiteSpace(value)) + { + return []; + } + + // Strip surrounding brackets if present. + var trimmed = value.Trim(); + if (trimmed.StartsWith('[') && trimmed.EndsWith(']')) + { + trimmed = trimmed[1..^1]; + } + + if (string.IsNullOrWhiteSpace(trimmed)) + { + return []; + } + + var parts = trimmed.Split(','); + var result = new List(parts.Length); + foreach (var part in parts) + { + var item = part.Trim().Trim('\'', '"'); + if (!string.IsNullOrEmpty(item)) + { + result.Add(item); + } + } + + return result; + } +} diff --git a/dotnet/src/GraphRag/Index/Operations/GraphUtils.cs b/dotnet/src/GraphRag/Index/Operations/GraphUtils.cs index 89676ee8ee..2f17851bf4 100644 --- a/dotnet/src/GraphRag/Index/Operations/GraphUtils.cs +++ b/dotnet/src/GraphRag/Index/Operations/GraphUtils.cs @@ -32,6 +32,49 @@ public static int ComputeNodeDegree(string nodeId, IReadOnlyList r return degree; } + /// + /// Filters out relationships whose source or target entity does not exist. + /// + /// + /// After LLM graph extraction the model may hallucinate entity names in + /// relationships that have no corresponding entity. This method drops + /// those dangling references so downstream processing never encounters + /// broken graph edges. + /// + /// The relationships to filter. + /// The entities whose titles form the valid set. + /// A new list containing only relationships whose source and target both appear in . + public static IReadOnlyList FilterOrphanRelationships( + IReadOnlyList relationships, + IReadOnlyList entities) + { + if (relationships.Count == 0 || entities.Count == 0) + { + return []; + } + + var entityTitles = new HashSet(StringComparer.Ordinal); + for (int i = 0; i < entities.Count; i++) + { + if (entities[i].Title is { } title) + { + entityTitles.Add(title); + } + } + + var result = new List(); + for (int i = 0; i < relationships.Count; i++) + { + var rel = relationships[i]; + if (entityTitles.Contains(rel.Source) && entityTitles.Contains(rel.Target)) + { + result.Add(rel); + } + } + + return result.AsReadOnly(); + } + /// /// Finds connected components in the graph using union-find. /// diff --git a/dotnet/src/GraphRag/Index/Run/PipelineRunner.cs b/dotnet/src/GraphRag/Index/Run/PipelineRunner.cs index 4d69174eaa..15847f3aae 100644 --- a/dotnet/src/GraphRag/Index/Run/PipelineRunner.cs +++ b/dotnet/src/GraphRag/Index/Run/PipelineRunner.cs @@ -2,6 +2,7 @@ // Licensed under the MIT License using System.Runtime.CompilerServices; +using System.Text.Json; using GraphRag.Callbacks; using GraphRag.Config.Models; @@ -14,6 +15,11 @@ namespace GraphRag.Index.Run; /// public static class PipelineRunner { + private static readonly JsonSerializerOptions JsonOptions = new() + { + WriteIndented = true, + }; + /// /// Runs all workflows in the given pipeline, yielding a result for each one. /// @@ -42,6 +48,10 @@ public static async IAsyncEnumerable RunPipelineAsync( var results = new List(); + // Initial stats dump before workflows start, matching Python behavior. + // This creates the stats.json file so monitoring tools can detect pipeline startup. + await DumpStatsAsync(context, ct).ConfigureAwait(false); + foreach (var (name, function) in pipeline.Run()) { ct.ThrowIfCancellationRequested(); @@ -100,6 +110,9 @@ public static async IAsyncEnumerable RunPipelineAsync( Error: ex); } + // Dump stats after each workflow, matching Python's per-workflow stats persistence. + await DumpStatsAsync(context, ct).ConfigureAwait(false); + yield return result; if (error is not null || stop) @@ -114,4 +127,33 @@ public static async IAsyncEnumerable RunPipelineAsync( cb.OnPipelineEnd(results); } } + + /// + /// Serializes the current pipeline run statistics to the output storage as stats.json. + /// + /// The pipeline run context containing stats and storage. + /// A token to cancel the operation. + /// A task representing the asynchronous operation. + internal static async Task DumpStatsAsync(PipelineRunContext context, CancellationToken ct = default) + { + var statsObject = new Dictionary + { + ["total_runtime"] = context.Stats.TotalRuntime, + ["num_documents"] = context.Stats.NumDocuments, + ["update_documents"] = context.Stats.UpdateDocuments, + ["input_load_time"] = context.Stats.InputLoadTime, + ["workflows"] = context.Stats.Workflows.ToDictionary( + kvp => kvp.Key, + kvp => (object?)new Dictionary + { + ["overall_time_seconds"] = kvp.Value.OverallTimeSeconds, + ["peak_memory_bytes"] = kvp.Value.PeakMemoryBytes, + ["memory_delta_bytes"] = kvp.Value.MemoryDeltaBytes, + }), + }; + + var json = JsonSerializer.Serialize(statsObject, JsonOptions); + + await context.Storage.SetAsync("stats.json", json, cancellationToken: ct).ConfigureAwait(false); + } } diff --git a/dotnet/tests/GraphRag.Tests.Unit/DataModel/RowTransformersTests.cs b/dotnet/tests/GraphRag.Tests.Unit/DataModel/RowTransformersTests.cs new file mode 100644 index 0000000000..55c06f16a0 --- /dev/null +++ b/dotnet/tests/GraphRag.Tests.Unit/DataModel/RowTransformersTests.cs @@ -0,0 +1,266 @@ +// Copyright (c) 2025 Microsoft Corporation. +// Licensed under the MIT License + +using FluentAssertions; +using GraphRag.DataModel; + +namespace GraphRag.Tests.Unit.DataModel; + +public class RowTransformersTests +{ + private static readonly string[] AbcList = ["a", "b", "c"]; + private static readonly string[] XyList = ["x", "y"]; + private static readonly string[] AbList = ["a", "b"]; + private static readonly string[] HelloWorldList = ["hello", "world"]; + private static readonly string[] Tu1Tu2List = ["tu1", "tu2"]; + private static readonly string[] Tu1List = ["tu1"]; + private static readonly string[] Tu1Tu2Tu3List = ["tu1", "tu2", "tu3"]; + + [Theory] + [InlineData(null, -1)] + [InlineData("", -1)] + [InlineData("42", 42)] + [InlineData("notanumber", -1)] + public void SafeInt_CoercesCorrectly(object? value, int expected) + { + RowTransformers.SafeInt(value).Should().Be(expected); + } + + [Fact] + public void SafeInt_IntValue_ReturnsDirectly() + { + RowTransformers.SafeInt(7).Should().Be(7); + } + + [Fact] + public void SafeInt_LongValue_CastsToInt() + { + RowTransformers.SafeInt(42L).Should().Be(42); + } + + [Fact] + public void SafeInt_CustomFill_ReturnsOnFailure() + { + RowTransformers.SafeInt(null, 99).Should().Be(99); + } + + [Theory] + [InlineData(null, 0.0)] + [InlineData("", 0.0)] + [InlineData("3.14", 3.14)] + [InlineData("notanumber", 0.0)] + public void SafeFloat_CoercesCorrectly(object? value, double expected) + { + RowTransformers.SafeFloat(value).Should().BeApproximately(expected, 0.001); + } + + [Fact] + public void SafeFloat_DoubleNaN_ReturnsFill() + { + RowTransformers.SafeFloat(double.NaN).Should().Be(0.0); + } + + [Fact] + public void SafeFloat_FloatNaN_ReturnsFill() + { + RowTransformers.SafeFloat(float.NaN).Should().Be(0.0); + } + + [Fact] + public void SafeFloat_DoubleValue_ReturnsDirect() + { + RowTransformers.SafeFloat(2.5).Should().Be(2.5); + } + + [Fact] + public void CoerceList_Null_ReturnsEmpty() + { + RowTransformers.CoerceList(null).Should().BeEmpty(); + } + + [Fact] + public void CoerceList_StringList_ParsesCsv() + { + var result = RowTransformers.CoerceList("[a, b, c]"); + result.Should().BeEquivalentTo(AbcList); + } + + [Fact] + public void CoerceList_ExistingList_ReturnsSame() + { + var input = new List { "x", "y" }; + var result = RowTransformers.CoerceList(input); + result.Should().BeEquivalentTo(XyList); + } + + [Fact] + public void CoerceList_ReadOnlyList_CopiesList() + { + IReadOnlyList input = new List { "a", "b" }; + var result = RowTransformers.CoerceList(input); + result.Should().BeEquivalentTo(AbList); + } + + [Fact] + public void CoerceList_QuotedCsv_StripsQuotes() + { + var result = RowTransformers.CoerceList("['hello', \"world\"]"); + result.Should().BeEquivalentTo(HelloWorldList); + } + + [Fact] + public void TransformEntityRow_CoercesTypes() + { + var row = new Dictionary + { + ["human_readable_id"] = "5", + ["text_unit_ids"] = "[tu1, tu2]", + ["frequency"] = "3", + ["degree"] = "7", + }; + + var result = RowTransformers.TransformEntityRow(row); + + result["human_readable_id"].Should().Be(5); + result["text_unit_ids"].Should().BeEquivalentTo(Tu1Tu2List); + result["frequency"].Should().Be(3); + result["degree"].Should().Be(7); + } + + [Fact] + public void TransformEntityRowForEmbedding_AddsTitleDescription() + { + var row = new Dictionary + { + ["title"] = "Entity1", + ["description"] = "A test entity", + }; + + var result = RowTransformers.TransformEntityRowForEmbedding(row); + + result["title_description"].Should().Be("Entity1:A test entity"); + } + + [Fact] + public void TransformRelationshipRow_CoercesTypes() + { + var row = new Dictionary + { + ["human_readable_id"] = "2", + ["weight"] = "1.5", + ["combined_degree"] = "10", + ["text_unit_ids"] = "[tu1]", + }; + + var result = RowTransformers.TransformRelationshipRow(row); + + result["human_readable_id"].Should().Be(2); + result["weight"].Should().Be(1.5); + result["combined_degree"].Should().Be(10); + result["text_unit_ids"].Should().BeEquivalentTo(Tu1List); + } + + [Fact] + public void TransformCommunityRow_CoercesTypes() + { + var row = new Dictionary + { + ["human_readable_id"] = "1", + ["community"] = "3", + ["level"] = "2", + ["children"] = "[c1, c2]", + ["entity_ids"] = "[e1]", + ["relationship_ids"] = "[r1]", + ["text_unit_ids"] = "[tu1]", + ["period"] = "2025-01", + ["size"] = "42", + }; + + var result = RowTransformers.TransformCommunityRow(row); + + result["human_readable_id"].Should().Be(1); + result["community"].Should().Be(3); + result["level"].Should().Be(2); + result["size"].Should().Be(42); + result["period"].Should().Be("2025-01"); + } + + [Fact] + public void TransformCommunityReportRow_CoercesTypes() + { + var row = new Dictionary + { + ["human_readable_id"] = "0", + ["community"] = "1", + ["level"] = "0", + ["children"] = string.Empty, + ["rank"] = "4.5", + ["findings"] = "[f1, f2]", + ["size"] = "10", + }; + + var result = RowTransformers.TransformCommunityReportRow(row); + + result["rank"].Should().Be(4.5); + result["size"].Should().Be(10); + } + + [Fact] + public void TransformTextUnitRow_CoercesTypes() + { + var row = new Dictionary + { + ["human_readable_id"] = "3", + ["n_tokens"] = "100", + ["entity_ids"] = "[e1, e2]", + ["relationship_ids"] = "[r1]", + ["covariate_ids"] = "[cv1]", + }; + + var result = RowTransformers.TransformTextUnitRow(row); + + result["human_readable_id"].Should().Be(3); + result["n_tokens"].Should().Be(100); + } + + [Fact] + public void TransformDocumentRow_CoercesTypes() + { + var row = new Dictionary + { + ["human_readable_id"] = "1", + ["text_unit_ids"] = "[tu1, tu2, tu3]", + }; + + var result = RowTransformers.TransformDocumentRow(row); + + result["human_readable_id"].Should().Be(1); + result["text_unit_ids"].Should().BeEquivalentTo(Tu1Tu2Tu3List); + } + + [Fact] + public void TransformCovariateRow_CoercesTypes() + { + var row = new Dictionary + { + ["human_readable_id"] = "7", + }; + + var result = RowTransformers.TransformCovariateRow(row); + + result["human_readable_id"].Should().Be(7); + } + + [Fact] + public void CoerceList_EmptyBrackets_ReturnsEmpty() + { + RowTransformers.CoerceList("[]").Should().BeEmpty(); + } + + [Fact] + public void CoerceList_NoBrackets_ParsesAsCsv() + { + var result = RowTransformers.CoerceList("a, b, c"); + result.Should().BeEquivalentTo(AbcList); + } +} diff --git a/dotnet/tests/GraphRag.Tests.Unit/Index/GraphUtilsFilterOrphanTests.cs b/dotnet/tests/GraphRag.Tests.Unit/Index/GraphUtilsFilterOrphanTests.cs new file mode 100644 index 0000000000..1856c8f5b8 --- /dev/null +++ b/dotnet/tests/GraphRag.Tests.Unit/Index/GraphUtilsFilterOrphanTests.cs @@ -0,0 +1,139 @@ +// Copyright (c) 2025 Microsoft Corporation. +// Licensed under the MIT License + +using FluentAssertions; +using GraphRag.DataModel; +using GraphRag.Index.Operations; + +namespace GraphRag.Tests.Unit.Index; + +public class GraphUtilsFilterOrphanTests +{ + [Fact] + public void FilterOrphanRelationships_EmptyRelationships_ReturnsEmpty() + { + var entities = new List + { + new() { Id = "1", Title = "A" }, + }; + + var result = GraphUtils.FilterOrphanRelationships([], entities); + + result.Should().BeEmpty(); + } + + [Fact] + public void FilterOrphanRelationships_EmptyEntities_ReturnsEmpty() + { + var rels = new List + { + new() { Id = "r1", Source = "A", Target = "B" }, + }; + + var result = GraphUtils.FilterOrphanRelationships(rels, []); + + result.Should().BeEmpty(); + } + + [Fact] + public void FilterOrphanRelationships_AllValid_ReturnsAll() + { + var entities = new List + { + new() { Id = "1", Title = "A" }, + new() { Id = "2", Title = "B" }, + new() { Id = "3", Title = "C" }, + }; + + var rels = new List + { + new() { Id = "r1", Source = "A", Target = "B" }, + new() { Id = "r2", Source = "B", Target = "C" }, + }; + + var result = GraphUtils.FilterOrphanRelationships(rels, entities); + + result.Should().HaveCount(2); + } + + [Fact] + public void FilterOrphanRelationships_OrphanSource_DropsRelationship() + { + var entities = new List + { + new() { Id = "1", Title = "A" }, + new() { Id = "2", Title = "B" }, + }; + + var rels = new List + { + new() { Id = "r1", Source = "A", Target = "B" }, + new() { Id = "r2", Source = "PHANTOM", Target = "B" }, + }; + + var result = GraphUtils.FilterOrphanRelationships(rels, entities); + + result.Should().HaveCount(1); + result[0].Id.Should().Be("r1"); + } + + [Fact] + public void FilterOrphanRelationships_OrphanTarget_DropsRelationship() + { + var entities = new List + { + new() { Id = "1", Title = "A" }, + new() { Id = "2", Title = "B" }, + }; + + var rels = new List + { + new() { Id = "r1", Source = "A", Target = "B" }, + new() { Id = "r2", Source = "A", Target = "PHANTOM" }, + }; + + var result = GraphUtils.FilterOrphanRelationships(rels, entities); + + result.Should().HaveCount(1); + result[0].Id.Should().Be("r1"); + } + + [Fact] + public void FilterOrphanRelationships_BothOrphan_DropsAll() + { + var entities = new List + { + new() { Id = "1", Title = "A" }, + }; + + var rels = new List + { + new() { Id = "r1", Source = "X", Target = "Y" }, + }; + + var result = GraphUtils.FilterOrphanRelationships(rels, entities); + + result.Should().BeEmpty(); + } + + [Fact] + public void FilterOrphanRelationships_CaseSensitive_MatchesExactly() + { + var entities = new List + { + new() { Id = "1", Title = "Alice" }, + new() { Id = "2", Title = "Bob" }, + }; + + var rels = new List + { + new() { Id = "r1", Source = "Alice", Target = "Bob" }, + new() { Id = "r2", Source = "alice", Target = "bob" }, + }; + + var result = GraphUtils.FilterOrphanRelationships(rels, entities); + + result.Should().HaveCount(1); + result[0].Id.Should().Be("r1"); + } +}