From 344d7f2edc23a927f80a419707ba0b378ebc7d54 Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 3 Apr 2026 08:38:39 -0600 Subject: [PATCH 1/4] fix: deduplicate and filter null join keys in lazy join snapshot requests When a lazy join collected foreign keys for subset queries, duplicate IDs and null values were passed through to the ANY() SQL param, producing bloated queries. Filter nulls, deduplicate via Set, and skip the request entirely when no valid keys remain. Co-Authored-By: Claude Opus 4.6 --- packages/db/src/query/compiler/joins.ts | 16 +- .../tests/query/live-query-collection.test.ts | 187 ++++++++++++++++++ 2 files changed, 201 insertions(+), 2 deletions(-) diff --git a/packages/db/src/query/compiler/joins.ts b/packages/db/src/query/compiler/joins.ts index 69b833cf0..dde137585 100644 --- a/packages/db/src/query/compiler/joins.ts +++ b/packages/db/src/query/compiler/joins.ts @@ -302,8 +302,20 @@ function processJoin( return } - // Request filtered snapshot from lazy collection for matching join keys - const joinKeys = data.getInner().map(([[joinKey]]) => joinKey) + // Deduplicate and filter null keys before requesting snapshot + const joinKeys = [ + ...new Set( + data + .getInner() + .map(([[joinKey]]) => joinKey) + .filter((key) => key != null), + ), + ] + + if (joinKeys.length === 0) { + return + } + const lazyJoinRef = new PropRef(followRefResult.path) const loaded = lazySourceSubscription.requestSnapshot({ where: inArray(lazyJoinRef, joinKeys), diff --git a/packages/db/tests/query/live-query-collection.test.ts b/packages/db/tests/query/live-query-collection.test.ts index d56e5a715..2225f2a1f 100644 --- a/packages/db/tests/query/live-query-collection.test.ts +++ b/packages/db/tests/query/live-query-collection.test.ts @@ -18,6 +18,7 @@ import { } from '../utils.js' import { createDeferred } from '../../src/deferred' import { BTreeIndex } from '../../src/indexes/btree-index' +import { Func, Value } from '../../src/query/ir.js' import type { ChangeMessage, LoadSubsetOptions } from '../../src/types.js' // Sample user type for tests @@ -2585,4 +2586,190 @@ describe(`createLiveQueryCollection`, () => { await preloadPromise }) }) + + describe(`lazy join key deduplication`, () => { + it(`should deduplicate join keys and filter nulls when requesting snapshot for lazy joins`, async () => { + type Task = { + id: number + name: string + project_id: number | null + } + + type Project = { + id: number + name: string + } + + // Main collection with duplicate foreign keys and null foreign keys + const taskCollection = createCollection({ + id: `tasks-dedup`, + getKey: (task) => task.id, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + // Multiple tasks pointing to the same project (duplicates) + write({ type: `insert`, value: { id: 1, name: `Task 1`, project_id: 10 } }) + write({ type: `insert`, value: { id: 2, name: `Task 2`, project_id: 10 } }) + write({ type: `insert`, value: { id: 3, name: `Task 3`, project_id: 10 } }) + write({ type: `insert`, value: { id: 4, name: `Task 4`, project_id: 20 } }) + write({ type: `insert`, value: { id: 5, name: `Task 5`, project_id: 20 } }) + // Tasks with null foreign key + write({ type: `insert`, value: { id: 6, name: `Task 6`, project_id: null } }) + write({ type: `insert`, value: { id: 7, name: `Task 7`, project_id: null } }) + commit() + markReady() + }, + }, + }) + + // Lazy joined collection that tracks loadSubset calls + const capturedOptions: Array = [] + + const projectCollection = createCollection({ + id: `projects-dedup`, + getKey: (project) => project.id, + syncMode: `on-demand`, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + write({ type: `insert`, value: { id: 10, name: `Project A` } }) + write({ type: `insert`, value: { id: 20, name: `Project B` } }) + commit() + markReady() + return { + loadSubset: (options: LoadSubsetOptions) => { + capturedOptions.push(options) + return true + }, + } + }, + }, + }) + + const liveQuery = createLiveQueryCollection((q) => + q + .from({ task: taskCollection }) + .leftJoin({ project: projectCollection }, ({ task, project }) => + eq(task.project_id, project.id), + ), + ) + + await liveQuery.preload() + await flushPromises() + + // Find the inArray expression in loadSubset calls + // It may be wrapped in `and` since requestSnapshot combines expressions + const findInArrayExpr = ( + expr: LoadSubsetOptions[`where`], + ): Func | undefined => { + if (!(expr instanceof Func)) return undefined + if (expr.name === `in`) return expr + if (expr.name === `and` || expr.name === `or`) { + for (const arg of expr.args) { + const found = findInArrayExpr(arg) + if (found) return found + } + } + return undefined + } + + const inExpr = capturedOptions + .map((opt) => findInArrayExpr(opt.where)) + .find((expr) => expr !== undefined) + + expect(inExpr).toBeDefined() + + // The second arg of inArray is the array of values + const arrayArg = inExpr!.args[1] + expect(arrayArg).toBeInstanceOf(Value) + const valuesArg = arrayArg as Value> + const values = valuesArg.value.slice().sort() + + // Should contain only the 2 unique project IDs -- no nulls, no duplicates + expect(values).toEqual([10, 20]) + }) + + it(`should skip loadSubset when all join keys are null`, async () => { + type Task = { + id: number + name: string + project_id: number | null + } + + type Project = { + id: number + name: string + } + + const taskCollection = createCollection({ + id: `tasks-all-null`, + getKey: (task) => task.id, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + write({ type: `insert`, value: { id: 1, name: `Task 1`, project_id: null } }) + write({ type: `insert`, value: { id: 2, name: `Task 2`, project_id: null } }) + write({ type: `insert`, value: { id: 3, name: `Task 3`, project_id: null } }) + commit() + markReady() + }, + }, + }) + + const capturedOptions: Array = [] + + const projectCollection = createCollection({ + id: `projects-all-null`, + getKey: (project) => project.id, + syncMode: `on-demand`, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + write({ type: `insert`, value: { id: 10, name: `Project A` } }) + commit() + markReady() + return { + loadSubset: (options: LoadSubsetOptions) => { + capturedOptions.push(options) + return true + }, + } + }, + }, + }) + + const liveQuery = createLiveQueryCollection((q) => + q + .from({ task: taskCollection }) + .leftJoin({ project: projectCollection }, ({ task, project }) => + eq(task.project_id, project.id), + ), + ) + + await liveQuery.preload() + await flushPromises() + + // No loadSubset call should contain an inArray expression + // since all keys were null and filtered out + const findInArrayExpr = ( + expr: LoadSubsetOptions[`where`], + ): boolean => { + if (!(expr instanceof Func)) return false + if (expr.name === `in`) return true + if (expr.name === `and` || expr.name === `or`) { + return expr.args.some((arg) => findInArrayExpr(arg)) + } + return false + } + + const hasInArrayCall = capturedOptions.some((opt) => findInArrayExpr(opt.where)) + expect(hasInArrayCall).toBe(false) + + // All tasks should still appear in results with null project + expect(liveQuery.toArray).toHaveLength(3) + for (const row of liveQuery.toArray) { + expect(row.project).toBeUndefined() + } + }) + }) }) From ad13046efa2d623c94ad1a7e2b6f452bea74bd31 Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 3 Apr 2026 08:39:55 -0600 Subject: [PATCH 2/4] chore: add changeset for lazy join key dedup fix Co-Authored-By: Claude Opus 4.6 --- .changeset/fix-lazy-join-key-dedup.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/fix-lazy-join-key-dedup.md diff --git a/.changeset/fix-lazy-join-key-dedup.md b/.changeset/fix-lazy-join-key-dedup.md new file mode 100644 index 000000000..dab2d9a45 --- /dev/null +++ b/.changeset/fix-lazy-join-key-dedup.md @@ -0,0 +1,5 @@ +--- +'@tanstack/db': patch +--- + +Deduplicate and filter null join keys in lazy join subset queries. Previously, when multiple rows referenced the same foreign key or had null foreign keys, the full unfiltered array was passed to `inArray()`, producing bloated `ANY()` SQL params with repeated IDs and NULLs. From f6e70381f8447960ddfe7595da063f47d40961dd Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Fri, 3 Apr 2026 15:04:34 +0000 Subject: [PATCH 3/4] ci: apply automated fixes --- .../tests/query/live-query-collection.test.ts | 58 ++++++++++++++----- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/packages/db/tests/query/live-query-collection.test.ts b/packages/db/tests/query/live-query-collection.test.ts index e207e72f5..7184b7f05 100644 --- a/packages/db/tests/query/live-query-collection.test.ts +++ b/packages/db/tests/query/live-query-collection.test.ts @@ -2608,14 +2608,35 @@ describe(`createLiveQueryCollection`, () => { sync: ({ begin, write, commit, markReady }) => { begin() // Multiple tasks pointing to the same project (duplicates) - write({ type: `insert`, value: { id: 1, name: `Task 1`, project_id: 10 } }) - write({ type: `insert`, value: { id: 2, name: `Task 2`, project_id: 10 } }) - write({ type: `insert`, value: { id: 3, name: `Task 3`, project_id: 10 } }) - write({ type: `insert`, value: { id: 4, name: `Task 4`, project_id: 20 } }) - write({ type: `insert`, value: { id: 5, name: `Task 5`, project_id: 20 } }) + write({ + type: `insert`, + value: { id: 1, name: `Task 1`, project_id: 10 }, + }) + write({ + type: `insert`, + value: { id: 2, name: `Task 2`, project_id: 10 }, + }) + write({ + type: `insert`, + value: { id: 3, name: `Task 3`, project_id: 10 }, + }) + write({ + type: `insert`, + value: { id: 4, name: `Task 4`, project_id: 20 }, + }) + write({ + type: `insert`, + value: { id: 5, name: `Task 5`, project_id: 20 }, + }) // Tasks with null foreign key - write({ type: `insert`, value: { id: 6, name: `Task 6`, project_id: null } }) - write({ type: `insert`, value: { id: 7, name: `Task 7`, project_id: null } }) + write({ + type: `insert`, + value: { id: 6, name: `Task 6`, project_id: null }, + }) + write({ + type: `insert`, + value: { id: 7, name: `Task 7`, project_id: null }, + }) commit() markReady() }, @@ -2707,9 +2728,18 @@ describe(`createLiveQueryCollection`, () => { sync: { sync: ({ begin, write, commit, markReady }) => { begin() - write({ type: `insert`, value: { id: 1, name: `Task 1`, project_id: null } }) - write({ type: `insert`, value: { id: 2, name: `Task 2`, project_id: null } }) - write({ type: `insert`, value: { id: 3, name: `Task 3`, project_id: null } }) + write({ + type: `insert`, + value: { id: 1, name: `Task 1`, project_id: null }, + }) + write({ + type: `insert`, + value: { id: 2, name: `Task 2`, project_id: null }, + }) + write({ + type: `insert`, + value: { id: 3, name: `Task 3`, project_id: null }, + }) commit() markReady() }, @@ -2751,9 +2781,7 @@ describe(`createLiveQueryCollection`, () => { // No loadSubset call should contain an inArray expression // since all keys were null and filtered out - const findInArrayExpr = ( - expr: LoadSubsetOptions[`where`], - ): boolean => { + const findInArrayExpr = (expr: LoadSubsetOptions[`where`]): boolean => { if (!(expr instanceof Func)) return false if (expr.name === `in`) return true if (expr.name === `and` || expr.name === `or`) { @@ -2762,7 +2790,9 @@ describe(`createLiveQueryCollection`, () => { return false } - const hasInArrayCall = capturedOptions.some((opt) => findInArrayExpr(opt.where)) + const hasInArrayCall = capturedOptions.some((opt) => + findInArrayExpr(opt.where), + ) expect(hasInArrayCall).toBe(false) // All tasks should still appear in results with null project From 39bdc413d7d0c7509e600698c68ce54005843e9a Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 3 Apr 2026 09:08:26 -0600 Subject: [PATCH 4/4] test: strengthen all-null-keys test to assert no loadSubset calls Assert capturedOptions is empty rather than checking for absence of inArray expressions. This catches regressions where the fallback path issues an expensive full collection load with no where clause. Co-Authored-By: Claude Opus 4.6 --- .../db/tests/query/live-query-collection.test.ts | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/packages/db/tests/query/live-query-collection.test.ts b/packages/db/tests/query/live-query-collection.test.ts index 7184b7f05..8b2b60901 100644 --- a/packages/db/tests/query/live-query-collection.test.ts +++ b/packages/db/tests/query/live-query-collection.test.ts @@ -2779,21 +2779,9 @@ describe(`createLiveQueryCollection`, () => { await liveQuery.preload() await flushPromises() - // No loadSubset call should contain an inArray expression + // No loadSubset call should have been made for the lazy join // since all keys were null and filtered out - const findInArrayExpr = (expr: LoadSubsetOptions[`where`]): boolean => { - if (!(expr instanceof Func)) return false - if (expr.name === `in`) return true - if (expr.name === `and` || expr.name === `or`) { - return expr.args.some((arg) => findInArrayExpr(arg)) - } - return false - } - - const hasInArrayCall = capturedOptions.some((opt) => - findInArrayExpr(opt.where), - ) - expect(hasInArrayCall).toBe(false) + expect(capturedOptions).toHaveLength(0) // All tasks should still appear in results with null project expect(liveQuery.toArray).toHaveLength(3)