diff --git a/.changeset/fix-lazy-join-key-dedup.md b/.changeset/fix-lazy-join-key-dedup.md new file mode 100644 index 000000000..dab2d9a45 --- /dev/null +++ b/.changeset/fix-lazy-join-key-dedup.md @@ -0,0 +1,5 @@ +--- +'@tanstack/db': patch +--- + +Deduplicate and filter null join keys in lazy join subset queries. Previously, when multiple rows referenced the same foreign key or had null foreign keys, the full unfiltered array was passed to `inArray()`, producing bloated `ANY()` SQL params with repeated IDs and NULLs. diff --git a/packages/db/src/query/compiler/joins.ts b/packages/db/src/query/compiler/joins.ts index 69b833cf0..dde137585 100644 --- a/packages/db/src/query/compiler/joins.ts +++ b/packages/db/src/query/compiler/joins.ts @@ -302,8 +302,20 @@ function processJoin( return } - // Request filtered snapshot from lazy collection for matching join keys - const joinKeys = data.getInner().map(([[joinKey]]) => joinKey) + // Deduplicate and filter null keys before requesting snapshot + const joinKeys = [ + ...new Set( + data + .getInner() + .map(([[joinKey]]) => joinKey) + .filter((key) => key != null), + ), + ] + + if (joinKeys.length === 0) { + return + } + const lazyJoinRef = new PropRef(followRefResult.path) const loaded = lazySourceSubscription.requestSnapshot({ where: inArray(lazyJoinRef, joinKeys), diff --git a/packages/db/tests/query/live-query-collection.test.ts b/packages/db/tests/query/live-query-collection.test.ts index 7179cd388..8b2b60901 100644 --- a/packages/db/tests/query/live-query-collection.test.ts +++ b/packages/db/tests/query/live-query-collection.test.ts @@ -18,6 +18,7 @@ import { } from '../utils.js' import { createDeferred } from '../../src/deferred' import { BTreeIndex } from '../../src/indexes/btree-index' +import { Func, Value } from '../../src/query/ir.js' import type { ChangeMessage, LoadSubsetOptions } from '../../src/types.js' // Sample user type for tests @@ -2586,6 +2587,210 @@ describe(`createLiveQueryCollection`, () => { }) }) + describe(`lazy join key deduplication`, () => { + it(`should deduplicate join keys and filter nulls when requesting snapshot for lazy joins`, async () => { + type Task = { + id: number + name: string + project_id: number | null + } + + type Project = { + id: number + name: string + } + + // Main collection with duplicate foreign keys and null foreign keys + const taskCollection = createCollection({ + id: `tasks-dedup`, + getKey: (task) => task.id, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + // Multiple tasks pointing to the same project (duplicates) + write({ + type: `insert`, + value: { id: 1, name: `Task 1`, project_id: 10 }, + }) + write({ + type: `insert`, + value: { id: 2, name: `Task 2`, project_id: 10 }, + }) + write({ + type: `insert`, + value: { id: 3, name: `Task 3`, project_id: 10 }, + }) + write({ + type: `insert`, + value: { id: 4, name: `Task 4`, project_id: 20 }, + }) + write({ + type: `insert`, + value: { id: 5, name: `Task 5`, project_id: 20 }, + }) + // Tasks with null foreign key + write({ + type: `insert`, + value: { id: 6, name: `Task 6`, project_id: null }, + }) + write({ + type: `insert`, + value: { id: 7, name: `Task 7`, project_id: null }, + }) + commit() + markReady() + }, + }, + }) + + // Lazy joined collection that tracks loadSubset calls + const capturedOptions: Array = [] + + const projectCollection = createCollection({ + id: `projects-dedup`, + getKey: (project) => project.id, + syncMode: `on-demand`, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + write({ type: `insert`, value: { id: 10, name: `Project A` } }) + write({ type: `insert`, value: { id: 20, name: `Project B` } }) + commit() + markReady() + return { + loadSubset: (options: LoadSubsetOptions) => { + capturedOptions.push(options) + return true + }, + } + }, + }, + }) + + const liveQuery = createLiveQueryCollection((q) => + q + .from({ task: taskCollection }) + .leftJoin({ project: projectCollection }, ({ task, project }) => + eq(task.project_id, project.id), + ), + ) + + await liveQuery.preload() + await flushPromises() + + // Find the inArray expression in loadSubset calls + // It may be wrapped in `and` since requestSnapshot combines expressions + const findInArrayExpr = ( + expr: LoadSubsetOptions[`where`], + ): Func | undefined => { + if (!(expr instanceof Func)) return undefined + if (expr.name === `in`) return expr + if (expr.name === `and` || expr.name === `or`) { + for (const arg of expr.args) { + const found = findInArrayExpr(arg) + if (found) return found + } + } + return undefined + } + + const inExpr = capturedOptions + .map((opt) => findInArrayExpr(opt.where)) + .find((expr) => expr !== undefined) + + expect(inExpr).toBeDefined() + + // The second arg of inArray is the array of values + const arrayArg = inExpr!.args[1] + expect(arrayArg).toBeInstanceOf(Value) + const valuesArg = arrayArg as Value> + const values = valuesArg.value.slice().sort() + + // Should contain only the 2 unique project IDs -- no nulls, no duplicates + expect(values).toEqual([10, 20]) + }) + + it(`should skip loadSubset when all join keys are null`, async () => { + type Task = { + id: number + name: string + project_id: number | null + } + + type Project = { + id: number + name: string + } + + const taskCollection = createCollection({ + id: `tasks-all-null`, + getKey: (task) => task.id, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + write({ + type: `insert`, + value: { id: 1, name: `Task 1`, project_id: null }, + }) + write({ + type: `insert`, + value: { id: 2, name: `Task 2`, project_id: null }, + }) + write({ + type: `insert`, + value: { id: 3, name: `Task 3`, project_id: null }, + }) + commit() + markReady() + }, + }, + }) + + const capturedOptions: Array = [] + + const projectCollection = createCollection({ + id: `projects-all-null`, + getKey: (project) => project.id, + syncMode: `on-demand`, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + write({ type: `insert`, value: { id: 10, name: `Project A` } }) + commit() + markReady() + return { + loadSubset: (options: LoadSubsetOptions) => { + capturedOptions.push(options) + return true + }, + } + }, + }, + }) + + const liveQuery = createLiveQueryCollection((q) => + q + .from({ task: taskCollection }) + .leftJoin({ project: projectCollection }, ({ task, project }) => + eq(task.project_id, project.id), + ), + ) + + await liveQuery.preload() + await flushPromises() + + // No loadSubset call should have been made for the lazy join + // since all keys were null and filtered out + expect(capturedOptions).toHaveLength(0) + + // All tasks should still appear in results with null project + expect(liveQuery.toArray).toHaveLength(3) + for (const row of liveQuery.toArray) { + expect(row.project).toBeUndefined() + } + }) + }) + describe(`chained live query collections without custom getKey`, () => { it(`should return all items when a live query collection without getKey is used as a source`, async () => { // Create a live query collection with the default (internal) getKey