Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-lazy-join-key-dedup.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@tanstack/db': patch
---

Deduplicate and filter null join keys in lazy join subset queries. Previously, when multiple rows referenced the same foreign key or had null foreign keys, the full unfiltered array was passed to `inArray()`, producing bloated `ANY()` SQL params with repeated IDs and NULLs.
16 changes: 14 additions & 2 deletions packages/db/src/query/compiler/joins.ts
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,20 @@ function processJoin(
return
}

// Request filtered snapshot from lazy collection for matching join keys
const joinKeys = data.getInner().map(([[joinKey]]) => joinKey)
// Deduplicate and filter null keys before requesting snapshot
const joinKeys = [
...new Set(
data
.getInner()
.map(([[joinKey]]) => joinKey)
.filter((key) => key != null),
),
]

if (joinKeys.length === 0) {
return
}

const lazyJoinRef = new PropRef(followRefResult.path)
const loaded = lazySourceSubscription.requestSnapshot({
where: inArray(lazyJoinRef, joinKeys),
Expand Down
205 changes: 205 additions & 0 deletions packages/db/tests/query/live-query-collection.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
} from '../utils.js'
import { createDeferred } from '../../src/deferred'
import { BTreeIndex } from '../../src/indexes/btree-index'
import { Func, Value } from '../../src/query/ir.js'
import type { ChangeMessage, LoadSubsetOptions } from '../../src/types.js'

// Sample user type for tests
Expand Down Expand Up @@ -2586,6 +2587,210 @@ describe(`createLiveQueryCollection`, () => {
})
})

describe(`lazy join key deduplication`, () => {
it(`should deduplicate join keys and filter nulls when requesting snapshot for lazy joins`, async () => {
type Task = {
id: number
name: string
project_id: number | null
}

type Project = {
id: number
name: string
}

// Main collection with duplicate foreign keys and null foreign keys
const taskCollection = createCollection<Task>({
id: `tasks-dedup`,
getKey: (task) => task.id,
sync: {
sync: ({ begin, write, commit, markReady }) => {
begin()
// Multiple tasks pointing to the same project (duplicates)
write({
type: `insert`,
value: { id: 1, name: `Task 1`, project_id: 10 },
})
write({
type: `insert`,
value: { id: 2, name: `Task 2`, project_id: 10 },
})
write({
type: `insert`,
value: { id: 3, name: `Task 3`, project_id: 10 },
})
write({
type: `insert`,
value: { id: 4, name: `Task 4`, project_id: 20 },
})
write({
type: `insert`,
value: { id: 5, name: `Task 5`, project_id: 20 },
})
// Tasks with null foreign key
write({
type: `insert`,
value: { id: 6, name: `Task 6`, project_id: null },
})
write({
type: `insert`,
value: { id: 7, name: `Task 7`, project_id: null },
})
commit()
markReady()
},
},
})

// Lazy joined collection that tracks loadSubset calls
const capturedOptions: Array<LoadSubsetOptions> = []

const projectCollection = createCollection<Project>({
id: `projects-dedup`,
getKey: (project) => project.id,
syncMode: `on-demand`,
sync: {
sync: ({ begin, write, commit, markReady }) => {
begin()
write({ type: `insert`, value: { id: 10, name: `Project A` } })
write({ type: `insert`, value: { id: 20, name: `Project B` } })
commit()
markReady()
return {
loadSubset: (options: LoadSubsetOptions) => {
capturedOptions.push(options)
return true
},
}
},
},
})

const liveQuery = createLiveQueryCollection((q) =>
q
.from({ task: taskCollection })
.leftJoin({ project: projectCollection }, ({ task, project }) =>
eq(task.project_id, project.id),
),
)

await liveQuery.preload()
await flushPromises()

// Find the inArray expression in loadSubset calls
// It may be wrapped in `and` since requestSnapshot combines expressions
const findInArrayExpr = (
expr: LoadSubsetOptions[`where`],
): Func | undefined => {
if (!(expr instanceof Func)) return undefined
if (expr.name === `in`) return expr
if (expr.name === `and` || expr.name === `or`) {
for (const arg of expr.args) {
const found = findInArrayExpr(arg)
if (found) return found
}
}
return undefined
}

const inExpr = capturedOptions
.map((opt) => findInArrayExpr(opt.where))
.find((expr) => expr !== undefined)

expect(inExpr).toBeDefined()

// The second arg of inArray is the array of values
const arrayArg = inExpr!.args[1]
expect(arrayArg).toBeInstanceOf(Value)
const valuesArg = arrayArg as Value<Array<number>>
const values = valuesArg.value.slice().sort()

// Should contain only the 2 unique project IDs -- no nulls, no duplicates
expect(values).toEqual([10, 20])
})

it(`should skip loadSubset when all join keys are null`, async () => {
type Task = {
id: number
name: string
project_id: number | null
}

type Project = {
id: number
name: string
}

const taskCollection = createCollection<Task>({
id: `tasks-all-null`,
getKey: (task) => task.id,
sync: {
sync: ({ begin, write, commit, markReady }) => {
begin()
write({
type: `insert`,
value: { id: 1, name: `Task 1`, project_id: null },
})
write({
type: `insert`,
value: { id: 2, name: `Task 2`, project_id: null },
})
write({
type: `insert`,
value: { id: 3, name: `Task 3`, project_id: null },
})
commit()
markReady()
},
},
})

const capturedOptions: Array<LoadSubsetOptions> = []

const projectCollection = createCollection<Project>({
id: `projects-all-null`,
getKey: (project) => project.id,
syncMode: `on-demand`,
sync: {
sync: ({ begin, write, commit, markReady }) => {
begin()
write({ type: `insert`, value: { id: 10, name: `Project A` } })
commit()
markReady()
return {
loadSubset: (options: LoadSubsetOptions) => {
capturedOptions.push(options)
return true
},
}
},
},
})

const liveQuery = createLiveQueryCollection((q) =>
q
.from({ task: taskCollection })
.leftJoin({ project: projectCollection }, ({ task, project }) =>
eq(task.project_id, project.id),
),
)

await liveQuery.preload()
await flushPromises()

// No loadSubset call should have been made for the lazy join
// since all keys were null and filtered out
expect(capturedOptions).toHaveLength(0)

// All tasks should still appear in results with null project
expect(liveQuery.toArray).toHaveLength(3)
for (const row of liveQuery.toArray) {
expect(row.project).toBeUndefined()
}
})
})

describe(`chained live query collections without custom getKey`, () => {
it(`should return all items when a live query collection without getKey is used as a source`, async () => {
// Create a live query collection with the default (internal) getKey
Expand Down
Loading