Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/fix-left-join-where-filter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
'@tanstack/db': patch
---

fix(db): don't push WHERE clauses to nullable side of outer joins

The query optimizer incorrectly pushed single-source WHERE clauses into subqueries and collection index optimization for the nullable side of outer joins. This pre-filtered the data before the join, converting rows that should have been excluded by the WHERE into unmatched outer-join rows that incorrectly survived the residual filter.
74 changes: 63 additions & 11 deletions packages/db/src/query/optimizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,11 @@ export function optimizeQuery(query: QueryIR): OptimizationResult {

/**
* Extracts collection-specific WHERE clauses from a query for index optimization.
* This analyzes the original query to identify WHERE clauses that can be pushed down
* to specific collections, but only for simple queries without joins.
* This analyzes the original query to identify single-source WHERE clauses that
* reference collection sources (not subqueries), including joined collections.
*
* For outer joins, clauses referencing the nullable side are excluded because
* using them to pre-filter collection data would change join semantics.
*
* @param query - The original QueryIR to analyze
* @returns Map of source aliases to their WHERE clauses
Expand All @@ -246,10 +249,19 @@ function extractSourceWhereClauses(
// Group clauses by single-source vs multi-source
const groupedClauses = groupWhereClauses(analyzedClauses)

// Determine which source aliases are on the nullable side of outer joins.
// WHERE clauses for these sources must not be used for index optimization
// because they should filter the final joined result, not the input data.
const nullableSources = getNullableJoinSources(query)

// Only include single-source clauses that reference collections directly
// and are not on the nullable side of an outer join
for (const [sourceAlias, whereClause] of groupedClauses.singleSource) {
// Check if this source alias corresponds to a collection reference
if (isCollectionReference(query, sourceAlias)) {
if (
isCollectionReference(query, sourceAlias) &&
!nullableSources.has(sourceAlias)
) {
sourceWhereClauses.set(sourceAlias, whereClause)
}
}
Expand Down Expand Up @@ -283,6 +295,36 @@ function isCollectionReference(query: QueryIR, sourceAlias: string): boolean {
return false
}

/**
* Returns the set of source aliases that are on the nullable side of outer joins.
*
* For a LEFT join the joined (right) side is nullable.
* For a RIGHT join the main (left/from) side is nullable.
* For a FULL join both sides are nullable.
*
* WHERE clauses that reference only a nullable source must not be pushed down
* into that source's subquery or used for index optimization, because doing so
* changes the join semantics: rows that should be excluded by the WHERE become
* unmatched outer-join rows (with the nullable side set to undefined) and
* incorrectly survive residual filtering.
*/
function getNullableJoinSources(query: QueryIR): Set<string> {
const nullable = new Set<string>()
if (query.join) {
const mainAlias = query.from.alias
for (const join of query.join) {
const joinedAlias = join.from.alias
if (join.type === `left` || join.type === `full`) {
nullable.add(joinedAlias)
}
if (join.type === `right` || join.type === `full`) {
nullable.add(mainAlias)
}
}
}
return nullable
}

/**
* Applies recursive predicate pushdown optimization.
*
Expand Down Expand Up @@ -635,10 +677,25 @@ function applyOptimizations(
// Track which single-source clauses were actually optimized
const actuallyOptimized = new Set<string>()

// Determine which source aliases are on the nullable side of outer joins.
const nullableSources = getNullableJoinSources(query)

// Build a filtered copy of singleSource that excludes nullable-side clauses.
// Pushing a WHERE clause into the nullable side's subquery pre-filters the
// data before the join, converting "matched but WHERE-excluded" rows into
// "unmatched" outer-join rows. These are indistinguishable from genuinely
// unmatched rows, so the residual WHERE cannot correct the result.
const pushableSingleSource = new Map<string, BasicExpression<boolean>>()
for (const [source, clause] of groupedClauses.singleSource) {
if (!nullableSources.has(source)) {
pushableSingleSource.set(source, clause)
}
}

// Optimize the main FROM clause and track what was optimized
const optimizedFrom = optimizeFromWithTracking(
query.from,
groupedClauses.singleSource,
pushableSingleSource,
actuallyOptimized,
)

Expand All @@ -648,7 +705,7 @@ function applyOptimizations(
...joinClause,
from: optimizeFromWithTracking(
joinClause.from,
groupedClauses.singleSource,
pushableSingleSource,
actuallyOptimized,
),
}))
Expand All @@ -663,12 +720,7 @@ function applyOptimizations(
}

// Determine if we need residual clauses (when query has outer JOINs)
const hasOuterJoins =
query.join &&
query.join.some(
(join) =>
join.type === `left` || join.type === `right` || join.type === `full`,
)
const hasOuterJoins = nullableSources.size > 0

// Add single-source clauses
for (const [source, clause] of groupedClauses.singleSource) {
Expand Down
34 changes: 15 additions & 19 deletions packages/db/tests/query/indexes.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -679,13 +679,14 @@ describe(`Query Index Optimization`, () => {
],
}

// Should use index optimization for both WHERE clauses
// since they each touch only a single source and both sources are indexed
// The WHERE clause on the non-nullable (left) side uses its index.
// The WHERE clause on the nullable (right) side of the LEFT JOIN is NOT
// pushed down to avoid changing join semantics, so the right side does a full scan.
expectIndexUsage(combinedStats, {
shouldUseIndex: true,
shouldUseFullScan: false,
indexCallCount: 2, // Both item.status='active' and other.status='active' can use indexes
fullScanCallCount: 0,
shouldUseFullScan: true,
indexCallCount: 1, // Only item.status='active' uses index (non-nullable side)
fullScanCallCount: 1, // other collection does full scan (nullable side)
})
} finally {
tracker1.restore()
Expand Down Expand Up @@ -1177,21 +1178,18 @@ describe(`Query Index Optimization`, () => {
{ id: `1`, name: `Alice`, otherName: `Other Active Item` },
])

// We should have done a full scan of the right collection
// The right collection does a full scan (no index on status)
expect(tracker2.stats.queriesExecuted).toEqual([
{
type: `fullScan`,
},
])

// We should have done an index lookup on the 1st collection to find active items
// In a RIGHT join, the left (from) side is nullable. The WHERE clause
// eq(item.status, 'active') is NOT pushed down to avoid changing join
// semantics, so the left collection does NOT do an index lookup for status.
// It only does the index lookup for the join key (id) used by lazy loading.
expect(tracker1.stats.queriesExecuted).toEqual([
{
field: `status`,
operation: `eq`,
type: `index`,
value: `active`,
},
{
type: `index`,
operation: `in`,
Expand Down Expand Up @@ -1281,14 +1279,12 @@ describe(`Query Index Optimization`, () => {
},
])

// We should have done an index lookup on the left collection to find active items
// because it has an index on the join key
// In a RIGHT join, the left (from) side is nullable. The WHERE clause
// eq(item.status, 'active') is NOT pushed down to avoid changing join
// semantics, so the left collection does a full scan.
expect(tracker1.stats.queriesExecuted).toEqual([
{
type: `index`,
operation: `eq`,
field: `status`,
value: `active`,
type: `fullScan`,
},
])
} finally {
Expand Down
60 changes: 60 additions & 0 deletions packages/db/tests/query/join.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1840,6 +1840,66 @@ function createJoinTests(autoIndex: `off` | `eager`): void {
expect(result.join2!.value).toBe(1)
expect(result.join2!.other).toBe(30)
})

test(`where with isUndefined on right-side field should filter entire joined rows`, () => {
type Left = {
id: string
rightId: string | null
}

type Right = {
id: string
payload: string | null | undefined
}

const leftCollection = createCollection(
mockSyncCollectionOptions<Left>({
id: `test-left-isundefined-field`,
getKey: (item) => item.id,
initialData: [
{ id: `l1`, rightId: `r1` },
{ id: `l2`, rightId: `r2` },
{ id: `l3`, rightId: `r3` },
{ id: `l4`, rightId: null },
],
autoIndex,
}),
)

const rightCollection = createCollection(
mockSyncCollectionOptions<Right>({
id: `test-right-isundefined-field`,
getKey: (item) => item.id,
initialData: [
{ id: `r1`, payload: `ok` },
{ id: `r2`, payload: null },
{ id: `r3`, payload: undefined },
],
autoIndex,
}),
)

const lq = createLiveQueryCollection({
startSync: true,
query: (q) =>
q
.from({ l: leftCollection })
.leftJoin({ r: rightCollection }, ({ l, r }) => eq(l.rightId, r.id))
.where(({ r }) => isUndefined(r?.payload))
.select(({ l, r }) => ({ leftId: l.id, right: r })),
})

const data = lq.toArray

// l1 joins r1 (payload='ok') → payload is defined → exclude
// l2 joins r2 (payload=null) → payload is null, not undefined → exclude
// l3 joins r3 (payload=undefined) → payload is undefined → include
// l4 has no match (rightId=null) → right is undefined, so r?.payload is undefined → include
expect(data.sort((a, b) => a.leftId.localeCompare(b.leftId))).toEqual([
{ leftId: `l3`, right: { id: `r3`, payload: undefined } },
{ leftId: `l4`, right: undefined },
])
})
}

describe(`Query JOIN Operations`, () => {
Expand Down
98 changes: 20 additions & 78 deletions packages/db/tests/query/optimizer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1649,35 +1649,15 @@ describe(`Query Optimizer`, () => {
const { optimizedQuery } = optimizeQuery(query)

// The WHERE clause should remain in the main query to preserve LEFT JOIN semantics
// It should NOT be completely moved to the subquery
// It must NOT be pushed down to the nullable (right) side because that would
// pre-filter the right data, converting excluded matches into unmatched rows
expect(optimizedQuery.where).toHaveLength(1)
expect(optimizedQuery.where![0]).toEqual({
expression: createEq(
createPropRef(`teamMember`, `user_id`),
createValue(100),
),
residual: true,
})
expect(optimizedQuery.where![0]).toEqual(
createEq(createPropRef(`teamMember`, `user_id`), createValue(100)),
)

// If the optimizer creates a subquery for teamMember, the WHERE clause should also be copied there
// but a residual copy must remain in the main query
if (
optimizedQuery.join &&
optimizedQuery.join[0]?.from.type === `queryRef`
) {
const teamMemberSubquery = optimizedQuery.join[0].from.query
// The subquery may have the WHERE clause for optimization
if (teamMemberSubquery.where && teamMemberSubquery.where.length > 0) {
// But the main query MUST still have it to preserve semantics
expect(optimizedQuery.where).toContainEqual({
expression: createEq(
createPropRef(`teamMember`, `user_id`),
createValue(100),
),
residual: true,
})
}
}
// The optimizer should NOT create a subquery for the nullable side
expect(optimizedQuery.join![0]!.from.type).toBe(`collectionRef`)
})

test(`should preserve WHERE clause semantics when pushing down to RIGHT JOIN`, () => {
Expand Down Expand Up @@ -1715,32 +1695,14 @@ describe(`Query Optimizer`, () => {
const { optimizedQuery } = optimizeQuery(query)

// The WHERE clause should remain in the main query to preserve RIGHT JOIN semantics
// It should NOT be completely moved to the subquery
// It must NOT be pushed down to the nullable (left/from) side
expect(optimizedQuery.where).toHaveLength(1)
expect(optimizedQuery.where![0]).toEqual({
expression: createEq(
createPropRef(`user`, `department_id`),
createValue(1),
),
residual: true,
})
expect(optimizedQuery.where![0]).toEqual(
createEq(createPropRef(`user`, `department_id`), createValue(1)),
)

// If the optimizer creates a subquery for users, the WHERE clause should also be copied there
// but a residual copy must remain in the main query
if (optimizedQuery.from.type === `queryRef`) {
const userSubquery = optimizedQuery.from.query
// The subquery may have the WHERE clause for optimization
if (userSubquery.where && userSubquery.where.length > 0) {
// But the main query MUST still have it to preserve semantics
expect(optimizedQuery.where).toContainEqual({
expression: createEq(
createPropRef(`user`, `department_id`),
createValue(1),
),
residual: true,
})
}
}
// The optimizer should NOT create a subquery for the nullable side (from)
expect(optimizedQuery.from.type).toBe(`collectionRef`)
})

test(`should preserve WHERE clause semantics when pushing down to FULL JOIN`, () => {
Expand Down Expand Up @@ -1778,35 +1740,15 @@ describe(`Query Optimizer`, () => {
const { optimizedQuery } = optimizeQuery(query)

// The WHERE clause should remain in the main query to preserve FULL JOIN semantics
// It should NOT be completely moved to the subquery
// It must NOT be pushed down to any nullable side
expect(optimizedQuery.where).toHaveLength(1)
expect(optimizedQuery.where![0]).toEqual({
expression: createGt(
createPropRef(`payment`, `amount`),
createValue(100),
),
residual: true,
})
expect(optimizedQuery.where![0]).toEqual(
createGt(createPropRef(`payment`, `amount`), createValue(100)),
)

// If the optimizer creates a subquery for payments, the WHERE clause should also be copied there
// but a residual copy must remain in the main query
if (
optimizedQuery.join &&
optimizedQuery.join[0]?.from.type === `queryRef`
) {
const paymentSubquery = optimizedQuery.join[0].from.query
// The subquery may have the WHERE clause for optimization
if (paymentSubquery.where && paymentSubquery.where.length > 0) {
// But the main query MUST still have it to preserve semantics
expect(optimizedQuery.where).toContainEqual({
expression: createGt(
createPropRef(`payment`, `amount`),
createValue(100),
),
residual: true,
})
}
}
// The optimizer should NOT create subqueries for nullable sides
expect(optimizedQuery.from.type).toBe(`collectionRef`)
expect(optimizedQuery.join![0]!.from.type).toBe(`collectionRef`)
})

test(`should allow WHERE clause pushdown for INNER JOIN (semantics preserved)`, () => {
Expand Down
Loading