Skip to content

Commit 7d28a48

Browse files
authored
Refactor join types (#8606)
* Refactor join types in RSE and RecordSource. Stricter processing of special joins. The optimizer is mostly ready for anti-joins. * Postfixes for special joins * Follow Adriano's suggestion * Further refactoringof the joins: move common code into the base class * Correction
1 parent 7b79c01 commit 7d28a48

File tree

11 files changed

+429
-655
lines changed

11 files changed

+429
-655
lines changed

src/jrd/RecordSourceNodes.cpp

Lines changed: 60 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,16 @@ static ValueExprNode* resolveUsingField(DsqlCompilerScratch* dsqlScratch, const
5555

5656
namespace
5757
{
58-
void appendContextAlias(DsqlCompilerScratch* dsqlScratch, const string& alias)
58+
struct SpecialJoinItem
59+
{
60+
RseNode* rse;
61+
bool semiJoin;
62+
BoolExprNode* boolean;
63+
};
64+
65+
typedef HalfStaticArray<SpecialJoinItem, 4> SpecialJoinList;
66+
67+
void appendContextAlias(DsqlCompilerScratch* dsqlScratch, const string& alias)
5968
{
6069
const auto len = alias.length();
6170
if (len <= MAX_UCHAR)
@@ -126,18 +135,17 @@ namespace
126135
bool findPossibleJoins(CompilerScratch* csb,
127136
const StreamList& rseStreams,
128137
BoolExprNode** parentBoolean,
129-
RecordSourceNodeStack& rseStack,
130-
BoolExprNodeStack& booleanStack)
138+
SpecialJoinList& result)
131139
{
132140
auto boolNode = *parentBoolean;
133141

134142
const auto binaryNode = nodeAs<BinaryBoolNode>(boolNode);
135143
if (binaryNode && binaryNode->blrOp == blr_and)
136144
{
137145
const bool found1 = findPossibleJoins(csb, rseStreams,
138-
binaryNode->arg1.getAddress(), rseStack, booleanStack);
146+
binaryNode->arg1.getAddress(), result);
139147
const bool found2 = findPossibleJoins(csb, rseStreams,
140-
binaryNode->arg2.getAddress(), rseStack, booleanStack);
148+
binaryNode->arg2.getAddress(), result);
141149

142150
if (!binaryNode->arg1 && !binaryNode->arg2)
143151
*parentBoolean = nullptr;
@@ -156,7 +164,7 @@ namespace
156164
auto rse = rseNode->rse;
157165
fb_assert(rse && (rse->flags & RseNode::FLAG_SUB_QUERY));
158166

159-
if (rse->rse_boolean && rse->rse_jointype == blr_inner &&
167+
if (rse->rse_boolean && rse->isInnerJoin() &&
160168
!rse->rse_first && !rse->rse_skip && !rse->rse_plan)
161169
{
162170
// Find booleans convertable into semi-joins
@@ -201,9 +209,7 @@ namespace
201209
if (!dependent)
202210
{
203211
rse->flags &= ~RseNode::FLAG_SUB_QUERY;
204-
rse->flags |= RseNode::FLAG_SEMI_JOINED;
205-
rseStack.push(rse);
206-
booleanStack.push(boolean);
212+
result.push({rse, true, boolean});
207213
*parentBoolean = nullptr;
208214
return true;
209215
}
@@ -1019,7 +1025,7 @@ void RelationSourceNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseN
10191025
// 1) If the view has a projection, sort, first/skip or explicit plan.
10201026
// 2) If it's part of an outer join.
10211027

1022-
if (rse->rse_jointype != blr_inner || // viewRse->rse_jointype != blr_inner || ???
1028+
if (!rse->isInnerJoin() || // !viewRse->isInnerJoin() || ???
10231029
viewRse->rse_sorted || viewRse->rse_projection || viewRse->rse_first ||
10241030
viewRse->rse_skip || viewRse->rse_plan)
10251031
{
@@ -2970,19 +2976,19 @@ RseNode* RseNode::dsqlPass(DsqlCompilerScratch* dsqlScratch)
29702976

29712977
switch (rse_jointype)
29722978
{
2973-
case blr_inner:
2979+
case INNER_JOIN:
29742980
streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]);
29752981
streamList->items[1] = doDsqlPass(dsqlScratch, fromList->items[1]);
29762982
break;
29772983

2978-
case blr_left:
2984+
case LEFT_JOIN:
29792985
streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]);
29802986
++dsqlScratch->inOuterJoin;
29812987
streamList->items[1] = doDsqlPass(dsqlScratch, fromList->items[1]);
29822988
--dsqlScratch->inOuterJoin;
29832989
break;
29842990

2985-
case blr_right:
2991+
case RIGHT_JOIN:
29862992
++dsqlScratch->inOuterJoin;
29872993
streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]);
29882994
--dsqlScratch->inOuterJoin;
@@ -2993,7 +2999,7 @@ RseNode* RseNode::dsqlPass(DsqlCompilerScratch* dsqlScratch)
29932999
streamList->items[1] = doDsqlPass(dsqlScratch, fromList->items[1]);
29943000
break;
29953001

2996-
case blr_full:
3002+
case FULL_JOIN:
29973003
++dsqlScratch->inOuterJoin;
29983004
streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]);
29993005
// Temporarily remove just created context(s) from the stack,
@@ -3065,7 +3071,7 @@ RseNode* RseNode::dsqlPass(DsqlCompilerScratch* dsqlScratch)
30653071
if (matched->items.isEmpty())
30663072
{
30673073
// There is no match. Transform to CROSS JOIN.
3068-
node->rse_jointype = blr_inner;
3074+
node->rse_jointype = INNER_JOIN;
30693075
usingList = NULL;
30703076

30713077
delete matched;
@@ -3280,14 +3286,14 @@ RseNode* RseNode::pass1(thread_db* tdbb, CompilerScratch* csb)
32803286
ValueExprNode* skip = rse_skip;
32813287
PlanNode* plan = rse_plan;
32823288

3283-
if (rse_jointype == blr_inner)
3289+
if (isInnerJoin())
32843290
csb->csb_inner_booleans.push(rse_boolean);
32853291

32863292
// zip thru RseNode expanding views and inner joins
32873293
for (auto sub : rse_relations)
32883294
processSource(tdbb, csb, this, sub, &boolean, stack);
32893295

3290-
if (rse_jointype == blr_inner)
3296+
if (isInnerJoin())
32913297
csb->csb_inner_booleans.pop();
32923298

32933299
// Now, rebuild the RseNode block.
@@ -3362,7 +3368,7 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse,
33623368
return;
33633369
}
33643370

3365-
if (rse_jointype != blr_inner)
3371+
if (isOuterJoin())
33663372
{
33673373
// Check whether any of the upper level booleans (those belonging to the WHERE clause)
33683374
// is able to filter out rows from the "inner" streams. If this is the case,
@@ -3377,15 +3383,15 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse,
33773383
StreamList streams;
33783384

33793385
// First check the left stream of the full outer join
3380-
if (rse_jointype == blr_full)
3386+
if (isFullJoin())
33813387
{
33823388
rse1->computeRseStreams(streams);
33833389

33843390
for (const auto boolean : csb->csb_inner_booleans)
33853391
{
33863392
if (boolean && boolean->ignoreNulls(streams))
33873393
{
3388-
rse_jointype = blr_left;
3394+
rse_jointype = LEFT_JOIN;
33893395
break;
33903396
}
33913397
}
@@ -3399,16 +3405,16 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse,
33993405
{
34003406
if (boolean && boolean->ignoreNulls(streams))
34013407
{
3402-
if (rse_jointype == blr_full)
3408+
if (isFullJoin())
34033409
{
34043410
// We should transform FULL join to RIGHT join,
34053411
// but as we don't allow them inside the engine
34063412
// just swap the sides and insist it's LEFT join
34073413
std::swap(rse_relations[0], rse_relations[1]);
3408-
rse_jointype = blr_left;
3414+
rse_jointype = LEFT_JOIN;
34093415
}
34103416
else
3411-
rse_jointype = blr_inner;
3417+
rse_jointype = INNER_JOIN;
34123418

34133419
break;
34143420
}
@@ -3423,11 +3429,9 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse,
34233429
// where we are just trying to inner join more than 2 streams. If possible,
34243430
// try to flatten the tree out before we go any further.
34253431

3426-
if (!isLateral() && !isSemiJoined() &&
3427-
rse->rse_jointype == blr_inner &&
3428-
rse_jointype == blr_inner &&
3429-
!rse_sorted && !rse_projection &&
3430-
!rse_first && !rse_skip && !rse_plan)
3432+
if (!isLateral() &&
3433+
rse->isInnerJoin() && isInnerJoin() &&
3434+
!rse_sorted && !rse_projection && !rse_first && !rse_skip && !rse_plan)
34313435
{
34323436
for (auto sub : rse_relations)
34333437
processSource(tdbb, csb, rse, sub, boolean, stack);
@@ -3518,58 +3522,47 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr
35183522
computeRseStreams(rseStreams);
35193523

35203524
BoolExprNodeStack conjunctStack;
3525+
StreamStateHolder stateHolder(csb, opt->getOuterStreams());
35213526

3522-
// pass RseNode boolean only to inner substreams because join condition
3527+
// Pass RseNode boolean only to inner substreams because join condition
35233528
// should never exclude records from outer substreams
3524-
if (opt->isInnerJoin() || (opt->isLeftJoin() && innerSubStream))
3529+
if (opt->isInnerJoin() || ((opt->isLeftJoin() || opt->isSpecialJoin()) && innerSubStream))
35253530
{
35263531
// AB: For an (X LEFT JOIN Y) mark the outer-streams (X) as
35273532
// active because the inner-streams (Y) are always "dependent"
35283533
// on the outer-streams. So that index retrieval nodes could be made.
35293534
//
35303535
// dimitr: the same for lateral derived tables in inner joins
35313536

3532-
StreamStateHolder stateHolder(csb, opt->getOuterStreams());
3533-
3534-
if (opt->isLeftJoin() || isLateral() || isSemiJoined())
3535-
{
3537+
if (!opt->isInnerJoin() || isLateral())
35363538
stateHolder.activate();
35373539

3538-
if (opt->isLeftJoin() || isSemiJoined())
3539-
{
3540-
// Push all conjuncts except "missing" ones (e.g. IS NULL)
3541-
for (auto iter = opt->getConjuncts(false, true); iter.hasData(); ++iter)
3542-
{
3543-
if (iter->containsAnyStream(rseStreams))
3544-
conjunctStack.push(iter);
3545-
}
3546-
}
3547-
}
3548-
else
3540+
// For the LEFT JOIN, push all conjuncts except "missing" ones (e.g. IS NULL)
3541+
for (auto iter = opt->getConjuncts(false, opt->isLeftJoin()); iter.hasData(); ++iter)
35493542
{
3550-
for (auto iter = opt->getConjuncts(); iter.hasData(); ++iter)
3551-
{
3552-
if (iter->containsAnyStream(rseStreams))
3553-
conjunctStack.push(iter);
3554-
}
3543+
if (iter->containsAnyStream(rseStreams))
3544+
conjunctStack.push(iter);
35553545
}
35563546

3557-
return opt->compile(this, &conjunctStack);
3547+
if (opt->isSpecialJoin() && !opt->deliverJoinConjuncts(conjunctStack))
3548+
conjunctStack.clear();
35583549
}
3559-
3560-
// Push only parent conjuncts to the outer stream
3561-
for (auto iter = opt->getConjuncts(true, false); iter.hasData(); ++iter)
3550+
else
35623551
{
3563-
if (iter->containsAnyStream(rseStreams))
3564-
conjunctStack.push(iter);
3552+
// Push only parent conjuncts to the outer stream
3553+
for (auto iter = opt->getConjuncts(true, false); iter.hasData(); ++iter)
3554+
{
3555+
if (iter->containsAnyStream(rseStreams))
3556+
conjunctStack.push(iter);
3557+
}
35653558
}
35663559

35673560
return opt->compile(this, &conjunctStack);
35683561
}
35693562

35703563
RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb)
35713564
{
3572-
if (rse_jointype != blr_inner || !rse_boolean || rse_plan)
3565+
if (!isInnerJoin() || !rse_boolean || rse_plan)
35733566
return nullptr;
35743567

35753568
// If the sub-query is nested inside the other sub-query which wasn't converted into semi-join,
@@ -3589,19 +3582,16 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb)
35893582
}
35903583
}
35913584

3592-
RecordSourceNodeStack rseStack;
3593-
BoolExprNodeStack booleanStack;
3594-
35953585
// Find possibly joinable sub-queries
35963586

35973587
StreamList rseStreams;
35983588
computeRseStreams(rseStreams);
3589+
SpecialJoinList specialJoins;
35993590

3600-
if (!findPossibleJoins(csb, rseStreams, rse_boolean.getAddress(), rseStack, booleanStack))
3591+
if (!findPossibleJoins(csb, rseStreams, rse_boolean.getAddress(), specialJoins))
36013592
return nullptr;
36023593

3603-
fb_assert(rseStack.hasData() && booleanStack.hasData());
3604-
fb_assert(rseStack.getCount() == booleanStack.getCount());
3594+
fb_assert(specialJoins.hasData());
36053595

36063596
// Create joins between the original node and detected joinable nodes.
36073597
// Preserve FIRST/SKIP nodes at their original position, i.e. outside semi-joins.
@@ -3616,16 +3606,18 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb)
36163606
flags = 0;
36173607

36183608
auto rse = this;
3619-
while (rseStack.hasData())
3609+
while (specialJoins.hasData())
36203610
{
36213611
const auto newRse = FB_NEW_POOL(*tdbb->getDefaultPool())
36223612
RseNode(*tdbb->getDefaultPool());
36233613

3614+
const auto item = specialJoins.pop();
3615+
36243616
newRse->rse_relations.add(rse);
3625-
newRse->rse_relations.add(rseStack.pop());
3617+
newRse->rse_relations.add(item.rse);
36263618

3627-
newRse->rse_jointype = blr_inner;
3628-
newRse->rse_boolean = booleanStack.pop();
3619+
newRse->rse_jointype = item.semiJoin ? SEMI_JOIN : ANTI_JOIN;
3620+
newRse->rse_boolean = item.boolean;
36293621

36303622
rse = newRse;
36313623
}
@@ -3636,7 +3628,7 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb)
36363628
RseNode(*tdbb->getDefaultPool());
36373629

36383630
newRse->rse_relations.add(rse);
3639-
newRse->rse_jointype = blr_inner;
3631+
newRse->rse_jointype = INNER_JOIN;
36403632
newRse->rse_first = first;
36413633
newRse->rse_skip = skip;
36423634

0 commit comments

Comments
 (0)