@@ -54,6 +54,165 @@ static ValueExprNode* resolveUsingField(DsqlCompilerScratch* dsqlScratch, const
54
54
55
55
namespace
56
56
{
57
+ // Search through the list of ANDed booleans to find comparisons
58
+ // referring streams of parent select expressions.
59
+ // Extract those booleans and return them to the caller.
60
+
61
+ bool findDependentBooleans (CompilerScratch* csb,
62
+ const StreamList& rseStreams,
63
+ BoolExprNode** parentBoolean,
64
+ BoolExprNodeStack& booleanStack)
65
+ {
66
+ const auto boolean = *parentBoolean;
67
+
68
+ const auto binaryNode = nodeAs<BinaryBoolNode>(boolean);
69
+ if (binaryNode && binaryNode->blrOp == blr_and)
70
+ {
71
+ const bool found1 = findDependentBooleans (csb, rseStreams,
72
+ binaryNode->arg1 .getAddress (), booleanStack);
73
+ const bool found2 = findDependentBooleans (csb, rseStreams,
74
+ binaryNode->arg2 .getAddress (), booleanStack);
75
+
76
+ if (!binaryNode->arg1 && !binaryNode->arg2 )
77
+ *parentBoolean = nullptr ;
78
+ else if (!binaryNode->arg1 )
79
+ *parentBoolean = binaryNode->arg2 ;
80
+ else if (!binaryNode->arg2 )
81
+ *parentBoolean = binaryNode->arg1 ;
82
+
83
+ return (found1 || found2);
84
+ }
85
+
86
+ if (const auto cmpNode = nodeAs<ComparativeBoolNode>(boolean))
87
+ {
88
+ if (cmpNode->blrOp == blr_eql || cmpNode->blrOp == blr_equiv)
89
+ {
90
+ SortedStreamList streams;
91
+ cmpNode->collectStreams (streams);
92
+
93
+ for (const auto stream : streams)
94
+ {
95
+ if (rseStreams.exist (stream))
96
+ {
97
+ booleanStack.push (boolean);
98
+ *parentBoolean = nullptr ;
99
+ return true ;
100
+ }
101
+ }
102
+ }
103
+ }
104
+
105
+ return false ;
106
+ }
107
+
108
+ // Search through the list of ANDed booleans to find correlated EXISTS/IN sub-queries.
109
+ // They are candidates to be converted into semi- or anti-joins.
110
+
111
+ bool findPossibleJoins (CompilerScratch* csb,
112
+ const StreamList& rseStreams,
113
+ BoolExprNode** parentBoolean,
114
+ RecordSourceNodeStack& rseStack,
115
+ BoolExprNodeStack& booleanStack)
116
+ {
117
+ auto boolNode = *parentBoolean;
118
+
119
+ const auto binaryNode = nodeAs<BinaryBoolNode>(boolNode);
120
+ if (binaryNode && binaryNode->blrOp == blr_and)
121
+ {
122
+ const bool found1 = findPossibleJoins (csb, rseStreams,
123
+ binaryNode->arg1 .getAddress (), rseStack, booleanStack);
124
+ const bool found2 = findPossibleJoins (csb, rseStreams,
125
+ binaryNode->arg2 .getAddress (), rseStack, booleanStack);
126
+
127
+ if (!binaryNode->arg1 && !binaryNode->arg2 )
128
+ *parentBoolean = nullptr ;
129
+ else if (!binaryNode->arg1 )
130
+ *parentBoolean = binaryNode->arg2 ;
131
+ else if (!binaryNode->arg2 )
132
+ *parentBoolean = binaryNode->arg1 ;
133
+
134
+ return (found1 || found2);
135
+ }
136
+
137
+ const auto rseNode = nodeAs<RseBoolNode>(boolNode);
138
+ // Both EXISTS (blr_any) and IN (blr_ansi_any) sub-queries are handled
139
+ if (rseNode && (rseNode->blrOp == blr_any || rseNode->blrOp == blr_ansi_any))
140
+ {
141
+ auto rse = rseNode->rse ;
142
+ fb_assert (rse && (rse->flags & RseNode::FLAG_SUB_QUERY));
143
+
144
+ if (rse->rse_boolean && rse->rse_jointype == blr_inner &&
145
+ !rse->rse_first && !rse->rse_skip && !rse->rse_plan )
146
+ {
147
+ // Find booleans convertable into semi-joins
148
+
149
+ StreamList streams;
150
+ rse->computeRseStreams (streams);
151
+
152
+ BoolExprNodeStack booleans;
153
+ if (findDependentBooleans (csb, rseStreams,
154
+ rse->rse_boolean .getAddress (),
155
+ booleans))
156
+ {
157
+ // Compose the conjunct boolean
158
+
159
+ fb_assert (booleans.hasData ());
160
+ auto boolean = booleans.pop ();
161
+ while (booleans.hasData ())
162
+ {
163
+ const auto andNode = FB_NEW_POOL (csb->csb_pool )
164
+ BinaryBoolNode (csb->csb_pool , blr_and);
165
+ andNode->arg1 = boolean;
166
+ andNode->arg2 = booleans.pop ();
167
+ boolean = andNode;
168
+ }
169
+
170
+ // Ensure that no external references are left inside the subquery.
171
+ // If so, mark the RSE as joined and add it to the stack.
172
+
173
+ SortedStreamList streams;
174
+ rse->collectStreams (streams);
175
+
176
+ bool dependent = false ;
177
+ for (const auto stream : streams)
178
+ {
179
+ if (rseStreams.exist (stream))
180
+ {
181
+ dependent = true ;
182
+ break ;
183
+ }
184
+ }
185
+
186
+ if (!dependent)
187
+ {
188
+ rse->flags &= ~RseNode::FLAG_SUB_QUERY;
189
+ rse->flags |= RseNode::FLAG_SEMI_JOINED;
190
+ rseStack.push (rse);
191
+ booleanStack.push (boolean);
192
+ *parentBoolean = nullptr ;
193
+ return true ;
194
+ }
195
+
196
+ // Otherwise, restore the original sub-query by adding
197
+ // the collected booleans back to the RSE.
198
+
199
+ if (rse->rse_boolean )
200
+ {
201
+ const auto andNode = FB_NEW_POOL (csb->csb_pool )
202
+ BinaryBoolNode (csb->csb_pool , blr_and);
203
+ andNode->arg1 = boolean;
204
+ andNode->arg2 = rse->rse_boolean ;
205
+ boolean = andNode;
206
+ }
207
+
208
+ rse->rse_boolean = boolean;
209
+ }
210
+ }
211
+ }
212
+
213
+ return false ;
214
+ }
215
+
57
216
class AutoActivateResetStreams : public AutoStorage
58
217
{
59
218
public:
@@ -3025,6 +3184,9 @@ RseNode* RseNode::pass1(thread_db* tdbb, CompilerScratch* csb)
3025
3184
{
3026
3185
SET_TDBB (tdbb);
3027
3186
3187
+ if (const auto newRse = processPossibleJoins (tdbb, csb))
3188
+ return newRse->pass1 (tdbb, csb);
3189
+
3028
3190
// for scoping purposes, maintain a stack of RseNode's which are
3029
3191
// currently being parsed; if there are none on the stack as
3030
3192
// yet, mark the RseNode as variant to make sure that statement-
@@ -3130,6 +3292,12 @@ RseNode* RseNode::pass1(thread_db* tdbb, CompilerScratch* csb)
3130
3292
void RseNode::pass1Source (thread_db* tdbb, CompilerScratch* csb, RseNode* rse,
3131
3293
BoolExprNode** boolean, RecordSourceNodeStack& stack)
3132
3294
{
3295
+ if (const auto newRse = processPossibleJoins (tdbb, csb))
3296
+ {
3297
+ newRse->pass1Source (tdbb, csb, rse, boolean, stack);
3298
+ return ;
3299
+ }
3300
+
3133
3301
if (rse_jointype != blr_inner)
3134
3302
{
3135
3303
// Check whether any of the upper level booleans (those belonging to the WHERE clause)
@@ -3183,15 +3351,15 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse,
3183
3351
}
3184
3352
}
3185
3353
3186
- // in the case of an RseNode, it is possible that a new RseNode will be generated,
3354
+ // In the case of an RseNode, it is possible that a new RseNode will be generated,
3187
3355
// so wait to process the source before we push it on the stack (bug 8039)
3188
3356
3189
3357
// The addition of the JOIN syntax for specifying inner joins causes an
3190
3358
// RseNode tree to be generated, which is undesirable in the simplest case
3191
3359
// where we are just trying to inner join more than 2 streams. If possible,
3192
3360
// try to flatten the tree out before we go any further.
3193
3361
3194
- if (!isLateral () &&
3362
+ if (!isLateral () && ! isSemiJoined () &&
3195
3363
rse->rse_jointype == blr_inner &&
3196
3364
rse_jointype == blr_inner &&
3197
3365
!rse_sorted && !rse_projection &&
@@ -3296,11 +3464,11 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr
3296
3464
3297
3465
StreamStateHolder stateHolder (csb, opt->getOuterStreams ());
3298
3466
3299
- if (opt->isLeftJoin () || isLateral ())
3467
+ if (opt->isLeftJoin () || isLateral () || isSemiJoined () )
3300
3468
{
3301
3469
stateHolder.activate ();
3302
3470
3303
- if (opt->isLeftJoin ())
3471
+ if (opt->isLeftJoin () || isSemiJoined () )
3304
3472
{
3305
3473
// Push all conjuncts except "missing" ones (e.g. IS NULL)
3306
3474
for (auto iter = opt->getConjuncts (false , true ); iter.hasData (); ++iter)
@@ -3323,6 +3491,87 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr
3323
3491
return opt->compile (this , &conjunctStack);
3324
3492
}
3325
3493
3494
+ RseNode* RseNode::processPossibleJoins (thread_db* tdbb, CompilerScratch* csb)
3495
+ {
3496
+ if (rse_jointype != blr_inner || !rse_boolean || rse_plan)
3497
+ return nullptr ;
3498
+
3499
+ // If the sub-query is nested inside the other sub-query which wasn't converted into semi-join,
3500
+ // it makes no sense to apply a semi-join at the deeper levels, as a sub-query is expected
3501
+ // to be executed repeatedly.
3502
+ // This is a temporary fix until nested loop semi-joins are allowed by the optimizer.
3503
+
3504
+ if (flags & FLAG_SUB_QUERY)
3505
+ return nullptr ;
3506
+
3507
+ for (const auto node : csb->csb_current_nodes )
3508
+ {
3509
+ if (const auto rse = nodeAs<RseNode>(node))
3510
+ {
3511
+ if (rse->flags & FLAG_SUB_QUERY)
3512
+ return nullptr ;
3513
+ }
3514
+ }
3515
+
3516
+ RecordSourceNodeStack rseStack;
3517
+ BoolExprNodeStack booleanStack;
3518
+
3519
+ // Find possibly joinable sub-queries
3520
+
3521
+ StreamList rseStreams;
3522
+ computeRseStreams (rseStreams);
3523
+
3524
+ if (!findPossibleJoins (csb, rseStreams, rse_boolean.getAddress (), rseStack, booleanStack))
3525
+ return nullptr ;
3526
+
3527
+ fb_assert (rseStack.hasData () && booleanStack.hasData ());
3528
+ fb_assert (rseStack.getCount () == booleanStack.getCount ());
3529
+
3530
+ // Create joins between the original node and detected joinable nodes.
3531
+ // Preserve FIRST/SKIP nodes at their original position, i.e. outside semi-joins.
3532
+
3533
+ const auto first = rse_first;
3534
+ rse_first = nullptr ;
3535
+
3536
+ const auto skip = rse_skip;
3537
+ rse_skip = nullptr ;
3538
+
3539
+ const auto orgFlags = flags;
3540
+ flags = 0 ;
3541
+
3542
+ auto rse = this ;
3543
+ while (rseStack.hasData ())
3544
+ {
3545
+ const auto newRse = FB_NEW_POOL (*tdbb->getDefaultPool ())
3546
+ RseNode (*tdbb->getDefaultPool ());
3547
+
3548
+ newRse->rse_relations .add (rse);
3549
+ newRse->rse_relations .add (rseStack.pop ());
3550
+
3551
+ newRse->rse_jointype = blr_inner;
3552
+ newRse->rse_boolean = booleanStack.pop ();
3553
+
3554
+ rse = newRse;
3555
+ }
3556
+
3557
+ if (first || skip)
3558
+ {
3559
+ const auto newRse = FB_NEW_POOL (*tdbb->getDefaultPool ())
3560
+ RseNode (*tdbb->getDefaultPool ());
3561
+
3562
+ newRse->rse_relations .add (rse);
3563
+ newRse->rse_jointype = blr_inner;
3564
+ newRse->rse_first = first;
3565
+ newRse->rse_skip = skip;
3566
+
3567
+ rse = newRse;
3568
+ }
3569
+
3570
+ rse->flags = orgFlags;
3571
+
3572
+ return rse;
3573
+ }
3574
+
3326
3575
// Check that all streams in the RseNode have a plan specified for them.
3327
3576
// If they are not, there are streams in the RseNode which were not mentioned in the plan.
3328
3577
void RseNode::planCheck (const CompilerScratch* csb) const
0 commit comments