@@ -66,50 +66,47 @@ class TJoinReorderer {
66
66
}
67
67
68
68
TYtJoinNodeOp::TPtr Do () {
69
- CollectRels (Root);
70
- if (!CollectOps (Root)) {
71
- return Root;
72
- }
73
-
74
- IOptimizer::TInput input;
75
- input.EqClasses = std::move (EqClasses);
76
- input.Left = std::move (Left);
77
- input.Right = std::move (Right);
78
- input.Rels = std::move (Rels);
79
- input.Normalize ();
80
- YQL_CLOG (INFO, ProviderYt) << " Input: " << input.ToString ();
69
+ std::shared_ptr<IBaseOptimizerNode> tree;
70
+ std::shared_ptr<IProviderContext> ctx;
71
+ BuildOptimizerJoinTree (tree, ctx, Root);
81
72
82
73
std::function<void (const TString& str)> log;
83
74
84
75
log = [](const TString& str) {
85
76
YQL_CLOG (INFO, ProviderYt) << str;
86
77
};
87
78
88
- std::unique_ptr<IOptimizer > opt;
79
+ std::unique_ptr<IOptimizerNew > opt;
89
80
90
81
switch (State->Types ->CostBasedOptimizer ) {
91
82
case ECostBasedOptimizerType::PG:
92
- opt = std::unique_ptr<IOptimizer>( MakePgOptimizer (input , log));
83
+ opt = std::unique_ptr<IOptimizerNew>( MakePgOptimizerNew (*ctx, Ctx , log));
93
84
break ;
94
85
case ECostBasedOptimizerType::Native:
95
- opt = std::unique_ptr<IOptimizer >(NDq::MakeNativeOptimizer (input, log ));
86
+ opt = std::unique_ptr<IOptimizerNew >(NDq::MakeNativeOptimizerNew (*ctx, 100000 ));
96
87
break ;
97
88
default :
98
89
YQL_CLOG (ERROR, ProviderYt) << " Unknown optimizer type " << ToString (State->Types ->CostBasedOptimizer );
99
90
return Root;
100
91
}
101
92
93
+ std::shared_ptr<TJoinOptimizerNode> result;
94
+
102
95
try {
103
- Result = opt->JoinSearch ();
96
+ result = opt->JoinSearch (std::dynamic_pointer_cast<TJoinOptimizerNode>(tree));
97
+ if (tree == result) { return Root; }
104
98
} catch (...) {
105
99
YQL_CLOG (ERROR, ProviderYt) << " Cannot do join search " << CurrentExceptionMessage ();
106
100
return Root;
107
101
}
108
102
109
- YQL_CLOG (INFO, ProviderYt) << " Result: " << Result.ToString ();
103
+ std::stringstream ss;
104
+ result->Print (ss);
105
+
106
+ YQL_CLOG (INFO, ProviderYt) << " Result: " << ss.str ();
110
107
111
108
TVector<TString> scope;
112
- TYtJoinNodeOp::TPtr res = dynamic_cast <TYtJoinNodeOp*>(Convert ( 0 , scope ).Get ());
109
+ TYtJoinNodeOp::TPtr res = dynamic_cast <TYtJoinNodeOp*>(BuildYtJoinTree (result, Ctx, {} ).Get ());
113
110
114
111
YQL_ENSURE (res);
115
112
if (Debug) {
@@ -120,268 +117,10 @@ class TJoinReorderer {
120
117
}
121
118
122
119
private:
123
- int GetVarId (int relId, TStringBuf column) {
124
- int varId = 0 ;
125
- auto maybeVarId = VarIds[relId-1 ].find (column);
126
- if (maybeVarId != VarIds[relId-1 ].end ()) {
127
- varId = maybeVarId->second ;
128
- } else {
129
- varId = Rels[relId - 1 ].TargetVars .size () + 1 ;
130
- VarIds[relId - 1 ][column] = varId;
131
- Rels[relId - 1 ].TargetVars .emplace_back ();
132
- Var2TableCol[relId - 1 ].emplace_back ();
133
- }
134
- return varId;
135
- }
136
-
137
- void ExtractVars (auto & vars, TExprNode::TPtr labels) {
138
- for (ui32 i = 0 ; i < labels->ChildrenSize (); i += 2 ) {
139
- auto table = labels->Child (i)->Content ();
140
- auto column = labels->Child (i + 1 )->Content ();
141
-
142
- const auto & relIds = Table2RelIds[table];
143
- YQL_ENSURE (!relIds.empty ());
144
-
145
- for (int relId : relIds) {
146
- int varId = GetVarId (relId, column);
147
-
148
- vars.emplace_back (std::make_tuple (relId, varId, table, column));
149
- }
150
- }
151
- };
152
-
153
- std::vector<TStringBuf> GetTables (TExprNode::TPtr label)
154
- {
155
- if (label->ChildrenSize () == 0 ) {
156
- return {label->Content ()};
157
- } else {
158
- std::vector<TStringBuf> tables;
159
- tables.reserve (label->ChildrenSize ());
160
- for (ui32 i = 0 ; i < label->ChildrenSize (); i++) {
161
- tables.emplace_back (label->Child (i)->Content ());
162
- }
163
- return tables;
164
- }
165
- }
166
-
167
- void OnLeaf (TYtJoinNodeLeaf* leaf) {
168
- int relId = Rels.size () + 1 ;
169
- Rels.emplace_back (IOptimizer::TRel{});
170
- Var2TableCol.emplace_back ();
171
- // rel -> varIds
172
- VarIds.emplace_back (THashMap<TStringBuf, int >{});
173
- // rel -> tables
174
- RelTables.emplace_back (std::vector<TStringBuf>{});
175
- for (const auto & table : GetTables (leaf->Label )) {
176
- RelTables.back ().emplace_back (table);
177
- Table2RelIds[table].emplace_back (relId);
178
- }
179
- auto & rel = Rels[relId - 1 ];
180
-
181
- TYtSection section{leaf->Section };
182
- if (Y_UNLIKELY (!section.Settings ().Empty ()) && Y_UNLIKELY (section.Settings ().Item (0 ).Name () == " Test" )) {
183
- // ut
184
- for (const auto & setting : section.Settings ()) {
185
- if (setting.Name () == " Rows" ) {
186
- rel.Rows += FromString<ui64>(setting.Value ().Ref ().Content ());
187
- } else if (setting.Name () == " Size" ) {
188
- rel.TotalCost += FromString<ui64>(setting.Value ().Ref ().Content ());
189
- }
190
- }
191
- } else {
192
- for (auto path: section.Paths ()) {
193
- auto stat = TYtTableBaseInfo::GetStat (path.Table ());
194
- rel.TotalCost += stat->DataSize ;
195
- rel.Rows += stat->RecordsCount ;
196
- }
197
- if (!(rel.Rows > 0 )) {
198
- YQL_CLOG (INFO, ProviderYt) << " Cannot read stats from: " << NCommon::ExprToPrettyString (Ctx, *section.Ptr ());
199
- }
200
- }
201
-
202
- int leafIndex = relId - 1 ;
203
- if (leafIndex >= static_cast <int >(Leafs.size ())) {
204
- Leafs.resize (leafIndex + 1 );
205
- }
206
- Leafs[leafIndex] = leaf;
207
- };
208
-
209
- IOptimizer::TEq MakeEqClass (const auto & vars) {
210
- IOptimizer::TEq eqClass;
211
-
212
- for (auto & [relId, varId, table, column] : vars) {
213
- eqClass.Vars .emplace_back (std::make_tuple (relId, varId));
214
- Var2TableCol[relId - 1 ][varId - 1 ] = std::make_tuple (table, column);
215
- }
216
-
217
- return eqClass;
218
- }
219
-
220
- void MakeEqClasses (std::vector<IOptimizer::TEq>& res, const auto & leftVars, const auto & rightVars) {
221
- for (int i = 0 ; i < (int )leftVars.size (); i++) {
222
- auto & [lrelId, lvarId, ltable, lcolumn] = leftVars[i];
223
- auto & [rrelId, rvarId, rtable, rcolumn] = rightVars[i];
224
-
225
- IOptimizer::TEq eqClass; eqClass.Vars .reserve (2 );
226
- eqClass.Vars .emplace_back (std::make_tuple (lrelId, lvarId));
227
- eqClass.Vars .emplace_back (std::make_tuple (rrelId, rvarId));
228
-
229
- Var2TableCol[lrelId - 1 ][lvarId - 1 ] = std::make_tuple (ltable, lcolumn);
230
- Var2TableCol[rrelId - 1 ][rvarId - 1 ] = std::make_tuple (rtable, rcolumn);
231
-
232
- res.emplace_back (std::move (eqClass));
233
- }
234
- }
235
-
236
- bool OnOp (TYtJoinNodeOp* op) {
237
- #define CHECK (A, B ) \
238
- if (Y_UNLIKELY (!(A))) { \
239
- TIssues issues; \
240
- issues.AddIssue (TIssue (B).SetCode (0 , NYql::TSeverityIds::S_INFO)); \
241
- Ctx.IssueManager .AddIssues (issues); \
242
- return false ; \
243
- }
244
-
245
- CHECK (!op->Output , " Non empty output" );
246
- CHECK (op->StarOptions .empty (), " Non empty StarOptions" );
247
-
248
- const auto & joinKind = op->JoinKind ->Content ();
249
-
250
- if (joinKind == " Inner" ) {
251
- // relId, varId, table, column
252
- std::vector<std::tuple<int ,int ,TStringBuf,TStringBuf>> leftVars;
253
- std::vector<std::tuple<int ,int ,TStringBuf,TStringBuf>> rightVars;
254
-
255
- ExtractVars (leftVars, op->LeftLabel );
256
- ExtractVars (rightVars, op->RightLabel );
257
-
258
- CHECK (leftVars.size () == rightVars.size (), " Left and right labels must have the same size" );
259
-
260
- MakeEqClasses (EqClasses, leftVars, rightVars);
261
- } else if (joinKind == " Left" || joinKind == " Right" ) {
262
- CHECK (op->LeftLabel ->ChildrenSize () == 2 , " Only 1 var per join supported" );
263
- CHECK (op->RightLabel ->ChildrenSize () == 2 , " Only 1 var per join supported" );
264
-
265
- std::vector<std::tuple<int ,int ,TStringBuf,TStringBuf>> leftVars, rightVars;
266
- ExtractVars (leftVars, op->LeftLabel );
267
- ExtractVars (rightVars, op->RightLabel );
268
-
269
- IOptimizer::TEq leftEqClass = MakeEqClass (leftVars);
270
- IOptimizer::TEq rightEqClass = MakeEqClass (rightVars);
271
- IOptimizer::TEq eqClass = leftEqClass;
272
- eqClass.Vars .insert (eqClass.Vars .end (), rightEqClass.Vars .begin (), rightEqClass.Vars .end ());
273
-
274
- CHECK (eqClass.Vars .size () == 2 , " Only a=b left|right join supported yet" );
275
-
276
- EqClasses.emplace_back (std::move (leftEqClass));
277
- EqClasses.emplace_back (std::move (rightEqClass));
278
- if (joinKind == " Left" ) {
279
- Left.emplace_back (eqClass);
280
- } else {
281
- Right.emplace_back (eqClass);
282
- }
283
- } else {
284
- CHECK (false , " Unsupported join type" );
285
- }
286
-
287
- #undef CHECK
288
- return true ;
289
- }
290
-
291
- bool CollectOps (TYtJoinNode::TPtr node)
292
- {
293
- if (auto * op = dynamic_cast <TYtJoinNodeOp*>(node.Get ())) {
294
- return OnOp (op)
295
- && CollectOps (op->Left )
296
- && CollectOps (op->Right );
297
- }
298
- return true ;
299
- }
300
-
301
- void CollectRels (TYtJoinNode::TPtr node)
302
- {
303
- if (auto * op = dynamic_cast <TYtJoinNodeOp*>(node.Get ())) {
304
- CollectRels (op->Left );
305
- CollectRels (op->Right );
306
- } else if (auto * leaf = dynamic_cast <TYtJoinNodeLeaf*>(node.Get ())) {
307
- OnLeaf (leaf);
308
- }
309
- }
310
-
311
- TExprNode::TPtr MakeLabel (const std::vector<IOptimizer::TVarId>& vars) const {
312
- TVector<TExprNodePtr> label; label.reserve (vars.size () * 2 );
313
-
314
- for (auto [relId, varId] : vars) {
315
- auto [table, column] = Var2TableCol[relId - 1 ][varId - 1 ];
316
-
317
- label.emplace_back (Ctx.NewAtom (Root->JoinKind ->Pos (), table));
318
- label.emplace_back (Ctx.NewAtom (Root->JoinKind ->Pos (), column));
319
- }
320
-
321
- return Build<TCoAtomList>(Ctx, Root->JoinKind ->Pos ())
322
- .Add (label)
323
- .Done ()
324
- .Ptr ();
325
- }
326
-
327
- TYtJoinNode::TPtr Convert (int nodeId, TVector<TString>& scope) const
328
- {
329
- const IOptimizer::TJoinNode* node = &Result.Nodes [nodeId];
330
- if (node->Outer == -1 && node->Inner == -1 ) {
331
- YQL_ENSURE (node->Rels .size () == 1 );
332
- auto leaf = Leafs[node->Rels [0 ]-1 ];
333
- YQL_ENSURE (leaf);
334
- YQL_ENSURE (!leaf->Scope .empty ());
335
- scope.insert (scope.end (), leaf->Scope .begin (), leaf->Scope .end ());
336
- return leaf;
337
- } else if (node->Outer != -1 && node->Inner != -1 ) {
338
- auto ret = MakeIntrusive<TYtJoinNodeOp>();
339
- TString joinKind;
340
- switch (node->Mode ) {
341
- case IOptimizer::EJoinType::Inner:
342
- joinKind = " Inner" ;
343
- break ;
344
- case IOptimizer::EJoinType::Left:
345
- joinKind = " Left" ;
346
- break ;
347
- case IOptimizer::EJoinType::Right:
348
- joinKind = " Right" ;
349
- break ;
350
- default :
351
- YQL_ENSURE (false , " Unsupported join type" );
352
- break ;
353
- }
354
- ret->JoinKind = Ctx.NewAtom (Root->JoinKind ->Pos (), joinKind);
355
- ret->LeftLabel = MakeLabel (node->LeftVars );
356
- ret->RightLabel = MakeLabel (node->RightVars );
357
- int index = scope.size ();
358
- ret->Left = Convert (node->Outer , scope);
359
- ret->Right = Convert (node->Inner , scope);
360
- ret->Scope .insert (ret->Scope .end (), scope.begin () + index, scope.end ());
361
- return ret;
362
- } else {
363
- YQL_ENSURE (false , " Wrong CBO node" );
364
- }
365
- }
366
-
367
120
TYtJoinNodeOp::TPtr Root;
368
121
const TYtState::TPtr& State;
369
122
TExprContext& Ctx;
370
123
bool Debug;
371
-
372
- THashMap<TStringBuf, std::vector<int >> Table2RelIds;
373
- std::vector<IOptimizer::TRel> Rels;
374
- std::vector<std::vector<TStringBuf>> RelTables;
375
- std::vector<TYtJoinNodeLeaf*> Leafs;
376
- std::vector<std::vector<std::tuple<TStringBuf, TStringBuf>>> Var2TableCol;
377
-
378
- std::vector<THashMap<TStringBuf, int >> VarIds;
379
-
380
- std::vector<IOptimizer::TEq> EqClasses;
381
- std::vector<IOptimizer::TEq> Left;
382
- std::vector<IOptimizer::TEq> Right;
383
-
384
- IOptimizer::TOutput Result;
385
124
};
386
125
387
126
class TYtRelOptimizerNode : public TRelOptimizerNode {
0 commit comments