@@ -66,50 +66,47 @@ class TJoinReorderer {
6666 }
6767
6868 TYtJoinNodeOp::TPtr Do () {
69- CollectRels (Root);
70- if (!CollectOps (Root)) {
71- return Root;
72- }
73-
74- IOptimizer::TInput input;
75- input.EqClasses = std::move (EqClasses);
76- input.Left = std::move (Left);
77- input.Right = std::move (Right);
78- input.Rels = std::move (Rels);
79- input.Normalize ();
80- YQL_CLOG (INFO, ProviderYt) << " Input: " << input.ToString ();
69+ std::shared_ptr<IBaseOptimizerNode> tree;
70+ std::shared_ptr<IProviderContext> ctx;
71+ BuildOptimizerJoinTree (tree, ctx, Root);
8172
8273 std::function<void (const TString& str)> log;
8374
8475 log = [](const TString& str) {
8576 YQL_CLOG (INFO, ProviderYt) << str;
8677 };
8778
88- std::unique_ptr<IOptimizer > opt;
79+ std::unique_ptr<IOptimizerNew > opt;
8980
9081 switch (State->Types ->CostBasedOptimizer ) {
9182 case ECostBasedOptimizerType::PG:
92- opt = std::unique_ptr<IOptimizer>( MakePgOptimizer (input , log));
83+ opt = std::unique_ptr<IOptimizerNew>( MakePgOptimizerNew (*ctx, Ctx , log));
9384 break ;
9485 case ECostBasedOptimizerType::Native:
95- opt = std::unique_ptr<IOptimizer >(NDq::MakeNativeOptimizer (input, log ));
86+ opt = std::unique_ptr<IOptimizerNew >(NDq::MakeNativeOptimizerNew (*ctx, 100000 ));
9687 break ;
9788 default :
9889 YQL_CLOG (ERROR, ProviderYt) << " Unknown optimizer type " << ToString (State->Types ->CostBasedOptimizer );
9990 return Root;
10091 }
10192
93+ std::shared_ptr<TJoinOptimizerNode> result;
94+
10295 try {
103- Result = opt->JoinSearch ();
96+ result = opt->JoinSearch (std::dynamic_pointer_cast<TJoinOptimizerNode>(tree));
97+ if (tree == result) { return Root; }
10498 } catch (...) {
10599 YQL_CLOG (ERROR, ProviderYt) << " Cannot do join search " << CurrentExceptionMessage ();
106100 return Root;
107101 }
108102
109- YQL_CLOG (INFO, ProviderYt) << " Result: " << Result.ToString ();
103+ std::stringstream ss;
104+ result->Print (ss);
105+
106+ YQL_CLOG (INFO, ProviderYt) << " Result: " << ss.str ();
110107
111108 TVector<TString> scope;
112- TYtJoinNodeOp::TPtr res = dynamic_cast <TYtJoinNodeOp*>(Convert ( 0 , scope ).Get ());
109+ TYtJoinNodeOp::TPtr res = dynamic_cast <TYtJoinNodeOp*>(BuildYtJoinTree (result, Ctx, {} ).Get ());
113110
114111 YQL_ENSURE (res);
115112 if (Debug) {
@@ -120,268 +117,10 @@ class TJoinReorderer {
120117 }
121118
122119private:
123- int GetVarId (int relId, TStringBuf column) {
124- int varId = 0 ;
125- auto maybeVarId = VarIds[relId-1 ].find (column);
126- if (maybeVarId != VarIds[relId-1 ].end ()) {
127- varId = maybeVarId->second ;
128- } else {
129- varId = Rels[relId - 1 ].TargetVars .size () + 1 ;
130- VarIds[relId - 1 ][column] = varId;
131- Rels[relId - 1 ].TargetVars .emplace_back ();
132- Var2TableCol[relId - 1 ].emplace_back ();
133- }
134- return varId;
135- }
136-
137- void ExtractVars (auto & vars, TExprNode::TPtr labels) {
138- for (ui32 i = 0 ; i < labels->ChildrenSize (); i += 2 ) {
139- auto table = labels->Child (i)->Content ();
140- auto column = labels->Child (i + 1 )->Content ();
141-
142- const auto & relIds = Table2RelIds[table];
143- YQL_ENSURE (!relIds.empty ());
144-
145- for (int relId : relIds) {
146- int varId = GetVarId (relId, column);
147-
148- vars.emplace_back (std::make_tuple (relId, varId, table, column));
149- }
150- }
151- };
152-
153- std::vector<TStringBuf> GetTables (TExprNode::TPtr label)
154- {
155- if (label->ChildrenSize () == 0 ) {
156- return {label->Content ()};
157- } else {
158- std::vector<TStringBuf> tables;
159- tables.reserve (label->ChildrenSize ());
160- for (ui32 i = 0 ; i < label->ChildrenSize (); i++) {
161- tables.emplace_back (label->Child (i)->Content ());
162- }
163- return tables;
164- }
165- }
166-
167- void OnLeaf (TYtJoinNodeLeaf* leaf) {
168- int relId = Rels.size () + 1 ;
169- Rels.emplace_back (IOptimizer::TRel{});
170- Var2TableCol.emplace_back ();
171- // rel -> varIds
172- VarIds.emplace_back (THashMap<TStringBuf, int >{});
173- // rel -> tables
174- RelTables.emplace_back (std::vector<TStringBuf>{});
175- for (const auto & table : GetTables (leaf->Label )) {
176- RelTables.back ().emplace_back (table);
177- Table2RelIds[table].emplace_back (relId);
178- }
179- auto & rel = Rels[relId - 1 ];
180-
181- TYtSection section{leaf->Section };
182- if (Y_UNLIKELY (!section.Settings ().Empty ()) && Y_UNLIKELY (section.Settings ().Item (0 ).Name () == " Test" )) {
183- // ut
184- for (const auto & setting : section.Settings ()) {
185- if (setting.Name () == " Rows" ) {
186- rel.Rows += FromString<ui64>(setting.Value ().Ref ().Content ());
187- } else if (setting.Name () == " Size" ) {
188- rel.TotalCost += FromString<ui64>(setting.Value ().Ref ().Content ());
189- }
190- }
191- } else {
192- for (auto path: section.Paths ()) {
193- auto stat = TYtTableBaseInfo::GetStat (path.Table ());
194- rel.TotalCost += stat->DataSize ;
195- rel.Rows += stat->RecordsCount ;
196- }
197- if (!(rel.Rows > 0 )) {
198- YQL_CLOG (INFO, ProviderYt) << " Cannot read stats from: " << NCommon::ExprToPrettyString (Ctx, *section.Ptr ());
199- }
200- }
201-
202- int leafIndex = relId - 1 ;
203- if (leafIndex >= static_cast <int >(Leafs.size ())) {
204- Leafs.resize (leafIndex + 1 );
205- }
206- Leafs[leafIndex] = leaf;
207- };
208-
209- IOptimizer::TEq MakeEqClass (const auto & vars) {
210- IOptimizer::TEq eqClass;
211-
212- for (auto & [relId, varId, table, column] : vars) {
213- eqClass.Vars .emplace_back (std::make_tuple (relId, varId));
214- Var2TableCol[relId - 1 ][varId - 1 ] = std::make_tuple (table, column);
215- }
216-
217- return eqClass;
218- }
219-
220- void MakeEqClasses (std::vector<IOptimizer::TEq>& res, const auto & leftVars, const auto & rightVars) {
221- for (int i = 0 ; i < (int )leftVars.size (); i++) {
222- auto & [lrelId, lvarId, ltable, lcolumn] = leftVars[i];
223- auto & [rrelId, rvarId, rtable, rcolumn] = rightVars[i];
224-
225- IOptimizer::TEq eqClass; eqClass.Vars .reserve (2 );
226- eqClass.Vars .emplace_back (std::make_tuple (lrelId, lvarId));
227- eqClass.Vars .emplace_back (std::make_tuple (rrelId, rvarId));
228-
229- Var2TableCol[lrelId - 1 ][lvarId - 1 ] = std::make_tuple (ltable, lcolumn);
230- Var2TableCol[rrelId - 1 ][rvarId - 1 ] = std::make_tuple (rtable, rcolumn);
231-
232- res.emplace_back (std::move (eqClass));
233- }
234- }
235-
236- bool OnOp (TYtJoinNodeOp* op) {
237- #define CHECK (A, B ) \
238- if (Y_UNLIKELY (!(A))) { \
239- TIssues issues; \
240- issues.AddIssue (TIssue (B).SetCode (0 , NYql::TSeverityIds::S_INFO)); \
241- Ctx.IssueManager .AddIssues (issues); \
242- return false ; \
243- }
244-
245- CHECK (!op->Output , " Non empty output" );
246- CHECK (op->StarOptions .empty (), " Non empty StarOptions" );
247-
248- const auto & joinKind = op->JoinKind ->Content ();
249-
250- if (joinKind == " Inner" ) {
251- // relId, varId, table, column
252- std::vector<std::tuple<int ,int ,TStringBuf,TStringBuf>> leftVars;
253- std::vector<std::tuple<int ,int ,TStringBuf,TStringBuf>> rightVars;
254-
255- ExtractVars (leftVars, op->LeftLabel );
256- ExtractVars (rightVars, op->RightLabel );
257-
258- CHECK (leftVars.size () == rightVars.size (), " Left and right labels must have the same size" );
259-
260- MakeEqClasses (EqClasses, leftVars, rightVars);
261- } else if (joinKind == " Left" || joinKind == " Right" ) {
262- CHECK (op->LeftLabel ->ChildrenSize () == 2 , " Only 1 var per join supported" );
263- CHECK (op->RightLabel ->ChildrenSize () == 2 , " Only 1 var per join supported" );
264-
265- std::vector<std::tuple<int ,int ,TStringBuf,TStringBuf>> leftVars, rightVars;
266- ExtractVars (leftVars, op->LeftLabel );
267- ExtractVars (rightVars, op->RightLabel );
268-
269- IOptimizer::TEq leftEqClass = MakeEqClass (leftVars);
270- IOptimizer::TEq rightEqClass = MakeEqClass (rightVars);
271- IOptimizer::TEq eqClass = leftEqClass;
272- eqClass.Vars .insert (eqClass.Vars .end (), rightEqClass.Vars .begin (), rightEqClass.Vars .end ());
273-
274- CHECK (eqClass.Vars .size () == 2 , " Only a=b left|right join supported yet" );
275-
276- EqClasses.emplace_back (std::move (leftEqClass));
277- EqClasses.emplace_back (std::move (rightEqClass));
278- if (joinKind == " Left" ) {
279- Left.emplace_back (eqClass);
280- } else {
281- Right.emplace_back (eqClass);
282- }
283- } else {
284- CHECK (false , " Unsupported join type" );
285- }
286-
287- #undef CHECK
288- return true ;
289- }
290-
291- bool CollectOps (TYtJoinNode::TPtr node)
292- {
293- if (auto * op = dynamic_cast <TYtJoinNodeOp*>(node.Get ())) {
294- return OnOp (op)
295- && CollectOps (op->Left )
296- && CollectOps (op->Right );
297- }
298- return true ;
299- }
300-
301- void CollectRels (TYtJoinNode::TPtr node)
302- {
303- if (auto * op = dynamic_cast <TYtJoinNodeOp*>(node.Get ())) {
304- CollectRels (op->Left );
305- CollectRels (op->Right );
306- } else if (auto * leaf = dynamic_cast <TYtJoinNodeLeaf*>(node.Get ())) {
307- OnLeaf (leaf);
308- }
309- }
310-
311- TExprNode::TPtr MakeLabel (const std::vector<IOptimizer::TVarId>& vars) const {
312- TVector<TExprNodePtr> label; label.reserve (vars.size () * 2 );
313-
314- for (auto [relId, varId] : vars) {
315- auto [table, column] = Var2TableCol[relId - 1 ][varId - 1 ];
316-
317- label.emplace_back (Ctx.NewAtom (Root->JoinKind ->Pos (), table));
318- label.emplace_back (Ctx.NewAtom (Root->JoinKind ->Pos (), column));
319- }
320-
321- return Build<TCoAtomList>(Ctx, Root->JoinKind ->Pos ())
322- .Add (label)
323- .Done ()
324- .Ptr ();
325- }
326-
327- TYtJoinNode::TPtr Convert (int nodeId, TVector<TString>& scope) const
328- {
329- const IOptimizer::TJoinNode* node = &Result.Nodes [nodeId];
330- if (node->Outer == -1 && node->Inner == -1 ) {
331- YQL_ENSURE (node->Rels .size () == 1 );
332- auto leaf = Leafs[node->Rels [0 ]-1 ];
333- YQL_ENSURE (leaf);
334- YQL_ENSURE (!leaf->Scope .empty ());
335- scope.insert (scope.end (), leaf->Scope .begin (), leaf->Scope .end ());
336- return leaf;
337- } else if (node->Outer != -1 && node->Inner != -1 ) {
338- auto ret = MakeIntrusive<TYtJoinNodeOp>();
339- TString joinKind;
340- switch (node->Mode ) {
341- case IOptimizer::EJoinType::Inner:
342- joinKind = " Inner" ;
343- break ;
344- case IOptimizer::EJoinType::Left:
345- joinKind = " Left" ;
346- break ;
347- case IOptimizer::EJoinType::Right:
348- joinKind = " Right" ;
349- break ;
350- default :
351- YQL_ENSURE (false , " Unsupported join type" );
352- break ;
353- }
354- ret->JoinKind = Ctx.NewAtom (Root->JoinKind ->Pos (), joinKind);
355- ret->LeftLabel = MakeLabel (node->LeftVars );
356- ret->RightLabel = MakeLabel (node->RightVars );
357- int index = scope.size ();
358- ret->Left = Convert (node->Outer , scope);
359- ret->Right = Convert (node->Inner , scope);
360- ret->Scope .insert (ret->Scope .end (), scope.begin () + index, scope.end ());
361- return ret;
362- } else {
363- YQL_ENSURE (false , " Wrong CBO node" );
364- }
365- }
366-
367120 TYtJoinNodeOp::TPtr Root;
368121 const TYtState::TPtr& State;
369122 TExprContext& Ctx;
370123 bool Debug;
371-
372- THashMap<TStringBuf, std::vector<int >> Table2RelIds;
373- std::vector<IOptimizer::TRel> Rels;
374- std::vector<std::vector<TStringBuf>> RelTables;
375- std::vector<TYtJoinNodeLeaf*> Leafs;
376- std::vector<std::vector<std::tuple<TStringBuf, TStringBuf>>> Var2TableCol;
377-
378- std::vector<THashMap<TStringBuf, int >> VarIds;
379-
380- std::vector<IOptimizer::TEq> EqClasses;
381- std::vector<IOptimizer::TEq> Left;
382- std::vector<IOptimizer::TEq> Right;
383-
384- IOptimizer::TOutput Result;
385124};
386125
387126class TYtRelOptimizerNode : public TRelOptimizerNode {
0 commit comments