Skip to content

Commit f49e354

Browse files
authored
Switch yt provider to new CBO api YQL-17437 (#1868)
1 parent 29e13aa commit f49e354

File tree

5 files changed

+323
-278
lines changed

5 files changed

+323
-278
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
#pragma once
22

33
#include <ydb/library/yql/core/cbo/cbo_optimizer.h>
4+
#include <ydb/library/yql/core/cbo/cbo_optimizer_new.h>
45

56
#include <functional>
67

78
namespace NYql {
89

10+
struct TExprContext;
11+
912
IOptimizer* MakePgOptimizer(const IOptimizer::TInput& input, const std::function<void(const TString&)>& log = {});
1013

14+
IOptimizerNew* MakePgOptimizerNew(IProviderContext& pctx, TExprContext& ctx, const std::function<void(const TString&)>& log = {});
15+
1116
} // namespace NYql

ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,6 @@ Y_UNIT_TEST(BuildYtJoinTree2TablesTableIn2Rels)
171171
Name(ECostBasedOptimizerType::Native); \
172172
}
173173

174-
175174
void OrderJoins2Tables(auto optimizerType) {
176175
TExprContext exprCtx;
177176
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n"}, exprCtx);
@@ -256,7 +255,7 @@ Y_UNIT_TEST(UnsupportedJoin)
256255
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n"}, exprCtx);
257256
tree->Left = MakeLeaf({"c"}, {"c"}, 1000000, 1233333, exprCtx);
258257
tree->Right = MakeLeaf({"n"}, {"n"}, 10000, 12333, exprCtx);
259-
tree->JoinKind = exprCtx.NewAtom(exprCtx.AppendPosition({}), "Full");
258+
tree->JoinKind = exprCtx.NewAtom(exprCtx.AppendPosition({}), "RightSemi");
260259

261260
TTypeAnnotationContext typeCtx;
262261
TYtState::TPtr state = MakeIntrusive<TYtState>();

ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp

Lines changed: 15 additions & 276 deletions
Original file line numberDiff line numberDiff line change
@@ -66,50 +66,47 @@ class TJoinReorderer {
6666
}
6767

6868
TYtJoinNodeOp::TPtr Do() {
69-
CollectRels(Root);
70-
if (!CollectOps(Root)) {
71-
return Root;
72-
}
73-
74-
IOptimizer::TInput input;
75-
input.EqClasses = std::move(EqClasses);
76-
input.Left = std::move(Left);
77-
input.Right = std::move(Right);
78-
input.Rels = std::move(Rels);
79-
input.Normalize();
80-
YQL_CLOG(INFO, ProviderYt) << "Input: " << input.ToString();
69+
std::shared_ptr<IBaseOptimizerNode> tree;
70+
std::shared_ptr<IProviderContext> ctx;
71+
BuildOptimizerJoinTree(tree, ctx, Root);
8172

8273
std::function<void(const TString& str)> log;
8374

8475
log = [](const TString& str) {
8576
YQL_CLOG(INFO, ProviderYt) << str;
8677
};
8778

88-
std::unique_ptr<IOptimizer> opt;
79+
std::unique_ptr<IOptimizerNew> opt;
8980

9081
switch (State->Types->CostBasedOptimizer) {
9182
case ECostBasedOptimizerType::PG:
92-
opt = std::unique_ptr<IOptimizer>(MakePgOptimizer(input, log));
83+
opt = std::unique_ptr<IOptimizerNew>(MakePgOptimizerNew(*ctx, Ctx, log));
9384
break;
9485
case ECostBasedOptimizerType::Native:
95-
opt = std::unique_ptr<IOptimizer>(NDq::MakeNativeOptimizer(input, log));
86+
opt = std::unique_ptr<IOptimizerNew>(NDq::MakeNativeOptimizerNew(*ctx, 100000));
9687
break;
9788
default:
9889
YQL_CLOG(ERROR, ProviderYt) << "Unknown optimizer type " << ToString(State->Types->CostBasedOptimizer);
9990
return Root;
10091
}
10192

93+
std::shared_ptr<TJoinOptimizerNode> result;
94+
10295
try {
103-
Result = opt->JoinSearch();
96+
result = opt->JoinSearch(std::dynamic_pointer_cast<TJoinOptimizerNode>(tree));
97+
if (tree == result) { return Root; }
10498
} catch (...) {
10599
YQL_CLOG(ERROR, ProviderYt) << "Cannot do join search " << CurrentExceptionMessage();
106100
return Root;
107101
}
108102

109-
YQL_CLOG(INFO, ProviderYt) << "Result: " << Result.ToString();
103+
std::stringstream ss;
104+
result->Print(ss);
105+
106+
YQL_CLOG(INFO, ProviderYt) << "Result: " << ss.str();
110107

111108
TVector<TString> scope;
112-
TYtJoinNodeOp::TPtr res = dynamic_cast<TYtJoinNodeOp*>(Convert(0, scope).Get());
109+
TYtJoinNodeOp::TPtr res = dynamic_cast<TYtJoinNodeOp*>(BuildYtJoinTree(result, Ctx, {}).Get());
113110

114111
YQL_ENSURE(res);
115112
if (Debug) {
@@ -120,268 +117,10 @@ class TJoinReorderer {
120117
}
121118

122119
private:
123-
int GetVarId(int relId, TStringBuf column) {
124-
int varId = 0;
125-
auto maybeVarId = VarIds[relId-1].find(column);
126-
if (maybeVarId != VarIds[relId-1].end()) {
127-
varId = maybeVarId->second;
128-
} else {
129-
varId = Rels[relId - 1].TargetVars.size() + 1;
130-
VarIds[relId - 1][column] = varId;
131-
Rels[relId - 1].TargetVars.emplace_back();
132-
Var2TableCol[relId - 1].emplace_back();
133-
}
134-
return varId;
135-
}
136-
137-
void ExtractVars(auto& vars, TExprNode::TPtr labels) {
138-
for (ui32 i = 0; i < labels->ChildrenSize(); i += 2) {
139-
auto table = labels->Child(i)->Content();
140-
auto column = labels->Child(i + 1)->Content();
141-
142-
const auto& relIds = Table2RelIds[table];
143-
YQL_ENSURE(!relIds.empty());
144-
145-
for (int relId : relIds) {
146-
int varId = GetVarId(relId, column);
147-
148-
vars.emplace_back(std::make_tuple(relId, varId, table, column));
149-
}
150-
}
151-
};
152-
153-
std::vector<TStringBuf> GetTables(TExprNode::TPtr label)
154-
{
155-
if (label->ChildrenSize() == 0) {
156-
return {label->Content()};
157-
} else {
158-
std::vector<TStringBuf> tables;
159-
tables.reserve(label->ChildrenSize());
160-
for (ui32 i = 0; i < label->ChildrenSize(); i++) {
161-
tables.emplace_back(label->Child(i)->Content());
162-
}
163-
return tables;
164-
}
165-
}
166-
167-
void OnLeaf(TYtJoinNodeLeaf* leaf) {
168-
int relId = Rels.size() + 1;
169-
Rels.emplace_back(IOptimizer::TRel{});
170-
Var2TableCol.emplace_back();
171-
// rel -> varIds
172-
VarIds.emplace_back(THashMap<TStringBuf, int>{});
173-
// rel -> tables
174-
RelTables.emplace_back(std::vector<TStringBuf>{});
175-
for (const auto& table : GetTables(leaf->Label)) {
176-
RelTables.back().emplace_back(table);
177-
Table2RelIds[table].emplace_back(relId);
178-
}
179-
auto& rel = Rels[relId - 1];
180-
181-
TYtSection section{leaf->Section};
182-
if (Y_UNLIKELY(!section.Settings().Empty()) && Y_UNLIKELY(section.Settings().Item(0).Name() == "Test")) {
183-
// ut
184-
for (const auto& setting : section.Settings()) {
185-
if (setting.Name() == "Rows") {
186-
rel.Rows += FromString<ui64>(setting.Value().Ref().Content());
187-
} else if (setting.Name() == "Size") {
188-
rel.TotalCost += FromString<ui64>(setting.Value().Ref().Content());
189-
}
190-
}
191-
} else {
192-
for (auto path: section.Paths()) {
193-
auto stat = TYtTableBaseInfo::GetStat(path.Table());
194-
rel.TotalCost += stat->DataSize;
195-
rel.Rows += stat->RecordsCount;
196-
}
197-
if (!(rel.Rows > 0)) {
198-
YQL_CLOG(INFO, ProviderYt) << "Cannot read stats from: " << NCommon::ExprToPrettyString(Ctx, *section.Ptr());
199-
}
200-
}
201-
202-
int leafIndex = relId - 1;
203-
if (leafIndex >= static_cast<int>(Leafs.size())) {
204-
Leafs.resize(leafIndex + 1);
205-
}
206-
Leafs[leafIndex] = leaf;
207-
};
208-
209-
IOptimizer::TEq MakeEqClass(const auto& vars) {
210-
IOptimizer::TEq eqClass;
211-
212-
for (auto& [relId, varId, table, column] : vars) {
213-
eqClass.Vars.emplace_back(std::make_tuple(relId, varId));
214-
Var2TableCol[relId - 1][varId - 1] = std::make_tuple(table, column);
215-
}
216-
217-
return eqClass;
218-
}
219-
220-
void MakeEqClasses(std::vector<IOptimizer::TEq>& res, const auto& leftVars, const auto& rightVars) {
221-
for (int i = 0; i < (int)leftVars.size(); i++) {
222-
auto& [lrelId, lvarId, ltable, lcolumn] = leftVars[i];
223-
auto& [rrelId, rvarId, rtable, rcolumn] = rightVars[i];
224-
225-
IOptimizer::TEq eqClass; eqClass.Vars.reserve(2);
226-
eqClass.Vars.emplace_back(std::make_tuple(lrelId, lvarId));
227-
eqClass.Vars.emplace_back(std::make_tuple(rrelId, rvarId));
228-
229-
Var2TableCol[lrelId - 1][lvarId - 1] = std::make_tuple(ltable, lcolumn);
230-
Var2TableCol[rrelId - 1][rvarId - 1] = std::make_tuple(rtable, rcolumn);
231-
232-
res.emplace_back(std::move(eqClass));
233-
}
234-
}
235-
236-
bool OnOp(TYtJoinNodeOp* op) {
237-
#define CHECK(A, B) \
238-
if (Y_UNLIKELY(!(A))) { \
239-
TIssues issues; \
240-
issues.AddIssue(TIssue(B).SetCode(0, NYql::TSeverityIds::S_INFO)); \
241-
Ctx.IssueManager.AddIssues(issues); \
242-
return false; \
243-
}
244-
245-
CHECK(!op->Output, "Non empty output");
246-
CHECK(op->StarOptions.empty(), "Non empty StarOptions");
247-
248-
const auto& joinKind = op->JoinKind->Content();
249-
250-
if (joinKind == "Inner") {
251-
// relId, varId, table, column
252-
std::vector<std::tuple<int,int,TStringBuf,TStringBuf>> leftVars;
253-
std::vector<std::tuple<int,int,TStringBuf,TStringBuf>> rightVars;
254-
255-
ExtractVars(leftVars, op->LeftLabel);
256-
ExtractVars(rightVars, op->RightLabel);
257-
258-
CHECK(leftVars.size() == rightVars.size(), "Left and right labels must have the same size");
259-
260-
MakeEqClasses(EqClasses, leftVars, rightVars);
261-
} else if (joinKind == "Left" || joinKind == "Right") {
262-
CHECK(op->LeftLabel->ChildrenSize() == 2, "Only 1 var per join supported");
263-
CHECK(op->RightLabel->ChildrenSize() == 2, "Only 1 var per join supported");
264-
265-
std::vector<std::tuple<int,int,TStringBuf,TStringBuf>> leftVars, rightVars;
266-
ExtractVars(leftVars, op->LeftLabel);
267-
ExtractVars(rightVars, op->RightLabel);
268-
269-
IOptimizer::TEq leftEqClass = MakeEqClass(leftVars);
270-
IOptimizer::TEq rightEqClass = MakeEqClass(rightVars);
271-
IOptimizer::TEq eqClass = leftEqClass;
272-
eqClass.Vars.insert(eqClass.Vars.end(), rightEqClass.Vars.begin(), rightEqClass.Vars.end());
273-
274-
CHECK(eqClass.Vars.size() == 2, "Only a=b left|right join supported yet");
275-
276-
EqClasses.emplace_back(std::move(leftEqClass));
277-
EqClasses.emplace_back(std::move(rightEqClass));
278-
if (joinKind == "Left") {
279-
Left.emplace_back(eqClass);
280-
} else {
281-
Right.emplace_back(eqClass);
282-
}
283-
} else {
284-
CHECK(false, "Unsupported join type");
285-
}
286-
287-
#undef CHECK
288-
return true;
289-
}
290-
291-
bool CollectOps(TYtJoinNode::TPtr node)
292-
{
293-
if (auto* op = dynamic_cast<TYtJoinNodeOp*>(node.Get())) {
294-
return OnOp(op)
295-
&& CollectOps(op->Left)
296-
&& CollectOps(op->Right);
297-
}
298-
return true;
299-
}
300-
301-
void CollectRels(TYtJoinNode::TPtr node)
302-
{
303-
if (auto* op = dynamic_cast<TYtJoinNodeOp*>(node.Get())) {
304-
CollectRels(op->Left);
305-
CollectRels(op->Right);
306-
} else if (auto* leaf = dynamic_cast<TYtJoinNodeLeaf*>(node.Get())) {
307-
OnLeaf(leaf);
308-
}
309-
}
310-
311-
TExprNode::TPtr MakeLabel(const std::vector<IOptimizer::TVarId>& vars) const {
312-
TVector<TExprNodePtr> label; label.reserve(vars.size() * 2);
313-
314-
for (auto [relId, varId] : vars) {
315-
auto [table, column] = Var2TableCol[relId - 1][varId - 1];
316-
317-
label.emplace_back(Ctx.NewAtom(Root->JoinKind->Pos(), table));
318-
label.emplace_back(Ctx.NewAtom(Root->JoinKind->Pos(), column));
319-
}
320-
321-
return Build<TCoAtomList>(Ctx, Root->JoinKind->Pos())
322-
.Add(label)
323-
.Done()
324-
.Ptr();
325-
}
326-
327-
TYtJoinNode::TPtr Convert(int nodeId, TVector<TString>& scope) const
328-
{
329-
const IOptimizer::TJoinNode* node = &Result.Nodes[nodeId];
330-
if (node->Outer == -1 && node->Inner == -1) {
331-
YQL_ENSURE(node->Rels.size() == 1);
332-
auto leaf = Leafs[node->Rels[0]-1];
333-
YQL_ENSURE(leaf);
334-
YQL_ENSURE(!leaf->Scope.empty());
335-
scope.insert(scope.end(), leaf->Scope.begin(), leaf->Scope.end());
336-
return leaf;
337-
} else if (node->Outer != -1 && node->Inner != -1) {
338-
auto ret = MakeIntrusive<TYtJoinNodeOp>();
339-
TString joinKind;
340-
switch (node->Mode) {
341-
case IOptimizer::EJoinType::Inner:
342-
joinKind = "Inner";
343-
break;
344-
case IOptimizer::EJoinType::Left:
345-
joinKind = "Left";
346-
break;
347-
case IOptimizer::EJoinType::Right:
348-
joinKind = "Right";
349-
break;
350-
default:
351-
YQL_ENSURE(false, "Unsupported join type");
352-
break;
353-
}
354-
ret->JoinKind = Ctx.NewAtom(Root->JoinKind->Pos(), joinKind);
355-
ret->LeftLabel = MakeLabel(node->LeftVars);
356-
ret->RightLabel = MakeLabel(node->RightVars);
357-
int index = scope.size();
358-
ret->Left = Convert(node->Outer, scope);
359-
ret->Right = Convert(node->Inner, scope);
360-
ret->Scope.insert(ret->Scope.end(), scope.begin() + index, scope.end());
361-
return ret;
362-
} else {
363-
YQL_ENSURE(false, "Wrong CBO node");
364-
}
365-
}
366-
367120
TYtJoinNodeOp::TPtr Root;
368121
const TYtState::TPtr& State;
369122
TExprContext& Ctx;
370123
bool Debug;
371-
372-
THashMap<TStringBuf, std::vector<int>> Table2RelIds;
373-
std::vector<IOptimizer::TRel> Rels;
374-
std::vector<std::vector<TStringBuf>> RelTables;
375-
std::vector<TYtJoinNodeLeaf*> Leafs;
376-
std::vector<std::vector<std::tuple<TStringBuf, TStringBuf>>> Var2TableCol;
377-
378-
std::vector<THashMap<TStringBuf, int>> VarIds;
379-
380-
std::vector<IOptimizer::TEq> EqClasses;
381-
std::vector<IOptimizer::TEq> Left;
382-
std::vector<IOptimizer::TEq> Right;
383-
384-
IOptimizer::TOutput Result;
385124
};
386125

387126
class TYtRelOptimizerNode: public TRelOptimizerNode {

0 commit comments

Comments
 (0)