Skip to content

Commit b7130eb

Browse files
committed
[KQP] Shuffle elimination flag bug fix
1 parent 357fb73 commit b7130eb

File tree

4 files changed

+32
-20
lines changed

4 files changed

+32
-20
lines changed

ydb/core/kqp/executer_actor/kqp_executer_impl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1652,11 +1652,12 @@ class TKqpExecuterBase : public TActor<TDerived> {
16521652
auto& stage = stageInfo.Meta.GetStage(stageInfo.Id);
16531653

16541654
auto& columnShardHashV1Params = stageInfo.Meta.ColumnShardHashV1Params;
1655-
if (enableShuffleElimination && stageInfo.Meta.ColumnTableInfoPtr) {
1655+
if (enableShuffleElimination && stage.GetIsShuffleEliminated() && stageInfo.Meta.ColumnTableInfoPtr) {
16561656
const auto& tableDesc = stageInfo.Meta.ColumnTableInfoPtr->Description;
16571657
columnShardHashV1Params.SourceShardCount = tableDesc.GetColumnShardCount();
16581658
columnShardHashV1Params.SourceTableKeyColumnTypes = std::make_shared<TVector<NScheme::TTypeInfo>>();
16591659
for (const auto& column: tableDesc.GetSharding().GetHashSharding().GetColumns()) {
1660+
Y_ENSURE(stageInfo.Meta.TableConstInfo->Columns.contains(column), TStringBuilder{} << "Table doesn't have column: " << column);
16601661
auto columnType = stageInfo.Meta.TableConstInfo->Columns.at(column).Type;
16611662
columnShardHashV1Params.SourceTableKeyColumnTypes->push_back(columnType);
16621663
}

ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ class TOptimizerNativeNew: public IOptimizerNew {
350350
if (postEnumerationShuffleElimination) {
351351
EliminateShuffles(hypergraph, bestJoinOrder, orderingsFSM);
352352
}
353-
auto resTree = ConvertFromInternal(bestJoinOrder, fdStorage);
353+
auto resTree = ConvertFromInternal(bestJoinOrder, fdStorage, EnableShuffleElimination);
354354
AddMissingConditions(hypergraph, resTree);
355355
return resTree;
356356
}

ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal(
2424

2525
std::shared_ptr<TJoinOptimizerNode> ConvertFromInternal(
2626
const std::shared_ptr<IBaseOptimizerNode>& internal,
27-
const TFDStorage& fdStorage
27+
const TFDStorage& fdStorage,
28+
bool enableShuffleElimination
2829
) {
2930
Y_ENSURE(internal->Kind == EOptimizerNodeKind::JoinNodeType);
3031

@@ -38,31 +39,40 @@ std::shared_ptr<TJoinOptimizerNode> ConvertFromInternal(
3839
auto right = join->RightArg;
3940

4041
if (left->Kind == EOptimizerNodeKind::JoinNodeType) {
41-
left = ConvertFromInternal(left, fdStorage);
42+
left = ConvertFromInternal(left, fdStorage, enableShuffleElimination);
4243
}
4344
if (right->Kind == EOptimizerNodeKind::JoinNodeType) {
44-
right = ConvertFromInternal(right, fdStorage);
45+
right = ConvertFromInternal(right, fdStorage, enableShuffleElimination);
4546
}
4647

4748
auto newJoin = std::make_shared<TJoinOptimizerNode>(left, right, join->LeftJoinKeys, join->RightJoinKeys, join->JoinType, join->JoinAlgo, join->LeftAny, join->RightAny);
4849
newJoin->Stats = std::move(join->Stats);
4950

50-
51-
if (join->ShuffleLeftSideByOrderingIdx != -1) {
51+
if (!enableShuffleElimination && join->JoinAlgo == EJoinAlgoType::GraceJoin) {
52+
left->Stats.ShuffledByColumns =
53+
TIntrusivePtr<TOptimizerStatistics::TShuffledByColumns>(
54+
new TOptimizerStatistics::TShuffledByColumns(join->LeftJoinKeys)
55+
);
56+
} else if (join->ShuffleLeftSideByOrderingIdx != -1) {
5257
auto shuffledBy = fdStorage.GetInterestingOrderingsColumnNamesByIdx(join->ShuffleLeftSideByOrderingIdx);
5358

54-
left->Stats.ShuffledByColumns =
59+
left->Stats.ShuffledByColumns =
5560
TIntrusivePtr<TOptimizerStatistics::TShuffledByColumns>(
5661
new TOptimizerStatistics::TShuffledByColumns(std::move(shuffledBy))
5762
);
5863
} else {
5964
left->Stats.ShuffledByColumns = nullptr;
6065
}
6166

62-
if (join->ShuffleRightSideByOrderingIdx != -1) {
67+
if (!enableShuffleElimination && join->JoinAlgo == EJoinAlgoType::GraceJoin) {
68+
right->Stats.ShuffledByColumns =
69+
TIntrusivePtr<TOptimizerStatistics::TShuffledByColumns>(
70+
new TOptimizerStatistics::TShuffledByColumns(join->RightJoinKeys)
71+
);
72+
} else if (join->ShuffleRightSideByOrderingIdx != -1) {
6373
auto shuffledBy = fdStorage.GetInterestingOrderingsColumnNamesByIdx(join->ShuffleRightSideByOrderingIdx);
6474

65-
right->Stats.ShuffledByColumns =
75+
right->Stats.ShuffledByColumns =
6676
TIntrusivePtr<TOptimizerStatistics::TShuffledByColumns>(
6777
new TOptimizerStatistics::TShuffledByColumns(std::move(shuffledBy))
6878
);

ydb/library/yql/dq/opt/dq_opt_join_tree_node.h

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22

3-
#include <yql/essentials/core/cbo/cbo_optimizer_new.h>
3+
#include <yql/essentials/core/cbo/cbo_optimizer_new.h>
44

55
const TString& ToString(NYql::EJoinKind);
66
const TString& ToString(NYql::EJoinAlgoType);
@@ -19,15 +19,15 @@ namespace NYql::NDq {
1919
*/
2020
struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
2121
TJoinOptimizerNodeInternal(
22-
const std::shared_ptr<IBaseOptimizerNode>& left,
22+
const std::shared_ptr<IBaseOptimizerNode>& left,
2323
const std::shared_ptr<IBaseOptimizerNode>& right,
2424
const TVector<TJoinColumn>& leftJoinKeys,
25-
const TVector<TJoinColumn>& rightJoinKeys,
26-
const EJoinKind joinType,
25+
const TVector<TJoinColumn>& rightJoinKeys,
26+
const EJoinKind joinType,
2727
const EJoinAlgoType joinAlgo,
2828
const bool leftAny,
2929
const bool rightAny
30-
)
30+
)
3131
: IBaseOptimizerNode(JoinNodeType)
3232
, LeftArg(left)
3333
, RightArg(right)
@@ -55,9 +55,9 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
5555
stream << ToString(JoinType) << "," << ToString(JoinAlgo) << " ";
5656

5757
for (size_t i = 0; i < LeftJoinKeys.size(); ++i){
58-
stream
58+
stream
5959
<< LeftJoinKeys[i].RelName << "." << LeftJoinKeys[i].AttributeName
60-
<< "="
60+
<< "="
6161
<< RightJoinKeys[i].RelName << "." << RightJoinKeys[i].AttributeName << ",";
6262
}
6363
stream << "\n";
@@ -75,7 +75,7 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
7575
stream << " ";
7676
}
7777
stream << " ";
78-
stream << "Shuffled By: " << ShuffleRightSideByOrderingIdx << "\n";
78+
stream << "Shuffled By: " << ShuffleRightSideByOrderingIdx << "\n";
7979
RightArg->Print(stream, ntabs + 1);
8080
}
8181

@@ -90,7 +90,7 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
9090

9191
// for interesting orderings framework
9292
std::int64_t ShuffleLeftSideByOrderingIdx = -1;
93-
std::int64_t ShuffleRightSideByOrderingIdx = -1;
93+
std::int64_t ShuffleRightSideByOrderingIdx = -1;
9494
};
9595

9696
/**
@@ -118,7 +118,8 @@ std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal(
118118
*/
119119
std::shared_ptr<TJoinOptimizerNode> ConvertFromInternal(
120120
const std::shared_ptr<IBaseOptimizerNode>& internal,
121-
const TFDStorage& fdStorage
121+
const TFDStorage& fdStorage,
122+
bool enableShuffleElimination
122123
);
123124

124125
} // namespace NYql::NDq

0 commit comments

Comments
 (0)