Skip to content

Commit 7a34ae7

Browse files
authored
Add SMJ support (#97)
1 parent c19bda6 commit 7a34ae7

File tree

4 files changed

+75
-44
lines changed

4 files changed

+75
-44
lines changed

velox/substrait/SubstraitParser.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -144,14 +144,12 @@ std::shared_ptr<SubstraitParser::SubstraitType> SubstraitParser::parseType(
144144
return std::make_shared<SubstraitType>(type);
145145
}
146146

147-
std::string SubstraitParser::parseType(
148-
const std::string& substraitType) {
149-
auto it = typeMap_.find(substraitType);
150-
if (it == typeMap_.end()) {
151-
VELOX_NYI(
152-
"Substrait parsing for type {} not supported.", substraitType);
153-
}
154-
return it->second;
147+
std::string SubstraitParser::parseType(const std::string& substraitType) {
148+
auto it = typeMap_.find(substraitType);
149+
if (it == typeMap_.end()) {
150+
VELOX_NYI("Substrait parsing for type {} not supported.", substraitType);
151+
}
152+
return it->second;
155153
};
156154

157155
std::vector<std::shared_ptr<SubstraitParser::SubstraitType>>
@@ -286,7 +284,7 @@ void SubstraitParser::getSubFunctionTypes(
286284
std::string delimiter = "_";
287285
while ((pos = funcTypes.find(delimiter)) != std::string::npos) {
288286
auto type = funcTypes.substr(0, pos);
289-
if (type != "opt" && type !="req") {
287+
if (type != "opt" && type != "req") {
290288
types.emplace_back(type);
291289
}
292290
funcTypes.erase(0, pos + delimiter.length());
@@ -314,4 +312,19 @@ std::string SubstraitParser::mapToVeloxFunction(
314312
return subFunc;
315313
}
316314

315+
bool SubstraitParser::configSetInOptimization(
316+
const ::substrait::extensions::AdvancedExtension& extension,
317+
const std::string& config) const {
318+
if (extension.has_optimization()) {
319+
google::protobuf::StringValue msg;
320+
extension.optimization().UnpackTo(&msg);
321+
std::size_t pos = msg.value().find(config);
322+
if ((pos != std::string::npos) &&
323+
(msg.value().substr(pos + config.size(), 1) == "1")) {
324+
return true;
325+
}
326+
}
327+
return false;
328+
}
329+
317330
} // namespace facebook::velox::substrait

velox/substrait/SubstraitParser.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include "velox/substrait/proto/substrait/type.pb.h"
2626
#include "velox/substrait/proto/substrait/type_expressions.pb.h"
2727

28+
#include <google/protobuf/wrappers.pb.h>
29+
2830
namespace facebook::velox::substrait {
2931

3032
/// This class contains some common functions used to parse Substrait
@@ -94,6 +96,15 @@ class SubstraitParser {
9496
/// Map the Substrait function keyword into Velox function keyword.
9597
std::string mapToVeloxFunction(const std::string& substraitFunction) const;
9698

99+
/// @brief Return whether a config is set as true in AdvancedExtension
100+
/// optimization.
101+
/// @param extension Substrait advanced extension.
102+
/// @param config the key string of a config.
103+
/// @return Whether the config is set as true.
104+
bool configSetInOptimization(
105+
const ::substrait::extensions::AdvancedExtension& extension,
106+
const std::string& config) const;
107+
97108
private:
98109
/// A map used for mapping Substrait function keywords into Velox functions'
99110
/// keywords. Key: the Substrait function keyword, Value: the Velox function

velox/substrait/SubstraitToVeloxPlan.cpp

Lines changed: 29 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616

1717
#include "velox/substrait/SubstraitToVeloxPlan.h"
1818

19-
#include <google/protobuf/wrappers.pb.h>
20-
2119
#include "velox/substrait/TypeUtils.h"
2220
#include "velox/substrait/VariantToVectorConverter.h"
2321
#include "velox/type/Type.h"
@@ -63,26 +61,6 @@ const std::string sNot = "not";
6361
const std::string sI32 = "i32";
6462
const std::string sI64 = "i64";
6563

66-
/// @brief Return whether a config is set as true in AdvancedExtension
67-
/// optimization.
68-
/// @param extension Substrait advanced extension.
69-
/// @param config the key string of a config.
70-
/// @return Whether the config is set as true.
71-
bool configSetInOptimization(
72-
const ::substrait::extensions::AdvancedExtension& extension,
73-
const std::string& config) {
74-
if (extension.has_optimization()) {
75-
google::protobuf::StringValue msg;
76-
extension.optimization().UnpackTo(&msg);
77-
std::size_t pos = msg.value().find(config);
78-
if ((pos != std::string::npos) &&
79-
(msg.value().substr(pos + config.size(), 1) == "1")) {
80-
return true;
81-
}
82-
}
83-
return false;
84-
}
85-
8664
/// @brief Get the input type from both sides of join.
8765
/// @param leftNode the plan node of left side.
8866
/// @param rightNode the plan node of right side.
@@ -219,7 +197,7 @@ core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan(
219197
case ::substrait::JoinRel_JoinType::JoinRel_JoinType_JOIN_TYPE_LEFT_SEMI:
220198
// Determine the semi join type based on extracted information.
221199
if (sJoin.has_advanced_extension() &&
222-
configSetInOptimization(
200+
subParser_->configSetInOptimization(
223201
sJoin.advanced_extension(), "isExistenceJoin=")) {
224202
joinType = core::JoinType::kLeftSemiProject;
225203
} else {
@@ -229,7 +207,7 @@ core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan(
229207
case ::substrait::JoinRel_JoinType::JoinRel_JoinType_JOIN_TYPE_RIGHT_SEMI:
230208
// Determine the semi join type based on extracted information.
231209
if (sJoin.has_advanced_extension() &&
232-
configSetInOptimization(
210+
subParser_->configSetInOptimization(
233211
sJoin.advanced_extension(), "isExistenceJoin=")) {
234212
joinType = core::JoinType::kRightSemiProject;
235213
} else {
@@ -239,7 +217,7 @@ core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan(
239217
case ::substrait::JoinRel_JoinType::JoinRel_JoinType_JOIN_TYPE_ANTI: {
240218
// Determine the anti join type based on extracted information.
241219
if (sJoin.has_advanced_extension() &&
242-
configSetInOptimization(
220+
subParser_->configSetInOptimization(
243221
sJoin.advanced_extension(), "isNullAwareAntiJoin=")) {
244222
joinType = core::JoinType::kNullAwareAnti;
245223
} else {
@@ -276,16 +254,32 @@ core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan(
276254
exprConverter_->toVeloxExpr(sJoin.post_join_filter(), inputRowType);
277255
}
278256

279-
// Create join node
280-
return std::make_shared<core::HashJoinNode>(
281-
nextPlanNodeId(),
282-
joinType,
283-
leftKeys,
284-
rightKeys,
285-
filter,
286-
leftNode,
287-
rightNode,
288-
getJoinOutputType(leftNode, rightNode, joinType));
257+
if (sJoin.has_advanced_extension() &&
258+
subParser_->configSetInOptimization(
259+
sJoin.advanced_extension(), "isSMJ=")) {
260+
// Create MergeJoinNode node
261+
return std::make_shared<core::MergeJoinNode>(
262+
nextPlanNodeId(),
263+
joinType,
264+
leftKeys,
265+
rightKeys,
266+
filter,
267+
leftNode,
268+
rightNode,
269+
getJoinOutputType(leftNode, rightNode, joinType));
270+
271+
} else {
272+
// Create HashJoinNode node
273+
return std::make_shared<core::HashJoinNode>(
274+
nextPlanNodeId(),
275+
joinType,
276+
leftKeys,
277+
rightKeys,
278+
filter,
279+
leftNode,
280+
rightNode,
281+
getJoinOutputType(leftNode, rightNode, joinType));
282+
}
289283
}
290284

291285
core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan(

velox/substrait/SubstraitToVeloxPlanValidator.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,19 @@ bool SubstraitToVeloxPlanValidator::validate(
429429
return false;
430430
}
431431

432+
if (sJoin.has_advanced_extension() &&
433+
subParser_->configSetInOptimization(
434+
sJoin.advanced_extension(), "isSMJ=")) {
435+
switch (sJoin.type()) {
436+
case ::substrait::JoinRel_JoinType_JOIN_TYPE_INNER:
437+
case ::substrait::JoinRel_JoinType_JOIN_TYPE_LEFT:
438+
break;
439+
default:
440+
std::cout << "Sort merge join only support inner and left join"
441+
<< std::endl;
442+
return false;
443+
}
444+
}
432445
switch (sJoin.type()) {
433446
case ::substrait::JoinRel_JoinType_JOIN_TYPE_INNER:
434447
case ::substrait::JoinRel_JoinType_JOIN_TYPE_OUTER:

0 commit comments

Comments
 (0)