Skip to content

Commit 6dceba0

Browse files
author
Vadim Averin
authored
Add builtins for struct operations (#4058)
1 parent e3a8844 commit 6dceba0

File tree

7 files changed

+229
-0
lines changed

7 files changed

+229
-0
lines changed

ydb/library/yql/core/type_ann/type_ann_core.cpp

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6074,6 +6074,142 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
60746074
return IGraphTransformer::TStatus::Ok;
60756075
}
60766076

6077+
IGraphTransformer::TStatus StructMergeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
6078+
auto maxArgc = (input->Content() == "StructDifference" || input->Content() == "StructSymmetricDifference") ? 2 : 3;
6079+
if (!EnsureMinMaxArgsCount(*input, 2, maxArgc, ctx.Expr)) {
6080+
return IGraphTransformer::TStatus::Error;
6081+
}
6082+
6083+
auto left = input->Child(0);
6084+
auto right = input->Child(1);
6085+
6086+
if (HasError(left->GetTypeAnn(), ctx.Expr)) {
6087+
return IGraphTransformer::TStatus::Error;
6088+
}
6089+
6090+
if (HasError(right->GetTypeAnn(), ctx.Expr)) {
6091+
return IGraphTransformer::TStatus::Error;
6092+
}
6093+
6094+
if (!EnsureStructType(*left, ctx.Expr)) {
6095+
return IGraphTransformer::TStatus::Error;
6096+
}
6097+
auto leftType = left->GetTypeAnn()->Cast<TStructExprType>();
6098+
6099+
if (!EnsureStructType(*right, ctx.Expr)) {
6100+
return IGraphTransformer::TStatus::Error;
6101+
}
6102+
auto rightType = right->GetTypeAnn()->Cast<TStructExprType>();
6103+
6104+
TExprNode::TPtr mergeLambda = nullptr;
6105+
if (input->ChildrenSize() == 3) {
6106+
mergeLambda = input->ChildPtr(2);
6107+
auto status = ConvertToLambda(mergeLambda, ctx.Expr, 3);
6108+
if (status.Level != IGraphTransformer::TStatus::Ok) {
6109+
return status;
6110+
}
6111+
} else {
6112+
mergeLambda = ctx.Expr.Builder(input->Pos())
6113+
.Lambda()
6114+
.Param("name")
6115+
.Param("left")
6116+
.Param("right")
6117+
.Callable("Unwrap")
6118+
.Callable(0, "Coalesce")
6119+
.Arg(0, "left")
6120+
.Arg(1, "right")
6121+
.Seal()
6122+
.Seal()
6123+
.Seal()
6124+
.Build();
6125+
}
6126+
6127+
auto buildJustMember = [&ctx, &input](const TExprNode::TPtr &st, const TStringBuf& name) -> TExprNode::TPtr {
6128+
return ctx.Expr.Builder(input->Pos())
6129+
.Callable("Just")
6130+
.Callable(0, "Member")
6131+
.Add(0, st)
6132+
.Atom(1, name)
6133+
.Seal()
6134+
.Seal()
6135+
.Build();
6136+
};
6137+
6138+
auto mergeMembers = [&ctx, &buildJustMember, &input, &left, &right, &mergeLambda](const TStringBuf& name, bool hasLeft, bool hasRight) -> TExprNode::TPtr {
6139+
auto leftMaybe = hasLeft ?
6140+
buildJustMember(left, name) :
6141+
ctx.Expr.NewCallable(input->Pos(), "Nothing", {
6142+
ExpandType(input->Pos(), *ctx.Expr.MakeType<TOptionalExprType>(right->GetTypeAnn()->Cast<TStructExprType>()->FindItemType(name)), ctx.Expr)
6143+
});
6144+
6145+
auto rightMaybe = hasRight ?
6146+
buildJustMember(right, name) :
6147+
ctx.Expr.NewCallable(input->Pos(), "Nothing", {
6148+
ExpandType(input->Pos(), *ctx.Expr.MakeType<TOptionalExprType>(left->GetTypeAnn()->Cast<TStructExprType>()->FindItemType(name)), ctx.Expr)
6149+
});
6150+
6151+
return ctx.Expr.Builder(input->Pos())
6152+
.List()
6153+
.Atom(0, name)
6154+
.Apply(1, mergeLambda)
6155+
.With(0)
6156+
.Callable("String")
6157+
.Atom(0, name)
6158+
.Seal()
6159+
.Done()
6160+
.With(1, leftMaybe)
6161+
.With(2, rightMaybe)
6162+
.Seal()
6163+
.Seal()
6164+
.Build();
6165+
};
6166+
6167+
TExprNode::TListType children;
6168+
6169+
bool isUnion = input->Content() == "StructUnion";
6170+
bool isIntersection = input->Content() == "StructIntersection";
6171+
bool isDifference = input->Content() == "StructDifference";
6172+
bool isSymmDifference = input->Content() == "StructSymmetricDifference";
6173+
6174+
for (const auto* leftItem : leftType->GetItems()) {
6175+
const auto& name = leftItem->GetName();
6176+
if (isUnion) {
6177+
if (rightType->FindItem(name)) {
6178+
children.push_back(mergeMembers(name, true, true));
6179+
} else {
6180+
children.push_back(mergeMembers(name, true, false));
6181+
}
6182+
}
6183+
if (isIntersection) {
6184+
if (rightType->FindItem(name)) {
6185+
children.push_back(mergeMembers(name, true, true));
6186+
}
6187+
}
6188+
if (isDifference || isSymmDifference) {
6189+
if (!rightType->FindItem(name)) {
6190+
children.push_back(mergeMembers(name, true, false));
6191+
}
6192+
}
6193+
}
6194+
6195+
for (const auto* rightItem : rightType->GetItems()) {
6196+
const auto& name = rightItem->GetName();
6197+
if (isUnion) {
6198+
if (!leftType->FindItem(name)) {
6199+
children.push_back(mergeMembers(name, false, true));
6200+
}
6201+
}
6202+
if (isSymmDifference) {
6203+
if (!leftType->FindItem(name)) {
6204+
children.push_back(mergeMembers(name, false, true));
6205+
}
6206+
}
6207+
}
6208+
6209+
output = ctx.Expr.NewCallable(input->Pos(), "AsStruct", std::move(children));
6210+
return IGraphTransformer::TStatus::Repeat;
6211+
}
6212+
60776213
IGraphTransformer::TStatus StaticMapWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
60786214
if (!EnsureArgsCount(*input, 2, ctx.Expr)) {
60796215
return IGraphTransformer::TStatus::Error;
@@ -12238,6 +12374,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
1223812374
Functions["PgGrouping"] = &PgGroupingWrapper;
1223912375
Functions["PgGroupingSet"] = &PgGroupingSetWrapper;
1224012376
Functions["PgToRecord"] = &PgToRecordWrapper;
12377+
Functions["StructUnion"] = &StructMergeWrapper;
12378+
Functions["StructIntersection"] = &StructMergeWrapper;
12379+
Functions["StructDifference"] = &StructMergeWrapper;
12380+
Functions["StructSymmetricDifference"] = &StructMergeWrapper;
1224112381

1224212382
Functions["AutoDemux"] = &AutoDemuxWrapper;
1224312383
Functions["AggrCountInit"] = &AggrCountInitWrapper;

ydb/library/yql/sql/v1/builtin.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3070,6 +3070,10 @@ struct TBuiltinFuncData {
30703070
{"flattenmembers", BuildNamedBuiltinFactoryCallback<TFlattenMembers>("FlattenMembers")},
30713071
{"staticmap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticMap", 2, 2) },
30723072
{"staticzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticZip", 1, -1) },
3073+
{"structunion", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructUnion", 2, 3)},
3074+
{"structintersection", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructIntersection", 2, 3)},
3075+
{"structdifference", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructDifference", 2, 2)},
3076+
{"structsymmetricdifference", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructSymmetricDifference", 2, 2)},
30733077
{"staticfold", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticFold", 3, 3)},
30743078
{"staticfold1", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticFold1", 3, 3)},
30753079

ydb/library/yql/tests/sql/dq_file/part12/canondata/result.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,28 @@
975975
"uri": "file://test.test_expr-non_persistable_group_by_column_fail--Results_/extracted"
976976
}
977977
],
978+
"test.test[expr-struct_merge-default.txt-Analyze]": [
979+
{
980+
"checksum": "b4dd508a329723c74293d80f0278c705",
981+
"size": 505,
982+
"uri": "https://{canondata_backend}/1775059/79f40817d9be6347f8a0a937bdd3c46c326ab7d3/resource.tar.gz#test.test_expr-struct_merge-default.txt-Analyze_/plan.txt"
983+
}
984+
],
985+
"test.test[expr-struct_merge-default.txt-Debug]": [
986+
{
987+
"checksum": "7ed8bb90b0fd6a7a9c734d5e24ec3a79",
988+
"size": 867,
989+
"uri": "https://{canondata_backend}/1689644/d939c79f1c25569f7b8f4e5b740e070ad72d7ad7/resource.tar.gz#test.test_expr-struct_merge-default.txt-Debug_/opt.yql_patched"
990+
}
991+
],
992+
"test.test[expr-struct_merge-default.txt-Plan]": [
993+
{
994+
"checksum": "b4dd508a329723c74293d80f0278c705",
995+
"size": 505,
996+
"uri": "https://{canondata_backend}/1775059/79f40817d9be6347f8a0a937bdd3c46c326ab7d3/resource.tar.gz#test.test_expr-struct_merge-default.txt-Plan_/plan.txt"
997+
}
998+
],
999+
"test.test[expr-struct_merge-default.txt-Results]": [],
9781000
"test.test[flatten_by-flatten_one_field--Analyze]": [
9791001
{
9801002
"checksum": "dfeb435c40cd1a0a98c74310e1507366",

ydb/library/yql/tests/sql/hybrid_file/part6/canondata/result.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -923,6 +923,20 @@
923923
"uri": "https://{canondata_backend}/1775059/3cb7d014d70b84dbcb84645fa987dd9d47d7fd6c/resource.tar.gz#test.test_expr-many_opt_comp-default.txt-Plan_/plan.txt"
924924
}
925925
],
926+
"test.test[expr-struct_merge-default.txt-Debug]": [
927+
{
928+
"checksum": "ab77466270296597939428807f0af395",
929+
"size": 866,
930+
"uri": "https://{canondata_backend}/1942415/ecf45b8d311b13ba55e2de94295cabed9b642863/resource.tar.gz#test.test_expr-struct_merge-default.txt-Debug_/opt.yql_patched"
931+
}
932+
],
933+
"test.test[expr-struct_merge-default.txt-Plan]": [
934+
{
935+
"checksum": "b4dd508a329723c74293d80f0278c705",
936+
"size": 505,
937+
"uri": "https://{canondata_backend}/1936842/e15468da5c6a430935df259a2106604daa68ad66/resource.tar.gz#test.test_expr-struct_merge-default.txt-Plan_/plan.txt"
938+
}
939+
],
926940
"test.test[expr-uuid_order-default.txt-Debug]": [
927941
{
928942
"checksum": "dd888f0b22d793979dbf237917d203dd",

ydb/library/yql/tests/sql/sql2yql/canondata/result.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5991,6 +5991,13 @@
59915991
"uri": "https://{canondata_backend}/1784117/d56ae82ad9d30397a41490647be1bd2124718f98/resource.tar.gz#test_sql2yql.test_expr-struct_literal_members_/sql.yql"
59925992
}
59935993
],
5994+
"test_sql2yql.test[expr-struct_merge]": [
5995+
{
5996+
"checksum": "e3781bd00212a17b07691294caa0c1b0",
5997+
"size": 3095,
5998+
"uri": "https://{canondata_backend}/1916746/116cafe28e270e7917dbeab5e0d1b5f2357e5c16/resource.tar.gz#test_sql2yql.test_expr-struct_merge_/sql.yql"
5999+
}
6000+
],
59946001
"test_sql2yql.test[expr-struct_slice]": [
59956002
{
59966003
"checksum": "4d0f79865e785d3f3b0e9e0110bb1f86",
@@ -24443,6 +24450,13 @@
2444324450
"uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_expr-struct_literal_members_/formatted.sql"
2444424451
}
2444524452
],
24453+
"test_sql_format.test[expr-struct_merge]": [
24454+
{
24455+
"checksum": "509cfc4518e9c467b2cd05a5e568c00b",
24456+
"size": 413,
24457+
"uri": "https://{canondata_backend}/1916746/116cafe28e270e7917dbeab5e0d1b5f2357e5c16/resource.tar.gz#test_sql_format.test_expr-struct_merge_/formatted.sql"
24458+
}
24459+
],
2444624460
"test_sql_format.test[expr-struct_slice]": [
2444724461
{
2444824462
"checksum": "8a9f027371f1722b5753e5b53cf62346",
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/* syntax version 1 */
2+
3+
$merge = ($_name, $l, $r) -> { return Coalesce($l, 0) + Coalesce($r, 0); };
4+
$left = <|a: 1, b: 2, c: 3|>;
5+
$right = <|c: 1, d: 2, e: 3|>;
6+
7+
SELECT
8+
StructUnion($left, $right),
9+
StructUnion($left, $right, $merge),
10+
StructIntersection($left, $right),
11+
StructIntersection($left, $right, $merge),
12+
StructDifference($left, $right),
13+
StructSymmetricDifference($left, $right)
14+
;

ydb/library/yql/tests/sql/yt_native_file/part12/canondata/result.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,27 @@
10031003
"uri": "file://test.test_expr-non_persistable_group_by_column_fail--Results_/extracted"
10041004
}
10051005
],
1006+
"test.test[expr-struct_merge-default.txt-Debug]": [
1007+
{
1008+
"checksum": "54a15a62a123d1a72d9190f26324aa13",
1009+
"size": 796,
1010+
"uri": "https://{canondata_backend}/1784826/6c4ac0f02ea872d52d4b59ee9d0f2b2963fe6800/resource.tar.gz#test.test_expr-struct_merge-default.txt-Debug_/opt.yql"
1011+
}
1012+
],
1013+
"test.test[expr-struct_merge-default.txt-Plan]": [
1014+
{
1015+
"checksum": "b4dd508a329723c74293d80f0278c705",
1016+
"size": 505,
1017+
"uri": "https://{canondata_backend}/1881367/84017fd57088f9d554efcf1a44f82b22e5b164b7/resource.tar.gz#test.test_expr-struct_merge-default.txt-Plan_/plan.txt"
1018+
}
1019+
],
1020+
"test.test[expr-struct_merge-default.txt-Results]": [
1021+
{
1022+
"checksum": "83bf749394c8035e5f04d3cf2e23c44c",
1023+
"size": 9246,
1024+
"uri": "https://{canondata_backend}/1784826/6c4ac0f02ea872d52d4b59ee9d0f2b2963fe6800/resource.tar.gz#test.test_expr-struct_merge-default.txt-Results_/results.txt"
1025+
}
1026+
],
10061027
"test.test[flatten_by-flatten_one_field--Debug]": [
10071028
{
10081029
"checksum": "1e1f4bdf8614f3314eb9a5b53d01d8db",

0 commit comments

Comments
 (0)