Skip to content

Commit 59298b8

Browse files
authored
[yt provider] Properly check output tables limit in FuseMultiOutsWithOuterMaps optimizer (#5799)
1 parent 9822fa1 commit 59298b8

File tree

4 files changed

+30
-22
lines changed

4 files changed

+30
-22
lines changed

ydb/library/yql/providers/yt/provider/yql_yt_physical_finalizing.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2339,6 +2339,7 @@ class TYtPhysicalFinalizingTransformer : public TSyncTransformerBase {
23392339
continue;
23402340
}
23412341

2342+
const size_t opOutTables = op.Output().Size();
23422343
std::map<size_t, std::pair<std::vector<const TExprNode*>, std::vector<const TExprNode*>>> maps; // output -> pair<vector<YtMap>, vector<other YtOutput's>>
23432344
for (size_t i = 0; i < x.second.size(); ++i) {
23442345
auto reader = std::get<0>(x.second[i]);
@@ -2354,7 +2355,9 @@ class TYtPhysicalFinalizingTransformer : public TSyncTransformerBase {
23542355
if (newPair && TYtMap::Match(reader)) {
23552356
const auto outerMap = TYtMap(reader);
23562357
if ((outerMap.World().Ref().IsWorld() || outerMap.World().Raw() == op.World().Raw())
2357-
&& outerMap.Input().Size() == 1 && outerMap.DataSink().Cluster().Value() == op.DataSink().Cluster().Value()
2358+
&& outerMap.Input().Size() == 1
2359+
&& outerMap.Output().Size() + item.first.size() <= maxOutTables // fast check for too many operations
2360+
&& outerMap.DataSink().Cluster().Value() == op.DataSink().Cluster().Value()
23582361
&& NYql::HasSetting(op.Settings().Ref(), EYtSettingType::Flow) == NYql::HasSetting(outerMap.Settings().Ref(), EYtSettingType::Flow)
23592362
&& !NYql::HasSetting(op.Settings().Ref(), EYtSettingType::JobCount)
23602363
&& !NYql::HasSetting(outerMap.Settings().Ref(), EYtSettingType::JobCount)
@@ -2382,7 +2385,7 @@ class TYtPhysicalFinalizingTransformer : public TSyncTransformerBase {
23822385
if (AnyOf(maps, [](const auto& item) { return item.second.first.size() > 0; })) {
23832386
TMap<TStringBuf, ui64> memUsage;
23842387
size_t currenFiles = 1; // jobstate. Take into account only once
2385-
size_t currOutTables = op.Output().Size();
2388+
size_t currOutTables = opOutTables;
23862389

23872390
TExprNode::TPtr updatedBody = lambda.Body().Ptr();
23882391
if (maxJobMemoryLimit) {
@@ -2397,10 +2400,11 @@ class TYtPhysicalFinalizingTransformer : public TSyncTransformerBase {
23972400
TMap<TStringBuf, double> cpuUsage;
23982401
for (auto& item: maps) {
23992402
if (!item.second.first.empty()) {
2403+
size_t otherTablesDelta = item.second.second.empty() ? 1 : 0;
24002404
for (auto it = item.second.first.begin(); it != item.second.first.end(); ) {
24012405
const auto outerMap = TYtMap(*it);
24022406

2403-
const size_t outTablesDelta = outerMap.Output().Size() - size_t(item.second.second.empty());
2407+
const size_t outTablesDelta = outerMap.Output().Size() - otherTablesDelta;
24042408

24052409
updatedBody = outerMap.Mapper().Body().Ptr();
24062410
if (maxJobMemoryLimit) {
@@ -2418,7 +2422,7 @@ class TYtPhysicalFinalizingTransformer : public TSyncTransformerBase {
24182422
cpuUsage.clear();
24192423
ScanResourceUsage(*updatedBody, *State_->Configuration, State_->Types, pMemUsage, &cpuUsage, &newCurrenFiles);
24202424

2421-
auto usedMemory = Accumulate(memUsage.begin(), memUsage.end(), switchLimit,
2425+
auto usedMemory = Accumulate(newMemUsage.begin(), newMemUsage.end(), switchLimit,
24222426
[](ui64 sum, const std::pair<const TStringBuf, ui64>& val) { return sum + val.second; });
24232427

24242428
// Take into account codec input/output buffers (one for all inputs and one per output)
@@ -2453,12 +2457,16 @@ class TYtPhysicalFinalizingTransformer : public TSyncTransformerBase {
24532457
if (skip) {
24542458
// Move to other usages
24552459
it = item.second.first.erase(it);
2460+
if (item.second.second.empty()) {
2461+
++currOutTables;
2462+
}
24562463
item.second.second.push_back(outerMap.Input().Item(0).Paths().Item(0).Table().Raw());
24572464
continue;
24582465
}
24592466
currenFiles = newCurrenFiles;
24602467
memUsage = std::move(newMemUsage);
24612468
currOutTables += outTablesDelta;
2469+
otherTablesDelta = 0; // Take into account only once
24622470
++it;
24632471
}
24642472
}

ydb/library/yql/tests/sql/hybrid_file/part1/canondata/result.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,16 +1065,16 @@
10651065
],
10661066
"test.test[hor_join-out_mem_limit-default.txt-Debug]": [
10671067
{
1068-
"checksum": "83d9bc57921df8c6209bf72011b0893f",
1069-
"size": 9613,
1070-
"uri": "https://{canondata_backend}/1871182/cf13957d635dc8c77a65ef70797b7c6b8d4646c5/resource.tar.gz#test.test_hor_join-out_mem_limit-default.txt-Debug_/opt.yql_patched"
1068+
"checksum": "60cda73222095c4fc4bae6fadafa15ec",
1069+
"size": 10408,
1070+
"uri": "https://{canondata_backend}/1937429/280843cc8d8c85d96284e8fe059b340a3a45c757/resource.tar.gz#test.test_hor_join-out_mem_limit-default.txt-Debug_/opt.yql_patched"
10711071
}
10721072
],
10731073
"test.test[hor_join-out_mem_limit-default.txt-Plan]": [
10741074
{
1075-
"checksum": "2b487d8bb3e3a2a92a3b64c0509b296a",
1076-
"size": 19513,
1077-
"uri": "https://{canondata_backend}/1925842/70942689b7ce63cefca5f7da5343fab5153230a8/resource.tar.gz#test.test_hor_join-out_mem_limit-default.txt-Plan_/plan.txt"
1075+
"checksum": "a8280f86c88685688891cc00595867dc",
1076+
"size": 22131,
1077+
"uri": "https://{canondata_backend}/1937429/280843cc8d8c85d96284e8fe059b340a3a45c757/resource.tar.gz#test.test_hor_join-out_mem_limit-default.txt-Plan_/plan.txt"
10781078
}
10791079
],
10801080
"test.test[in-in_compact_distinct--Debug]": [

ydb/library/yql/tests/sql/yt_native_file/part0/canondata/result.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -936,16 +936,16 @@
936936
],
937937
"test.test[hor_join-out_mem_limit-default.txt-Debug]": [
938938
{
939-
"checksum": "5ca93bcce545f2ffa0f71efea30559d9",
940-
"size": 6699,
941-
"uri": "https://{canondata_backend}/1903280/821c0bdbea6623cbd0729714ca013fc70fe76044/resource.tar.gz#test.test_hor_join-out_mem_limit-default.txt-Debug_/opt.yql"
939+
"checksum": "99f6fba578021392f588f49a7a16d230",
940+
"size": 6967,
941+
"uri": "https://{canondata_backend}/1920236/d3e4f2c8fc7c4fdc1e1e409b3c3feb97251def9a/resource.tar.gz#test.test_hor_join-out_mem_limit-default.txt-Debug_/opt.yql"
942942
}
943943
],
944944
"test.test[hor_join-out_mem_limit-default.txt-Plan]": [
945945
{
946-
"checksum": "09d06e666cf775302f456dece40c9a0e",
947-
"size": 13329,
948-
"uri": "https://{canondata_backend}/1917492/63a0d945c75b6f9740a7e74d552f005bcf78e318/resource.tar.gz#test.test_hor_join-out_mem_limit-default.txt-Plan_/plan.txt"
946+
"checksum": "efaad6dc78b38f1ea7d29851927794ee",
947+
"size": 14233,
948+
"uri": "https://{canondata_backend}/1920236/d3e4f2c8fc7c4fdc1e1e409b3c3feb97251def9a/resource.tar.gz#test.test_hor_join-out_mem_limit-default.txt-Plan_/plan.txt"
949949
}
950950
],
951951
"test.test[hor_join-out_mem_limit-default.txt-Results]": [

ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,16 +1343,16 @@
13431343
],
13441344
"test.test[hor_join-fuse_multi_outs1-outlimit-Debug]": [
13451345
{
1346-
"checksum": "6798cb9a915abe1d2c99a3cdf736af95",
1347-
"size": 4249,
1348-
"uri": "https://{canondata_backend}/1903885/d48e78489ae557f8510451b183c5b7207f7bc38a/resource.tar.gz#test.test_hor_join-fuse_multi_outs1-outlimit-Debug_/opt.yql"
1346+
"checksum": "0cdf8bf3e0d09cc5b66a464966455cb3",
1347+
"size": 4634,
1348+
"uri": "https://{canondata_backend}/1942671/4ddfdf14b7530d609576cab63522db8c8ffc57e6/resource.tar.gz#test.test_hor_join-fuse_multi_outs1-outlimit-Debug_/opt.yql"
13491349
}
13501350
],
13511351
"test.test[hor_join-fuse_multi_outs1-outlimit-Plan]": [
13521352
{
1353-
"checksum": "3834a1cf9cea5acaa5e1c071283de426",
1354-
"size": 12461,
1355-
"uri": "https://{canondata_backend}/1923547/5154c8bd8ef9ead4f609771f831f20c15e795571/resource.tar.gz#test.test_hor_join-fuse_multi_outs1-outlimit-Plan_/plan.txt"
1353+
"checksum": "293adad074847f403910b7325d3bb304",
1354+
"size": 12561,
1355+
"uri": "https://{canondata_backend}/1942671/4ddfdf14b7530d609576cab63522db8c8ffc57e6/resource.tar.gz#test.test_hor_join-fuse_multi_outs1-outlimit-Plan_/plan.txt"
13561356
}
13571357
],
13581358
"test.test[hor_join-fuse_multi_outs1-outlimit-Results]": [

0 commit comments

Comments
 (0)