Skip to content

Commit 3221456

Browse files
authored
Merge 66eed5d into 63dd2da
2 parents 63dd2da + 66eed5d commit 3221456

File tree

13 files changed

+1202
-583
lines changed

13 files changed

+1202
-583
lines changed

ydb/core/kqp/ut/common/kqp_ut_common.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,5 +1401,33 @@ bool JoinOrderAndAlgosMatch(const TString& optimized, const TString& reference){
14011401
return JoinOrderAndAlgosMatch(optRoot, refRoot);
14021402
}
14031403

1404+
/* Temporary solution to canonize tests */
1405+
NJson::TJsonValue CanonizeJoinOrderImpl(const NJson::TJsonValue& opt) {
1406+
NJson::TJsonValue res;
1407+
1408+
auto op = opt.GetMapSafe().at("Operators").GetArraySafe()[0];
1409+
res["op_name"] = op.GetMapSafe().at("Name").GetStringSafe();
1410+
1411+
1412+
if (!opt.GetMapSafe().contains("Plans")) {
1413+
res["table"] = op.GetMapSafe().at("Table").GetStringSafe();
1414+
return res;
1415+
}
1416+
1417+
auto subplans = opt.GetMapSafe().at("Plans").GetArraySafe();
1418+
for (size_t i = 0; i< subplans.size(); ++i) {
1419+
res["args"].AppendValue(CanonizeJoinOrderImpl(subplans[i]));
1420+
}
1421+
return res;
1422+
}
1423+
1424+
/* Temporary solution to canonize tests */
1425+
NJson::TJsonValue CanonizeJoinOrder(const TString& deserializedPlan) {
1426+
NJson::TJsonValue optRoot;
1427+
NJson::ReadJsonTree(deserializedPlan, &optRoot, true);
1428+
optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan"));
1429+
return CanonizeJoinOrderImpl(SimplifyPlan(optRoot));
1430+
}
1431+
14041432
} // namspace NKqp
14051433
} // namespace NKikimr

ydb/core/kqp/ut/common/kqp_ut_common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,5 +337,8 @@ void WaitForZeroSessions(const NKqp::TKqpCounters& counters);
337337

338338
bool JoinOrderAndAlgosMatch(const TString& optimized, const TString& reference);
339339

340+
/* Temporary solution to canonize tests */
341+
NJson::TJsonValue CanonizeJoinOrder(const TString& deserializedPlan);
342+
340343
} // namespace NKqp
341344
} // namespace NKikimr
Lines changed: 1 addition & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1 @@
1-
{
2-
"op_name": "InnerJoin (Grace)",
3-
"args": [
4-
{
5-
"op_name": "InnerJoin (MapJoin)",
6-
"args": [
7-
{
8-
"op_name": "TableFullScan",
9-
"table": "partsupp"
10-
},
11-
{
12-
"op_name": "InnerJoin (MapJoin)",
13-
"args": [
14-
{
15-
"op_name": "TableFullScan",
16-
"table": "supplier"
17-
},
18-
{
19-
"op_name": "InnerJoin (MapJoin)",
20-
"args": [
21-
{
22-
"op_name": "TableFullScan",
23-
"table": "nation"
24-
},
25-
{
26-
"op_name": "TableFullScan",
27-
"table": "region"
28-
}
29-
]
30-
}
31-
]
32-
}
33-
]
34-
},
35-
{
36-
"op_name": "InnerJoin (Grace)",
37-
"args": [
38-
{
39-
"op_name": "InnerJoin (MapJoin)",
40-
"args": [
41-
{
42-
"op_name": "TableFullScan",
43-
"table": "partsupp"
44-
},
45-
{
46-
"op_name": "InnerJoin (MapJoin)",
47-
"args": [
48-
{
49-
"op_name": "TableFullScan",
50-
"table": "supplier"
51-
},
52-
{
53-
"op_name": "InnerJoin (MapJoin)",
54-
"args": [
55-
{
56-
"op_name": "TableFullScan",
57-
"table": "nation"
58-
},
59-
{
60-
"op_name": "TableFullScan",
61-
"table": "region"
62-
}
63-
]
64-
}
65-
]
66-
}
67-
]
68-
},
69-
{
70-
"op_name": "TableFullScan",
71-
"table": "part"
72-
}
73-
]
74-
}
75-
]
76-
}
1+
{"op_name":"InnerJoin (Grace)","args":[{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"partsupp"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"supplier"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"nation"},{"op_name":"TableFullScan","table":"region"}]}]}]},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"partsupp"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"supplier"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"nation"},{"op_name":"TableFullScan","table":"region"}]}]}]},{"op_name":"TableFullScan","table":"part"}]}]}

ydb/core/kqp/ut/join/data/stats/tpcds1000s.json

Lines changed: 850 additions & 425 deletions
Large diffs are not rendered by default.

ydb/core/kqp/ut/join/data/stats/tpch1000s.json

Lines changed: 123 additions & 62 deletions
Large diffs are not rendered by default.

ydb/core/kqp/ut/join/kqp_join_order_ut.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ static TKikimrRunner GetKikimrWithJoinSettings(bool useStreamLookupJoin = false,
6868

6969
NKikimrConfig::TAppConfig appConfig;
7070
appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamIdxLookupJoin(useStreamLookupJoin);
71+
appConfig.MutableTableServiceConfig()->SetCompileTimeoutMs(TDuration::Minutes(10).MilliSeconds());
72+
7173
auto serverSettings = TKikimrSettings().SetAppConfig(appConfig);
7274
serverSettings.SetKqpSettings(settings);
7375
return TKikimrRunner(serverSettings);
@@ -274,7 +276,6 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
274276
ExplainJoinOrderTestDataQuery("queries/tpcds96.sql", StreamLookupJoin);
275277
}
276278

277-
278279
void JoinOrderTestWithOverridenStats(const TString& queryPath, const TString& statsPath, const TString& correctJoinOrderPath, bool useStreamLookupJoin) {
279280
auto kikimr = GetKikimrWithJoinSettings(useStreamLookupJoin, GetStatic(statsPath));
280281
auto db = kikimr.GetTableClient();
@@ -294,6 +295,9 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
294295

295296
TString ref = GetStatic(correctJoinOrderPath);
296297

298+
/* correct canonized join order in cout, change corresponding join_order/.json file */
299+
Cout << CanonizeJoinOrder(*res.PlanJson) << Endl;
300+
297301
UNIT_ASSERT(JoinOrderAndAlgosMatch(*res.PlanJson, ref));
298302
}
299303
}

ydb/core/ydb_convert/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ SRCS(
1313
PEERDIR(
1414
ydb/core/base
1515
ydb/core/engine
16+
ydb/core/kqp/provider
1617
ydb/core/protos
1718
ydb/core/scheme
1819
ydb/core/util

ydb/core/ydb_convert/ydb_convert.cpp

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <ydb/library/yql/minikql/dom/json.h>
1616
#include <ydb/library/yql/minikql/dom/yson.h>
1717
#include <ydb/library/yql/public/udf/udf_types.h>
18+
#include <ydb/core/kqp/provider/yql_kikimr_expr_nodes.h>
1819
#include <ydb/library/yql/utils/utf8.h>
1920

2021
namespace NKikimr {
@@ -1167,7 +1168,141 @@ bool CheckValueData(NScheme::TTypeInfo type, const TCell& cell, TString& err) {
11671168
return ok;
11681169
}
11691170

1171+
bool CellFromLiteralExprNode(NYql::NNodes::TExprBase maybeLiteral, TCell& cell, TMemoryPool& valueDataPool) {
1172+
if (auto maybeJust = maybeLiteral.Maybe<NYql::NNodes::TCoJust>() ) {
1173+
maybeLiteral = maybeJust.Cast().Input();
1174+
}
1175+
1176+
if (maybeLiteral.Maybe<NYql::NNodes::TCoDataCtor>()) {
1177+
auto literal = maybeLiteral.Maybe<NYql::NNodes::TCoDataCtor>().Cast();
11701178

1179+
auto type = literal.Ref().GetTypeAnn();
1180+
auto slot = type->Cast<NYql::TDataExprType>()->GetSlot();
1181+
auto value = literal.Literal().Value();
1182+
1183+
switch (slot) {
1184+
case NYql::NUdf::EDataSlot::Bool: {
1185+
ui8 v = FromString<bool>(value);
1186+
cell = TCell((const char*)&v, sizeof(v));
1187+
break;
1188+
}
1189+
case NYql::NUdf::EDataSlot::Uint8: {
1190+
ui8 v = FromString<ui8>(value);
1191+
cell = TCell((const char*)&v, sizeof(v));
1192+
break;
1193+
}
1194+
case NYql::NUdf::EDataSlot::Int8: {
1195+
i8 v = FromString<i8>(value);
1196+
cell = TCell((const char*)&v, sizeof(v));
1197+
break;
1198+
}
1199+
case NYql::NUdf::EDataSlot::Uint32: {
1200+
ui32 v = FromString<ui32>(value);
1201+
cell = TCell((const char*)&v, sizeof(v));
1202+
break;
1203+
}
1204+
case NYql::NUdf::EDataSlot::Int32: {
1205+
i32 v = FromString<i32>(value);
1206+
cell = TCell((const char*)&v, sizeof(v));
1207+
break;
1208+
}
1209+
case NYql::NUdf::EDataSlot::Uint64: {
1210+
ui64 v = FromString<ui64>(value);
1211+
cell = TCell((const char*)&v, sizeof(v));
1212+
break;
1213+
}
1214+
case NYql::NUdf::EDataSlot::Int64: {
1215+
i64 v = FromString<i64>(value);
1216+
cell = TCell((const char*)&v, sizeof(v));
1217+
break;
1218+
}
1219+
case NYql::NUdf::EDataSlot::Float: {
1220+
float v = FromString<float>(value);
1221+
cell = TCell((const char*)&v, sizeof(v));
1222+
break;
1223+
}
1224+
case NYql::NUdf::EDataSlot::Double: {
1225+
double v = FromString<double>(value);
1226+
cell = TCell((const char*)&v, sizeof(v));
1227+
break;
1228+
}
1229+
case NYql::NUdf::EDataSlot::Date: {
1230+
ui16 v = FromString<ui32>(value);
1231+
cell = TCell((const char*)&v, sizeof(v));
1232+
break;
1233+
}
1234+
case NYql::NUdf::EDataSlot::Datetime: {
1235+
ui32 v = FromString<ui32>(value);
1236+
cell = TCell((const char*)&v, sizeof(v));
1237+
break;
1238+
}
1239+
case NYql::NUdf::EDataSlot::Utf8:
1240+
case NYql::NUdf::EDataSlot::String:
1241+
case NYql::NUdf::EDataSlot::Yson:
1242+
case NYql::NUdf::EDataSlot::Json: {
1243+
cell = TCell(value.Data(), value.Size());
1244+
break;
1245+
}
1246+
case NYql::NUdf::EDataSlot::Interval: {
1247+
i64 v = FromString<i64>(value);
1248+
cell = TCell((const char*)&v, sizeof(v));
1249+
break;
1250+
}
1251+
case NYql::NUdf::EDataSlot::Interval64: {
1252+
i64 v = FromString<i64>(value);
1253+
cell = TCell((const char*)&v, sizeof(v));
1254+
break;
1255+
}
1256+
case NYql::NUdf::EDataSlot::Timestamp: {
1257+
ui64 v = FromString<ui64>(value);
1258+
cell = TCell((const char*)&v, sizeof(v));
1259+
break;
1260+
}
1261+
case NYql::NUdf::EDataSlot::Timestamp64: {
1262+
i64 v = FromString<i64>(value);
1263+
cell = TCell((const char*)&v, sizeof(v));
1264+
break;
1265+
}
1266+
case NYql::NUdf::EDataSlot::DyNumber: {
1267+
const auto dyNumber = NDyNumber::ParseDyNumberString(value);
1268+
if (!dyNumber.Defined()) {
1269+
return false;
1270+
}
1271+
const auto dyNumberInPool = valueDataPool.AppendString(TStringBuf(*dyNumber));
1272+
cell = TCell(dyNumberInPool.data(), dyNumberInPool.size());
1273+
}
1274+
case NYql::NUdf::EDataSlot::Decimal: {
1275+
const auto paramsDataType = type->Cast<NYql::TDataExprParamsType>();
1276+
auto precision = FromString<ui8>(paramsDataType->GetParamOne());
1277+
auto scale = FromString<ui8>(paramsDataType->GetParamTwo());
1278+
1279+
auto v = NYql::NDecimal::FromString(literal.Cast<NYql::NNodes::TCoDecimal>().Literal().Value(), precision, scale);
1280+
const auto p = reinterpret_cast<ui8*>(&v);
1281+
1282+
std::pair<ui64,ui64>& valInPool = *valueDataPool.Allocate<std::pair<ui64,ui64>>();
1283+
valInPool.first = *reinterpret_cast<ui64*>(p);
1284+
valInPool.second = *reinterpret_cast<ui64*>(p + 8);
1285+
cell = TCell((const char*)&valInPool, sizeof(valInPool));
1286+
}
1287+
case NYql::NUdf::EDataSlot::Uuid: {
1288+
const ui64* uuidData = reinterpret_cast<const ui64*>(value.Data());
1289+
1290+
std::pair<ui64,ui64>& valInPool = *valueDataPool.Allocate<std::pair<ui64,ui64>>();
1291+
valInPool.first = uuidData[0]; // low128
1292+
valInPool.second = uuidData[1]; // high128
1293+
cell = TCell((const char*)&valInPool, sizeof(valInPool));
1294+
break;
1295+
}
1296+
default:
1297+
YQL_ENSURE(false, "Unexpected type slot " << slot);
1298+
}
1299+
1300+
1301+
return true;
1302+
}
1303+
1304+
return false;
1305+
}
11711306

11721307
bool CellFromProtoVal(NScheme::TTypeInfo type, i32 typmod, const Ydb::Value* vp,
11731308
TCell& c, TString& err, TMemoryPool& valueDataPool)

ydb/core/ydb_convert/ydb_convert.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313

1414
#include <util/memory/pool.h>
1515

16+
namespace NYql::NNodes {
17+
class TExprBase;
18+
}
19+
1620
namespace NKikimr {
1721

1822
void ConvertMiniKQLTypeToYdbType(const NKikimrMiniKQL::TType& input, Ydb::Type& output);
@@ -51,6 +55,8 @@ void ConvertDirectoryEntry(const NKikimrSchemeOp::TPathDescription& from, Ydb::S
5155
bool CellFromProtoVal(NScheme::TTypeInfo type, i32 typmod, const Ydb::Value* vp,
5256
TCell& c, TString& err, TMemoryPool& valueDataPool);
5357

58+
bool CellFromLiteralExprNode(NYql::NNodes::TExprBase maybeLiteral, TCell& cell, TMemoryPool& valueDataPool);
59+
5460
void ProtoValueFromCell(NYdb::TValueBuilder& vb, const NScheme::TTypeInfo& typeInfo, const TCell& cell);
5561

5662

ydb/library/yql/core/yql_statistics.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "yql_statistics.h"
33

44
#include <library/cpp/json/json_reader.h>
5+
#include <library/cpp/string_utils/base64/base64.h>
56

67
using namespace NYql;
78

@@ -103,16 +104,24 @@ std::shared_ptr<TOptimizerStatistics> NYql::OverrideStatistics(const NYql::TOpti
103104

104105
TColumnStatistics cStat;
105106

106-
auto column_name = colMap.at("name").GetStringSafe();
107+
auto columnName = colMap.at("name").GetStringSafe();
107108

108109
if (auto numUniqueVals = colMap.find("n_unique_vals"); numUniqueVals != colMap.end()) {
109-
cStat.NumUniqueVals = numUniqueVals->second.IsNull()? 0.0f: numUniqueVals->second.GetDoubleSafe();
110+
cStat.NumUniqueVals = numUniqueVals->second.IsNull()? 0.0: numUniqueVals->second.GetDoubleSafe();
110111
}
111112
if (auto hll = colMap.find("hyperloglog"); hll != colMap.end()) {
112-
cStat.HyperLogLog = hll->second.IsNull()? 0.0f: hll->second.GetDoubleSafe();
113+
cStat.HyperLogLog = hll->second.IsNull()? 0.0: hll->second.GetDoubleSafe();
113114
}
115+
if (auto countMinSketch = colMap.find("count-min"); countMinSketch != colMap.end()) {
116+
TString countMinBase64 = countMinSketch->second.GetStringSafe();
114117

115-
res->ColumnStatistics->Data[column_name] = cStat;
118+
TString countMinRaw{};
119+
Base64StrictDecode(countMinBase64, countMinRaw);
120+
121+
cStat.CountMinSketch.reset(NKikimr::TCountMinSketch::FromString(countMinRaw.Data(), countMinRaw.Size()));
122+
}
123+
124+
res->ColumnStatistics->Data[columnName] = cStat;
116125
}
117126
}
118127

0 commit comments

Comments
 (0)