Skip to content

Commit 7f4747e

Browse files
committed
Case insensetive
1 parent f1eb9c0 commit 7f4747e

File tree

11 files changed

+197
-55
lines changed

11 files changed

+197
-55
lines changed

ydb/library/yql/core/common_opt/yql_co_pgselect.cpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,25 +1584,7 @@ std::tuple<TVector<ui32>, TExprNode::TListType> BuildJoinGroups(TPositionHandle
15841584
.Seal()
15851585
.Seal()
15861586
.Seal()
1587-
.Build();/*
1588-
for (size_t i = 0; i < leftColumns.size(); ++i) {
1589-
current = ctx.Builder(pos)
1590-
.Callable("OrderedMap")
1591-
.Add(0, current)
1592-
.Lambda(1)
1593-
.Param("row")
1594-
.Callable("AddMember")
1595-
.Arg(0, "row")
1596-
.Atom(1, rightColumns[i]->Content())
1597-
.Callable(2, "Member")
1598-
.Arg(0, "row")
1599-
.Atom(1, leftColumns[i]->Content())
1600-
.Seal()
1601-
.Seal()
1602-
.Seal()
1603-
.Seal()
1604-
.Build();
1605-
}*/
1587+
.Build();
16061588
continue;
16071589
}
16081590

ydb/library/yql/core/type_ann/type_ann_pg.cpp

Lines changed: 56 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,7 +1482,7 @@ void ScanSublinks(TExprNode::TPtr root, TNodeSet& sublinks) {
14821482

14831483
bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& possibleAliases,
14841484
bool* hasStar, bool& hasColumnRef, THashSet<TString>& refs, THashMap<TString, THashSet<TString>>* qualifiedRefs,
1485-
TExtContext& ctx, bool scanColumnsOnly, bool hasEmitPgStar = false, THashSet<TString> usedInUsing = {}) {
1485+
TExtContext& ctx, bool scanColumnsOnly, bool hasEmitPgStar = false, THashMap<TString, TString> usedInUsing = {}) {
14861486
bool isError = false;
14871487
VisitExpr(root, [&](const TExprNode::TPtr& node) {
14881488
if (node->IsCallable("PgSubLink")) {
@@ -1581,8 +1581,9 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>&
15811581
isError = true;
15821582
return false;
15831583
}
1584-
if (node->ChildrenSize() == 1 && usedInUsing.contains(node->Tail().Content())) {
1585-
refs.insert(TString(node->Tail().Content()));
1584+
auto lcase = to_lower(TString(node->Tail().Content()));
1585+
if (auto it = usedInUsing.find(lcase); node->ChildrenSize() == 1 && it != usedInUsing.end()) {
1586+
refs.insert(it->second);
15861587
} else {
15871588
TString foundAlias;
15881589
for (ui32 priority : {TInput::Projection, TInput::Current, TInput::External}) {
@@ -1773,7 +1774,7 @@ const TItemExprType* RemoveAlias(const TItemExprType* item, TExprContext& ctx) {
17731774

17741775
void AddColumns(const TInputs& inputs, const bool* hasStar, const THashSet<TString>& refs,
17751776
const THashMap<TString, THashSet<TString>>* qualifiedRefs,
1776-
TVector<const TItemExprType*>& items, TExprContext& ctx, const THashSet<TString>& usedInUsing = {}) {
1777+
TVector<const TItemExprType*>& items, TExprContext& ctx, const THashMap<TString, TString>& usedInUsing = {}) {
17771778
THashSet<TString> usedRefs;
17781779
THashSet<TString> usedAliases;
17791780
THashSet<TString> present;
@@ -1791,10 +1792,11 @@ void AddColumns(const TInputs& inputs, const bool* hasStar, const THashSet<TStri
17911792
for (ui32 i = 0; i < x.Type->GetSize(); ++i) {
17921793
auto item = x.Type->GetItems()[i];
17931794
if (!item->GetName().StartsWith("_yql_")) {
1794-
if (usedInUsing.contains(item->GetName())) {
1795-
if (!present.contains(item->GetName())) {
1796-
items.push_back(item);
1797-
present.emplace(item->GetName());
1795+
TString lcase = to_lower(TString(item->GetName()));
1796+
if (auto it = usedInUsing.find(lcase); it != usedInUsing.end()) {
1797+
if (!present.contains(lcase)) {
1798+
items.push_back(ctx.MakeType<TItemExprType>(it->second, item->GetItemType()));
1799+
present.emplace(lcase);
17981800
}
17991801
continue;
18001802
}
@@ -1814,9 +1816,10 @@ void AddColumns(const TInputs& inputs, const bool* hasStar, const THashSet<TStri
18141816
auto pos = x.Type->FindItemI(ref);
18151817
if (pos) {
18161818
auto item = x.Type->GetItems()[*pos];
1817-
if (usedInUsing.contains(item->GetName()) && !present.contains(item->GetName())) {
1818-
items.push_back(item);
1819-
present.emplace(item->GetName());
1819+
TString lcase = to_lower(TString(item->GetName()));
1820+
if (auto it = usedInUsing.find(lcase); it != usedInUsing.end() && !present.contains(lcase)) {
1821+
items.push_back(ctx.MakeType<TItemExprType>(it->second, item->GetItemType()));
1822+
present.emplace(lcase);
18201823
}
18211824
item = AddAlias(x.Alias, item, ctx);
18221825
items.push_back(item);
@@ -1846,7 +1849,7 @@ void AddColumns(const TInputs& inputs, const bool* hasStar, const THashSet<TStri
18461849

18471850
IGraphTransformer::TStatus RebuildLambdaColumns(const TExprNode::TPtr& root, const TExprNode::TPtr& argNode,
18481851
TExprNode::TPtr& newRoot, const TInputs& inputs, TExprNode::TPtr* expandedColumns, TExtContext& ctx,
1849-
THashSet<TString> usedInUsing={}) {
1852+
THashMap<TString, TString> usedInUsing={}) {
18501853
bool hasExternalInput = false;
18511854
for (const auto& i : inputs) {
18521855
if (i.Priority == TInput::External) {
@@ -1882,29 +1885,35 @@ IGraphTransformer::TStatus RebuildLambdaColumns(const TExprNode::TPtr& root, con
18821885
for (const auto& item : x.Type->GetItems()) {
18831886
if (!item->GetName().StartsWith("_yql_")) {
18841887
if (!order) {
1885-
if (usedFromUsing.contains(item->GetName())) {
1888+
auto lcase = to_lower(TString(item->GetName()));
1889+
if (usedFromUsing.contains(lcase)) {
18861890
continue;
18871891
}
1888-
if (usedInUsing.contains(item->GetName())) {
1889-
usedFromUsing.emplace(item->GetName());
1892+
if (usedInUsing.contains(lcase)) {
1893+
usedFromUsing.emplace(lcase);
1894+
orderAtoms.push_back(ctx.Expr.NewAtom(node->Pos(), usedInUsing[lcase]));
1895+
} else {
1896+
orderAtoms.push_back(ctx.Expr.NewAtom(node->Pos(),
1897+
NTypeAnnImpl::MakeAliasedColumn(hasExternalInput ? x.Alias : "", item->GetName())));
18901898
}
1891-
orderAtoms.push_back(ctx.Expr.NewAtom(node->Pos(),
1892-
NTypeAnnImpl::MakeAliasedColumn(hasExternalInput ? x.Alias : "", item->GetName())));
18931899
}
18941900
}
18951901
}
18961902

18971903
if (order) {
18981904
for (const auto& o : *order) {
18991905
if (!o.StartsWith("_yql_")) {
1900-
if (usedFromUsing.contains(o)) {
1906+
auto lcase = to_lower(o);
1907+
if (usedFromUsing.contains(lcase)) {
19011908
continue;
19021909
}
1903-
if (usedInUsing.contains(o)) {
1904-
usedFromUsing.emplace(o);
1910+
if (usedInUsing.contains(lcase)) {
1911+
usedFromUsing.emplace(lcase);
1912+
orderAtoms.push_back(ctx.Expr.NewAtom(node->Pos(), usedInUsing[lcase]));
1913+
} else {
1914+
orderAtoms.push_back(ctx.Expr.NewAtom(node->Pos(),
1915+
NTypeAnnImpl::MakeAliasedColumn(hasExternalInput ? x.Alias : "", o)));
19051916
}
1906-
orderAtoms.push_back(ctx.Expr.NewAtom(node->Pos(),
1907-
NTypeAnnImpl::MakeAliasedColumn(hasExternalInput ? x.Alias : "", o)));
19081917
}
19091918
}
19101919
}
@@ -2994,7 +3003,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
29943003
TExprNode::TPtr groupExprs;
29953004
TExprNode::TPtr result;
29963005
bool isUsing = 0;
2997-
THashSet<TString> repeatedColumnsInUsing;
3006+
THashMap<TString, TString> repeatedColumnsInUsing;
29983007
THashMap<TString, const TTypeAnnotationNode*> usingColumnsAnnotation;
29993008
// pass 0 - from/values
30003009
// pass 1 - join
@@ -3178,13 +3187,14 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
31783187
YQL_ENSURE(x.Order);
31793188
for (const auto& col : *x.Order) {
31803189
if (!col.StartsWith("_yql_")) {
3181-
if (alreadyPresent.contains(col)) {
3190+
auto lcase = to_lower(col);
3191+
if (alreadyPresent.contains(lcase)) {
31823192
continue;
31833193
}
3184-
if (repeatedColumnsInUsing.contains(col)) {
3185-
alreadyPresent.emplace(col);
3194+
if (repeatedColumnsInUsing.contains(lcase)) {
3195+
alreadyPresent.emplace(lcase);
31863196
// coalesce of two inputs
3187-
o.push_back(col);
3197+
o.push_back(repeatedColumnsInUsing[lcase]);
31883198
} else {
31893199
o.push_back(MakeAliasedColumn(x.Alias, col));
31903200
}
@@ -3332,12 +3342,13 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
33323342
outputItems[outputItemIndex[name]] = itemRef;
33333343
} else {
33343344
if (isUsing) {
3335-
if (repeatedColumnsInUsing.contains(itemRef->GetName())) {
3336-
usingColumnsAnnotation[itemRef->GetName()] = itemRef->GetItemType();
3345+
auto lcase = to_lower(TString(itemRef->GetName()));
3346+
if (auto it = repeatedColumnsInUsing.find(lcase); it != repeatedColumnsInUsing.end()) {
3347+
usingColumnsAnnotation[lcase] = itemRef->GetItemType();
33373348
outputItems.emplace_back(itemRef);
3338-
repeatedColumnsInUsing.erase(TString(itemRef->GetName()));
3339-
} else if (usingColumnsAnnotation.contains(itemRef->GetName())) {
3340-
if (usingColumnsAnnotation[itemRef->GetName()] != itemRef->GetItemType()) {
3349+
repeatedColumnsInUsing.erase(it);
3350+
} else if (usingColumnsAnnotation.contains(lcase)) {
3351+
if (usingColumnsAnnotation[lcase] != itemRef->GetItemType()) {
33413352
TStringStream ss;
33423353
ss << "Expected column of same type when USING: got " << *itemRef->GetItemType() << " != " << *usingColumnsAnnotation[itemRef->GetName()];
33433354
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), ss.Str()));
@@ -3809,11 +3820,12 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
38093820
auto columnNames = child->Child(2);
38103821
needRewriteUsing = child->ChildrenSize() == 3;
38113822
for (ui32 i = 0; i < columnNames->ChildrenSize(); ++i) {
3812-
if (repeatedColumnsInUsing.contains(columnNames->Child(i)->Content())) {
3823+
auto lcase = to_lower(TString(columnNames->Child(i)->Content()));
3824+
if (repeatedColumnsInUsing.contains(lcase)) {
38133825
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(child->Pos()), TStringBuilder() << "Duplicated column in USING clause: " << columnNames->Child(i)->Content()));
38143826
return IGraphTransformer::TStatus::Error;
38153827
}
3816-
repeatedColumnsInUsing.emplace(columnNames->Child(i)->Content());
3828+
repeatedColumnsInUsing.emplace(lcase, columnNames->Child(i)->Content());
38173829
}
38183830
} else {
38193831
const auto& quals = child->Tail();
@@ -3900,8 +3912,16 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
39003912
for (ui32 colIdx = 0; colIdx < inp->ChildrenSize(); ++colIdx) {
39013913
auto name = inp->Child(colIdx)->Content();
39023914
TExprNode::TListType lrNames(2);
3903-
lrNames[0] = ctx.Expr.NewAtom(inp->Pos(), MakeAliasedColumn(groupInputs[0].Alias, name));
3904-
lrNames[1] = ctx.Expr.NewAtom(inp->Pos(), MakeAliasedColumn(groupInputs[1].Alias, name));
3915+
auto pos = groupInputs[0].Type->FindItemI(name);
3916+
if (!pos) {
3917+
return IGraphTransformer::TStatus::Error;
3918+
}
3919+
lrNames[0] = ctx.Expr.NewAtom(inp->Pos(), MakeAliasedColumn(groupInputs[0].Alias, groupInputs[0].Type->GetItems()[*pos]->GetName()));
3920+
pos = groupInputs[1].Type->FindItemI(name);
3921+
if (!pos) {
3922+
return IGraphTransformer::TStatus::Error;
3923+
}
3924+
lrNames[1] = ctx.Expr.NewAtom(inp->Pos(), MakeAliasedColumn(groupInputs[1].Alias, groupInputs[1].Type->GetItems()[*pos]->GetName()));
39053925
nodes[colIdx] = ctx.Expr.NewList(inp->Pos(), std::move(lrNames));
39063926
}
39073927
TExprNode::TListType newJoin(4);

ydb/library/yql/tests/sql/dq_file/part13/canondata/result.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1756,6 +1756,28 @@
17561756
}
17571757
],
17581758
"test.test[pg-join_using7-default.txt-Results]": [],
1759+
"test.test[pg-join_using_case_insensetive1-default.txt-Analyze]": [
1760+
{
1761+
"checksum": "b4dd508a329723c74293d80f0278c705",
1762+
"size": 505,
1763+
"uri": "https://{canondata_backend}/1946324/6416045a0bb9d6e8e5b0b141a708474cc016eb51/resource.tar.gz#test.test_pg-join_using_case_insensetive1-default.txt-Analyze_/plan.txt"
1764+
}
1765+
],
1766+
"test.test[pg-join_using_case_insensetive1-default.txt-Debug]": [
1767+
{
1768+
"checksum": "b93af5844523a671a6ea7bfd3f3146c3",
1769+
"size": 680,
1770+
"uri": "https://{canondata_backend}/1946324/6416045a0bb9d6e8e5b0b141a708474cc016eb51/resource.tar.gz#test.test_pg-join_using_case_insensetive1-default.txt-Debug_/opt.yql_patched"
1771+
}
1772+
],
1773+
"test.test[pg-join_using_case_insensetive1-default.txt-Plan]": [
1774+
{
1775+
"checksum": "b4dd508a329723c74293d80f0278c705",
1776+
"size": 505,
1777+
"uri": "https://{canondata_backend}/1946324/6416045a0bb9d6e8e5b0b141a708474cc016eb51/resource.tar.gz#test.test_pg-join_using_case_insensetive1-default.txt-Plan_/plan.txt"
1778+
}
1779+
],
1780+
"test.test[pg-join_using_case_insensetive1-default.txt-Results]": [],
17591781
"test.test[pg-long_ident-default.txt-Analyze]": [
17601782
{
17611783
"checksum": "b4dd508a329723c74293d80f0278c705",

ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2134,6 +2134,28 @@
21342134
}
21352135
],
21362136
"test.test[pg-join_using4-default.txt-Results]": [],
2137+
"test.test[pg-join_using_case_insensetive2-default.txt-Analyze]": [
2138+
{
2139+
"checksum": "b4dd508a329723c74293d80f0278c705",
2140+
"size": 505,
2141+
"uri": "https://{canondata_backend}/1931696/c6aa257a7050331fd824bbdb0d587a5a0f000ab3/resource.tar.gz#test.test_pg-join_using_case_insensetive2-default.txt-Analyze_/plan.txt"
2142+
}
2143+
],
2144+
"test.test[pg-join_using_case_insensetive2-default.txt-Debug]": [
2145+
{
2146+
"checksum": "58dd711999198797209085fc5f8c1315",
2147+
"size": 652,
2148+
"uri": "https://{canondata_backend}/1931696/c6aa257a7050331fd824bbdb0d587a5a0f000ab3/resource.tar.gz#test.test_pg-join_using_case_insensetive2-default.txt-Debug_/opt.yql_patched"
2149+
}
2150+
],
2151+
"test.test[pg-join_using_case_insensetive2-default.txt-Plan]": [
2152+
{
2153+
"checksum": "b4dd508a329723c74293d80f0278c705",
2154+
"size": 505,
2155+
"uri": "https://{canondata_backend}/1931696/c6aa257a7050331fd824bbdb0d587a5a0f000ab3/resource.tar.gz#test.test_pg-join_using_case_insensetive2-default.txt-Plan_/plan.txt"
2156+
}
2157+
],
2158+
"test.test[pg-join_using_case_insensetive2-default.txt-Results]": [],
21372159
"test.test[pg-range_function_multi-default.txt-Analyze]": [
21382160
{
21392161
"checksum": "b2a2eb5d6b0a138ee924c128fc7738ef",

ydb/library/yql/tests/sql/hybrid_file/part4/canondata/result.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1875,6 +1875,20 @@
18751875
"uri": "https://{canondata_backend}/1889210/431569691fa60b20bf9ef4cc94610d8f1b1518e2/resource.tar.gz#test.test_pg-in_sorted-default.txt-Plan_/plan.txt"
18761876
}
18771877
],
1878+
"test.test[pg-join_using_case_insensetive1-default.txt-Debug]": [
1879+
{
1880+
"checksum": "f05158fb04e92f447279d343b7ec8261",
1881+
"size": 679,
1882+
"uri": "https://{canondata_backend}/1937367/ffc26952304424e6a4538295e7d27e30362a4e89/resource.tar.gz#test.test_pg-join_using_case_insensetive1-default.txt-Debug_/opt.yql_patched"
1883+
}
1884+
],
1885+
"test.test[pg-join_using_case_insensetive1-default.txt-Plan]": [
1886+
{
1887+
"checksum": "b4dd508a329723c74293d80f0278c705",
1888+
"size": 505,
1889+
"uri": "https://{canondata_backend}/1937367/ffc26952304424e6a4538295e7d27e30362a4e89/resource.tar.gz#test.test_pg-join_using_case_insensetive1-default.txt-Plan_/plan.txt"
1890+
}
1891+
],
18781892
"test.test[pg-nothing-default.txt-Debug]": [
18791893
{
18801894
"checksum": "e179a1dbbd7a35b6fef3223e4ab1afcc",

0 commit comments

Comments
 (0)