Skip to content

Commit 4c9c78d

Browse files
authored
Merge fb1c9fe into 2f985f5
2 parents 2f985f5 + fb1c9fe commit 4c9c78d

File tree

18 files changed

+306
-33
lines changed

18 files changed

+306
-33
lines changed

ydb/library/yql/core/type_ann/type_ann_core.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7590,11 +7590,16 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
75907590
return IGraphTransformer::TStatus::Error;
75917591
}
75927592

7593+
cached.NormalizedName = description.NormalizedName;
75937594
cached.FunctionType = description.CallableType;
75947595
cached.RunConfigType = description.RunConfigType ? description.RunConfigType : ctx.Expr.MakeType<TVoidExprType>();
75957596
cached.NormalizedUserType = description.NormalizedUserType ? description.NormalizedUserType : ctx.Expr.MakeType<TVoidExprType>();
75967597
cached.SupportsBlocks = description.SupportsBlocks;
75977598
cached.IsStrict = description.IsStrict;
7599+
7600+
if (name != cached.NormalizedName) {
7601+
ctx.Types.UdfTypeCache[std::make_tuple(cached.NormalizedName, TString(typeConfig), userType)] = cached;
7602+
}
75987603
}
75997604

76007605
TStringBuf typeConfig = "";
@@ -7623,7 +7628,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
76237628
TStringBuf fileAlias = udfInfo ? udfInfo->FileAlias : ""_sb;
76247629
auto ret = ctx.Expr.Builder(input->Pos())
76257630
.Callable("Udf")
7626-
.Add(0, input->HeadPtr())
7631+
.Atom(0, cached.NormalizedName)
76277632
.Add(1, runConfigValue)
76287633
.Add(2, ExpandType(input->Pos(), *cached.NormalizedUserType, ctx.Expr))
76297634
.Atom(3, typeConfig)

ydb/library/yql/core/yql_type_annotation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ enum class EBlockEngineMode {
273273
};
274274

275275
struct TUdfCachedInfo {
276+
TString NormalizedName;
276277
const TTypeAnnotationNode* FunctionType = nullptr;
277278
const TTypeAnnotationNode* RunConfigType = nullptr;
278279
const TTypeAnnotationNode* NormalizedUserType = nullptr;

ydb/library/yql/core/yql_udf_index.cpp

Lines changed: 111 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,40 +80,133 @@ void AddResolveResultToRegistry(const TResolveResult& resolveResult, const TMap<
8080
TUdfIndex::TUdfIndex() {
8181
}
8282

83-
TUdfIndex::TUdfIndex(const TMap<TString, TResourceInfo::TPtr>& resources)
83+
void TUdfIndex::SetCaseSentiveSearch(bool caseSensitive) {
84+
CaseSensitive_ = caseSensitive;
85+
}
86+
87+
TUdfIndex::TUdfIndex(const TMap<TString, TResourceInfo::TPtr>& resources, bool caseSensitive)
8488
: Resources_(resources)
89+
, CaseSensitive_(caseSensitive)
8590
{
86-
91+
for (const auto& x : Resources_) {
92+
ICaseModules_[to_lower(x.first)].insert(x.first);
93+
}
8794
}
8895

89-
bool TUdfIndex::ContainsModule(const TString& moduleName) const {
96+
bool TUdfIndex::ContainsModuleStrict(const TString& moduleName) const {
9097
return Resources_.contains(moduleName);
9198
}
9299

100+
bool TUdfIndex::CanonizeModule(TString& moduleName) const {
101+
if (Resources_.contains(moduleName)) {
102+
return true;
103+
}
104+
105+
if (CaseSensitive_) {
106+
return false;
107+
}
108+
109+
auto p = ICaseModules_.FindPtr(to_lower(moduleName));
110+
if (!p) {
111+
return false;
112+
}
113+
114+
Y_ENSURE(p->size() > 0);
115+
if (p->size() > 1) {
116+
return false;
117+
}
118+
119+
moduleName = *p->begin();
120+
return true;
121+
}
122+
123+
TUdfIndex::EStatus TUdfIndex::ContainsModule(const TString& moduleName) const {
124+
if (Resources_.contains(moduleName)) {
125+
return EStatus::Found;
126+
}
127+
128+
if (CaseSensitive_) {
129+
return EStatus::NotFound;
130+
}
131+
132+
auto p = ICaseModules_.FindPtr(to_lower(moduleName));
133+
if (!p) {
134+
return EStatus::NotFound;
135+
}
136+
137+
Y_ENSURE(p->size() > 0);
138+
return p->size() > 1 ? EStatus::Ambigious : EStatus::Found;
139+
}
140+
93141
bool TUdfIndex::ContainsAnyModule(const TSet<TString>& modules) const {
94142
return AnyOf(modules, [this](auto& m) {
95-
return this->ContainsModule(m);
143+
return Resources_.contains(m);
96144
});
97145
}
98146

99-
bool TUdfIndex::FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const {
100-
auto r = FindResourceByModule(moduleName);
147+
TUdfIndex::EStatus TUdfIndex::FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const {
148+
auto r = Resources_.FindPtr(moduleName);
101149
if (!r) {
102-
return false;
150+
if (CaseSensitive_) {
151+
return EStatus::NotFound;
152+
}
153+
154+
auto p = ICaseModules_.FindPtr(to_lower(moduleName));
155+
if (!p) {
156+
return EStatus::NotFound;
157+
}
158+
159+
Y_ENSURE(p->size() > 0);
160+
if (p->size() > 1) {
161+
return EStatus::Ambigious;
162+
}
163+
164+
r = Resources_.FindPtr(*p->begin());
165+
Y_ENSURE(r);
103166
}
104167

105-
auto f = r->Functions.FindPtr(functionName);
168+
auto f = (*r)->Functions.FindPtr(functionName);
106169
if (!f) {
107-
return false;
170+
if (CaseSensitive_) {
171+
return EStatus::NotFound;
172+
}
173+
174+
auto p = (*r)->ICaseFuncNames.FindPtr(to_lower(functionName));
175+
if (!p) {
176+
return EStatus::NotFound;
177+
}
178+
179+
Y_ENSURE(p->size() > 0);
180+
if (p->size() > 1) {
181+
return EStatus::Ambigious;
182+
}
183+
184+
f = (*r)->Functions.FindPtr(*p->begin());
185+
Y_ENSURE(f);
108186
}
109187

110188
function = *f;
111-
return true;
189+
return EStatus::Found;
112190
}
113191

114192
TResourceInfo::TPtr TUdfIndex::FindResourceByModule(const TString& moduleName) const {
115193
auto p = Resources_.FindPtr(moduleName);
116-
return p ? *p : nullptr;
194+
if (!p) {
195+
if (CaseSensitive_) {
196+
return nullptr;
197+
}
198+
199+
auto n = ICaseModules_.FindPtr(to_lower(moduleName));
200+
Y_ENSURE(n->size() > 0);
201+
if (n->size() > 1) {
202+
return nullptr;
203+
}
204+
205+
p = Resources_.FindPtr(*n->begin());
206+
Y_ENSURE(p);
207+
}
208+
209+
return *p;
117210
}
118211

119212
TSet<TResourceInfo::TPtr> TUdfIndex::FindResourcesByModules(const TSet<TString>& modules) const {
@@ -130,6 +223,11 @@ TSet<TResourceInfo::TPtr> TUdfIndex::FindResourcesByModules(const TSet<TString>&
130223
void TUdfIndex::UnregisterResource(TResourceInfo::TPtr resource) {
131224
for (auto& m : resource->Modules) {
132225
Resources_.erase(m);
226+
auto& names = ICaseModules_[to_lower(m)];
227+
names.erase(m);
228+
if (names.empty()) {
229+
ICaseModules_.erase(to_lower(m));
230+
}
133231
}
134232
// resource pointer should be alive here to avoid problems with erase
135233
}
@@ -170,11 +268,12 @@ void TUdfIndex::RegisterResource(const TResourceInfo::TPtr& resource, EOverrideM
170268

171269
for (auto& m : resource->Modules) {
172270
Resources_.emplace(m, resource);
271+
ICaseModules_[to_lower(m)].insert(m);
173272
}
174273
}
175274

176275
TIntrusivePtr<TUdfIndex> TUdfIndex::Clone() const {
177-
return new TUdfIndex(Resources_);
276+
return new TUdfIndex(Resources_, CaseSensitive_);
178277
}
179278

180279
void TUdfIndex::RegisterResources(const TVector<TResourceInfo::TPtr>& resources, EOverrideMode mode) {

ydb/library/yql/core/yql_udf_index.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,12 @@ struct TResourceInfo : public TThrRefBase {
7272
TDownloadLink Link;
7373
TSet<TString> Modules;
7474
TMap<TString, TFunctionInfo> Functions;
75+
TMap<TString, TSet<TString>> ICaseFuncNames;
7576

7677
void SetFunctions(const TVector<TFunctionInfo>& functions) {
7778
for (auto& f : functions) {
7879
Functions.emplace(f.Name, f);
80+
ICaseFuncNames[to_lower(f.Name)].insert(f.Name);
7981
}
8082
}
8183
};
@@ -96,12 +98,21 @@ class TUdfIndex : public TThrRefBase {
9698
RaiseError
9799
};
98100

101+
enum class EStatus {
102+
Found,
103+
NotFound,
104+
Ambigious
105+
};
106+
99107
public:
100108
TUdfIndex();
101-
bool ContainsModule(const TString& moduleName) const;
102-
bool FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const;
109+
void SetCaseSentiveSearch(bool caseSensitive);
110+
bool CanonizeModule(TString& moduleName) const;
111+
EStatus ContainsModule(const TString& moduleName) const;
112+
EStatus FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const;
103113
TResourceInfo::TPtr FindResourceByModule(const TString& moduleName) const;
104114

115+
bool ContainsModuleStrict(const TString& moduleName) const;
105116
/*
106117
New resource can contain already registered module.
107118
In this case 'mode' will be used to resolve conflicts.
@@ -114,7 +125,7 @@ class TUdfIndex : public TThrRefBase {
114125
TIntrusivePtr<TUdfIndex> Clone() const;
115126

116127
private:
117-
explicit TUdfIndex(const TMap<TString, TResourceInfo::TPtr>& resources);
128+
explicit TUdfIndex(const TMap<TString, TResourceInfo::TPtr>& resources, bool caseSensitive);
118129

119130
bool ContainsAnyModule(const TSet<TString>& modules) const;
120131
TSet<TResourceInfo::TPtr> FindResourcesByModules(const TSet<TString>& modules) const;
@@ -123,6 +134,8 @@ class TUdfIndex : public TThrRefBase {
123134
private:
124135
// module => Resource
125136
TMap<TString, TResourceInfo::TPtr> Resources_;
137+
bool CaseSensitive_ = true;
138+
TMap<TString, TSet<TString>> ICaseModules_;
126139
};
127140

128141
void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TVector<TString>& paths, bool isTrusted, TUdfIndex::EOverrideMode mode, TUdfIndex& registry);

ydb/library/yql/core/yql_udf_resolver.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class IUdfResolver : public TThrRefBase {
4242
THashMap<TString, TString> SecureParams;
4343

4444
// output
45+
TString NormalizedName;
4546
const TTypeAnnotationNode* NormalizedUserType = nullptr;
4647
const TTypeAnnotationNode* RunConfigType = nullptr;
4748
const TTypeAnnotationNode* CallableType = nullptr;

ydb/library/yql/providers/common/udf_resolve/yql_outproc_udf_resolver.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ class TOutProcUdfResolver : public IUdfResolver {
346346
ctx.AddError(TIssue(udf->Pos, udfRes.GetError()));
347347
hasErrors = true;
348348
} else {
349+
udf->NormalizedName = udf->Name;
349350
udf->CallableType = ParseTypeFromYson(TStringBuf{udfRes.GetCallableType()}, ctx, udf->Pos);
350351
if (!udf->CallableType) {
351352
hasErrors = true;

ydb/library/yql/providers/common/udf_resolve/yql_simple_udf_resolver.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ bool LoadFunctionsMetadata(const TVector<IUdfResolver::TFunction*>& functions,
200200
continue;
201201
}
202202

203+
udf.NormalizedName = udf.Name;
203204
udf.CallableType = ConvertMiniKQLType(udf.Pos, funcInfo.FunctionType, ctx);
204205
YQL_ENSURE(udf.CallableType);
205206
if (funcInfo.RunConfigType) {

ydb/library/yql/providers/common/udf_resolve/yql_udf_resolver_with_index.cpp

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ class TUdfResolverWithIndex : public IUdfResolver {
6969
TMaybe<TFilePathWithMd5> GetSystemModulePath(const TStringBuf& moduleName) const override {
7070
with_lock(Lock_) {
7171
TString moduleNameStr(moduleName);
72-
if (!UdfIndex_->ContainsModule(moduleNameStr)) {
72+
if (!UdfIndex_->ContainsModuleStrict(moduleNameStr)) {
7373
return Nothing();
7474
}
7575

@@ -115,7 +115,7 @@ class TUdfResolverWithIndex : public IUdfResolver {
115115

116116
bool ContainsModule(const TStringBuf& moduleName) const override {
117117
TString moduleNameStr = TString(moduleName);
118-
if (UdfIndex_->ContainsModule(moduleNameStr)) {
118+
if (UdfIndex_->ContainsModuleStrict(moduleNameStr)) {
119119
return true;
120120
}
121121

@@ -142,17 +142,29 @@ class TUdfResolverWithIndex : public IUdfResolver {
142142
*/
143143

144144
TString moduleNameStr = TString(moduleName);
145-
if (!UdfIndex_->ContainsModule(moduleNameStr)) {
145+
auto moduleStatus = UdfIndex_->ContainsModule(moduleNameStr);
146+
if (moduleStatus == TUdfIndex::EStatus::NotFound) {
146147
fallbackFunction = &function;
147148
return true;
148149
}
149150

151+
if (moduleStatus == TUdfIndex::EStatus::Ambigious) {
152+
ctx.AddError(TIssue(function.Pos, TStringBuilder() << "Ambigious module name: " << moduleName));
153+
return false;
154+
}
155+
150156
TFunctionInfo info;
151-
if (!UdfIndex_->FindFunction(moduleNameStr, function.Name, info)) {
157+
auto functionStatus = UdfIndex_->FindFunction(moduleNameStr, function.Name, info);
158+
if (functionStatus == TUdfIndex::EStatus::NotFound) {
152159
ctx.AddError(TIssue(function.Pos, TStringBuilder() << "Function not found: " << function.Name));
153160
return false;
154161
}
155162

163+
if (functionStatus == TUdfIndex::EStatus::Ambigious) {
164+
ctx.AddError(TIssue(function.Pos, TStringBuilder() << "Ambigious function: " << function.Name));
165+
return false;
166+
}
167+
156168
TResourceFile::TPtr file = DownloadFileWithModule(moduleName, function.Pos, ctx);
157169
if (!file) {
158170
return false;
@@ -161,6 +173,7 @@ class TUdfResolverWithIndex : public IUdfResolver {
161173
additionalImport = &file->Import_;
162174

163175
if (info.IsTypeAwareness) {
176+
function.Name = info.Name;
164177
fallbackFunction = &function;
165178
return true;
166179
}
@@ -170,6 +183,7 @@ class TUdfResolverWithIndex : public IUdfResolver {
170183
return false;
171184
}
172185

186+
function.NormalizedName = info.Name;
173187
function.CallableType = ParseTypeFromYson(TStringBuf{info.CallableType}, ctx, function.Pos);
174188
if (!function.CallableType) {
175189
ctx.AddError(TIssue(function.Pos, TStringBuilder() << "Failed to build callable type from YSON for function " << function.Name));
@@ -205,26 +219,29 @@ class TUdfResolverWithIndex : public IUdfResolver {
205219
TResourceFile::TPtr DownloadFileWithModule(const TStringBuf& module) const {
206220
TString moduleName(module);
207221

208-
const auto it = DownloadedFiles_.find(module);
209-
if (it != DownloadedFiles_.end()) {
210-
return it->second;
211-
}
212-
213222
auto resource = UdfIndex_->FindResourceByModule(moduleName);
214223
if (!resource) {
215224
ythrow yexception() << "No resource has been found for registered module " << moduleName;
216225
}
217226

227+
auto canonizedModuleName = moduleName;
228+
Y_ENSURE(UdfIndex_->CanonizeModule(canonizedModuleName));
229+
230+
const auto it = DownloadedFiles_.find(canonizedModuleName);
231+
if (it != DownloadedFiles_.end()) {
232+
return it->second;
233+
}
234+
218235
// token is empty for urls for now
219236
// assumption: file path is frozen already, no need to put into file storage
220237
const TDownloadLink& downloadLink = resource->Link;
221238
TFileLinkPtr link = downloadLink.IsUrl ? FileStorage_->PutUrl(downloadLink.Path, {}) : CreateFakeFileLink(downloadLink.Path, downloadLink.Md5);
222-
TResourceFile::TPtr file = TResourceFile::Create(moduleName, resource->Modules, link);
239+
TResourceFile::TPtr file = TResourceFile::Create(canonizedModuleName, resource->Modules, link);
223240
for (auto& d : resource->Modules) {
224241
auto p = DownloadedFiles_.emplace(d, file);
225242
if (!p.second) {
226243
// should not happen because UdfIndex handles conflicts
227-
ythrow yexception() << "file already downloaded for module " << moduleName << ", conflicting path " << downloadLink.Path << ", existing local file " << p.first->second->Link_->GetPath();
244+
ythrow yexception() << "file already downloaded for module " << canonizedModuleName << ", conflicting path " << downloadLink.Path << ", existing local file " << p.first->second->Link_->GetPath();
228245
}
229246
}
230247

0 commit comments

Comments
 (0)