Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions ydb/library/yql/public/udf/arrow/block_item.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ class TBlockItem {
template <typename T, typename = std::enable_if_t<TPrimitiveDataType<T>::Result>>
inline explicit TBlockItem(T value);

inline explicit TBlockItem(bool value) {
Raw.Simple.bool_ = value ? 1 : 0;
Raw.Simple.Meta = static_cast<ui8>(EMarkers::Present);
}

inline explicit TBlockItem(TStringRef value) {
Raw.String.Value = value.Data();
Raw.String.Size = value.Size();
Expand Down Expand Up @@ -109,6 +114,10 @@ class TBlockItem {
#define FIELD(type) type type##_;
PRIMITIVE_VALUE_TYPES(FIELD);
#undef FIELD
// According to the YQL <-> arrow type mapping convention,
// boolean values are processed as 8-bit unsigned integer
// with either 0 or 1 as a condition payload.
ui8 bool_;
ui64 Count;
};
union {
Expand Down Expand Up @@ -167,6 +176,9 @@ UDF_ASSERT_TYPE_SIZE(TBlockItem, 16);
PRIMITIVE_VALUE_TYPES(VALUE_AS)
PRIMITIVE_VALUE_TYPES(VALUE_GET)
PRIMITIVE_VALUE_TYPES(VALUE_CONSTR)
// XXX: TBlockItem constructor with <bool> parameter is implemented above.
VALUE_AS(bool)
VALUE_GET(bool)

#undef VALUE_AS
#undef VALUE_GET
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -379,9 +379,9 @@
],
"test.test[ansi_idents-escaped_udf_name-default.txt-Debug]": [
{
"checksum": "21a4fe7fb93bc82cd8f67fed0ffad56e",
"size": 1780,
"uri": "https://{canondata_backend}/1936997/ad7538cf8edf8e81865f7eee42c2de851daf1211/resource.tar.gz#test.test_ansi_idents-escaped_udf_name-default.txt-Debug_/opt.yql_patched"
"checksum": "131c2b624f1fb925a389a58ee3d1ee12",
"size": 1793,
"uri": "https://{canondata_backend}/1936842/557f7ab03608bf231a6bd2276c94b8a7ee4523b0/resource.tar.gz#test.test_ansi_idents-escaped_udf_name-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[ansi_idents-escaped_udf_name-default.txt-Plan]": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,9 +449,9 @@
],
"test.test[ansi_idents-escaped_udf_name-default.txt-Debug]": [
{
"checksum": "4c8e1060cd6d4c7c0166346a5eb44739",
"size": 3018,
"uri": "https://{canondata_backend}/1936842/15d1b251a19a947bc78bcd914d26903ce91d665f/resource.tar.gz#test.test_ansi_idents-escaped_udf_name-default.txt-Debug_/opt.yql_patched"
"checksum": "b217896dea1c1d1811b86d0ec696f868",
"size": 3031,
"uri": "https://{canondata_backend}/1880306/c1aa0466ddf0d02543125e7a8f308326758dc098/resource.tar.gz#test.test_ansi_idents-escaped_udf_name-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[ansi_idents-escaped_udf_name-default.txt-Plan]": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,9 @@
],
"test.test[ansi_idents-escaped_udf_name-default.txt-Debug]": [
{
"checksum": "4988877c69725bebc3eb77a48625f5cd",
"size": 2011,
"uri": "https://{canondata_backend}/1923547/5154c8bd8ef9ead4f609771f831f20c15e795571/resource.tar.gz#test.test_ansi_idents-escaped_udf_name-default.txt-Debug_/opt.yql"
"checksum": "bba9b3784ee486de1c6c60c024b7ac96",
"size": 2024,
"uri": "https://{canondata_backend}/1942525/2596beba419499583afc5dd08cdd9d8cff9ec93b/resource.tar.gz#test.test_ansi_idents-escaped_udf_name-default.txt-Debug_/opt.yql"
}
],
"test.test[ansi_idents-escaped_udf_name-default.txt-Plan]": [
Expand Down
41 changes: 40 additions & 1 deletion ydb/library/yql/udfs/common/string/string_udf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,19 +174,27 @@ namespace {
XX(AsciiToUpper, to_upper) \
XX(AsciiToTitle, to_title)

// NOTE: The functions below are marked as deprecated, so block implementation
// is not required for them. Hence, STROKA_FIND_UDF provides only the scalar
// one at the moment.
#define STROKA_FIND_UDF_MAP(XX) \
XX(Contains, Contains) \
XX(StartsWith, StartsWith) \
XX(EndsWith, EndsWith) \
XX(HasPrefix, StartsWith) \
XX(HasSuffix, EndsWith)

// NOTE: The functions below are marked as deprecated, so block implementation
// is not required for them. Hence, STRING_TWO_ARGS_UDF provides only the
// scalar one at the moment.
#define STRING_TWO_ARGS_UDF_MAP(XX) \
XX(StartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \
XX(EndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \
XX(HasPrefixIgnoreCase, AsciiHasPrefixIgnoreCase) \
XX(HasSuffixIgnoreCase, AsciiHasSuffixIgnoreCase)

// NOTE: The functions below are marked as deprecated, so block implementation
// is not required for them. Hence, STROKA_UDF provides only the scalar one at
// the moment.
#define STROKA_UDF_MAP(XX) \
XX(Reverse, ReverseInPlace)

Expand All @@ -207,6 +215,30 @@ namespace {
return valueBuilder->NewString(input);
}

BEGIN_SIMPLE_STRICT_ARROW_UDF(TContains, bool(TOptional<char*>, char*)) {
Y_UNUSED(valueBuilder);
if (!args[0])
return TUnboxedValuePod(false);

const TString haystack(args[0].AsStringRef());
const TString needle(args[1].AsStringRef());
return TUnboxedValuePod(haystack.Contains(needle));
}

struct TContainsKernelExec : public TBinaryKernelExec<TContainsKernelExec> {
template <typename TSink>
static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
if (!arg1)
return sink(TBlockItem(false));

const TString haystack(arg1.AsStringRef());
const TString needle(arg2.AsStringRef());
sink(TBlockItem(haystack.Contains(needle)));
}
};

END_SIMPLE_ARROW_UDF(TContains, TContainsKernelExec::Do);

SIMPLE_STRICT_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) {
if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef()))
return valueBuilder->NewString(result);
Expand Down Expand Up @@ -277,6 +309,8 @@ namespace {
return args[0];
}

// NOTE: String::Find is marked as deprecated, so block implementation is
// not required for them. Hence, only the scalar one is provided.
SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) {
Y_UNUSED(valueBuilder);
const TString haystack(args[0].AsStringRef());
Expand All @@ -285,6 +319,9 @@ namespace {
return TUnboxedValuePod(haystack.find(needle, pos));
}

// NOTE: String::ReverseFind is marked as deprecated, so block
// implementation is not required for them. Hence, only the scalar one is
// provided.
SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TReverseFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) {
Y_UNUSED(valueBuilder);
const TString haystack(args[0].AsStringRef());
Expand All @@ -293,6 +330,8 @@ namespace {
return TUnboxedValuePod(haystack.rfind(needle, pos));
}

// NOTE: String::Substring is marked as deprecated, so block implementation
// is not required for them. Hence, only the scalar one is provided.
SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSubstring, char*(TAutoMap<char*>, TOptional<ui64>, TOptional<ui64>), 1) {
const TString input(args[0].AsStringRef());
const ui64 from = args[1].GetOrDefault<ui64>(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@
"String"
]
];
[
"contains";
[
"DataType";
"Bool"
]
];
[
"levenstein";
[
Expand All @@ -27,22 +34,27 @@
"Data" = [
[
"fdsa";
%false;
"3"
];
[
"aswedfg";
%true;
"5"
];
[
"asdadsaasd";
%true;
"8"
];
[
"gdsfsassas";
%true;
"8"
];
[
"";
%false;
"2"
]
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
pragma UseBlocks;
SELECT
value,
String::Contains(value, "as") AS contains,
String::LevensteinDistance(value, "as") AS levenstein
FROM Input;