Skip to content

Commit ec35b6a

Browse files
authored
Fixed ToBytes/FromBytes for decimal & big tz dates (#10348)
1 parent 7e39cf0 commit ec35b6a

File tree

21 files changed

+369
-72
lines changed

21 files changed

+369
-72
lines changed

ydb/library/yql/core/type_ann/type_ann_core.cpp

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3973,9 +3973,9 @@ namespace NTypeAnnImpl {
39733973
return IGraphTransformer::TStatus::Ok;
39743974
}
39753975

3976-
IGraphTransformer::TStatus FromBytesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
3976+
IGraphTransformer::TStatus FromBytesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) {
39773977
Y_UNUSED(output);
3978-
if (!EnsureMinArgsCount(*input, 2, ctx.Expr)) {
3978+
if (!EnsureArgsCount(*input, 2, ctx.Expr)) {
39793979
return IGraphTransformer::TStatus::Error;
39803980
}
39813981

@@ -3993,25 +3993,33 @@ namespace NTypeAnnImpl {
39933993
return IGraphTransformer::TStatus::Error;
39943994
}
39953995

3996-
auto dataTypeName = input->Child(1)->Content();
3997-
auto slot = NKikimr::NUdf::FindDataSlot(dataTypeName);
3998-
if (!slot) {
3999-
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Child(1)->Pos()), TStringBuilder() << "Unknown datatype: " << dataTypeName));
4000-
return IGraphTransformer::TStatus::Error;
4001-
}
3996+
auto targetDataTypeName = input->Child(1)->Content();
3997+
const TDataExprType* targetDataType = nullptr;
3998+
auto targetSlot = NKikimr::NUdf::FindDataSlot(targetDataTypeName);
3999+
if (!targetSlot) {
4000+
auto typeExpr = ctx.Expr.Builder(input->Child(1)->Pos()).Callable("ParseType")
4001+
.Add(0, input->ChildPtr(1))
4002+
.Seal().Build();
4003+
auto parseTypeResult = ParseTypeWrapper(typeExpr, typeExpr, ctx);
4004+
if (parseTypeResult == IGraphTransformer::TStatus::Error) {
4005+
return parseTypeResult;
4006+
}
40024007

4003-
const bool isDecimal = IsDataTypeDecimal(*slot);
4004-
if (!EnsureArgsCount(*input, isDecimal ? 4 : 2, ctx.Expr)) {
4005-
return IGraphTransformer::TStatus::Error;
4008+
if (!EnsureType(*typeExpr, ctx.Expr)) {
4009+
return IGraphTransformer::TStatus::Error;
4010+
}
4011+
4012+
auto type = typeExpr->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
4013+
if (!EnsureDataType(input->Child(1)->Pos(), *type, ctx.Expr)) {
4014+
return IGraphTransformer::TStatus::Error;
4015+
}
4016+
4017+
targetDataType = type->Cast<TDataExprType>();
4018+
targetSlot = targetDataType->GetSlot();
40064019
}
40074020

4008-
auto dataTypeAnn = isDecimal ?
4009-
ctx.Expr.MakeType<TDataExprParamsType>(*slot, input->Child(2)->Content(), input->Child(3)->Content()):
4010-
ctx.Expr.MakeType<TDataExprType>(*slot);
4021+
auto dataTypeAnn = targetDataType ? targetDataType : ctx.Expr.MakeType<TDataExprType>(*targetSlot);
40114022
input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(dataTypeAnn));
4012-
if (isDecimal && !input->GetTypeAnn()->Cast<TDataExprParamsType>()->Validate(input->Pos(), ctx.Expr)) {
4013-
return IGraphTransformer::TStatus::Error;
4014-
}
40154023
return IGraphTransformer::TStatus::Ok;
40164024
}
40174025

@@ -5441,11 +5449,15 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
54415449
}
54425450

54435451
IGraphTransformer::TStatus ToBytesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
5444-
Y_UNUSED(output);
54455452
if (!EnsureArgsCount(*input, 1, ctx.Expr)) {
54465453
return IGraphTransformer::TStatus::Error;
54475454
}
54485455

5456+
if (IsNull(input->Head())) {
5457+
output = input->HeadPtr();
5458+
return IGraphTransformer::TStatus::Repeat;
5459+
}
5460+
54495461
bool isOptional;
54505462
const TDataExprType* dataType;
54515463
if (!EnsureDataOrOptionalOfData(input->Head(), isOptional, dataType, ctx.Expr)) {
@@ -12209,7 +12221,6 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
1220912221
Functions["SessionWindowTraits"] = &SessionWindowTraitsWrapper;
1221012222
Functions["FromString"] = &FromStringWrapper;
1221112223
Functions["StrictFromString"] = &StrictFromStringWrapper;
12212-
Functions["FromBytes"] = &FromBytesWrapper;
1221312224
Functions["Convert"] = &ConvertWrapper;
1221412225
Functions["AlterTo"] = &AlterToWrapper;
1221512226
Functions["ToIntegral"] = &ToIntegralWrapper;
@@ -12725,6 +12736,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
1272512736
ExtFunctions["SafeCast"] = &CastWrapper<false>;
1272612737
ExtFunctions["StrictCast"] = &CastWrapper<true>;
1272712738
ExtFunctions["Version"] = &VersionWrapper;
12739+
ExtFunctions["FromBytes"] = &FromBytesWrapper;
1272812740

1272912741
ExtFunctions["Aggregate"] = &AggregateWrapper;
1273012742
ExtFunctions["AggregateCombine"] = &AggregateWrapper;

ydb/library/yql/minikql/comp_nodes/mkql_frombytes.cpp

Lines changed: 108 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,22 @@ template <bool IsOptional>
2121
class TFromBytesWrapper : public TMutableComputationNode<TFromBytesWrapper<IsOptional>> {
2222
typedef TMutableComputationNode<TFromBytesWrapper<IsOptional>> TBaseComputation;
2323
public:
24-
TFromBytesWrapper(TComputationMutables& mutables, IComputationNode* data, NUdf::TDataTypeId schemeType)
24+
TFromBytesWrapper(TComputationMutables& mutables, IComputationNode* data, NUdf::TDataTypeId schemeType, ui32 param1, ui32 param2)
2525
: TBaseComputation(mutables)
2626
, Data(data)
2727
, SchemeType(NUdf::GetDataSlot(schemeType))
28+
, Param1(param1)
29+
, Param2(param2)
2830
{
31+
if (SchemeType == NUdf::EDataSlot::Decimal) {
32+
DecimalBound = NYql::NDecimal::TInt128(1);
33+
NYql::NDecimal::TInt128 ten(10U);
34+
for (ui32 i = 0; i < Param1; ++i) {
35+
DecimalBound *= ten;
36+
}
37+
38+
NegDecimalBound = -DecimalBound;
39+
}
2940
}
3041

3142
NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const {
@@ -36,10 +47,6 @@ class TFromBytesWrapper : public TMutableComputationNode<TFromBytesWrapper<IsOpt
3647

3748
switch (SchemeType) {
3849
case NUdf::EDataSlot::TzDate: {
39-
if (!data) {
40-
return NUdf::TUnboxedValuePod();
41-
}
42-
4350
const auto& ref = data.AsStringRef();
4451
if (ref.Size() != 4) {
4552
return NUdf::TUnboxedValuePod();
@@ -57,10 +64,6 @@ class TFromBytesWrapper : public TMutableComputationNode<TFromBytesWrapper<IsOpt
5764
}
5865

5966
case NUdf::EDataSlot::TzDatetime: {
60-
if (!data) {
61-
return NUdf::TUnboxedValuePod();
62-
}
63-
6467
const auto& ref = data.AsStringRef();
6568
if (ref.Size() != 6) {
6669
return NUdf::TUnboxedValuePod();
@@ -78,18 +81,65 @@ class TFromBytesWrapper : public TMutableComputationNode<TFromBytesWrapper<IsOpt
7881
}
7982

8083
case NUdf::EDataSlot::TzTimestamp: {
81-
if (!data) {
84+
const auto& ref = data.AsStringRef();
85+
if (ref.Size() != 10) {
8286
return NUdf::TUnboxedValuePod();
8387
}
8488

89+
auto tzId = SwapBytes(ReadUnaligned<ui16>(ref.Data() + ref.Size() - sizeof(ui16)));
90+
auto value = SwapBytes(data.Get<ui64>());
91+
if (value < NUdf::MAX_TIMESTAMP && tzId < NUdf::GetTimezones().size()) {
92+
auto ret = NUdf::TUnboxedValuePod(value);
93+
ret.SetTimezoneId(tzId);
94+
return ret;
95+
}
96+
97+
return NUdf::TUnboxedValuePod();
98+
}
99+
100+
case NUdf::EDataSlot::TzDate32: {
101+
const auto& ref = data.AsStringRef();
102+
if (ref.Size() != 6) {
103+
return NUdf::TUnboxedValuePod();
104+
}
105+
106+
auto tzId = SwapBytes(ReadUnaligned<ui16>(ref.Data() + ref.Size() - sizeof(ui16)));
107+
auto value = SwapBytes(data.Get<i32>());
108+
if (value >= NUdf::MIN_DATE32 && value <= NUdf::MAX_DATE32 && tzId < NUdf::GetTimezones().size()) {
109+
auto ret = NUdf::TUnboxedValuePod(value);
110+
ret.SetTimezoneId(tzId);
111+
return ret;
112+
}
113+
114+
return NUdf::TUnboxedValuePod();
115+
}
116+
117+
case NUdf::EDataSlot::TzDatetime64: {
85118
const auto& ref = data.AsStringRef();
86119
if (ref.Size() != 10) {
87120
return NUdf::TUnboxedValuePod();
88121
}
89122

90123
auto tzId = SwapBytes(ReadUnaligned<ui16>(ref.Data() + ref.Size() - sizeof(ui16)));
91-
auto value = SwapBytes(data.Get<ui64>());
92-
if (value < NUdf::MAX_TIMESTAMP && tzId < NUdf::GetTimezones().size()) {
124+
auto value = SwapBytes(data.Get<i64>());
125+
if (value >= NUdf::MIN_DATETIME64 && value <= NUdf::MAX_DATETIME64 && tzId < NUdf::GetTimezones().size()) {
126+
auto ret = NUdf::TUnboxedValuePod(value);
127+
ret.SetTimezoneId(tzId);
128+
return ret;
129+
}
130+
131+
return NUdf::TUnboxedValuePod();
132+
}
133+
134+
case NUdf::EDataSlot::TzTimestamp64: {
135+
const auto& ref = data.AsStringRef();
136+
if (ref.Size() != 10) {
137+
return NUdf::TUnboxedValuePod();
138+
}
139+
140+
auto tzId = SwapBytes(ReadUnaligned<ui16>(ref.Data() + ref.Size() - sizeof(ui16)));
141+
auto value = SwapBytes(data.Get<i64>());
142+
if (value >= NUdf::MIN_TIMESTAMP64 && value <= NUdf::MAX_TIMESTAMP64 && tzId < NUdf::GetTimezones().size()) {
93143
auto ret = NUdf::TUnboxedValuePod(value);
94144
ret.SetTimezoneId(tzId);
95145
return ret;
@@ -105,6 +155,35 @@ class TFromBytesWrapper : public TMutableComputationNode<TFromBytesWrapper<IsOpt
105155
return data.Release();
106156
}
107157

158+
case NUdf::EDataSlot::Decimal: {
159+
const auto& ref = data.AsStringRef();
160+
if (ref.Size() != 15) {
161+
return NUdf::TUnboxedValuePod();
162+
}
163+
164+
NYql::NDecimal::TInt128 v = 0;
165+
ui8* p = (ui8*)&v;
166+
memcpy(p, ref.Data(), 15);
167+
p[0xF] = (p[0xE] & 0x80) ? 0xFF : 0x00;
168+
if (NYql::NDecimal::IsError(v)) {
169+
return NUdf::TUnboxedValuePod();
170+
}
171+
172+
if (!NYql::NDecimal::IsNormal(v)) {
173+
return NUdf::TUnboxedValuePod(v);
174+
}
175+
176+
if (v >= DecimalBound) {
177+
return NUdf::TUnboxedValuePod(NYql::NDecimal::Inf());
178+
}
179+
180+
if (v <= NegDecimalBound) {
181+
return NUdf::TUnboxedValuePod(-NYql::NDecimal::Inf());
182+
}
183+
184+
return NUdf::TUnboxedValuePod(v);
185+
}
186+
108187
default:
109188
if (IsValidValue(SchemeType, data)) {
110189
return data.Release();
@@ -121,25 +200,39 @@ class TFromBytesWrapper : public TMutableComputationNode<TFromBytesWrapper<IsOpt
121200

122201
IComputationNode* const Data;
123202
const NUdf::EDataSlot SchemeType;
203+
const ui32 Param1;
204+
const ui32 Param2;
205+
NYql::NDecimal::TInt128 DecimalBound, NegDecimalBound;
124206
};
125207

126208
}
127209

128210
IComputationNode* WrapFromBytes(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
129-
MKQL_ENSURE(callable.GetInputsCount() == 2, "Expected 2 args");
211+
MKQL_ENSURE(callable.GetInputsCount() == 2 || callable.GetInputsCount() == 4, "Expected 2 or 4 args");
130212

131213
bool isOptional;
132214
const auto dataType = UnpackOptionalData(callable.GetInput(0), isOptional);
133215
MKQL_ENSURE(dataType->GetSchemeType() == NUdf::TDataType<char*>::Id, "Expected String");
134216

135217
const auto schemeTypeData = AS_VALUE(TDataLiteral, callable.GetInput(1));
136218
const auto schemeType = schemeTypeData->AsValue().Get<ui32>();
219+
ui32 param1 = 0;
220+
ui32 param2 = 0;
221+
if (schemeType == NUdf::TDataType<NUdf::TDecimal>::Id) {
222+
MKQL_ENSURE(callable.GetInputsCount() == 4, "Expected 4 args");
223+
const auto param1Data = AS_VALUE(TDataLiteral, callable.GetInput(2));
224+
param1 = param1Data->AsValue().Get<ui32>();
225+
const auto param2Data = AS_VALUE(TDataLiteral, callable.GetInput(3));
226+
param2 = param2Data->AsValue().Get<ui32>();
227+
} else {
228+
MKQL_ENSURE(callable.GetInputsCount() == 2, "Expected 2 args");
229+
}
137230

138231
const auto data = LocateNode(ctx.NodeLocator, callable, 0);
139232
if (isOptional) {
140-
return new TFromBytesWrapper<true>(ctx.Mutables, data, static_cast<NUdf::TDataTypeId>(schemeType));
233+
return new TFromBytesWrapper<true>(ctx.Mutables, data, static_cast<NUdf::TDataTypeId>(schemeType), param1, param2);
141234
} else {
142-
return new TFromBytesWrapper<false>(ctx.Mutables, data, static_cast<NUdf::TDataTypeId>(schemeType));
235+
return new TFromBytesWrapper<false>(ctx.Mutables, data, static_cast<NUdf::TDataTypeId>(schemeType), param1, param2);
143236
}
144237
}
145238

ydb/library/yql/minikql/comp_nodes/mkql_tobytes.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <ydb/library/yql/minikql/computation/mkql_computation_node_codegen.h> // Y_IGNORE
33
#include <ydb/library/yql/minikql/mkql_node_cast.h>
44
#include <ydb/library/yql/minikql/mkql_node_builder.h>
5+
#include <ydb/library/yql/minikql/mkql_string_util.h>
56
#include <ydb/library/yql/utils/swap_bytes.h>
67

78
namespace NKikimr {
@@ -38,6 +39,30 @@ using TBaseComputation = TDecoratorCodegeneratorNode<TToBytesPrimitiveTypeWrappe
3839
#endif
3940
};
4041

42+
template<bool IsOptional>
43+
class TToBytesDecimalWrapper : public TMutableComputationNode<TToBytesDecimalWrapper<IsOptional>> {
44+
using TBaseComputation = TMutableComputationNode<TToBytesDecimalWrapper<IsOptional>>;
45+
public:
46+
TToBytesDecimalWrapper(TComputationMutables& mutables, IComputationNode* data)
47+
: TBaseComputation(mutables, EValueRepresentation::String)
48+
, Data(data)
49+
{}
50+
51+
NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const {
52+
const auto& value = this->Data->GetValue(ctx);
53+
if (IsOptional && !value)
54+
return NUdf::TUnboxedValuePod();
55+
56+
return MakeString(NUdf::TStringRef(reinterpret_cast<const char*>(&value), 15));
57+
}
58+
59+
void RegisterDependencies() const final {
60+
this->DependsOn(Data);
61+
}
62+
63+
IComputationNode* const Data;
64+
};
65+
4166
template<bool IsOptional, typename Type>
4267
class TToBytesTzTypeWrapper : public TDecoratorComputationNode<TToBytesTzTypeWrapper<IsOptional, Type>> {
4368
using TBaseComputation = TDecoratorComputationNode<TToBytesTzTypeWrapper<IsOptional, Type>>;
@@ -100,7 +125,16 @@ IComputationNode* WrapToBytes(TCallable& callable, const TComputationNodeFactory
100125
MAKE_TZ_TYPE_BYTES(NUdf::TTzDate, ui16);
101126
MAKE_TZ_TYPE_BYTES(NUdf::TTzDatetime, ui32);
102127
MAKE_TZ_TYPE_BYTES(NUdf::TTzTimestamp, ui64);
128+
MAKE_TZ_TYPE_BYTES(NUdf::TTzDate32, i32);
129+
MAKE_TZ_TYPE_BYTES(NUdf::TTzDatetime64, i64);
130+
MAKE_TZ_TYPE_BYTES(NUdf::TTzTimestamp64, i64);
103131
#undef MAKE_TZ_TYPE_BYTES
132+
case NUdf::TDataType<NUdf::TDecimal>::Id: {
133+
if (isOptional) \
134+
return new TToBytesDecimalWrapper<true>(ctx.Mutables, data);
135+
else
136+
return new TToBytesDecimalWrapper<false>(ctx.Mutables, data);
137+
}
104138
default:
105139
break;
106140
}

ydb/library/yql/minikql/comp_nodes/ut/mkql_computation_node_ut.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4414,9 +4414,9 @@ Y_UNIT_TEST_SUITE(TMiniKQLComputationNodeTest) {
44144414

44154415
TVector<TRuntimeNode> tupleItems;
44164416
const auto data1 = pb.NewDataLiteral<NUdf::EDataSlot::String>(TString("\xEA\x00\x00\x00", 4));
4417-
tupleItems.push_back(pb.FromBytes(data1, NUdf::TDataType<ui32>::Id));
4417+
tupleItems.push_back(pb.FromBytes(data1, pb.NewDataType(NUdf::TDataType<ui32>::Id)));
44184418
const auto data2 = pb.NewEmptyOptionalDataLiteral(NUdf::TDataType<const char*>::Id);
4419-
tupleItems.push_back(pb.FromBytes(data2, NUdf::TDataType<ui32>::Id));
4419+
tupleItems.push_back(pb.FromBytes(data2, pb.NewDataType(NUdf::TDataType<ui32>::Id)));
44204420
const auto pgmReturn = pb.NewTuple(tupleItems);
44214421

44224422
const auto graph = setup.BuildGraph(pgmReturn);

0 commit comments

Comments
 (0)