Skip to content

Commit 16036eb

Browse files
authored
Merge db207b4 into a8fb87e
2 parents a8fb87e + db207b4 commit 16036eb

File tree

1 file changed

+21
-2
lines changed

1 file changed

+21
-2
lines changed

ydb/library/yql/providers/yt/comp_nodes/dq/arrow_converter.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,10 @@ class IYsonBlockReaderWithNativeFlag : public IYsonBlockReader {
254254
return NUdf::TBlockItem();
255255
}
256256
YQL_ENSURE(prev == BeginListSymbol);
257+
if (buf.Current() == EndListSymbol) {
258+
buf.Next();
259+
return NUdf::TBlockItem();
260+
}
257261
auto result = GetNotNull(buf);
258262
if (buf.Current() == ListItemSeparatorSymbol) {
259263
buf.Next();
@@ -513,7 +517,7 @@ class TPrimitiveColumnConverter {
513517
case arrow::Type::UINT64: PrimitiveConverterImpl_ = GEN_TYPE(UInt64); break;
514518
case arrow::Type::DOUBLE: PrimitiveConverterImpl_ = GEN_TYPE(Double); break;
515519
case arrow::Type::FLOAT: PrimitiveConverterImpl_ = GEN_TYPE(Float); break;
516-
case arrow::Type::STRING: PrimitiveConverterImpl_ = GEN_TYPE_STR(String); break;
520+
case arrow::Type::STRING: PrimitiveConverterImpl_ = GEN_TYPE_STR(Binary); break; // all strings from yt is in binary format
517521
case arrow::Type::BINARY: PrimitiveConverterImpl_ = GEN_TYPE_STR(Binary); break;
518522
default:
519523
return; // will check in runtime
@@ -615,7 +619,11 @@ class TYtColumnConverter final : public IYtColumnConverter {
615619
: Settings_(std::move(settings))
616620
, DictYsonConverter_(Settings_)
617621
, YsonConverter_(Settings_)
618-
, DictPrimitiveConverter_(Settings_) {}
622+
, DictPrimitiveConverter_(Settings_)
623+
{
624+
auto type = Settings_.Type;
625+
IsJson_ = type->IsData() && static_cast<NKikimr::NMiniKQL::TDataType*>(type)->GetDataSlot() == NUdf::EDataSlot::Json || (Native && type->IsOptional() && static_cast<NKikimr::NMiniKQL::TDataType*>(static_cast<NKikimr::NMiniKQL::TOptionalType*>(type)->GetItemType())->GetDataSlot() == NUdf::EDataSlot::Json);
626+
}
619627

620628
arrow::Datum Convert(std::shared_ptr<arrow::ArrayData> block) override {
621629
if (arrow::Type::DICTIONARY == block->type->id()) {
@@ -628,6 +636,11 @@ class TYtColumnConverter final : public IYtColumnConverter {
628636
auto result = arrow::compute::Cast(DictPrimitiveConverter_.Convert(block), Settings_.ArrowType);
629637
YQL_ENSURE(result.ok());
630638
return *result;
639+
} else if (IsJson_ && arrow::Type::STRING == Settings_.ArrowType->id() && arrow::Type::BINARY == valType->id())
640+
{
641+
auto result = arrow::compute::Cast(DictPrimitiveConverter_.Convert(block), Settings_.ArrowType);
642+
YQL_ENSURE(result.ok());
643+
return *result;
631644
} else {
632645
return DictYsonConverter_.Convert(block);
633646
}
@@ -640,6 +653,11 @@ class TYtColumnConverter final : public IYtColumnConverter {
640653
auto result = arrow::compute::Cast(arrow::Datum(*block), Settings_.ArrowType);
641654
YQL_ENSURE(result.ok());
642655
return *result;
656+
} else if (IsJson_ && arrow::Type::STRING == Settings_.ArrowType->id() && arrow::Type::BINARY == blockType->id())
657+
{
658+
auto result = arrow::compute::Cast(arrow::Datum(*block), Settings_.ArrowType);
659+
YQL_ENSURE(result.ok());
660+
return *result;
643661
} else {
644662
YQL_ENSURE(arrow::Type::BINARY == blockType->id());
645663
return YsonConverter_.Convert(block);
@@ -651,6 +669,7 @@ class TYtColumnConverter final : public IYtColumnConverter {
651669
TYtYsonColumnConverter<Native, IsTopOptional, true> DictYsonConverter_;
652670
TYtYsonColumnConverter<Native, IsTopOptional, false> YsonConverter_;
653671
TPrimitiveColumnConverter<true> DictPrimitiveConverter_;
672+
bool IsJson_;
654673
};
655674

656675
TYtColumnConverterSettings::TYtColumnConverterSettings(NKikimr::NMiniKQL::TType* type, const NUdf::IPgBuilder* pgBuilder, arrow::MemoryPool& pool, bool isNative)

0 commit comments

Comments
 (0)