Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions ydb/library/yql/providers/yt/comp_nodes/dq/arrow_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <ydb/library/yql/utils/yql_panic.h>
#include <ydb/library/yql/minikql/mkql_type_builder.h>
#include <ydb/library/yql/minikql/mkql_type_ops.h>
#include <ydb/library/yql/minikql/mkql_node_cast.h>

#include <library/cpp/yson/node/node_io.h>
#include <library/cpp/yson/detail.h>
Expand Down Expand Up @@ -254,6 +255,10 @@ class IYsonBlockReaderWithNativeFlag : public IYsonBlockReader {
return NUdf::TBlockItem();
}
YQL_ENSURE(prev == BeginListSymbol);
if (buf.Current() == EndListSymbol) {
buf.Next();
return NUdf::TBlockItem();
}
auto result = GetNotNull(buf);
if (buf.Current() == ListItemSeparatorSymbol) {
buf.Next();
Expand Down Expand Up @@ -513,7 +518,7 @@ class TPrimitiveColumnConverter {
case arrow::Type::UINT64: PrimitiveConverterImpl_ = GEN_TYPE(UInt64); break;
case arrow::Type::DOUBLE: PrimitiveConverterImpl_ = GEN_TYPE(Double); break;
case arrow::Type::FLOAT: PrimitiveConverterImpl_ = GEN_TYPE(Float); break;
case arrow::Type::STRING: PrimitiveConverterImpl_ = GEN_TYPE_STR(String); break;
case arrow::Type::STRING: PrimitiveConverterImpl_ = GEN_TYPE_STR(Binary); break; // all strings from yt is in binary format
case arrow::Type::BINARY: PrimitiveConverterImpl_ = GEN_TYPE_STR(Binary); break;
default:
return; // will check in runtime
Expand Down Expand Up @@ -615,7 +620,13 @@ class TYtColumnConverter final : public IYtColumnConverter {
: Settings_(std::move(settings))
, DictYsonConverter_(Settings_)
, YsonConverter_(Settings_)
, DictPrimitiveConverter_(Settings_) {}
, DictPrimitiveConverter_(Settings_)
{
auto type = Settings_.Type;
IsJson_ = type->IsData() && AS_TYPE(TDataType, type)->GetDataSlot() == NUdf::EDataSlot::Json
|| (Native && type->IsOptional() && AS_TYPE(TOptionalType, type)->GetItemType()->IsData()
&& AS_TYPE(TDataType, AS_TYPE(TOptionalType, type)->GetItemType())->GetDataSlot() == NUdf::EDataSlot::Json);
}

arrow::Datum Convert(std::shared_ptr<arrow::ArrayData> block) override {
if (arrow::Type::DICTIONARY == block->type->id()) {
Expand All @@ -628,6 +639,11 @@ class TYtColumnConverter final : public IYtColumnConverter {
auto result = arrow::compute::Cast(DictPrimitiveConverter_.Convert(block), Settings_.ArrowType);
YQL_ENSURE(result.ok());
return *result;
} else if (IsJson_ && arrow::Type::STRING == Settings_.ArrowType->id() && arrow::Type::BINARY == valType->id())
{
auto result = arrow::compute::Cast(DictPrimitiveConverter_.Convert(block), Settings_.ArrowType);
YQL_ENSURE(result.ok());
return *result;
} else {
return DictYsonConverter_.Convert(block);
}
Expand All @@ -640,6 +656,11 @@ class TYtColumnConverter final : public IYtColumnConverter {
auto result = arrow::compute::Cast(arrow::Datum(*block), Settings_.ArrowType);
YQL_ENSURE(result.ok());
return *result;
} else if (IsJson_ && arrow::Type::STRING == Settings_.ArrowType->id() && arrow::Type::BINARY == blockType->id())
{
auto result = arrow::compute::Cast(arrow::Datum(*block), Settings_.ArrowType);
YQL_ENSURE(result.ok());
return *result;
} else {
YQL_ENSURE(arrow::Type::BINARY == blockType->id());
return YsonConverter_.Convert(block);
Expand All @@ -651,6 +672,7 @@ class TYtColumnConverter final : public IYtColumnConverter {
TYtYsonColumnConverter<Native, IsTopOptional, true> DictYsonConverter_;
TYtYsonColumnConverter<Native, IsTopOptional, false> YsonConverter_;
TPrimitiveColumnConverter<true> DictPrimitiveConverter_;
bool IsJson_;
};

TYtColumnConverterSettings::TYtColumnConverterSettings(NKikimr::NMiniKQL::TType* type, const NUdf::IPgBuilder* pgBuilder, arrow::MemoryPool& pool, bool isNative)
Expand Down