Skip to content

Commit ed7c207

Browse files
authored
[yt] Mixed gateway (#11146)
1 parent 5f71cfb commit ed7c207

File tree

19 files changed

+732
-86
lines changed

19 files changed

+732
-86
lines changed

ydb/library/yql/providers/yt/gateway/file/yql_yt_file.cpp

Lines changed: 98 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include <util/system/fstat.h>
5959
#include <util/string/split.h>
6060
#include <util/string/builder.h>
61+
#include <util/string/cast.h>
6162
#include <util/folder/path.h>
6263
#include <util/generic/yexception.h>
6364
#include <util/generic/xrange.h>
@@ -189,9 +190,11 @@ class TFileTransformProvider {
189190
const TString name(AS_VALUE(TDataLiteral, callable.GetInput(0))->AsValue().AsStringRef());
190191
auto block = TUserDataStorage::FindUserDataBlock(UserDataBlocks, name);
191192
MKQL_ENSURE(block, "File not found: " << name);
192-
MKQL_ENSURE(block->Type == EUserDataType::PATH, "FilePath not supported for non-file data block, name: "
193+
MKQL_ENSURE(block->Type == EUserDataType::PATH || block->FrozenFile, "File is not frozen, name: "
193194
<< name << ", block type: " << block->Type);
194-
return TProgramBuilder(env, *Services->GetFunctionRegistry()).NewDataLiteral<NUdf::EDataSlot::String>(block->Data);
195+
return TProgramBuilder(env, *Services->GetFunctionRegistry()).NewDataLiteral<NUdf::EDataSlot::String>(
196+
block->Type == EUserDataType::PATH ? block->Data : block->FrozenFile->GetPath().GetPath()
197+
);
195198
};
196199
}
197200

@@ -206,7 +209,7 @@ class TFileTransformProvider {
206209
continue;
207210
}
208211

209-
MKQL_ENSURE(x.second.Type == EUserDataType::PATH, "FilePath not supported for non-file data block, name: "
212+
MKQL_ENSURE(x.second.Type == EUserDataType::PATH, "FolderPath not supported for non-file data block, name: "
210213
<< x.first.Alias() << ", block type: " << x.second.Type);
211214
auto newFolderPath = x.second.Data.substr(0, x.second.Data.size() - (x.first.Alias().size() - folderName.size()));
212215
if (!folderPath) {
@@ -234,9 +237,8 @@ class TFileTransformProvider {
234237
else if (block->Type == EUserDataType::RAW_INLINE_DATA) {
235238
return pgmBuilder.NewDataLiteral<NUdf::EDataSlot::String>(block->Data);
236239
}
237-
else if (Services->GetFileStorage() && block->Type == EUserDataType::URL) {
238-
auto link = Services->GetFileStorage()->PutUrl(block->Data, "");
239-
auto content = TFileInput(link->GetPath()).ReadAll();
240+
else if (block->FrozenFile && block->Type == EUserDataType::URL) {
241+
auto content = TFileInput(block->FrozenFile->GetPath().GetPath()).ReadAll();
240242
return pgmBuilder.NewDataLiteral<NUdf::EDataSlot::String>(content);
241243
} else {
242244
MKQL_ENSURE(false, "Unsupported block type");
@@ -336,6 +338,23 @@ class TFileTransformProvider {
336338
std::shared_ptr<THashMap<TString, TRuntimeNode>> ExtraArgs;
337339
};
338340

341+
template <typename TType>
342+
static inline TType OptionFromString(const TStringBuf value) {
343+
if constexpr (std::is_same_v<TString, TType>) {
344+
return TString{value};
345+
} else if constexpr (std::is_same_v<NYT::TNode, TType>) {
346+
return NYT::NodeFromYsonString(value);
347+
} else {
348+
return FromString<TType>(value);
349+
}
350+
}
351+
352+
template <typename TType>
353+
static inline const TType& NoOp(const TType& value) {
354+
return value;
355+
}
356+
357+
339358
///////////////////////////////////////////////////////////////////////////////////////////////////////
340359

341360
class TYtFileGateway : public IYtGateway {
@@ -844,8 +863,12 @@ class TYtFileGateway : public IYtGateway {
844863

845864
auto publish = TYtPublish(node);
846865

847-
auto mode = NYql::GetSetting(publish.Settings().Ref(), EYtSettingType::Mode);
848-
bool append = mode && FromString<EYtWriteMode>(mode->Child(1)->Content()) == EYtWriteMode::Append;
866+
EYtWriteMode mode = EYtWriteMode::Renew;
867+
if (const auto modeSetting = NYql::GetSetting(publish.Settings().Ref(), EYtSettingType::Mode)) {
868+
mode = FromString<EYtWriteMode>(modeSetting->Child(1)->Content());
869+
}
870+
871+
bool append = mode == EYtWriteMode::Append;
849872
auto cluster = TString{publish.DataSink().Cluster().Value()};
850873

851874
bool isAnonymous = NYql::HasSetting(publish.Publish().Settings().Ref(), EYtSettingType::Anonymous);
@@ -939,9 +962,65 @@ class TYtFileGateway : public IYtGateway {
939962
columnGroupsSpec = NYT::NodeFromYsonString(setting->Tail().Content());
940963
}
941964
}
942-
if (!append || !attrs.HasKey("schema") || !columnGroupsSpec.IsUndefined()) {
965+
if (!append || !attrs.HasKey("schema") || !columnGroupsSpec.IsUndefined() || dstRowSpec->IsSorted()) {
943966
attrs["schema"] = RowSpecToYTSchema(spec[YqlRowSpecAttribute], nativeYtTypeCompatibility, columnGroupsSpec).ToNode();
944967
}
968+
969+
if (EYtWriteMode::Renew == mode || EYtWriteMode::RenewKeepMeta == mode) {
970+
bool isTimestamp = false, isDuration = false;
971+
TInstant stamp;
972+
TDuration duration;
973+
if (auto e = NYql::GetSetting(publish.Settings().Ref(), EYtSettingType::Expiration)) {
974+
isDuration = TDuration::TryParse(e->Tail().Content(), duration);
975+
if (!isDuration) {
976+
isTimestamp = TInstant::TryParseIso8601(e->Tail().Content(), stamp);
977+
}
978+
}
979+
const TMaybe<TInstant> deadline = options.Config()->ExpirationDeadline.Get(cluster);
980+
const TMaybe<TDuration> interval = options.Config()->ExpirationInterval.Get(cluster);
981+
if (deadline || isTimestamp) {
982+
attrs["expiration_time"] = isTimestamp ? stamp.ToStringUpToSeconds() : deadline->ToStringUpToSeconds();
983+
}
984+
if (interval || isDuration) {
985+
attrs["expiration_timeout"] = isDuration ? duration.MilliSeconds() : interval->MilliSeconds();
986+
}
987+
if (options.Config()->NightlyCompress.Get(cluster).GetOrElse(false)) {
988+
attrs["force_nightly_compress"] = true;
989+
}
990+
}
991+
992+
#define HANDLE_OPT(name, attr, conv) \
993+
auto dst##name = isAnonymous \
994+
? options.Config()->Temporary##name.Get(cluster) \
995+
: options.Config()->Published##name.Get(cluster); \
996+
if (auto s = NYql::GetSetting(publish.Settings().Ref(), EYtSettingType::name)) { \
997+
dst##name = OptionFromString<decltype(dst##name)::value_type>(s->Tail().Content()); \
998+
} \
999+
if (dst##name && dst##name != options.Config()->Temporary##name.Get(cluster)) { \
1000+
attrs[attr] = conv(*dst##name); \
1001+
}
1002+
1003+
HANDLE_OPT(CompressionCodec, "compression_codec", NoOp);
1004+
HANDLE_OPT(ErasureCodec, "erasure_codec", ToString);
1005+
HANDLE_OPT(ReplicationFactor, "replication_factor", static_cast<i64>);
1006+
HANDLE_OPT(Media, "media", NoOp);
1007+
HANDLE_OPT(PrimaryMedium, "primary_medium", NoOp);
1008+
#undef DEFINE_OPT
1009+
1010+
if (auto optimizeFor = options.Config()->OptimizeFor.Get(cluster)) {
1011+
if (dstRowSpec->GetType()->GetSize()) {
1012+
attrs["optimize_for"] = ToString(*optimizeFor);
1013+
}
1014+
}
1015+
1016+
if (auto ua = NYql::GetSetting(publish.Settings().Ref(), EYtSettingType::UserAttrs)) {
1017+
const NYT::TNode mapNode = NYT::NodeFromYsonString(ua->Tail().Content());
1018+
const auto& map = mapNode.AsMap();
1019+
for (auto it = map.cbegin(); it != map.cend(); ++it) {
1020+
attrs[it->first] = it->second;
1021+
}
1022+
}
1023+
9451024
TOFStream ofAttr(destFilePath + ".attr");
9461025
ofAttr.Write(NYT::NodeToYsonString(attrs, NYson::EYsonFormat::Pretty));
9471026
}
@@ -1037,6 +1116,16 @@ class TYtFileGateway : public IYtGateway {
10371116
return res;
10381117
}
10391118

1119+
TFuture<TDownloadTablesResult> DownloadTables(TDownloadTablesOptions&& options) final {
1120+
Y_UNUSED(options);
1121+
return MakeFuture<TDownloadTablesResult>();
1122+
}
1123+
1124+
TFuture<TUploadTableResult> UploadTable(TUploadTableOptions&& options) final {
1125+
Y_UNUSED(options);
1126+
return MakeFuture<TUploadTableResult>();
1127+
}
1128+
10401129
TFullResultTableResult PrepareFullResultTable(TFullResultTableOptions&& options) final {
10411130
try {
10421131
TString cluster = options.Cluster();
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
LIBRARY()
2+
3+
SRCS(
4+
yql_yt_mixed.cpp
5+
)
6+
7+
PEERDIR(
8+
ydb/library/yql/utils/log
9+
ydb/library/yql/providers/yt/provider
10+
ydb/library/yql/providers/yt/gateway/file
11+
ydb/library/yql/providers/yt/gateway/native
12+
ydb/library/yql/providers/yt/gateway/lib
13+
ydb/library/yql/providers/yt/common
14+
ydb/library/yql/providers/common/provider
15+
16+
library/cpp/threading/future
17+
library/cpp/yson/node
18+
)
19+
20+
YQL_LAST_ABI_VERSION()
21+
22+
END()

0 commit comments

Comments
 (0)