Skip to content

Show create table (Column-oriented tables) #16509

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
314 changes: 311 additions & 3 deletions ydb/core/sys_view/show_create/create_table_formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,10 +612,11 @@ bool TCreateTableFormatter::Format(const TFamilyDescription& familyDesc) {
compression = "off";
break;
case NKikimrSchemeOp::ColumnCodecLZ4:
compression = "lz4";
compression = "lz4";
break;
case NKikimrSchemeOp::ColumnCodecZSTD:
ythrow TFormatFail(Ydb::StatusIds::UNSUPPORTED, "ZSTD COMPRESSION codec is not supported");
compression = "zstd";
break;
}
} else if (familyDesc.HasCodec()) {
if (familyDesc.GetCodec() == 1) {
Expand Down Expand Up @@ -643,7 +644,7 @@ bool TCreateTableFormatter::Format(const TFamilyDescription& familyDesc) {
del = ", ";
}

if (dataName) {
if (compression) {
Stream << del << "COMPRESSION = " << "\"" << compression << "\"";
}

Expand Down Expand Up @@ -884,5 +885,312 @@ bool TCreateTableFormatter::Format(const Ydb::Table::TtlSettings& ttlSettings, T
return true;
}

TCreateTableFormatter::TResult TCreateTableFormatter::Format(const TString& tablePath, const TColumnTableDescription& tableDesc, bool temporary) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

У YQL уже есть форматер, возможно ли его применить?

Copy link
Collaborator Author

@shnikd shnikd Apr 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Уже применяется)

Stream.Clear();

TStringStreamWrapper wrapper(Stream);

Ydb::Table::CreateTableRequest createRequest;
if (temporary) {
Stream << "CREATE TEMPORARY TABLE ";
} else {
Stream << "CREATE TABLE ";
}
EscapeName(tablePath);
Stream << " (\n";

const auto& schema = tableDesc.GetSchema();

std::map<ui32, const TOlapColumnDescription*> columns;
for (const auto& column : schema.GetColumns()) {
columns[column.GetId()] = &column;
}

try {
auto it = columns.cbegin();
Format(*it->second);
std::advance(it, 1);
for (; it != columns.end(); ++it) {
Stream << ",\n";
Format(*it->second);
}
} catch (const TFormatFail& ex) {
return TResult(ex.Status, ex.Error);
} catch (const yexception& e) {
return TResult(Ydb::StatusIds::UNSUPPORTED, e.what());
}
Stream << ",\n";

if (!schema.GetIndexes().empty()) {
return TResult(Ydb::StatusIds::UNSUPPORTED, "Indexes are not supported yet for column tables.");
}

bool isFamilyPrinted = false;
if (!schema.GetColumnFamilies().empty()) {
try {
isFamilyPrinted = Format(schema.GetColumnFamilies(0));
for (int i = 1; i < schema.GetColumnFamilies().size(); i++) {
if (isFamilyPrinted) {
Stream << ",\n";
}
isFamilyPrinted = Format(schema.GetColumnFamilies(i));
}
} catch (const TFormatFail& ex) {
return TResult(ex.Status, ex.Error);
} catch (const yexception& e) {
return TResult(Ydb::StatusIds::UNSUPPORTED, e.what());
}
}

Y_ENSURE(!schema.GetKeyColumnNames().empty());
if (isFamilyPrinted) {
Stream << ",\n";
}
Stream << "\tPRIMARY KEY (";
EscapeName(schema.GetKeyColumnNames(0));
for (int i = 1; i < schema.GetKeyColumnNames().size(); i++) {
Stream << ", ";
EscapeName(schema.GetKeyColumnNames(i));
}
Stream << ")\n";
Stream << ") ";

if (schema.HasOptions()) {
const auto& options = schema.GetOptions();
if (options.GetSchemeNeedActualization()) {
return TResult(Ydb::StatusIds::UNSUPPORTED, "Unsupported setting: SCHEME_NEED_ACTUALIZATION");
}
if (options.HasScanReaderPolicyName() && !options.GetScanReaderPolicyName().empty()) {
return TResult(Ydb::StatusIds::UNSUPPORTED, "Unsupported setting: SCAN_READER_POLICY_NAME");
}
if (options.HasCompactionPlannerConstructor()) {
return TResult(Ydb::StatusIds::UNSUPPORTED, "Unsupported setting: COMPACTION_PLANNER");
}
if (options.HasMetadataManagerConstructor()) {
return TResult(Ydb::StatusIds::UNSUPPORTED, "Unsupported setting: METADATA_MEMORY_MANAGER");
}
}

if (tableDesc.HasSharding()) {
Format(tableDesc.GetSharding());
}

Stream << "WITH (\n";
Stream << "\tSTORE = COLUMN";

if (tableDesc.HasColumnShardCount()) {
Stream << ",\n";
Stream << "\tAUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << tableDesc.GetColumnShardCount();
}

if (tableDesc.HasTtlSettings()) {
Format(tableDesc.GetTtlSettings());
}

Stream << "\n);";

TString statement = Stream.Str();
TString formattedStatement;
NYql::TIssues issues;
if (!NYdb::NDump::Format(statement, formattedStatement, issues)) {
return TResult(Ydb::StatusIds::INTERNAL_ERROR, issues.ToString());
}

auto result = TResult(std::move(formattedStatement));

return result;
}

void TCreateTableFormatter::Format(const TOlapColumnDescription& olapColumnDesc) {
Stream << "\t";
EscapeName(olapColumnDesc.GetName());
Stream << " " << olapColumnDesc.GetType();

if (olapColumnDesc.HasColumnFamilyName()) {
Stream << " FAMILY ";
EscapeName(olapColumnDesc.GetColumnFamilyName());
}
if (olapColumnDesc.GetNotNull()) {
Stream << " NOT NULL";
}
if (olapColumnDesc.HasDefaultValue()) {
Format(olapColumnDesc.GetDefaultValue());
}

if (olapColumnDesc.HasStorageId() && !olapColumnDesc.GetStorageId().empty()) {
ythrow TFormatFail(Ydb::StatusIds::UNSUPPORTED, "Unsupported setting: STORAGE_ID");
}

if (olapColumnDesc.HasDataAccessorConstructor()) {
ythrow TFormatFail(Ydb::StatusIds::UNSUPPORTED, "Unsupported setting: DATA_ACCESSOR_CONSTRUCTOR");
}

if (olapColumnDesc.HasDictionaryEncoding()) {
ythrow TFormatFail(Ydb::StatusIds::UNSUPPORTED, "Unsupported setting: ENCODING.DICTIONARY");
}
}

void TCreateTableFormatter::Format(const NKikimrColumnShardColumnDefaults::TColumnDefault& defaultValue) {
if (!defaultValue.HasScalar()) {
return;
}

Stream << " DEFAULT ";

TGuard<NMiniKQL::TScopedAlloc> guard(Alloc);
const auto& scalar = defaultValue.GetScalar();
if (scalar.HasBool()) {
if (scalar.GetBool() == true) {
Stream << "true";
} else {
Stream << "false";
}
} else if (scalar.HasUint8()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А точно нету готовых ToString scalar?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Мне кажется нет, во всяком случае не находил, кажется что этот протобуф только для дефолтов используется

const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Uint8, NUdf::TUnboxedValuePod(scalar.GetUint8()));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasUint16()) {
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Uint16, NUdf::TUnboxedValuePod(scalar.GetUint16()));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasUint32()) {
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Uint32, NUdf::TUnboxedValuePod(scalar.GetUint32()));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasUint64()) {
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Uint64, NUdf::TUnboxedValuePod(static_cast<ui64>(scalar.GetUint64())));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasInt8()) {
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Int8, NUdf::TUnboxedValuePod(scalar.GetInt8()));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasInt16()) {
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Int16, NUdf::TUnboxedValuePod(scalar.GetInt16()));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasInt32()) {
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Int32, NUdf::TUnboxedValuePod(scalar.GetInt32()));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasInt64()) {
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Int64, NUdf::TUnboxedValuePod(static_cast<i64>(scalar.GetInt64())));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasDouble()) {
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Double, NUdf::TUnboxedValuePod(scalar.GetDouble()));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasFloat()) {
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Float, NUdf::TUnboxedValuePod(scalar.GetFloat()));
Y_ENSURE(str.HasValue());
Stream << TString(str.AsStringRef());
} else if (scalar.HasTimestamp()) {
ui64 value = scalar.GetTimestamp().GetValue();
arrow::TimeUnit::type unit = arrow::TimeUnit::type(scalar.GetTimestamp().GetUnit());
switch (unit) {
case arrow::TimeUnit::SECOND:
value *= 1000000;
break;
case arrow::TimeUnit::MILLI:
value *= 1000;
break;
case arrow::TimeUnit::MICRO:
break;
case arrow::TimeUnit::NANO:
value /= 1000;
break;
}
Stream << "TIMESTAMP(";
const NUdf::TUnboxedValue str = NMiniKQL::ValueToString(NUdf::EDataSlot::Timestamp, NUdf::TUnboxedValuePod(value));
Y_ENSURE(str.HasValue());
EscapeString(TString(str.AsStringRef()));
Stream << ")";
} else if (scalar.HasString()) {
EscapeString(TString(scalar.GetString()));
} else {
ythrow TFormatFail(Ydb::StatusIds::UNSUPPORTED, "Unsupported type for default value");
}
}

void TCreateTableFormatter::Format(const NKikimrSchemeOp::TColumnTableSharding& sharding) {
switch (sharding.GetMethodCase()) {
case NKikimrSchemeOp::TColumnTableSharding::kHashSharding: {
const auto& hashSharding = sharding.GetHashSharding();
Y_ENSURE(!hashSharding.GetColumns().empty());
Stream << "PARTITION BY HASH(";
EscapeName(hashSharding.GetColumns(0));
for (int i = 1; i < hashSharding.GetColumns().size(); i++) {
Stream << ", ";
EscapeName(hashSharding.GetColumns(i));
}
Stream << ")\n";
break;
}
case NKikimrSchemeOp::TColumnTableSharding::kRandomSharding:
ythrow TFormatFail(Ydb::StatusIds::UNSUPPORTED, "Random sharding is not supported yet.");
Copy link
Collaborator

@dorooleg dorooleg Mar 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Боюсь что когда его добавят RandomSharding, то точно сломаю SHOW CREATE TABLE. Может тест добавить на этот случай чтобы пошли чинить его после реализации?

default:
ythrow TFormatFail(Ydb::StatusIds::INTERNAL_ERROR, "Unsupported unit");
}
}

void TCreateTableFormatter::Format(const NKikimrSchemeOp::TColumnDataLifeCycle& ttlSettings) {
if (!ttlSettings.HasEnabled()) {
return;
}

const auto& enabled = ttlSettings.GetEnabled();

if (enabled.HasExpireAfterBytes()) {
ythrow TFormatFail(Ydb::StatusIds::UNSUPPORTED, "TTL by size is not supported.");
}

Stream << ",\n";
Stream << "\tTTL =\n\t ";
bool first = true;

if (!enabled.TiersSize()) {
Y_ENSURE(enabled.HasExpireAfterSeconds());
Format(enabled.GetExpireAfterSeconds());
} else {
for (const auto& tier : enabled.GetTiers()) {
if (!first) {
Stream << ", ";
}
switch (tier.GetActionCase()) {
case NKikimrSchemeOp::TTTLSettings::TTier::ActionCase::kDelete:
Format(tier.GetApplyAfterSeconds());
break;
case NKikimrSchemeOp::TTTLSettings::TTier::ActionCase::kEvictToExternalStorage:
Format(tier.GetApplyAfterSeconds(), tier.GetEvictToExternalStorage().GetStorage());
break;
case NKikimrSchemeOp::TTTLSettings::TTier::ActionCase::ACTION_NOT_SET:
ythrow TFormatFail(Ydb::StatusIds::UNSUPPORTED, "Undefined tier action");
}
first = false;
}
}

Stream << "\n\t ON " << enabled.GetColumnName();
switch (enabled.GetColumnUnit()) {
case NKikimrSchemeOp::TTTLSettings::UNIT_AUTO:
break;
case NKikimrSchemeOp::TTTLSettings::UNIT_SECONDS:
Stream << " AS SECONDS";
break;
case NKikimrSchemeOp::TTTLSettings::UNIT_MILLISECONDS:
Stream << " AS MILLISECONDS";
break;
case NKikimrSchemeOp::TTTLSettings::UNIT_MICROSECONDS:
Stream << " AS MICROSECONDS";
break;
case NKikimrSchemeOp::TTTLSettings::UNIT_NANOSECONDS:
Stream << " AS NANOSECONDS";
break;
default:
ythrow TFormatFail(Ydb::StatusIds::INTERNAL_ERROR, "Unsupported unit");
}
}

} // NSysView
} // NKikimr
9 changes: 9 additions & 0 deletions ydb/core/sys_view/show_create/create_table_formatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include <ydb/core/protos/flat_scheme_op.pb.h>

#include <ydb/core/tx/columnshard/engines/scheme/defaults/protos/data.pb.h>

#include <ydb/public/api/protos/ydb_table.pb.h>

#include <yql/essentials/minikql/mkql_alloc.h>
Expand Down Expand Up @@ -74,6 +76,7 @@ class TCreateTableFormatter {
}

TResult Format(const TString& tablePath, const NKikimrSchemeOp::TTableDescription& tableDesc, bool temporary);
TResult Format(const TString& tablePath, const NKikimrSchemeOp::TColumnTableDescription& tableDesc, bool temporary);

private:

Expand All @@ -88,6 +91,12 @@ class TCreateTableFormatter {

void Format(ui64 expireAfterSeconds, std::optional<TString> storage = std::nullopt);

void Format(const NKikimrSchemeOp::TOlapColumnDescription& olapColumnDesc);
void Format(const NKikimrSchemeOp::TColumnTableSharding& tableSharding);
void Format(const NKikimrSchemeOp::TColumnDataLifeCycle& ttlSettings);

void Format(const NKikimrColumnShardColumnDefaults::TColumnDefault& defaultValue);

void Format(const Ydb::TypedValue& value, bool isPartition = false);
void FormatValue(NYdb::TValueParser& parser, bool isPartition = false, TString del = "");
void FormatPrimitive(NYdb::TValueParser& parser);
Expand Down
Loading
Loading