Skip to content

Commit 90c247b

Browse files
default value for columns (#6101)
1 parent dad8344 commit 90c247b

File tree

35 files changed

+395
-100
lines changed

35 files changed

+395
-100
lines changed

ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ TConclusionStatus TAlterColumnOperation::DoDeserialize(NYql::TObjectSettingsImpl
1010
}
1111
ColumnName = *fValue;
1212
}
13+
DefaultValue = features.Extract("DEFAULT_VALUE");
14+
1315
StorageId = features.Extract("STORAGE_ID");
1416
if (StorageId && !*StorageId) {
1517
return TConclusionStatus::Fail("STORAGE_ID cannot be empty string");
@@ -39,6 +41,9 @@ void TAlterColumnOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTable
3941
Serializer.SerializeToProto(*column->MutableSerializer());
4042
}
4143
*column->MutableDictionaryEncoding() = DictionaryEncodingDiff.SerializeToProto();
44+
if (DefaultValue) {
45+
column->SetDefaultValue(*DefaultValue);
46+
}
4247
}
4348

4449
}

ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class TAlterColumnOperation : public ITableStoreOperation {
1717

1818
NArrow::NSerialization::TSerializerContainer Serializer;
1919
NArrow::NDictionary::TEncodingDiff DictionaryEncodingDiff;
20+
std::optional<TString> DefaultValue;
2021
public:
2122
TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override;
2223

ydb/core/kqp/ut/olap/write_ut.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "helpers/local.h"
22
#include "helpers/writer.h"
3+
#include "helpers/typed_local.h"
4+
#include "helpers/query_executor.h"
5+
#include "helpers/get_value.h"
36

47
#include <library/cpp/testing/unittest/registar.h>
58
#include <ydb/core/tx/columnshard/hooks/testing/controller.h>
@@ -105,7 +108,27 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) {
105108
("writes", writesCountStart)("count", Singleton<NKikimr::NWrappers::NExternalStorage::TFakeExternalStorage>()->GetWritesCount());
106109
AFL_VERIFY(deletesCountStart == Singleton<NKikimr::NWrappers::NExternalStorage::TFakeExternalStorage>()->GetDeletesCount())
107110
("deletes", deletesCountStart)("count", Singleton<NKikimr::NWrappers::NExternalStorage::TFakeExternalStorage>()->GetDeletesCount());
111+
}
112+
113+
Y_UNIT_TEST(DefaultValues) {
114+
auto settings = TKikimrSettings().SetWithSampleTables(false);
115+
TKikimrRunner kikimr(settings);
116+
Tests::NCommon::TLoggerInit(kikimr).Initialize();
117+
TTypedLocalHelper helper("Utf8", kikimr);
118+
helper.CreateTestOlapTable();
119+
helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`, `DEFAULT_VALUE`=`abcde`);");
120+
helper.FillPKOnly(0, 800000);
108121

122+
auto selectQuery = TString(R"(
123+
SELECT
124+
count(*) as count,
125+
FROM `/Root/olapStore/olapTable`
126+
WHERE field = 'abcde'
127+
)");
128+
129+
auto tableClient = kikimr.GetTableClient();
130+
auto rows = ExecuteScanQuery(tableClient, selectQuery);
131+
UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("count")), 800000);
109132
}
110133

111134
}

ydb/core/protos/flat_scheme_op.proto

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import "ydb/library/actors/protos/actors.proto";
1818
import "ydb/library/mkql_proto/protos/minikql.proto";
1919
import "ydb/core/protos/index_builder.proto";
2020
import "ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto";
21+
import "ydb/core/tx/columnshard/engines/scheme/defaults/protos/data.proto";
2122
import "ydb/core/tx/columnshard/common/protos/snapshot.proto";
2223

2324
import "google/protobuf/empty.proto";
@@ -421,6 +422,7 @@ message TOlapColumnDiff {
421422
optional TDictionaryEncodingSettings DictionaryEncoding = 4;
422423
optional TOlapColumn.TSerializer Serializer = 5;
423424
optional string StorageId = 6;
425+
optional string DefaultValue = 7;
424426
}
425427

426428
message TOlapColumnDescription {
@@ -439,6 +441,7 @@ message TOlapColumnDescription {
439441
optional TDictionaryEncodingSettings DictionaryEncoding = 9;
440442
optional TOlapColumn.TSerializer Serializer = 10;
441443
optional string StorageId = 11;
444+
optional NKikimrColumnShardColumnDefaults.TColumnDefault DefaultValue = 12;
442445
}
443446

444447
message TRequestedBloomFilter {

ydb/core/protos/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ PEERDIR(
166166
ydb/library/services
167167
ydb/library/ydb_issue/proto
168168
ydb/core/tx/columnshard/engines/scheme/statistics/protos
169+
ydb/core/tx/columnshard/engines/scheme/defaults/protos
169170
ydb/core/tx/columnshard/engines/protos
170171
ydb/core/formats/arrow/protos
171172
ydb/core/tx/columnshard/common/protos

ydb/core/tx/columnshard/blobs_reader/actor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class TActor: public TActorBootstrapped<TActor> {
2222
void Bootstrap();
2323

2424
STFUNC(StateWait) {
25-
TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD);
25+
TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("event_type", ev->GetTypeName());
2626
switch (ev->GetTypeRewrite()) {
2727
hFunc(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult, Handle);
2828
default:

ydb/core/tx/columnshard/engines/changes/general_compaction.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc
183183
} else {
184184
AFL_VERIFY(dataSchema->IsSpecialColumnId(columnId));
185185
}
186-
chunks.emplace_back(std::make_shared<NChunks::TDefaultChunkPreparation>(columnId, p.GetPortionInfo().GetRecordsCount(), resultField, resultSchema->GetDefaultReadValueVerified(columnId), resultSchema->GetColumnSaver(columnId)));
186+
chunks.emplace_back(std::make_shared<NChunks::TDefaultChunkPreparation>(columnId, p.GetPortionInfo().GetRecordsCount(), resultField, resultSchema->GetDefaultValueVerified(columnId), resultSchema->GetColumnSaver(columnId)));
187187
records = { nullptr };
188188
}
189189
AFL_VERIFY(!!loader);

ydb/core/tx/columnshard/engines/portions/portion_info.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -713,21 +713,23 @@ std::shared_ptr<arrow::ChunkedArray> TPortionInfo::TPreparedColumn::Assemble() c
713713
}
714714

715715
TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobInfo::BuildDeserializeChunk(const std::shared_ptr<TColumnLoader>& loader) const {
716-
if (NullRowsCount) {
716+
if (DefaultRowsCount) {
717717
Y_ABORT_UNLESS(!Data);
718-
auto emptyBatch = NArrow::MakeEmptyBatch(loader->GetExpectedSchema(), NullRowsCount);
719-
AFL_VERIFY(emptyBatch->num_columns() == 1);
720-
return TDeserializeChunkedArray::TChunk(emptyBatch->column(0));
718+
AFL_VERIFY(loader->GetExpectedSchema()->num_fields() == 1);
719+
auto col = NArrow::TThreadSimpleArraysCache::Get(loader->GetExpectedSchema()->field(0)->type(), DefaultValue, DefaultRowsCount);
720+
return TDeserializeChunkedArray::TChunk(col);
721721
} else {
722722
AFL_VERIFY(ExpectedRowsCount);
723723
return TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data);
724724
}
725725
}
726726

727727
std::shared_ptr<arrow::RecordBatch> TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const {
728-
if (NullRowsCount) {
728+
if (DefaultRowsCount) {
729729
Y_ABORT_UNLESS(!Data);
730-
return NArrow::MakeEmptyBatch(loader.GetExpectedSchema(), NullRowsCount);
730+
AFL_VERIFY(loader.GetExpectedSchema()->num_fields() == 1);
731+
return arrow::RecordBatch::Make(loader.GetExpectedSchema(), DefaultRowsCount,
732+
{ NArrow::TThreadSimpleArraysCache::Get(loader.GetExpectedSchema()->field(0)->type(), DefaultValue, DefaultRowsCount) });
731733
} else {
732734
auto result = loader.Apply(Data);
733735
if (!result.ok()) {

ydb/core/tx/columnshard/engines/portions/portion_info.h

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,8 @@ class TPortionInfo {
613613
class TAssembleBlobInfo {
614614
private:
615615
YDB_READONLY_DEF(std::optional<ui32>, ExpectedRowsCount);
616-
ui32 NullRowsCount = 0;
616+
ui32 DefaultRowsCount = 0;
617+
std::shared_ptr<arrow::Scalar> DefaultValue;
617618
TString Data;
618619
public:
619620
ui32 GetExpectedRowsCountVerified() const {
@@ -625,34 +626,36 @@ class TPortionInfo {
625626
AFL_VERIFY(!ExpectedRowsCount);
626627
ExpectedRowsCount = expectedRowsCount;
627628
if (!Data) {
628-
AFL_VERIFY(*ExpectedRowsCount == NullRowsCount);
629+
AFL_VERIFY(*ExpectedRowsCount == DefaultRowsCount);
629630
}
630631
}
631632

632-
TAssembleBlobInfo(const ui32 rowsCount)
633-
: NullRowsCount(rowsCount) {
634-
AFL_VERIFY(NullRowsCount);
633+
TAssembleBlobInfo(const ui32 rowsCount, const std::shared_ptr<arrow::Scalar>& defValue)
634+
: DefaultRowsCount(rowsCount)
635+
, DefaultValue(defValue)
636+
{
637+
AFL_VERIFY(DefaultRowsCount);
635638
}
636639

637640
TAssembleBlobInfo(const TString& data)
638641
: Data(data) {
639642
AFL_VERIFY(!!Data);
640643
}
641644

642-
ui32 GetNullRowsCount() const noexcept {
643-
return NullRowsCount;
645+
ui32 GetDefaultRowsCount() const noexcept {
646+
return DefaultRowsCount;
644647
}
645648

646649
const TString& GetData() const noexcept {
647650
return Data;
648651
}
649652

650653
bool IsBlob() const {
651-
return !NullRowsCount && !!Data;
654+
return !DefaultRowsCount && !!Data;
652655
}
653656

654-
bool IsNull() const {
655-
return NullRowsCount && !Data;
657+
bool IsDefault() const {
658+
return DefaultRowsCount && !Data;
656659
}
657660

658661
std::shared_ptr<arrow::RecordBatch> BuildRecordBatch(const TColumnLoader& loader) const;
@@ -788,7 +791,7 @@ class TPortionInfo {
788791

789792
TPreparedColumn Compile() {
790793
if (BlobsInfo.empty()) {
791-
BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows));
794+
BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows, DataLoader->GetDefaultValue()));
792795
return TPreparedColumn(std::move(BlobsInfo), ResultLoader);
793796
} else {
794797
AFL_VERIFY(NumRowsByChunks == NumRows)("by_chunks", NumRowsByChunks)("expected", NumRows);

ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ void TFetchedData::SyncTableColumns(const std::vector<std::shared_ptr<arrow::Fie
1010
if (Table->GetSchema()->GetFieldByName(i->name())) {
1111
continue;
1212
}
13-
Table->AddField(i, std::make_shared<NArrow::NAccessor::TTrivialArray>(NArrow::TThreadSimpleArraysCache::GetNull(i->type(), Table->num_rows())))
14-
.Validate();
13+
Table->AddField(i, std::make_shared<NArrow::NAccessor::TTrivialArray>(
14+
NArrow::TThreadSimpleArraysCache::GetNull(i->type(), Table->num_rows()))).Validate();
1515
}
1616
}
1717

0 commit comments

Comments
 (0)