Skip to content

speed up logging exp #9087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion ydb/core/formats/arrow/serializer/abstract.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@ NKikimr::TConclusionStatus TSerializerContainer::DeserializeFromRequest(NYql::TF
return TBase::GetObjectPtr()->DeserializeFromRequest(features);
}

std::shared_ptr<NKikimr::NArrow::NSerialization::ISerializer> TSerializerContainer::GetDefaultSerializer() {
std::shared_ptr<ISerializer> TSerializerContainer::GetDefaultSerializer() {
return std::make_shared<TNativeSerializer>();
}
std::shared_ptr<ISerializer> TSerializerContainer::GetFastestSerializer() {
return std::make_shared<TNativeSerializer>(arrow::Compression::UNCOMPRESSED);
}


}
1 change: 1 addition & 0 deletions ydb/core/formats/arrow/serializer/abstract.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ class TSerializerContainer: public NBackgroundTasks::TInterfaceProtoContainer<IS
using TBase::DeserializeFromProto;

static std::shared_ptr<ISerializer> GetDefaultSerializer();
static std::shared_ptr<ISerializer> GetFastestSerializer();

TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TCompressionOptions& proto);

Expand Down
13 changes: 8 additions & 5 deletions ydb/core/formats/arrow/size_calcer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,12 +242,15 @@ ui64 GetArrayDataSize(const std::shared_ptr<arrow::Array>& column) {
}

NKikimr::NArrow::TSerializedBatch TSerializedBatch::Build(std::shared_ptr<arrow::RecordBatch> batch, const TBatchSplitttingContext& context) {
std::optional<TString> specialKeys;
std::optional<TString> specialKeysPayload;
std::optional<TString> specialKeysFull;
if (context.GetFieldsForSpecialKeys().size()) {
specialKeys = TFirstLastSpecialKeys(batch, context.GetFieldsForSpecialKeys()).SerializeToString();
TFirstLastSpecialKeys specialKeys(batch, context.GetFieldsForSpecialKeys());
specialKeysPayload = specialKeys.SerializePayloadToString();
specialKeysFull = specialKeys.SerializeFullToString();
}
return TSerializedBatch(NArrow::SerializeSchema(*batch->schema()), NArrow::SerializeBatchNoCompression(batch), batch->num_rows(),
NArrow::GetBatchDataSize(batch), specialKeys);
return TSerializedBatch(NArrow::SerializeBatchNoCompression(batch), batch->num_rows(),
NArrow::GetBatchDataSize(batch), specialKeysPayload, specialKeysFull);
}

TConclusionStatus TSerializedBatch::BuildWithLimit(std::shared_ptr<arrow::RecordBatch> batch, const TBatchSplitttingContext& context, std::optional<TSerializedBatch>& sbL, std::optional<TSerializedBatch>& sbR) {
Expand Down Expand Up @@ -291,7 +294,7 @@ TConclusion<std::vector<TSerializedBatch>> TSerializedBatch::BuildWithLimit(std:
}

TString TSerializedBatch::DebugString() const {
return TStringBuilder() << "(data_size=" << Data.size() << ";schema_data_size=" << SchemaData.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)";
return TStringBuilder() << "(data_size=" << Data.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)";
}

}
30 changes: 18 additions & 12 deletions ydb/core/formats/arrow/size_calcer.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,29 @@ class TBatchSplitttingContext {

class TSerializedBatch {
private:
YDB_READONLY_DEF(TString, SchemaData);
YDB_READONLY_DEF(TString, Data);
YDB_READONLY(ui32, RowsCount, 0);
YDB_READONLY(ui32, RawBytes, 0);
std::optional<TString> SpecialKeys;
std::optional<TString> SpecialKeysFull;
std::optional<TString> SpecialKeysPayload;

public:
size_t GetSize() const {
return Data.size();
}

const TString& GetSpecialKeysSafe() const {
AFL_VERIFY(SpecialKeys);
return *SpecialKeys;
const TString& GetSpecialKeysPayloadSafe() const {
AFL_VERIFY(SpecialKeysPayload);
return *SpecialKeysPayload;
}

const TString& GetSpecialKeysFullSafe() const {
AFL_VERIFY(SpecialKeysFull);
return *SpecialKeysFull;
}

bool HasSpecialKeys() const {
return !!SpecialKeys;
return !!SpecialKeysFull;
}

TString DebugString() const;
Expand All @@ -95,14 +101,14 @@ class TSerializedBatch {
static TConclusionStatus BuildWithLimit(std::shared_ptr<arrow::RecordBatch> batch, const TBatchSplitttingContext& context, std::optional<TSerializedBatch>& sbL, std::optional<TSerializedBatch>& sbR);
static TSerializedBatch Build(std::shared_ptr<arrow::RecordBatch> batch, const TBatchSplitttingContext& context);

TSerializedBatch(TString&& schemaData, TString&& data, const ui32 rowsCount, const ui32 rawBytes, const std::optional<TString>& specialKeys)
: SchemaData(schemaData)
, Data(data)
TSerializedBatch(TString&& data, const ui32 rowsCount, const ui32 rawBytes,
const std::optional<TString>& specialKeysPayload, const std::optional<TString>& specialKeysFull)
: Data(data)
, RowsCount(rowsCount)
, RawBytes(rawBytes)
, SpecialKeys(specialKeys)
{

, SpecialKeysFull(specialKeysFull)
, SpecialKeysPayload(specialKeysPayload) {
AFL_VERIFY(!!SpecialKeysPayload == !!SpecialKeysFull);
}
};

Expand Down
24 changes: 14 additions & 10 deletions ydb/core/formats/arrow/special_keys.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ NKikimr::NArrow::TReplaceKey TSpecialKeys::GetKeyByIndex(const ui32 position, co
}
}

TString TSpecialKeys::SerializeToString() const {
return NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()->SerializeFull(Data);
TString TSpecialKeys::SerializePayloadToString() const {
return NArrow::NSerialization::TSerializerContainer::GetFastestSerializer()->SerializePayload(Data);
}

TString TSpecialKeys::SerializeToStringDataOnlyNoCompression() const {
return NArrow::SerializeBatchNoCompression(Data);
TString TSpecialKeys::SerializeFullToString() const {
return NArrow::NSerialization::TSerializerContainer::GetFastestSerializer()->SerializeFull(Data);
}

ui64 TSpecialKeys::GetMemoryBytes() const {
Expand All @@ -50,13 +50,17 @@ TFirstLastSpecialKeys::TFirstLastSpecialKeys(const std::shared_ptr<arrow::Record
if (columnNames.size()) {
keyBatch = NArrow::TColumnOperator().VerifyIfAbsent().Extract(batch, columnNames);
}
std::vector<ui64> indexes = {0};
if (batch->num_rows() > 1) {
indexes.emplace_back(batch->num_rows() - 1);
}
if (keyBatch->num_rows() <= 2) {
Data = keyBatch;
} else {
std::vector<ui64> indexes = { 0 };
if (batch->num_rows() > 1) {
indexes.emplace_back(batch->num_rows() - 1);
}

Data = NArrow::CopyRecords(keyBatch, indexes);
Y_ABORT_UNLESS(Data->num_rows() == 1 || Data->num_rows() == 2);
Data = NArrow::CopyRecords(keyBatch, indexes);
Y_ABORT_UNLESS(Data->num_rows() == 1 || Data->num_rows() == 2);
}
}

TMinMaxSpecialKeys::TMinMaxSpecialKeys(std::shared_ptr<arrow::RecordBatch> batch, const std::shared_ptr<arrow::Schema>& schema) {
Expand Down
5 changes: 2 additions & 3 deletions ydb/core/formats/arrow/special_keys.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ class TSpecialKeys {
public:
ui64 GetMemoryBytes() const;

TString SerializeToStringDataOnlyNoCompression() const;

TSpecialKeys(const TString& data, const std::shared_ptr<arrow::Schema>& schema) {
Data = NArrow::DeserializeBatch(data, schema);
Y_ABORT_UNLESS(Data);
Expand All @@ -34,7 +32,8 @@ class TSpecialKeys {
Y_ABORT_UNLESS(DeserializeFromString(data));
}

TString SerializeToString() const;
TString SerializePayloadToString() const;
TString SerializeFullToString() const;
ui64 GetMemorySize() const;
};

Expand Down
1 change: 1 addition & 0 deletions ydb/core/protos/tx_columnshard.proto
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ message TLogicalMetadata {
optional string SpecialKeysRawData = 6;
optional TEvWrite.EModificationType ModificationType = 7;
optional NKikimrArrowSchema.TSchemaSubset SchemaSubset = 8;
optional string SpecialKeysPayloadData = 9;
}

message TEvWriteResult {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSeriali
meta.SetNumRows(batch->GetRowsCount());
meta.SetRawBytes(batch->GetRawBytes());
meta.SetDirtyWriteTimeSeconds(batch.GetStartInstant().Seconds());
meta.SetSpecialKeysRawData(batch->GetSpecialKeysSafe());
meta.SetSpecialKeysRawData(batch->GetSpecialKeysFullSafe());
meta.SetSpecialKeysPayloadData(batch->GetSpecialKeysPayloadSafe());

const auto& blobRange = batch.GetRange();
Y_ABORT_UNLESS(blobRange.GetBlobId().IsValid());
Expand Down
4 changes: 3 additions & 1 deletion ydb/core/tx/columnshard/columnshard__progress_tx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class TColumnShard::TTxProgressTx: public TTransactionBase<TColumnShard> {
std::optional<NOlap::TSnapshot> LastCompletedTx;
std::optional<TTxController::TPlanQueueItem> PlannedQueueItem;
std::optional<TMonotonic> StartExecution;
const TMonotonic ConstructionInstant = TMonotonic::Now();

public:
TTxProgressTx(TColumnShard* self)
Expand Down Expand Up @@ -95,7 +96,8 @@ class TColumnShard::TTxProgressTx: public TTransactionBase<TColumnShard> {
Self->LastCompletedTx = std::max(*LastCompletedTx, Self->LastCompletedTx);
}
if (StartExecution) {
Self->GetProgressTxController().GetCounters().OnTxProgressDuration(TxOperator->GetOpType(), TMonotonic::Now() - *StartExecution);
Self->GetProgressTxController().GetCounters().OnTxExecuteDuration(TxOperator->GetOpType(), TMonotonic::Now() - *StartExecution);
Self->GetProgressTxController().GetCounters().OnTxLiveDuration(TxOperator->GetOpType(), TMonotonic::Now() - ConstructionInstant);
}
Self->SetupIndexation();
}
Expand Down
14 changes: 10 additions & 4 deletions ydb/core/tx/columnshard/counters/tx_progress.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class TTxProgressCounters: public TCommonCountersOwner {
NMonitoring::TDynamicCounters::TCounterPtr FinishProposeOnComplete;
NMonitoring::TDynamicCounters::TCounterPtr FinishPlannedTx;
NMonitoring::TDynamicCounters::TCounterPtr AbortTx;
NMonitoring::THistogramPtr HistogramTxProgressDuration;
NMonitoring::THistogramPtr HistogramTxExecuteDuration;
NMonitoring::THistogramPtr HistogramTxLiveDuration;
NMonitoring::THistogramPtr HistogramTxProgressLag;

TProgressCounters(const TCommonCountersOwner& owner)
Expand All @@ -37,16 +38,21 @@ class TTxProgressCounters: public TCommonCountersOwner {
, FinishProposeOnComplete(TBase::GetDeriviative("FinishProposeOnComplete"))
, FinishPlannedTx(TBase::GetDeriviative("FinishPlannedTx"))
, AbortTx(TBase::GetDeriviative("AbortTx"))
, HistogramTxProgressDuration(TBase::GetHistogram("TxProgress/Execution/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)))
, HistogramTxExecuteDuration(TBase::GetHistogram("TxProgress/Execution/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)))
, HistogramTxLiveDuration(TBase::GetHistogram("TxProgress/Live/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)))
, HistogramTxProgressLag(TBase::GetHistogram("TxProgress/LagOnComplete/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) {
}
};

THashMap<TOpType, TProgressCounters> CountersByOpType;

public:
void OnTxProgressDuration(const TString& opType, const TDuration d) {
GetSubGroup(opType).HistogramTxProgressDuration->Collect(d.MilliSeconds());
void OnTxExecuteDuration(const TString& opType, const TDuration d) {
GetSubGroup(opType).HistogramTxExecuteDuration->Collect(d.MilliSeconds());
}

void OnTxLiveDuration(const TString& opType, const TDuration d) {
GetSubGroup(opType).HistogramTxLiveDuration->Collect(d.MilliSeconds());
}

void OnTxProgressLag(const TString& opType, const TDuration d) {
Expand Down
23 changes: 16 additions & 7 deletions ydb/core/tx/columnshard/engines/insert_table/meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,23 @@ NKikimrTxColumnShard::TLogicalMetadata TInsertedDataMeta::SerializeToProto() con
return OriginalProto;
}

const std::optional<NKikimr::NArrow::TFirstLastSpecialKeys>& TInsertedDataMeta::GetSpecialKeys() const {
if (!KeysParsed) {
if (OriginalProto.HasSpecialKeysRawData()) {
SpecialKeysParsed = NArrow::TFirstLastSpecialKeys(OriginalProto.GetSpecialKeysRawData());
}
KeysParsed = true;
std::shared_ptr<NArrow::TFirstLastSpecialKeys> TInsertedDataMeta::GetSpecialKeys(const std::shared_ptr<arrow::Schema>& schema) const {
if (KeyInitialized.Val()) {
return SpecialKeysParsed;
}
return SpecialKeysParsed;
std::shared_ptr<NArrow::TFirstLastSpecialKeys> result;
if (OriginalProto.HasSpecialKeysPayloadData()) {
result = std::make_shared<NArrow::TFirstLastSpecialKeys>(OriginalProto.GetSpecialKeysPayloadData(), schema);
} else if (OriginalProto.HasSpecialKeysRawData()) {
result = std::make_shared<NArrow::TFirstLastSpecialKeys>(OriginalProto.GetSpecialKeysRawData());
} else {
AFL_VERIFY(false);
}
if (AtomicCas(&KeyInitialization, 1, 0)) {
SpecialKeysParsed = result;
KeyInitialized = 1;
}
return result;
}

}
26 changes: 10 additions & 16 deletions ydb/core/tx/columnshard/engines/insert_table/meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ class TInsertedDataMeta {
YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert);
YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset);

mutable bool KeysParsed = false;
mutable std::optional<NArrow::TFirstLastSpecialKeys> SpecialKeysParsed;

mutable TAtomicCounter KeyInitialized = 0;
mutable TAtomic KeyInitialization = 0;
mutable std::shared_ptr<NArrow::TFirstLastSpecialKeys> SpecialKeysParsed;
NKikimrTxColumnShard::TLogicalMetadata OriginalProto;
std::shared_ptr<NArrow::TFirstLastSpecialKeys> GetSpecialKeys(const std::shared_ptr<arrow::Schema>& schema) const;

const std::optional<NArrow::TFirstLastSpecialKeys>& GetSpecialKeys() const;
public:
ui64 GetTxVolume() const {
return 2 * sizeof(ui64) + sizeof(ui32) + sizeof(OriginalProto) + (SpecialKeysParsed ? SpecialKeysParsed->GetMemoryBytes() : 0);
Expand All @@ -43,19 +43,13 @@ class TInsertedDataMeta {
}
}

std::optional<NArrow::TReplaceKey> GetFirstPK(const std::shared_ptr<arrow::Schema>& schema) const {
if (GetSpecialKeys()) {
return GetSpecialKeys()->GetFirst(schema);
} else {
return {};
}
NArrow::TReplaceKey GetFirstPK(const std::shared_ptr<arrow::Schema>& schema) const {
AFL_VERIFY(schema);
return GetSpecialKeys(schema)->GetFirst();
}
std::optional<NArrow::TReplaceKey> GetLastPK(const std::shared_ptr<arrow::Schema>& schema) const {
if (GetSpecialKeys()) {
return GetSpecialKeys()->GetLast(schema);
} else {
return {};
}
NArrow::TReplaceKey GetLastPK(const std::shared_ptr<arrow::Schema>& schema) const {
AFL_VERIFY(schema);
return GetSpecialKeys(schema)->GetLast();
}

NKikimrTxColumnShard::TLogicalMetadata SerializeToProto() const;
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/tx/columnshard/engines/portions/meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ NKikimrTxColumnShard::TIndexPortionMeta TPortionMeta::SerializeToProto() const {
break;
}

portionMeta.SetPrimaryKeyBorders(ReplaceKeyEdges.SerializeToStringDataOnlyNoCompression());
portionMeta.SetPrimaryKeyBorders(ReplaceKeyEdges.SerializePayloadToString());

RecordSnapshotMin.SerializeToProto(*portionMeta.MutableRecordSnapshotMin());
RecordSnapshotMax.SerializeToProto(*portionMeta.MutableRecordSnapshotMax());
Expand Down
15 changes: 15 additions & 0 deletions ydb/core/tx/columnshard/engines/ut/helper.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
#include "helper.h"
#include <ydb/core/formats/arrow/simple_arrays_cache.h>

namespace NKikimr::NOlap::NEngines::NTest {

std::shared_ptr<arrow::Schema> TLocalHelper::GetMetaSchema() {
return std::make_shared<arrow::Schema>(arrow::FieldVector({ std::make_shared<arrow::Field>("1", arrow::uint64()) }));
}

NKikimrTxColumnShard::TLogicalMetadata TLocalHelper::GetMetaProto() {
NKikimrTxColumnShard::TLogicalMetadata result;
result.SetDirtyWriteTimeSeconds(TInstant::Now().Seconds());

std::vector<std::shared_ptr<arrow::Array>> columns;
auto schema = GetMetaSchema();
for (auto&& i : schema->fields()) {
columns.emplace_back(NArrow::TThreadSimpleArraysCache::Get(i->type(), NArrow::DefaultScalar(i->type()), 1));
}
auto batch = arrow::RecordBatch::Make(schema, 1, columns);

NArrow::TFirstLastSpecialKeys flKeys = NArrow::TFirstLastSpecialKeys(batch);
result.SetSpecialKeysPayloadData(flKeys.SerializePayloadToString());
return result;
}

Expand Down
1 change: 1 addition & 0 deletions ydb/core/tx/columnshard/engines/ut/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace NKikimr::NOlap::NEngines::NTest {
class TLocalHelper {
public:
static NKikimrTxColumnShard::TLogicalMetadata GetMetaProto();
static std::shared_ptr<arrow::Schema> GetMetaSchema();
};

};
12 changes: 6 additions & 6 deletions ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) {
UNIT_ASSERT(!ok);

// read nothing
auto blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), nullptr);
auto blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema());
UNIT_ASSERT_EQUAL(blobs.size(), 0);
blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), nullptr);
blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema());
UNIT_ASSERT_EQUAL(blobs.size(), 0);

// commit
Expand All @@ -115,15 +115,15 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) {
// UNIT_ASSERT_EQUAL((*insertTable.GetPathPriorities().begin()->second.begin())->GetCommitted().size(), 1);

// read old snapshot
blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), nullptr);
blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema());
UNIT_ASSERT_EQUAL(blobs.size(), 0);
blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), nullptr);
blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema());
UNIT_ASSERT_EQUAL(blobs.size(), 0);

// read new snapshot
blobs = insertTable.Read(tableId, {}, TSnapshot(planStep, txId), nullptr);
blobs = insertTable.Read(tableId, {}, TSnapshot(planStep, txId), TLocalHelper::GetMetaSchema());
UNIT_ASSERT_EQUAL(blobs.size(), 1);
blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), nullptr);
blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema());
UNIT_ASSERT_EQUAL(blobs.size(), 0);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "broken_insertion_dedup.h"
#include "broken_dedup.h"

#include <ydb/core/tx/columnshard/columnshard_private_events.h>
#include <ydb/core/tx/columnshard/columnshard_schema.h>
Expand Down
Loading
Loading