Skip to content

add missing monitoring metrics for column shards #7314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Aug 7, 2024
6 changes: 4 additions & 2 deletions ydb/core/protos/counters_columnshard.proto
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ enum ECumulativeCounters {
COUNTER_PLAN_STEP_ACCEPTED = 9 [(CounterOpts) = {Name: "PlanStepAccepted"}];
COUNTER_SCANNED_ROWS = 10 [(CounterOpts) = {Name: "ScannedRows"}];
COUNTER_SCANNED_BYTES = 11 [(CounterOpts) = {Name: "ScannedBytes"}];
COUNTER_UPSERT_BLOBS_WRITTEN = 12 [(CounterOpts) = {Name: "UpsertBlobsWritten"}];
COUNTER_UPSERT_BYTES_WRITTEN = 13 [(CounterOpts) = {Name: "UpsertBytesWritten"}];
COUNTER_OPERATIONS_BLOBS_WRITTEN = 12 [(CounterOpts) = {Name: "OperationsBlobsWritten"}];
COUNTER_OPERATIONS_BYTES_WRITTEN = 13 [(CounterOpts) = {Name: "OperationsBytesWritten"}];
COUNTER_INDEXING_BLOBS_WRITTEN = 14 [(CounterOpts) = {Name: "IndexingBlobsWritten"}];
COUNTER_INDEXING_BYTES_WRITTEN = 15 [(CounterOpts) = {Name: "IndexingBytesWritten"}];
COUNTER_COMPACTION_BLOBS_WRITTEN = 16 [(CounterOpts) = {Name: "CompactionBlobsWritten"}];
Expand Down Expand Up @@ -137,6 +137,8 @@ enum ECumulativeCounters {
COUNTER_READING_EXPORTED_RANGES = 81 [(CounterOpts) = {Name: "ReadingExportedRanges"}];
COUNTER_PLANNED_TX_COMPLETED = 82 [(CounterOpts) = {Name: "PlannedTxCompleted"}];
COUNTER_IMMEDIATE_TX_COMPLETED = 83 [(CounterOpts) = {Name: "ImmediateTxCompleted"}];
COUNTER_ROWS_ERASED = 84 [(CounterOpts) = {Name: "RowsErased"}];
COUNTER_OPERATIONS_ROWS_WRITTEN = 85 [(CounterOpts) = {Name: "OperationsRowsWritten"}];
}

enum EPercentileCounters {
Expand Down
49 changes: 43 additions & 6 deletions ydb/core/tablet/tablet_counters_aggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,10 +765,16 @@ class TTabletMon {
TCounterPtr DatashardSizeBytes;
TCounterPtr DatashardCacheHitBytes;
TCounterPtr DatashardCacheMissBytes;
TCounterPtr ColumnShardReadRows_;
TCounterPtr ColumnShardReadBytes_;
TCounterPtr ColumnShardScanRows_;
TCounterPtr ColumnShardScanBytes_;
TCounterPtr ColumnShardWriteRows_;
TCounterPtr ColumnShardWriteBytes_;
TCounterPtr ColumnShardBulkUpsertRows_;
TCounterPtr ColumnShardBulkUpsertBytes_;
TCounterPtr ColumnShardEraseRows_;
TCounterPtr ColumnShardEraseBytes_;
TCounterPtr ResourcesStorageUsedBytes;
TCounterPtr ResourcesStorageUsedBytesOnSsd;
TCounterPtr ResourcesStorageUsedBytesOnHdd;
Expand All @@ -787,6 +793,7 @@ class TTabletMon {
TCounterPtr ResourcesStreamReservedStorageLimit;

THistogramPtr ShardCpuUtilization;
THistogramPtr ColumnShardCpuUtilization;

TCounterPtr RowUpdates;
TCounterPtr RowUpdateBytes;
Expand All @@ -808,8 +815,11 @@ class TTabletMon {

TCounterPtr ColumnShardScannedBytes_;
TCounterPtr ColumnShardScannedRows_;
TCounterPtr ColumnShardUpsertBlobsWritten_;
TCounterPtr ColumnShardUpsertBytesWritten_;
TCounterPtr ColumnShardOperationsRowsWritten_;
TCounterPtr ColumnShardOperationsBytesWritten_;
TCounterPtr ColumnShardErasedBytes_;
TCounterPtr ColumnShardErasedRows_;
THistogramPtr ColumnShardConsumedCpuHistogram;

TCounterPtr DiskSpaceTablesTotalBytes;
TCounterPtr DiskSpaceTablesTotalBytesOnSsd;
Expand Down Expand Up @@ -859,14 +869,26 @@ class TTabletMon {
DatashardCacheMissBytes = ydbGroup->GetNamedCounter("name",
"table.datashard.cache_miss.bytes", true);

ColumnShardReadRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.read.rows", true);
ColumnShardReadBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.read.bytes", true);
ColumnShardScanRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.scan.rows", true);
ColumnShardScanBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.scan.bytes", true);
ColumnShardWriteRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.write.rows", true);
ColumnShardWriteBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.write.bytes", true);
ColumnShardBulkUpsertRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.bulk_upsert.rows", true);
ColumnShardBulkUpsertBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.bulk_upsert.bytes", true);
ColumnShardEraseRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.erase.rows", true);
ColumnShardEraseBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.erase.bytes", true);

ResourcesStorageUsedBytes = ydbGroup->GetNamedCounter("name",
"resources.storage.used_bytes", false);
Expand Down Expand Up @@ -908,6 +930,8 @@ class TTabletMon {

ShardCpuUtilization = ydbGroup->GetNamedHistogram("name",
"table.datashard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false);
ColumnShardCpuUtilization = ydbGroup->GetNamedHistogram("name",
"table.columnshard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false);
};

void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard, bool hasColumnShard) {
Expand Down Expand Up @@ -943,8 +967,11 @@ class TTabletMon {

ColumnShardScannedBytes_ = appGroup->GetCounter("ColumnShard/ScannedBytes");
ColumnShardScannedRows_ = appGroup->GetCounter("ColumnShard/ScannedRows");
ColumnShardUpsertBlobsWritten_ = appGroup->GetCounter("ColumnShard/UpsertBlobsWritten");
ColumnShardUpsertBytesWritten_ = appGroup->GetCounter("ColumnShard/UpsertBytesWritten");
ColumnShardOperationsRowsWritten_ = appGroup->GetCounter("ColumnShard/OperationsRowsWritten");
ColumnShardOperationsBytesWritten_ = appGroup->GetCounter("ColumnShard/OperationsBytesWritten");
ColumnShardErasedBytes_ = appGroup->GetCounter("ColumnShard/BytesErased");
ColumnShardErasedRows_ = appGroup->GetCounter("ColumnShard/RowsErased");
ColumnShardConsumedCpuHistogram = appGroup->FindHistogram("HIST(ConsumedCPU)");
}

if (hasSchemeshard && !DiskSpaceTablesTotalBytes) {
Expand Down Expand Up @@ -990,10 +1017,20 @@ class TTabletMon {
}

if (ColumnShardScannedBytes_) {
ColumnShardReadRows_->Set(0);
ColumnShardReadBytes_->Set(0);
ColumnShardScanRows_->Set(ColumnShardScannedRows_->Val());
ColumnShardScanBytes_->Set(ColumnShardScannedBytes_->Val());
ColumnShardBulkUpsertRows_->Set(ColumnShardUpsertBlobsWritten_->Val());
ColumnShardBulkUpsertBytes_->Set(ColumnShardUpsertBytesWritten_->Val());
ColumnShardWriteRows_->Set(ColumnShardOperationsRowsWritten_->Val());
Copy link
Collaborator Author

@swalrus1 swalrus1 Aug 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

В ColumnShardWrite* и ColumnShardBulkUpsert* одинаковые значения, потому что сейчас непонятно, как на стороне шарда различать записи через bulk upload от остальных -- когда поддержим транзакции, надо будет разделить эти метрики

ColumnShardWriteBytes_->Set(ColumnShardOperationsBytesWritten_->Val());
ColumnShardBulkUpsertRows_->Set(ColumnShardOperationsRowsWritten_->Val());
ColumnShardBulkUpsertBytes_->Set(ColumnShardOperationsBytesWritten_->Val());
ColumnShardEraseRows_->Set(ColumnShardErasedRows_->Val());
ColumnShardEraseBytes_->Set(ColumnShardErasedBytes_->Val());

if (ColumnShardConsumedCpuHistogram) {
TransferBuckets(ColumnShardCpuUtilization, ColumnShardConsumedCpuHistogram);
}
}

if (DiskSpaceTablesTotalBytes) {
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/tx/columnshard/columnshard__write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo
wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator());
} else {
const TMonotonic now = TMonotonic::Now();
Counters.GetCSCounters().OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant());
Counters.OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant(), aggr->GetRows());
Counters.GetCSCounters().OnWriteMiddle1PutBlobsSuccess(now - writeMeta.GetWriteMiddle1StartInstant());
Counters.GetCSCounters().OnWriteMiddle2PutBlobsSuccess(now - writeMeta.GetWriteMiddle2StartInstant());
Counters.GetCSCounters().OnWriteMiddle3PutBlobsSuccess(now - writeMeta.GetWriteMiddle3StartInstant());
Expand Down
5 changes: 5 additions & 0 deletions ydb/core/tx/columnshard/counters/counters_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ class TCountersManager {
BackgroundControllerCounters->FillTotalStats(tableStats);
ScanCounters.FillStats(tableStats);
}

void OnWritePutBlobsSuccess(const TDuration d, const ui64 rowsWritten) const {
TabletCounters->OnWritePutBlobsSuccess(rowsWritten);
CSCounters.OnWritePutBlobsSuccess(d);
}
};

} // namespace NKikimr::NColumnShard
19 changes: 14 additions & 5 deletions ydb/core/tx/columnshard/counters/tablet_counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,8 @@ class TTabletCountersHandle {
}

void OnWriteSuccess(const ui64 blobsWritten, const ui64 bytesWritten) const {
IncCounter(NColumnShard::COUNTER_UPSERT_BLOBS_WRITTEN, blobsWritten);
IncCounter(NColumnShard::COUNTER_UPSERT_BYTES_WRITTEN, bytesWritten);
// self.Stats.GetTabletCounters().IncCounter(NColumnShard::COUNTER_RAW_BYTES_UPSERTED, insertedBytes);
IncCounter(NColumnShard::COUNTER_OPERATIONS_BLOBS_WRITTEN, blobsWritten);
IncCounter(NColumnShard::COUNTER_OPERATIONS_BYTES_WRITTEN, bytesWritten);
IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS);
}

Expand Down Expand Up @@ -106,9 +105,19 @@ class TTabletCountersHandle {
IncCounter(NColumnShard::COUNTER_INDEXING_TIME, duration.MilliSeconds());
}

void OnWritePutBlobsSuccess(const ui64 rowsWritten) const {
IncCounter(NColumnShard::COUNTER_OPERATIONS_ROWS_WRITTEN, rowsWritten);
}

void OnDropPortionEvent(const ui64 rawBytes, const ui64 blobBytes, const ui64 rows) const {
IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, rawBytes);
IncCounter(NColumnShard::COUNTER_BYTES_ERASED, blobBytes);
IncCounter(NColumnShard::COUNTER_ROWS_ERASED, rows);
}

void FillStats(::NKikimrTableStats::TTableStats& output) const {
output.SetRowUpdates(GetValue(COUNTER_WRITE_SUCCESS));
output.SetRowDeletes(0); // manual deletes are not supported
output.SetRowUpdates(GetValue(COUNTER_OPERATIONS_ROWS_WRITTEN));
output.SetRowDeletes(GetValue(COUNTER_ROWS_ERASED));
output.SetRowReads(0); // all reads are range reads
output.SetRangeReadRows(GetValue(COUNTER_READ_INDEX_ROWS));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::T
if (self) {
self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size());
for (auto&& p : PortionsToDrop) {
self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, p.GetTotalRawBytes());
self->Counters.GetTabletCounters()->OnDropPortionEvent(p.GetTotalRawBytes(), p.GetTotalBlobBytes(), p.NumRows());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class TWriteAggregation {
NEvWrite::TWriteMeta WriteMeta;
YDB_READONLY(ui64, SchemaVersion, 0);
YDB_READONLY(ui64, Size, 0);
YDB_READONLY(ui64, Rows, 0);
YDB_ACCESSOR_DEF(std::vector<TWideSerializedBatch>, SplittedBlobs);
YDB_READONLY_DEF(TVector<TWriteId>, WriteIds);
YDB_READONLY_DEF(std::shared_ptr<NOlap::IBlobsWritingAction>, BlobsAction);
Expand Down Expand Up @@ -117,6 +118,9 @@ class TWriteAggregation {
for (auto&& s : splittedBlobs) {
SplittedBlobs.emplace_back(std::move(s), *this);
}
for (const auto& batch : SplittedBlobs) {
Rows += batch->GetRowsCount();
}
}

TWriteAggregation(const NEvWrite::TWriteData& writeData)
Expand Down
Loading