Skip to content

Commit bc46ed3

Browse files
Merge 09cf575 into 2490271
2 parents 2490271 + 09cf575 commit bc46ed3

File tree

9 files changed

+208
-57
lines changed

9 files changed

+208
-57
lines changed

ydb/core/tx/columnshard/columnshard.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include <ydb/core/protos/table_stats.pb.h>
1515
#include <ydb/core/tx/columnshard/bg_tasks/adapter/adapter.h>
16+
#include <ydb/core/tx/columnshard/tablet/write_queue.h>
1617
#include <ydb/core/tx/priorities/usage/service.h>
1718
#include <ydb/core/tx/tiering/manager.h>
1819

@@ -69,8 +70,8 @@ void TColumnShard::TrySwitchToWork(const TActorContext& ctx) {
6970
}
7071
ProgressTxController->OnTabletInit();
7172
{
72-
const TLogContextGuard gLogging =
73-
NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId())("process", "SwitchToWork");
73+
const TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())(
74+
"self_id", SelfId())("process", "SwitchToWork");
7475
AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "SwitchToWork");
7576
Become(&TThis::StateWork);
7677
SignalTabletActive(ctx);
@@ -250,6 +251,8 @@ void TColumnShard::Handle(NActors::TEvents::TEvWakeup::TPtr& ev, const TActorCon
250251
const TMonotonic now = TMonotonic::Now();
251252
GetProgressTxController().PingTimeouts(now);
252253
ctx.Schedule(TDuration::Seconds(1), new NActors::TEvents::TEvWakeup(0));
254+
} else if (ev->Get()->Tag == 1) {
255+
WriteTasksQueue->Drain(true, ctx);
253256
}
254257
}
255258

ydb/core/tx/columnshard/columnshard__write.cpp

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "transactions/operators/ev_write/secondary.h"
1414
#include "transactions/operators/ev_write/sync.h"
1515

16+
#include <ydb/core/tx/columnshard/tablet/write_queue.h>
1617
#include <ydb/core/tx/conveyor/usage/service.h>
1718
#include <ydb/core/tx/data_events/events.h>
1819

@@ -46,26 +47,27 @@ void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const
4647
Y_ABORT("invalid function usage");
4748
}
4849

49-
AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "write_overload")("size", writeSize)("path_id", writeMeta.GetTableId())(
50+
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "write_overload")("size", writeSize)("path_id", writeMeta.GetTableId())(
5051
"reason", overloadReason);
5152

5253
ctx.Send(writeMeta.GetSource(), event.release(), 0, cookie);
5354
}
5455

55-
TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 pathId) const {
56-
if (IsAnyChannelYellowStop()) {
57-
return EOverloadStatus::Disk;
58-
}
59-
56+
TColumnShard::EOverloadStatus TColumnShard::CheckOverloadedWait(const ui64 pathId) const {
6057
if (InsertTable && InsertTable->IsOverloadedByCommitted(pathId)) {
6158
return EOverloadStatus::InsertTable;
6259
}
63-
6460
Counters.GetCSCounters().OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit());
6561
if (TablesManager.GetPrimaryIndex() && TablesManager.GetPrimaryIndex()->IsOverloadedByMetadata(NOlap::IColumnEngine::GetMetadataLimit())) {
6662
return EOverloadStatus::OverloadMetadata;
6763
}
64+
return EOverloadStatus::None;
65+
}
6866

67+
TColumnShard::EOverloadStatus TColumnShard::CheckOverloadedImmediate(const ui64 pathId) const {
68+
if (IsAnyChannelYellowStop()) {
69+
return EOverloadStatus::Disk;
70+
}
6971
ui64 txLimit = Settings.OverloadTxInFlight;
7072
ui64 writesLimit = Settings.OverloadWritesInFlight;
7173
ui64 writesSizeLimit = Settings.OverloadWritesSizeInFlight;
@@ -93,7 +95,8 @@ void TColumnShard::Handle(NPrivateEvents::NWrite::TEvWritePortionResult::TPtr& e
9395
NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_WRITE)("tablet_id", TabletID())("event", "TEvWritePortionResult");
9496
std::vector<TNoDataWrite> noDataWrites = ev->Get()->DetachNoDataWrites();
9597
for (auto&& i : noDataWrites) {
96-
AFL_WARN(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "no_data_write_finished")("writing_size", i.GetDataSize())("writing_id", i.GetWriteMeta().GetId());
98+
AFL_WARN(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "no_data_write_finished")("writing_size", i.GetDataSize())(
99+
"writing_id", i.GetWriteMeta().GetId());
97100
Counters.GetWritesMonitor()->OnFinishWrite(i.GetDataSize(), 1);
98101
}
99102
if (ev->Get()->GetWriteStatus() == NKikimrProto::OK) {
@@ -255,7 +258,10 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex
255258

256259
NEvWrite::TWriteData writeData(writeMeta, arrowData, snapshotSchema->GetIndexInfo().GetReplaceKey(),
257260
StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING), false);
258-
auto overloadStatus = CheckOverloaded(pathId);
261+
auto overloadStatus = CheckOverloadedImmediate(pathId);
262+
if (overloadStatus == EOverloadStatus::None) {
263+
overloadStatus = CheckOverloadedWait(pathId);
264+
}
259265
if (overloadStatus != EOverloadStatus::None) {
260266
std::unique_ptr<NActors::IEventBase> result = std::make_unique<TEvColumnShard::TEvWriteResult>(
261267
TabletID(), writeData.GetWriteMeta(), NKikimrTxColumnShard::EResultStatus::OVERLOADED);
@@ -560,11 +566,17 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor
560566
return;
561567
}
562568

563-
auto overloadStatus = CheckOverloaded(pathId);
569+
if (!AppDataVerified().ColumnShardConfig.GetWritingEnabled()) {
570+
sendError("writing disabled", NKikimrDataEvents::TEvWriteResult::STATUS_CANCELLED);
571+
return;
572+
}
573+
574+
auto overloadStatus = CheckOverloadedImmediate(pathId);
564575
if (overloadStatus != EOverloadStatus::None) {
565576
std::unique_ptr<NActors::IEventBase> result = NEvents::TDataEvents::TEvWriteResult::BuildError(
566577
TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error");
567-
OverloadWriteFail(overloadStatus, NEvWrite::TWriteMeta(0, pathId, source, {}, TGUID::CreateTimebased().AsGuidString()), arrowData->GetSize(), cookie, std::move(result), ctx);
578+
OverloadWriteFail(overloadStatus, NEvWrite::TWriteMeta(0, pathId, source, {}, TGUID::CreateTimebased().AsGuidString()),
579+
arrowData->GetSize(), cookie, std::move(result), ctx);
568580
return;
569581
}
570582

@@ -580,25 +592,8 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor
580592
lockId = record.GetLockTxId();
581593
}
582594

583-
if (!AppDataVerified().ColumnShardConfig.GetWritingEnabled()) {
584-
sendError("writing disabled", NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED);
585-
return;
586-
}
587-
588-
OperationsManager->RegisterLock(lockId, Generation());
589-
auto writeOperation = OperationsManager->RegisterOperation(
590-
pathId, lockId, cookie, granuleShardingVersionId, *mType, AppDataVerified().FeatureFlags.GetEnableWritePortionsOnInsert());
591-
592-
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_WRITE)("writing_size", arrowData->GetSize())("operation_id", writeOperation->GetIdentifier())(
593-
"in_flight", Counters.GetWritesMonitor()->GetWritesInFlight())("size_in_flight", Counters.GetWritesMonitor()->GetWritesSizeInFlight());
594-
Counters.GetWritesMonitor()->OnStartWrite(arrowData->GetSize());
595-
596-
Y_ABORT_UNLESS(writeOperation);
597-
writeOperation->SetBehaviour(behaviour);
598-
NOlap::TWritingContext wContext(TabletID(), SelfId(), schema, StoragesManager, Counters.GetIndexationCounters().SplitterCounters,
599-
Counters.GetCSCounters().WritingCounters, NOlap::TSnapshot::Max(), writeOperation->GetActivityChecker());
600-
arrowData->SetSeparationPoints(GetIndexAs<NOlap::TColumnEngineForLogs>().GetGranulePtrVerified(pathId)->GetBucketPositions());
601-
writeOperation->Start(*this, arrowData, source, wContext);
595+
WriteTasksQueue->Enqueue(TWriteTask(arrowData, schema, source, granuleShardingVersionId, pathId, cookie, lockId, *mType, behaviour));
596+
WriteTasksQueue->Drain(false, ctx);
602597
}
603598

604599
} // namespace NKikimr::NColumnShard

ydb/core/tx/columnshard/columnshard_impl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <ydb/core/tx/columnshard/engines/changes/compaction.h>
4343
#include <ydb/core/tx/columnshard/engines/column_engine_logs.h>
4444
#include <ydb/core/tx/columnshard/engines/scheme/schema_version.h>
45+
#include <ydb/core/tx/columnshard/tablet/write_queue.h>
4546
#include <ydb/core/tx/conveyor/usage/service.h>
4647
#include <ydb/core/tx/priorities/usage/abstract.h>
4748
#include <ydb/core/tx/priorities/usage/events.h>
@@ -77,6 +78,7 @@ TColumnShard::TColumnShard(TTabletStorageInfo* info, const TActorId& tablet)
7778
, TabletCountersHolder(new TProtobufTabletCounters<ESimpleCounters_descriptor, ECumulativeCounters_descriptor,
7879
EPercentileCounters_descriptor, ETxTypes_descriptor>())
7980
, Counters(*TabletCountersHolder)
81+
, WriteTasksQueue(std::make_unique<TWriteTasksQueue>(this))
8082
, ProgressTxController(std::make_unique<TTxController>(*this))
8183
, StoragesManager(std::make_shared<NOlap::TStoragesManager>(*this))
8284
, DataLocksManager(std::make_shared<NOlap::NDataLocks::TManager>())

ydb/core/tx/columnshard/columnshard_impl.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ class TGeneralCompactColumnEngineChanges;
9090

9191
namespace NKikimr::NColumnShard {
9292

93+
class TArrowData;
9394
class TEvWriteCommitPrimaryTransactionOperator;
9495
class TEvWriteCommitSecondaryTransactionOperator;
9596
class TTxFinishAsyncTransaction;
@@ -99,6 +100,8 @@ class TOperationsManager;
99100
class TWaitEraseTablesTxSubscriber;
100101
class TTxBlobsWritingFinished;
101102
class TTxBlobsWritingFailed;
103+
class TWriteTasksQueue;
104+
class TWriteTask;
102105

103106
namespace NLoading {
104107
class TInsertTableInitializer;
@@ -228,6 +231,8 @@ class TColumnShard: public TActor<TColumnShard>, public NTabletFlatExecutor::TTa
228231
friend class NLoading::TInFlightReadsInitializer;
229232
friend class NLoading::TSpecialValuesInitializer;
230233
friend class NLoading::TTablesManagerInitializer;
234+
friend class TWriteTasksQueue;
235+
friend class TWriteTask;
231236

232237
class TTxProgressTx;
233238
class TTxProposeCancel;
@@ -373,7 +378,8 @@ class TColumnShard: public TActor<TColumnShard>, public NTabletFlatExecutor::TTa
373378
private:
374379
void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie,
375380
std::unique_ptr<NActors::IEventBase>&& event, const TActorContext& ctx);
376-
EOverloadStatus CheckOverloaded(const ui64 tableId) const;
381+
EOverloadStatus CheckOverloadedImmediate(const ui64 tableId) const;
382+
EOverloadStatus CheckOverloadedWait(const ui64 tableId) const;
377383

378384
protected:
379385
STFUNC(StateInit) {
@@ -464,6 +470,7 @@ class TColumnShard: public TActor<TColumnShard>, public NTabletFlatExecutor::TTa
464470
private:
465471
std::unique_ptr<TTabletCountersBase> TabletCountersHolder;
466472
TCountersManager Counters;
473+
std::unique_ptr<TWriteTasksQueue> WriteTasksQueue;
467474

468475
std::unique_ptr<TTxController> ProgressTxController;
469476
std::unique_ptr<TOperationsManager> OperationsManager;

ydb/core/tx/columnshard/counters/columnshard.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#pragma once
2-
#include "common/owner.h"
32
#include "initialization.h"
43
#include "tx_progress.h"
54

5+
#include "common/owner.h"
6+
67
#include <ydb/core/tx/columnshard/counters/tablet_counters.h>
78

89
#include <library/cpp/monlib/dynamic_counters/counters.h>
@@ -26,14 +27,23 @@ class TWriteCounters: public TCommonCountersOwner {
2627
NMonitoring::TDynamicCounters::TCounterPtr VolumeWriteData;
2728
NMonitoring::THistogramPtr HistogramBytesWriteDataCount;
2829
NMonitoring::THistogramPtr HistogramBytesWriteDataBytes;
30+
NMonitoring::THistogramPtr HistogramDurationQueueWait;
2931

3032
public:
33+
const NMonitoring::TDynamicCounters::TCounterPtr QueueWaitSize;
34+
35+
void OnWritingTaskDequeue(const TDuration d){
36+
HistogramDurationQueueWait->Collect(d.MilliSeconds());
37+
}
38+
3139
TWriteCounters(TCommonCountersOwner& owner)
3240
: TBase(owner, "activity", "writing")
41+
, QueueWaitSize(TBase::GetValue("Write/Queue/Size"))
3342
{
3443
VolumeWriteData = TBase::GetDeriviative("Write/Incoming/Bytes");
3544
HistogramBytesWriteDataCount = TBase::GetHistogram("Write/Incoming/ByBytes/Count", NMonitoring::ExponentialHistogram(18, 2, 100));
3645
HistogramBytesWriteDataBytes = TBase::GetHistogram("Write/Incoming/ByBytes/Bytes", NMonitoring::ExponentialHistogram(18, 2, 100));
46+
HistogramDurationQueueWait = TBase::GetHistogram("Write/Queue/Waiting/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 100));
3747
}
3848

3949
void OnIncomingData(const ui64 dataSize) const {
@@ -231,4 +241,4 @@ class TCSCounters: public TCommonCountersOwner {
231241
TCSCounters();
232242
};
233243

234-
}
244+
} // namespace NKikimr::NColumnShard
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#include "write_queue.h"
2+
3+
#include <ydb/core/tx/columnshard/columnshard_impl.h>
4+
#include <ydb/core/tx/columnshard/operations/write_data.h>
5+
#include <ydb/core/tx/data_events/write_data.h>
6+
7+
namespace NKikimr::NColumnShard {
8+
9+
bool TWriteTask::Execute(TColumnShard* owner, const TActorContext& ctx) {
10+
auto overloadStatus = owner->CheckOverloadedWait(PathId);
11+
if (overloadStatus != TColumnShard::EOverloadStatus::None) {
12+
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "wait_overload")("status", overloadStatus);
13+
return false;
14+
}
15+
16+
owner->Counters.GetCSCounters().WritingCounters->OnWritingTaskDequeue(TMonotonic::Now() - Created);
17+
owner->OperationsManager->RegisterLock(LockId, owner->Generation());
18+
auto writeOperation = owner->OperationsManager->RegisterOperation(
19+
PathId, LockId, Cookie, GranuleShardingVersionId, ModificationType, AppDataVerified().FeatureFlags.GetEnableWritePortionsOnInsert());
20+
21+
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_WRITE)("writing_size", ArrowData->GetSize())("operation_id", writeOperation->GetIdentifier())(
22+
"in_flight", owner->Counters.GetWritesMonitor()->GetWritesInFlight())(
23+
"size_in_flight", owner->Counters.GetWritesMonitor()->GetWritesSizeInFlight());
24+
25+
AFL_VERIFY(writeOperation);
26+
writeOperation->SetBehaviour(Behaviour);
27+
NOlap::TWritingContext wContext(owner->TabletID(), owner->SelfId(), Schema, owner->StoragesManager,
28+
owner->Counters.GetIndexationCounters().SplitterCounters, owner->Counters.GetCSCounters().WritingCounters, NOlap::TSnapshot::Max(),
29+
writeOperation->GetActivityChecker());
30+
ArrowData->SetSeparationPoints(owner->GetIndexAs<NOlap::TColumnEngineForLogs>().GetGranulePtrVerified(PathId)->GetBucketPositions());
31+
writeOperation->Start(*owner, ArrowData, SourceId, wContext);
32+
return true;
33+
}
34+
35+
bool TWriteTasksQueue::Drain(const bool onWakeup, const TActorContext& ctx) {
36+
if (onWakeup) {
37+
WriteTasksOverloadCheckerScheduled = false;
38+
}
39+
while (WriteTasks.size() && WriteTasks.front().Execute(Owner, ctx)) {
40+
WriteTasks.pop_front();
41+
}
42+
if (WriteTasks.size() && !WriteTasksOverloadCheckerScheduled) {
43+
Owner->Schedule(TDuration::MilliSeconds(300), new NActors::TEvents::TEvWakeup(1));
44+
WriteTasksOverloadCheckerScheduled = true;
45+
AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "queue_on_write")("size", WriteTasks.size());
46+
}
47+
Owner->Counters.GetCSCounters().WritingCounters->QueueWaitSize->Add((i64)WriteTasks.size() - PredWriteTasksSize);
48+
PredWriteTasksSize = (i64)WriteTasks.size();
49+
return !WriteTasks.size();
50+
}
51+
52+
void TWriteTasksQueue::Enqueue(TWriteTask&& task) {
53+
WriteTasks.emplace_back(std::move(task));
54+
}
55+
56+
TWriteTasksQueue::~TWriteTasksQueue() {
57+
Owner->Counters.GetCSCounters().WritingCounters->QueueWaitSize->Sub(PredWriteTasksSize);
58+
}
59+
60+
} // namespace NKikimr::NColumnShard
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#pragma once
2+
#include <ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h>
3+
#include <ydb/core/tx/columnshard/operations/write.h>
4+
#include <ydb/core/tx/data_events/common/modification_type.h>
5+
6+
namespace NKikimr::NColumnShard {
7+
class TColumnShard;
8+
class TArrowData;
9+
class TWriteTask: TMoveOnly {
10+
private:
11+
std::shared_ptr<TArrowData> ArrowData;
12+
NOlap::ISnapshotSchema::TPtr Schema;
13+
const NActors::TActorId SourceId;
14+
const std::optional<ui32> GranuleShardingVersionId;
15+
const ui64 PathId;
16+
const ui64 Cookie;
17+
const ui64 LockId;
18+
const NEvWrite::EModificationType ModificationType;
19+
const EOperationBehaviour Behaviour;
20+
const TMonotonic Created = TMonotonic::Now();
21+
22+
public:
23+
TWriteTask(const std::shared_ptr<TArrowData>& arrowData, const NOlap::ISnapshotSchema::TPtr& schema, const NActors::TActorId sourceId,
24+
const std::optional<ui32>& granuleShardingVersionId, const ui64 pathId, const ui64 cookie, const ui64 lockId,
25+
const NEvWrite::EModificationType modificationType, const EOperationBehaviour behaviour)
26+
: ArrowData(arrowData)
27+
, Schema(schema)
28+
, SourceId(sourceId)
29+
, GranuleShardingVersionId(granuleShardingVersionId)
30+
, PathId(pathId)
31+
, Cookie(cookie)
32+
, LockId(lockId)
33+
, ModificationType(modificationType)
34+
, Behaviour(behaviour) {
35+
}
36+
37+
const TMonotonic& GetCreatedMonotonic() const {
38+
return Created;
39+
}
40+
41+
bool Execute(TColumnShard* owner, const TActorContext& ctx);
42+
};
43+
44+
class TWriteTasksQueue {
45+
private:
46+
bool WriteTasksOverloadCheckerScheduled = false;
47+
std::deque<TWriteTask> WriteTasks;
48+
i64 PredWriteTasksSize = 0;
49+
TColumnShard* Owner;
50+
51+
public:
52+
TWriteTasksQueue(TColumnShard* owner)
53+
: Owner(owner) {
54+
}
55+
56+
~TWriteTasksQueue();
57+
58+
void Enqueue(TWriteTask&& task);
59+
bool Drain(const bool onWakeup, const TActorContext& ctx);
60+
};
61+
62+
} // namespace NKikimr::NColumnShard
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
LIBRARY()
2+
3+
SRCS(
4+
write_queue.cpp
5+
)
6+
7+
PEERDIR(
8+
ydb/core/tx/columnshard/hooks/abstract
9+
)
10+
11+
END()

0 commit comments

Comments
 (0)