@@ -32,7 +32,8 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
3232 SVLOG_D (" NSysView::TPartitionStatsCollector bootstrapped" );
3333
3434 if (AppData ()->UsePartitionStatsCollectorForTests ) {
35- OverloadedPartitionBound = 0.0 ;
35+ OverloadedByCpuPartitionBound = 0.0 ;
36+ OverloadedByTliPartitionBound = 0 ;
3637 ProcessOverloadedInterval = TDuration::Seconds (1 );
3738 }
3839
@@ -88,25 +89,32 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
8889
8990 auto & oldPartitions = table.Partitions ;
9091 std::unordered_map<TShardIdx, TPartitionStats> newPartitions;
91- std::set<TOverloadedFollower> overloaded ;
92+ std::set<TFollowerStats> overloadedByCpu, overloadedByTli ;
9293
9394 for (auto shardIdx : ev->Get ()->ShardIndices ) {
9495 auto old = oldPartitions.find (shardIdx);
9596 if (old != oldPartitions.end ()) {
9697 newPartitions[shardIdx] = old->second ;
9798
9899 for (const auto & followerStat: old->second .FollowerStats ) {
99- if (IsPartitionOverloaded (followerStat.second ))
100- overloaded.insert ({shardIdx, followerStat.first });
100+ if (IsPartitionOverloadedByCpu (followerStat.second ))
101+ overloadedByCpu.insert ({shardIdx, followerStat.first });
102+ if (IsPartitionOverloadedByTli (followerStat.second ))
103+ overloadedByTli.insert ({shardIdx, followerStat.first });
101104 }
102105 }
103106 }
104107
105- if (!overloaded .empty ()) {
106- tables.Overloaded [pathId].swap (overloaded );
108+ if (!overloadedByCpu .empty ()) {
109+ tables.OverloadedByCpu [pathId].swap (overloadedByCpu );
107110 } else {
108- tables.Overloaded .erase (pathId);
111+ tables.OverloadedByCpu .erase (pathId);
109112 }
113+ if (!overloadedByTli.empty ()) {
114+ tables.OverloadedByTli [pathId].swap (overloadedByTli);
115+ } else {
116+ tables.OverloadedByTli .erase (pathId);
117+ }
110118
111119 oldPartitions.swap (newPartitions);
112120 table.ShardIndices .swap (ev->Get ()->ShardIndices );
@@ -125,7 +133,8 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
125133
126134 auto & tables = DomainTables[domainKey];
127135 tables.Stats .erase (pathId);
128- tables.Overloaded .erase (pathId);
136+ tables.OverloadedByCpu .erase (pathId);
137+ tables.OverloadedByTli .erase (pathId);
129138 }
130139
131140 void Handle (TEvSysView::TEvSendPartitionStats::TPtr& ev) {
@@ -153,18 +162,29 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
153162
154163 auto & followerStats = partitionStats.FollowerStats [followerId];
155164
156- TOverloadedFollower overloadedFollower = {shardIdx, followerId};
157- if (IsPartitionOverloaded (newStats)) {
158- tables.Overloaded [pathId].insert (overloadedFollower);
165+ TFollowerStats overloadedFollower = {shardIdx, followerId};
166+ if (IsPartitionOverloadedByCpu (newStats)) {
167+ tables.OverloadedByCpu [pathId].insert (overloadedFollower);
159168 } else {
160- auto overloadedFound = tables.Overloaded .find (pathId);
161- if (overloadedFound != tables.Overloaded .end ()) {
169+ auto overloadedFound = tables.OverloadedByCpu .find (pathId);
170+ if (overloadedFound != tables.OverloadedByCpu .end ()) {
162171 overloadedFound->second .erase (overloadedFollower);
163172 if (overloadedFound->second .empty ()) {
164- tables.Overloaded .erase (pathId);
173+ tables.OverloadedByCpu .erase (pathId);
165174 }
166175 }
167176 }
177+ if (IsPartitionOverloadedByTli (newStats)) {
178+ tables.OverloadedByTli [pathId].insert (overloadedFollower);
179+ } else {
180+ auto overloadedFound = tables.OverloadedByTli .find (pathId);
181+ if (overloadedFound != tables.OverloadedByTli .end ()) {
182+ overloadedFound->second .erase (overloadedFollower);
183+ if (overloadedFound->second .empty ()) {
184+ tables.OverloadedByTli .erase (pathId);
185+ }
186+ }
187+ }
168188
169189 if (followerStats.HasTtlStats ()) {
170190 newStats.MutableTtlStats ()->Swap (followerStats.MutableTtlStats ());
@@ -376,37 +396,54 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
376396 }
377397 auto & domainTables = domainFound->second ;
378398
379- struct TPartition {
399+ struct TPartitionByCpu {
380400 TPathId PathId;
381401 TShardIdx ShardIdx;
382402 ui32 FollowerId;
383403 double CPUCores;
384404 };
385- std::vector<TPartition> sorted ;
405+ std::vector<TPartitionByCpu> sortedByCpu ;
386406
387- for (const auto & [pathId, overloadedFollowers] : domainTables.Overloaded ) {
388- for (const TOverloadedFollower& overloadedFollower : overloadedFollowers) {
407+ struct TPartitionByTli {
408+ TPathId PathId;
409+ TShardIdx ShardIdx;
410+ ui32 FollowerId;
411+ ui64 LocksBroken;
412+ };
413+ std::vector<TPartitionByTli> sortedByTli;
414+
415+ for (const auto & [pathId, overloadedFollowers] : domainTables.OverloadedByCpu ) {
416+ for (const TFollowerStats& overloadedFollower : overloadedFollowers) {
389417 const auto & table = domainTables.Stats [pathId];
390418 const auto & partition = table.Partitions .at (overloadedFollower.ShardIdx ).FollowerStats .at (overloadedFollower.FollowerId );
391- sorted .emplace_back (TPartition {pathId, overloadedFollower.ShardIdx , overloadedFollower.FollowerId , partition.GetCPUCores ()});
419+ sortedByCpu .emplace_back (TPartitionByCpu {pathId, overloadedFollower.ShardIdx , overloadedFollower.FollowerId , partition.GetCPUCores ()});
392420 }
393421 }
422+ for (const auto & [pathId, overloadedFollowers] : domainTables.OverloadedByTli ) {
423+ for (const TFollowerStats& overloadedFollower : overloadedFollowers) {
424+ const auto & table = domainTables.Stats [pathId];
425+ const auto & partition = table.Partitions .at (overloadedFollower.ShardIdx ).FollowerStats .at (overloadedFollower.FollowerId );
426+ sortedByTli.emplace_back (TPartitionByTli{pathId, overloadedFollower.ShardIdx , overloadedFollower.FollowerId , partition.GetLocksBroken ()});
427+ }
428+ }
394429
395- std::sort (sorted .begin (), sorted .end (),
430+ std::sort (sortedByCpu .begin (), sortedByCpu .end (),
396431 [] (const auto & l, const auto & r) { return l.CPUCores > r.CPUCores ; });
432+ std::sort (sortedByTli.begin (), sortedByTli.end (),
433+ [] (const auto & l, const auto & r) { return l.LocksBroken > r.LocksBroken ; });
397434
398435 auto now = TActivationContext::Now ();
399436 auto nowUs = now.MicroSeconds ();
400437
401438 size_t count = 0 ;
402439 auto sendEvent = MakeHolder<TEvSysView::TEvSendTopPartitions>();
403- for (const auto & entry : sorted ) {
440+ for (const auto & entry : sortedByCpu ) {
404441 const auto & table = domainTables.Stats [entry.PathId ];
405442 const auto & followerStats = table.Partitions .at (entry.ShardIdx ).FollowerStats ;
406443 const auto & partition = followerStats.at (entry.FollowerId );
407444 const auto & leaderPartition = followerStats.at (0 );
408445
409- auto * result = sendEvent->Record .AddPartitions ();
446+ auto * result = sendEvent->Record .AddPartitionsByCpu ();
410447 result->SetTabletId (partition.GetTabletId ());
411448 result->SetPath (table.Path );
412449 result->SetPeakTimeUs (nowUs);
@@ -422,11 +459,34 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
422459 break ;
423460 }
424461 }
462+ for (const auto & entry : sortedByTli) {
463+ const auto & table = domainTables.Stats [entry.PathId ];
464+ const auto & followerStats = table.Partitions .at (entry.ShardIdx ).FollowerStats ;
465+ const auto & partition = followerStats.at (entry.FollowerId );
466+ const auto & leaderPartition = followerStats.at (0 );
467+
468+ auto * result = sendEvent->Record .AddPartitionsByTli ();
469+ result->SetTabletId (partition.GetTabletId ());
470+ result->SetPath (table.Path );
471+ result->SetLocksAcquired (partition.GetLocksAcquired ());
472+ result->SetLocksWholeShard (partition.GetLocksWholeShard ());
473+ result->SetLocksBroken (partition.GetLocksBroken ());
474+ result->SetNodeId (partition.GetNodeId ());
475+ result->SetDataSize (leaderPartition.GetDataSize ());
476+ result->SetRowCount (leaderPartition.GetRowCount ());
477+ result->SetIndexSize (leaderPartition.GetIndexSize ());
478+ result->SetFollowerId (partition.GetFollowerId ());
479+
480+ if (++count == TOP_PARTITIONS_COUNT) {
481+ break ;
482+ }
483+ }
425484
426485 sendEvent->Record .SetTimeUs (nowUs);
427486
428487 SVLOG_D (" NSysView::TPartitionStatsCollector: TEvProcessOverloaded "
429- << " top size# " << sorted.size ()
488+ << " , top size by CPU # " << sortedByCpu.size ()
489+ << " , top size by TLI # " << sortedByTli.size ()
430490 << " , time# " << now);
431491
432492 Send (MakePipePerNodeCacheID (false ),
@@ -447,8 +507,11 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
447507 TBase::PassAway ();
448508 }
449509
450- bool IsPartitionOverloaded (const NKikimrSysView::TPartitionStats& stats) const {
451- return stats.GetCPUCores () >= OverloadedPartitionBound;
510+ bool IsPartitionOverloadedByCpu (const NKikimrSysView::TPartitionStats& stats) const {
511+ return stats.GetCPUCores () >= OverloadedByCpuPartitionBound;
512+ }
513+ bool IsPartitionOverloadedByTli (const NKikimrSysView::TPartitionStats& stats) const {
514+ return stats.GetLocksBroken () >= OverloadedByTliPartitionBound;
452515 }
453516
454517private:
@@ -458,7 +521,8 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
458521 TPathId DomainKey;
459522 ui64 SysViewProcessorId = 0 ;
460523
461- double OverloadedPartitionBound = 0.7 ;
524+ double OverloadedByCpuPartitionBound = 0.7 ;
525+ ui64 OverloadedByTliPartitionBound = 1 ;
462526 TDuration ProcessOverloadedInterval = TDuration::Seconds(15 );
463527
464528 typedef ui32 TFollowerId;
@@ -473,22 +537,23 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
473537 TString Path;
474538 };
475539
476- struct TOverloadedFollower {
540+ struct TFollowerStats {
477541 TShardIdx ShardIdx;
478542 TFollowerId FollowerId;
479543
480- bool operator <(const TOverloadedFollower &other) const {
544+ bool operator <(const TFollowerStats &other) const {
481545 return std::tie (ShardIdx, FollowerId) < std::tie (other.ShardIdx , other.FollowerId );
482546 }
483547
484- bool operator ==(const TOverloadedFollower &other) const {
548+ bool operator ==(const TFollowerStats &other) const {
485549 return std::tie (ShardIdx, FollowerId) == std::tie (other.ShardIdx , other.FollowerId );
486550 }
487551 };
488552
489553 struct TDomainTables {
490554 std::map<TPathId, TTableStats> Stats;
491- std::unordered_map<TPathId, std::set<TOverloadedFollower>> Overloaded;
555+ std::unordered_map<TPathId, std::set<TFollowerStats>> OverloadedByCpu;
556+ std::unordered_map<TPathId, std::set<TFollowerStats>> OverloadedByTli;
492557 };
493558 std::unordered_map<TPathId, TDomainTables> DomainTables;
494559
@@ -694,6 +759,15 @@ class TPartitionStatsScan : public TScanActorBase<TPartitionStatsScan> {
694759 insert ({TSchema::FollowerId::ColumnId, [] (const TPartitionStatsResult&, const TPartitionStats&, const TPartitionStats& stats) {
695760 return TCell::Make<ui32>(stats.GetFollowerId ());
696761 }});
762+ insert ({TSchema::LocksAcquired::ColumnId, [] (const TPartitionStatsResult&, const TPartitionStats&, const TPartitionStats& stats) {
763+ return TCell::Make<ui64>(stats.GetLocksAcquired ());
764+ }});
765+ insert ({TSchema::LocksWholeShard::ColumnId, [] (const TPartitionStatsResult&, const TPartitionStats&, const TPartitionStats& stats) {
766+ return TCell::Make<ui64>(stats.GetLocksWholeShard ());
767+ }});
768+ insert ({TSchema::LocksBroken::ColumnId, [] (const TPartitionStatsResult&, const TPartitionStats&, const TPartitionStats& stats) {
769+ return TCell::Make<ui64>(stats.GetLocksBroken ());
770+ }});
697771 }
698772 };
699773 static TExtractorsMap extractors;
0 commit comments