Skip to content

Commit 7e32c10

Browse files
committed
schemeshard: fix copy-tables memory hog
Export launches copy-tables operation, copy-tables invokes multiple copy table suboperations, each copy-table suboperation grabs and saves aside metadata of table's subdomain (to be able to restore original subdomain state in case IgniteOperation() fails). Straightforward implementation of TMemChanges resulted in that the same and only subdomain being copied multiple times. Which for massive export actions with many-many tables copied is both cpu intensive and, more importantly, requires excessive amount of memory (up to several observed OOM events). This fix changes TMemChanges to store only one subdomain state version per operation. Which is correct and appropriate for copy-tables and all other (sub)operations. KIKIMR-20242
1 parent 8ce7edb commit 7e32c10

File tree

2 files changed

+19
-12
lines changed

2 files changed

+19
-12
lines changed

ydb/core/tx/schemeshard/schemeshard__operation_memory_changes.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,13 @@ void TMemoryChanges::GrabShard(TSchemeShard *ss, const TShardIdx &shardId) {
4747
}
4848

4949
void TMemoryChanges::GrabDomain(TSchemeShard* ss, const TPathId& pathId) {
50-
Grab<TSubDomainInfo>(pathId, ss->SubDomains, SubDomains);
50+
// Copy TSubDomainInfo from ss->SubDomains to local SubDomains.
51+
// Make sure that copy will be made only when needed.
52+
const auto found = ss->SubDomains.find(pathId);
53+
Y_ABORT_UNLESS(found != ss->SubDomains.end());
54+
if (!SubDomains.contains(pathId)) {
55+
SubDomains.emplace(pathId, MakeIntrusive<TSubDomainInfo>(*found->second));
56+
}
5157
}
5258

5359
void TMemoryChanges::GrabNewIndex(TSchemeShard* ss, const TPathId& pathId) {
@@ -178,15 +184,12 @@ void TMemoryChanges::UnDo(TSchemeShard* ss) {
178184
Shards.pop();
179185
}
180186

181-
while (SubDomains) {
182-
const auto& [id, elem] = SubDomains.top();
183-
if (elem) {
184-
ss->SubDomains[id] = elem;
185-
} else {
186-
ss->SubDomains.erase(id);
187-
}
188-
SubDomains.pop();
187+
// Restore ss->SubDomains entries to saved copies of TSubDomainInfo objects.
188+
// No copy, simple pointer replacement.
189+
for (const auto& [id, elem] : SubDomains) {
190+
ss->SubDomains[id] = elem;
189191
}
192+
SubDomains.clear();
190193

191194
while (TxStates) {
192195
const auto& [id, elem] = TxStates.top();

ydb/core/tx/schemeshard/schemeshard__operation_memory_changes.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,12 @@ class TMemoryChanges: public TSimpleRefCount<TMemoryChanges> {
3333
using TShardState = std::pair<TShardIdx, THolder<TShardInfo>>;
3434
TStack<TShardState> Shards;
3535

36-
using TSubDomainState = std::pair<TPathId, TSubDomainInfo::TPtr>;
37-
TStack<TSubDomainState> SubDomains;
36+
// Actually, any single subdomain should not be grabbed at more than one version
37+
// per transaction/operation.
38+
// And transaction/operation could not work on more than one subdomain.
39+
// But just to be on the safe side (migrated paths, anyone?) we allow several
40+
// subdomains to be grabbed.
41+
THashMap<TPathId, TSubDomainInfo::TPtr> SubDomains;
3842

3943
using TTxState = std::pair<TOperationId, THolder<TTxState>>;
4044
TStack<TTxState> TxStates;
@@ -78,7 +82,7 @@ class TMemoryChanges: public TSimpleRefCount<TMemoryChanges> {
7882
void GrabExternalTable(TSchemeShard* ss, const TPathId& pathId);
7983

8084
void GrabExternalDataSource(TSchemeShard* ss, const TPathId& pathId);
81-
85+
8286
void GrabNewView(TSchemeShard* ss, const TPathId& pathId);
8387
void GrabView(TSchemeShard* ss, const TPathId& pathId);
8488

0 commit comments

Comments
 (0)