Skip to content

Commit a426e65

Browse files
committed
Fix DataShard BuildStats error handling
1 parent b614914 commit a426e65

File tree

3 files changed

+96
-2
lines changed

3 files changed

+96
-2
lines changed

ydb/core/tx/datashard/datashard__stats.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ class TAsyncTableStatsBuilder : public TActorBootstrapped<TAsyncTableStatsBuilde
179179
ctx.Send(ReplyTo, ev.Release());
180180

181181
FinishTask(ctx);
182-
183182
return Die(ctx);
184183
}
185184

@@ -206,6 +205,8 @@ class TAsyncTableStatsBuilder : public TActorBootstrapped<TAsyncTableStatsBuilde
206205
if (msg.Status != NKikimrProto::OK) {
207206
LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "Stats build failed at datashard " << TabletId << ", for tableId " << TableId
208207
<< " requested pages but got " << msg.Status);
208+
Send(ReplyTo, new TDataShard::TEvPrivate::TEvTableStatsError(TableId, TDataShard::TEvPrivate::TEvTableStatsError::ECode::FETCH_PAGE_FAILED));
209+
FinishTask(ctx);
209210
return Die(ctx);
210211
}
211212

@@ -409,6 +410,21 @@ void TDataShard::Handle(TEvPrivate::TEvAsyncTableStats::TPtr& ev, const TActorCo
409410
}
410411
}
411412

413+
void TDataShard::Handle(TEvPrivate::TEvTableStatsError::TPtr& ev, const TActorContext& ctx) {
414+
Actors.erase(ev->Sender);
415+
416+
auto msg = ev->Get();
417+
418+
LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "Stats rebuilt error '" << msg->Message
419+
<< "', code: " << ui32(msg->Code) << ", datashard " << TabletID() << ", tableId " << msg->TableId);
420+
421+
auto it = TableInfos.find(msg->TableId);
422+
if (it != TableInfos.end()) {
423+
it->second->StatsUpdateInProgress = false;
424+
// if we got an error, a compaction should have happened so restart build stats anyway
425+
it->second->StatsNeedUpdate = true;
426+
}
427+
}
412428

413429
class TDataShard::TTxInitiateStatsUpdate : public NTabletFlatExecutor::TTransactionBase<TDataShard> {
414430
private:

ydb/core/tx/datashard/datashard_impl.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ class TDataShard
364364
EvConfirmReadonlyLease,
365365
EvReadonlyLeaseConfirmation,
366366
EvPlanPredictedTxs,
367+
EvTableStatsError,
367368
EvEnd
368369
};
369370

@@ -400,6 +401,29 @@ class TDataShard
400401
ui64 SearchHeight = 0;
401402
};
402403

404+
struct TEvTableStatsError : public TEventLocal<TEvTableStatsError, EvTableStatsError> {
405+
enum class ECode {
406+
FETCH_PAGE_FAILED,
407+
RESOURCE_ALLOCATION_FAILED,
408+
ACTOR_DIED,
409+
UNKNOWN
410+
};
411+
412+
TEvTableStatsError(ui64 tableId, ECode code, const TString& msg)
413+
: TableId(tableId)
414+
, Code(code)
415+
, Message(msg)
416+
{}
417+
418+
TEvTableStatsError(ui64 tableId, ECode code)
419+
: TEvTableStatsError(tableId, code, "")
420+
{}
421+
422+
const ui64 TableId;
423+
const ECode Code;
424+
const TString Message;
425+
};
426+
403427
struct TEvRemoveOldInReadSets : public TEventLocal<TEvRemoveOldInReadSets, EvRemoveOldInReadSets> {};
404428

405429
struct TEvRegisterScanActor : public TEventLocal<TEvRegisterScanActor, EvRegisterScanActor> {
@@ -1248,6 +1272,7 @@ class TDataShard
12481272
void Handle(TEvDataShard::TEvSplitPartitioningChanged::TPtr& ev, const TActorContext& ctx);
12491273
void Handle(TEvDataShard::TEvGetTableStats::TPtr& ev, const TActorContext& ctx);
12501274
void Handle(TEvPrivate::TEvAsyncTableStats::TPtr& ev, const TActorContext& ctx);
1275+
void Handle(TEvPrivate::TEvTableStatsError::TPtr& ev, const TActorContext& ctx);
12511276
void Handle(TEvDataShard::TEvKqpScan::TPtr& ev, const TActorContext& ctx);
12521277
void HandleSafe(TEvDataShard::TEvKqpScan::TPtr& ev, const TActorContext& ctx);
12531278
void Handle(TEvDataShard::TEvUploadRowsRequest::TPtr& ev, const TActorContext& ctx);
@@ -2945,6 +2970,7 @@ class TDataShard
29452970
HFunc(TEvDataShard::TEvSplitPartitioningChanged, Handle);
29462971
HFunc(TEvDataShard::TEvGetTableStats, Handle);
29472972
HFunc(TEvPrivate::TEvAsyncTableStats, Handle);
2973+
HFunc(TEvPrivate::TEvTableStatsError, Handle);
29482974
HFunc(TEvDataShard::TEvKqpScan, Handle);
29492975
HFunc(TEvDataShard::TEvUploadRowsRequest, Handle);
29502976
HFunc(TEvDataShard::TEvEraseRowsRequest, Handle);

ydb/core/tx/datashard/datashard_ut_stats.cpp

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#include <ydb/core/tx/datashard/ut_common/datashard_ut_common.h>
2-
#include "datashard_ut_common_kqp.h"
32
#include "ydb/core/tablet_flat/shared_sausagecache.h"
43

54
namespace NKikimr {
@@ -378,6 +377,59 @@ Y_UNIT_TEST_SUITE(DataShardStats) {
378377
UNIT_ASSERT_LE(counters->ActiveBytes->Val(), 800*1024); // one index
379378
}
380379

380+
Y_UNIT_TEST(NoData) {
381+
TPortManager pm;
382+
TServerSettings serverSettings(pm.GetPort(2134));
383+
serverSettings.SetDomainName("Root")
384+
.SetUseRealThreads(false);
385+
386+
TServer::TPtr server = new TServer(serverSettings);
387+
auto& runtime = *server->GetRuntime();
388+
auto sender = runtime.AllocateEdgeActor();
389+
390+
runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE);
391+
runtime.SetLogPriority(NKikimrServices::TABLET_SAUSAGECACHE, NLog::PRI_TRACE);
392+
393+
InitRoot(server, sender);
394+
395+
auto [shards, tableId1] = CreateShardedTable(server, sender, "/Root", "table-1", 1);
396+
const auto shard1 = GetTableShards(server, sender, "/Root/table-1").at(0);
397+
398+
ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (1, 1), (2, 2), (3, 3)");
399+
400+
bool captured = false;
401+
auto observer = runtime.AddObserver<NSharedCache::TEvResult>([&](NSharedCache::TEvResult::TPtr& event) {
402+
IActor *actor = runtime.FindActor(event->Recipient);
403+
404+
Cerr << "Got SchemeShard NSharedCache::TEvResult from " << event->Sender << " to " << event->Recipient << "(" << actor->GetActivityType() << ")"<< Endl;
405+
406+
if (actor && actor->GetActivityType() == 288) {
407+
auto& message = *event->Get();
408+
event.Reset(static_cast<TEventHandle<NSharedCache::TEvResult> *>(
409+
new IEventHandle(event->Recipient, event->Sender,
410+
new NSharedCache::TEvResult(message.Origin, message.Cookie, NKikimrProto::NODATA))));
411+
captured = true;
412+
}
413+
});
414+
415+
CompactTable(runtime, shard1, tableId1, false);
416+
417+
for (int i = 0; i < 5 && !captured; ++i) {
418+
TDispatchOptions options;
419+
options.CustomFinalCondition = [&]() { return captured; };
420+
runtime.DispatchEvents(options, TDuration::Seconds(5));
421+
}
422+
observer.Remove();
423+
424+
{
425+
Cerr << "Waiting stats.." << Endl;
426+
auto stats = WaitTableStats(runtime, shard1, 1, 3);
427+
UNIT_ASSERT_VALUES_EQUAL(stats.GetDatashardId(), shard1);
428+
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetRowCount(), 3);
429+
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetPartCount(), 1);
430+
}
431+
}
432+
381433
} // Y_UNIT_TEST_SUITE(DataShardStats)
382434

383435
} // namespace NKikimr

0 commit comments

Comments
 (0)