Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ydb/core/base/hive.h
Original file line number Diff line number Diff line change
Expand Up @@ -712,8 +712,9 @@ namespace NKikimr {
{
TEvLockTabletExecutionLost() = default;

explicit TEvLockTabletExecutionLost(ui64 tabletId) {
explicit TEvLockTabletExecutionLost(ui64 tabletId, NKikimrHive::ELockLostReason reason) {
Record.SetTabletID(tabletId);
Record.SetReason(reason);
}
};

Expand Down
9 changes: 6 additions & 3 deletions ydb/core/mind/hive/hive_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "hive.h"
#include "tablet_info.h"
#include "node_info.h"
#include "leader_tablet_info.h"

namespace NKikimr {
namespace NHive {
Expand Down Expand Up @@ -71,12 +72,14 @@ struct TEvPrivate {
struct TEvUnlockTabletReconnectTimeout : TEventLocal<TEvUnlockTabletReconnectTimeout, EvUnlockTabletReconnectTimeout> {
ui64 TabletId;
ui64 SeqNo;
NKikimrHive::ELockLostReason Reason;

TEvUnlockTabletReconnectTimeout() = default;

explicit TEvUnlockTabletReconnectTimeout(ui64 tabletId, ui64 seqNo)
: TabletId(tabletId)
, SeqNo(seqNo)
explicit TEvUnlockTabletReconnectTimeout(const TLeaderTabletInfo& tablet, NKikimrHive::ELockLostReason reason)
: TabletId(tablet.Id)
, SeqNo(tablet.PendingUnlockSeqNo)
, Reason(reason)
{}
};

Expand Down
4 changes: 2 additions & 2 deletions ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
ITransaction* CreateReassignGroupsOnDecommit(ui32 groupId, std::unique_ptr<IEventHandle> reply);
ITransaction* CreateLockTabletExecution(const NKikimrHive::TEvLockTabletExecution& rec, const TActorId& sender, const ui64 cookie);
ITransaction* CreateUnlockTabletExecution(const NKikimrHive::TEvUnlockTabletExecution& rec, const TActorId& sender, const ui64 cookie);
ITransaction* CreateUnlockTabletExecution(ui64 tabletId, ui64 seqNo);
ITransaction* CreateUnlockTabletExecution(ui64 tabletId, ui64 seqNo, NKikimrHive::ELockLostReason reason);
ITransaction* CreateRequestTabletSequence(TEvHive::TEvRequestTabletIdSequence::TPtr event);
ITransaction* CreateResponseTabletSequence(TEvHive::TEvResponseTabletIdSequence::TPtr event);
ITransaction* CreateDisconnectNode(THolder<TEvInterconnect::TEvNodeDisconnected> event);
Expand Down Expand Up @@ -995,7 +995,7 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId
void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet);
void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects& sideEffects);
TInstant GetAllowedBootingTime();
void ScheduleUnlockTabletExecution(TNodeInfo& node);
void ScheduleUnlockTabletExecution(TNodeInfo& node, NKikimrHive::ELockLostReason reason);
TString DebugDomainsActiveNodes() const;
TResourceNormalizedValues GetStDevResourceValues() const;
bool IsTabletMoveExpedient(const TTabletInfo& tablet, const TNodeInfo& node) const;
Expand Down
17 changes: 9 additions & 8 deletions ydb/core/mind/hive/hive_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5844,12 +5844,13 @@ Y_UNIT_TEST_SUITE(THiveTest) {
UNIT_ASSERT_VALUES_EQUAL(result->Record.GetStatus(), expectedStatus);
}

void VerifyLockTabletExecutionLost(TTestActorRuntime& runtime, ui64 tabletId, const TActorId& owner) {
void VerifyLockTabletExecutionLost(TTestActorRuntime& runtime, ui64 tabletId, const TActorId& owner, NKikimrHive::ELockLostReason reason) {
TAutoPtr<IEventHandle> handle;
auto result = runtime.GrabEdgeEventRethrow<TEvHive::TEvLockTabletExecutionLost>(handle);
UNIT_ASSERT(result);
UNIT_ASSERT_VALUES_EQUAL(handle->GetRecipientRewrite(), owner);
UNIT_ASSERT_VALUES_EQUAL(result->Record.GetTabletID(), tabletId);
UNIT_ASSERT_VALUES_EQUAL(static_cast<i32>(result->Record.GetReason()), static_cast<i32>(reason));
}

Y_UNIT_TEST(TestLockTabletExecution) {
Expand Down Expand Up @@ -5923,7 +5924,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
WaitForTabletIsUp(runtime, tabletId, 0);

// Hive should try to notify owner on unlocking
VerifyLockTabletExecutionLost(runtime, tabletId, owner);
VerifyLockTabletExecutionLost(runtime, tabletId, owner, NKikimrHive::LOCK_LOST_REASON_NODE_DISCONNECTED);
}

Y_UNIT_TEST(TestLockTabletExecutionRebootTimeout) {
Expand Down Expand Up @@ -5952,7 +5953,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
WaitForTabletIsUp(runtime, tabletId, 0);

// Hive should try to notify owner on unlocking
VerifyLockTabletExecutionLost(runtime, tabletId, owner);
VerifyLockTabletExecutionLost(runtime, tabletId, owner, NKikimrHive::LOCK_LOST_REASON_HIVE_RESTART);
}

Y_UNIT_TEST(TestLockTabletExecutionDelete) {
Expand Down Expand Up @@ -5988,7 +5989,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
}

// Hive should try to notify owner on unlocking
VerifyLockTabletExecutionLost(runtime, tabletId, owner);
VerifyLockTabletExecutionLost(runtime, tabletId, owner, NKikimrHive::LOCK_LOST_REASON_TABLET_DELETED);
}

Y_UNIT_TEST(TestLockTabletExecutionDeleteReboot) {
Expand Down Expand Up @@ -6029,7 +6030,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
RebootTablet(runtime, hiveTablet, runtime.AllocateEdgeActor(0));

// Hive should try to notify owner on unlocking
VerifyLockTabletExecutionLost(runtime, tabletId, owner);
VerifyLockTabletExecutionLost(runtime, tabletId, owner, NKikimrHive::LOCK_LOST_REASON_TABLET_DELETED);
}

void MakeSureTabletStaysDown(TTestActorRuntime& runtime, ui64 tabletId, const TDuration& timeout) {
Expand Down Expand Up @@ -6139,7 +6140,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
runtime.Send(new IEventHandle(proxy, disconnecter, new TEvInterconnect::TEvDisconnect()), 0);

// wait for the lost lock notification
VerifyLockTabletExecutionLost(runtime, tabletId, owner);
VerifyLockTabletExecutionLost(runtime, tabletId, owner, NKikimrHive::LOCK_LOST_REASON_NODE_DISCONNECTED);

// lock reconnect should fail
SendLockTabletExecution(runtime, hiveTablet, tabletId, 1, NKikimrProto::ERROR, owner, 500, true);
Expand Down Expand Up @@ -6208,7 +6209,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
WaitForTabletIsUp(runtime, tabletId, 0);

// Hive should try to notify owner on unlocking
VerifyLockTabletExecutionLost(runtime, tabletId, owner);
VerifyLockTabletExecutionLost(runtime, tabletId, owner, NKikimrHive::LOCK_LOST_REASON_UNLOCKED);
}

Y_UNIT_TEST(TestLockTabletExecutionStealLock) {
Expand All @@ -6234,7 +6235,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
SendLockTabletExecution(runtime, hiveTablet, tabletId, 1, NKikimrProto::OK, owner2);

// Hive should notify the old owner on unlocking
VerifyLockTabletExecutionLost(runtime, tabletId, owner);
VerifyLockTabletExecutionLost(runtime, tabletId, owner, NKikimrHive::LOCK_LOST_REASON_NEW_LOCK);
}

Y_UNIT_TEST(TestExternalBoot) {
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/hive/tx__delete_tablet_result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class TTxDeleteTabletResult : public TTransactionBase<THive> {
TActorId unlockedFromActor = tablet->ClearLockedToActor();
if (unlockedFromActor) {
// Notify lock owner that lock has been lost
SideEffects.Send(unlockedFromActor, new TEvHive::TEvLockTabletExecutionLost(TabletId));
SideEffects.Send(unlockedFromActor, new TEvHive::TEvLockTabletExecutionLost(TabletId, NKikimrHive::LOCK_LOST_REASON_TABLET_DELETED));
}
Self->PendingCreateTablets.erase({tablet->Owner.first, tablet->Owner.second});
Self->DeleteTablet(tablet->Id);
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/hive/tx__disconnect_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class TTxDisconnectNode : public TTransactionBase<THive> {
BLOG_D("THive::TTxDisconnectNode()::Execute");
TNodeInfo* node = Self->FindNode(Event->NodeId);
if (node != nullptr) {
Self->ScheduleUnlockTabletExecution(*node);
Self->ScheduleUnlockTabletExecution(*node, NKikimrHive::LOCK_LOST_REASON_NODE_DISCONNECTED);
if (node->BecomeDisconnecting()) {
THolder<TEvPrivate::TEvProcessDisconnectNode> event = MakeHolder<TEvPrivate::TEvProcessDisconnectNode>();
event->NodeId = node->Id;
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/hive/tx__load_everything.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ class TTxLoadEverything : public TTransactionBase<THive> {
ctx.Send(Self->SelfId(), new TEvPrivate::TEvBootTablets());

for (auto it = Self->Nodes.begin(); it != Self->Nodes.end(); ++it) {
Self->ScheduleUnlockTabletExecution(it->second);
Self->ScheduleUnlockTabletExecution(it->second, NKikimrHive::LOCK_LOST_REASON_HIVE_RESTART);
}
}
};
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/hive/tx__lock_tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class TTxLockTabletExecution : public TTransactionBase<THive> {
ui32 flags = 0;
if (PreviousOwner && PreviousOwner != OwnerActor) {
// Notify previous owner that its lock ownership has been lost
SideEffects.Send(PreviousOwner, new TEvHive::TEvLockTabletExecutionLost(TabletId));
SideEffects.Send(PreviousOwner, new TEvHive::TEvLockTabletExecutionLost(TabletId, NKikimrHive::LOCK_LOST_REASON_NEW_LOCK));
}

if (tablet->IsLockedToActor()) {
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/hive/tx__release_tablets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class TTxReleaseTablets : public TTransactionBase<THive> {
SideEffects.Complete(ctx);
for (const auto& unlockedFromActor : UnlockedFromActor) {
// Notify lock owner that lock has been lost
ctx.Send(unlockedFromActor.second, new TEvHive::TEvLockTabletExecutionLost(unlockedFromActor.first));
ctx.Send(unlockedFromActor.second, new TEvHive::TEvLockTabletExecutionLost(unlockedFromActor.first, NKikimrHive::LOCK_LOST_REASON_TABLET_RELEASED));
}
ctx.Send(Request->Sender, Response.Release());
if (NeedToProcessPendingOperations) {
Expand Down
17 changes: 10 additions & 7 deletions ydb/core/mind/hive/tx__unlock_tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class TTxUnlockTabletExecution : public TTransactionBase<THive> {
const ui64 TabletId;
const TActorId OwnerActor;
const ui64 SeqNo;
const NKikimrHive::ELockLostReason Reason = NKikimrHive::LOCK_LOST_REASON_UNKNOWN;

const TActorId Sender;
const ui64 Cookie;
Expand All @@ -21,6 +22,7 @@ class TTxUnlockTabletExecution : public TTransactionBase<THive> {
, TabletId(rec.GetTabletID())
, OwnerActor(GetOwnerActor(rec, sender))
, SeqNo(0)
, Reason(NKikimrHive::LOCK_LOST_REASON_UNLOCKED)
, Sender(sender)
, Cookie(cookie)
{
Expand All @@ -29,10 +31,11 @@ class TTxUnlockTabletExecution : public TTransactionBase<THive> {

TTxType GetTxType() const override { return NHive::TXTYPE_UNLOCK_TABLET_EXECUTION; }

TTxUnlockTabletExecution(ui64 tabletId, ui64 seqNo, THive* hive)
TTxUnlockTabletExecution(ui64 tabletId, ui64 seqNo, NKikimrHive::ELockLostReason reason, THive* hive)
: TBase(hive)
, TabletId(tabletId)
, SeqNo(seqNo)
, Reason(reason)
, Cookie(0)
{}

Expand Down Expand Up @@ -70,7 +73,7 @@ class TTxUnlockTabletExecution : public TTransactionBase<THive> {

if (PreviousOwner) {
// Notify previous owner that its lock ownership has been lost
SideEffects.Send(PreviousOwner, new TEvHive::TEvLockTabletExecutionLost(TabletId));
SideEffects.Send(PreviousOwner, new TEvHive::TEvLockTabletExecutionLost(TabletId, Reason));
}

if (!tablet->IsLockedToActor()) {
Expand Down Expand Up @@ -100,19 +103,19 @@ ITransaction* THive::CreateUnlockTabletExecution(const NKikimrHive::TEvUnlockTab
return new TTxUnlockTabletExecution(rec, sender, cookie, this);
}

ITransaction* THive::CreateUnlockTabletExecution(ui64 tabletId, ui64 seqNo) {
return new TTxUnlockTabletExecution(tabletId, seqNo, this);
ITransaction* THive::CreateUnlockTabletExecution(ui64 tabletId, ui64 seqNo, NKikimrHive::ELockLostReason reason) {
return new TTxUnlockTabletExecution(tabletId, seqNo, reason, this);
}

void THive::ScheduleUnlockTabletExecution(TNodeInfo& node) {
void THive::ScheduleUnlockTabletExecution(TNodeInfo& node, NKikimrHive::ELockLostReason reason) {
// Unlock tablets that have been locked by this node
for (TLeaderTabletInfo* tablet : node.LockedTablets) {
Y_ABORT_UNLESS(FindTabletEvenInDeleting(tablet->Id) == tablet);
Y_ABORT_UNLESS(tablet->LockedToActor.NodeId() == node.Id);
if (tablet->PendingUnlockSeqNo == 0) {
tablet->PendingUnlockSeqNo = NextTabletUnlockSeqNo++;
Y_ABORT_UNLESS(tablet->PendingUnlockSeqNo != 0);
auto event = new TEvPrivate::TEvUnlockTabletReconnectTimeout(tablet->Id, tablet->PendingUnlockSeqNo);
auto event = new TEvPrivate::TEvUnlockTabletReconnectTimeout(*tablet, reason);
if (tablet->LockedReconnectTimeout) {
Schedule(tablet->LockedReconnectTimeout, event);
} else {
Expand Down Expand Up @@ -141,7 +144,7 @@ void THive::Handle(TEvPrivate::TEvUnlockTabletReconnectTimeout::TPtr& ev) {
// - reconnect timeout (execute, failure)
// tablet is not unlocked, because logically lock/reconnect
// transaction was scheduled before the timeout really happened.
Execute(CreateUnlockTabletExecution(tabletId, seqNo));
Execute(CreateUnlockTabletExecution(tabletId, seqNo, ev->Get()->Reason));
}
}

Expand Down
11 changes: 11 additions & 0 deletions ydb/core/protos/hive.proto
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ enum EDrainDownPolicy {
DRAIN_POLICY_KEEP_DOWN = 2;
}

enum ELockLostReason {
LOCK_LOST_REASON_UNKNOWN = 0;
LOCK_LOST_REASON_NODE_DISCONNECTED = 1;
LOCK_LOST_REASON_HIVE_RESTART = 2;
LOCK_LOST_REASON_TABLET_DELETED = 3;
LOCK_LOST_REASON_NEW_LOCK = 4;
LOCK_LOST_REASON_TABLET_RELEASED = 5;
LOCK_LOST_REASON_UNLOCKED = 6;
}

message TChannelInfo {
message THistorySlot {
optional uint32 FromGeneration = 1;
Expand Down Expand Up @@ -405,6 +415,7 @@ message TEvLockTabletExecution {

message TEvLockTabletExecutionLost {
optional fixed64 TabletID = 1;
optional ELockLostReason Reason = 2;
}

message TEvLockTabletExecutionResult {
Expand Down
Loading