Skip to content

actually delete nodes if possible #3120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions ydb/core/mind/hive/hive_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct TEvPrivate {
EvRestartCancelled,
EvProcessStorageBalancer,
EvStorageBalancerOut,
EvDeleteNode,
EvEnd
};

Expand Down Expand Up @@ -110,6 +111,12 @@ struct TEvPrivate {
struct TEvProcessStorageBalancer : TEventLocal<TEvProcessStorageBalancer, EvProcessStorageBalancer> {};

struct TEvStorageBalancerOut : TEventLocal<TEvStorageBalancerOut, EvStorageBalancerOut> {};

struct TEvDeleteNode : TEventLocal<TEvDeleteNode, EvDeleteNode> {
TNodeId NodeId;

TEvDeleteNode(TNodeId nodeId) : NodeId(nodeId) {}
};
};

} // NHive
Expand Down
29 changes: 27 additions & 2 deletions ydb/core/mind/hive/hive_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ void THive::RestartPipeTx(ui64 tabletId) {
}
}

bool THive::TryToDeleteNode(TNodeInfo* node) {
if (node->CanBeDeleted()) {
DeleteNode(node->Id);
return true;
}
if (!node->DeletionScheduled) {
Schedule(GetNodeDeletePeriod(), new TEvPrivate::TEvDeleteNode(node->Id));
node->DeletionScheduled = true;
}
return false;
}

void THive::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev) {
if (ev->Get()->TabletId == TabletID()) {
BLOG_TRACE("Handle TEvTabletPipe::TEvServerConnected(" << ev->Get()->ClientId << ") " << ev->Get()->ServerId);
Expand All @@ -108,9 +120,9 @@ void THive::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev) {
TNodeInfo* node = FindNode(ev->Get()->ClientId.NodeId());
if (node != nullptr) {
Erase(node->PipeServers, ev->Get()->ServerId);
if (node->PipeServers.empty() && node->IsUnknown() && node->CanBeDeleted()) {
if (node->PipeServers.empty() && node->IsUnknown()) {
ObjectDistributions.RemoveNode(*node);
DeleteNode(node->Id);
TryToDeleteNode(node);
}
}
}
Expand Down Expand Up @@ -2917,6 +2929,7 @@ void THive::ProcessEvent(std::unique_ptr<IEventHandle> event) {
hFunc(TEvPrivate::TEvStartStorageBalancer, Handle);
hFunc(TEvPrivate::TEvProcessStorageBalancer, Handle);
hFunc(TEvHive::TEvUpdateDomain, Handle);
hFunc(TEvPrivate::TEvDeleteNode, Handle);
}
}

Expand Down Expand Up @@ -3017,6 +3030,7 @@ STFUNC(THive::StateWork) {
fFunc(TEvPrivate::TEvStartStorageBalancer::EventType, EnqueueIncomingEvent);
fFunc(TEvHive::TEvUpdateDomain::EventType, EnqueueIncomingEvent);
fFunc(TEvPrivate::TEvProcessStorageBalancer::EventType, EnqueueIncomingEvent);
fFunc(TEvPrivate::TEvDeleteNode::EventType, EnqueueIncomingEvent);
hFunc(TEvPrivate::TEvProcessIncomingEvent, Handle);
default:
if (!HandleDefaultEvents(ev, SelfId())) {
Expand Down Expand Up @@ -3279,6 +3293,17 @@ void THive::Handle(TEvPrivate::TEvLogTabletMoves::TPtr&) {
TabletMovesByTypeForLog.clear();
}

void THive::Handle(TEvPrivate::TEvDeleteNode::TPtr& ev) {
auto node = FindNode(ev->Get()->NodeId);
if (node == nullptr) {
return;
}
node->DeletionScheduled = false;
if (!node->IsAlive()) {
TryToDeleteNode(node);
}
}

TVector<TNodeId> THive::GetNodesForWhiteboardBroadcast(size_t maxNodesToReturn) {
TVector<TNodeId> nodes;
TNodeId selfNodeId = SelfId().NodeId();
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -566,9 +566,11 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
void Handle(TEvPrivate::TEvProcessStorageBalancer::TPtr& ev);
void Handle(TEvPrivate::TEvProcessIncomingEvent::TPtr& ev);
void Handle(TEvHive::TEvUpdateDomain::TPtr& ev);
void Handle(TEvPrivate::TEvDeleteNode::TPtr& ev);

protected:
void RestartPipeTx(ui64 tabletId);
bool TryToDeleteNode(TNodeInfo* node);

public:
static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
Expand Down
1 change: 1 addition & 0 deletions ydb/core/mind/hive/node_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ struct TNodeInfo {
THashSet<TLeaderTabletInfo*> LockedTablets;
mutable TInstant LastResourceChangeReaction;
NKikimrHive::TNodeStatistics Statistics;
bool DeletionScheduled = false;

TNodeInfo(TNodeId nodeId, THive& hive);
TNodeInfo(const TNodeInfo&) = delete;
Expand Down
3 changes: 1 addition & 2 deletions ydb/core/mind/hive/tx__kill_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,8 @@ class TTxKillNode : public TTransactionBase<THive> {
}
node->PipeServers.clear();
Self->ObjectDistributions.RemoveNode(*node);
if (node->CanBeDeleted()) {
if (Self->TryToDeleteNode(node)) {
db.Table<Schema::Node>().Key(NodeId).Delete();
Self->DeleteNode(NodeId);
} else {
db.Table<Schema::Node>().Key(NodeId).Update<Schema::Node::Local>(TActorId());
}
Expand Down
Loading