@@ -45,21 +45,47 @@ class TSessionInfo {
4545 return Workers.contains (id);
4646 }
4747
48- void RegisterWorker (IActorOps* ops, const TWorkerId& id, IActor* actor) {
48+ bool HasWorker (const TActorId& id) const {
49+ return ActorIdToWorkerId.contains (id);
50+ }
51+
52+ TActorId GetWorkerActorId (const TWorkerId& id) const {
53+ auto it = Workers.find (id);
54+ Y_ABORT_UNLESS (it != Workers.end ());
55+ return it->second ;
56+ }
57+
58+ TActorId RegisterWorker (IActorOps* ops, const TWorkerId& id, IActor* actor) {
4959 auto res = Workers.emplace (id, ops->Register (actor));
5060 Y_ABORT_UNLESS (res.second );
5161
62+ const auto actorId = res.first ->second ;
63+ ActorIdToWorkerId.emplace (actorId, id);
64+
5265 ops->Send (ActorId, new TEvService::TEvWorkerStatus (id, NKikimrReplication::TEvWorkerStatus::RUNNING));
66+ return actorId;
5367 }
5468
5569 void StopWorker (IActorOps* ops, const TWorkerId& id) {
5670 auto it = Workers.find (id);
5771 Y_ABORT_UNLESS (it != Workers.end ());
5872
5973 ops->Send (it->second , new TEvents::TEvPoison ());
74+ ops->Send (ActorId, new TEvService::TEvWorkerStatus (id, NKikimrReplication::TEvWorkerStatus::STOPPED));
75+
76+ ActorIdToWorkerId.erase (it->second );
6077 Workers.erase (it);
78+ }
6179
62- ops->Send (ActorId, new TEvService::TEvWorkerStatus (id, NKikimrReplication::TEvWorkerStatus::STOPPED));
80+ void StopWorker (IActorOps* ops, const TActorId& id) {
81+ auto it = ActorIdToWorkerId.find (id);
82+ Y_ABORT_UNLESS (it != ActorIdToWorkerId.end ());
83+
84+ // actor already stopped
85+ ops->Send (ActorId, new TEvService::TEvWorkerStatus (it->second , NKikimrReplication::TEvWorkerStatus::STOPPED));
86+
87+ Workers.erase (it->second );
88+ ActorIdToWorkerId.erase (it);
6389 }
6490
6591 void SendStatus (IActorOps* ops) const {
@@ -83,6 +109,7 @@ class TSessionInfo {
83109 TActorId ActorId;
84110 ui64 Generation;
85111 THashMap<TWorkerId, TActorId> Workers;
112+ THashMap<TActorId, TWorkerId> ActorIdToWorkerId;
86113
87114}; // TSessionInfo
88115
@@ -252,7 +279,9 @@ class TReplicationService: public TActorBootstrapped<TReplicationService> {
252279 // TODO: validate settings
253280 const auto & readerSettings = cmd.GetRemoteTopicReader ();
254281 const auto & writerSettings = cmd.GetLocalTableWriter ();
255- session.RegisterWorker (this , id, CreateWorker (ReaderFn (readerSettings), WriterFn (writerSettings)));
282+ const auto actorId = session.RegisterWorker (this , id,
283+ CreateWorker (SelfId (), ReaderFn (readerSettings), WriterFn (writerSettings)));
284+ WorkerActorIdToSession[actorId] = controller.GetTabletId ();
256285 }
257286
258287 void Handle (TEvService::TEvStopWorker::TPtr& ev) {
@@ -284,10 +313,43 @@ class TReplicationService: public TActorBootstrapped<TReplicationService> {
284313 if (session.HasWorker (id)) {
285314 LOG_I (" Stop worker"
286315 << " : worker# " << id);
316+ WorkerActorIdToSession.erase (session.GetWorkerActorId (id));
287317 session.StopWorker (this , id);
288318 }
289319 }
290320
321+ void Handle (TEvWorker::TEvGone::TPtr& ev) {
322+ LOG_T (" Handle " << ev->Get ()->ToString ());
323+
324+ auto wit = WorkerActorIdToSession.find (ev->Sender );
325+ if (wit == WorkerActorIdToSession.end ()) {
326+ LOG_W (" Unknown worker has gone"
327+ << " : worker# " << ev->Sender );
328+ return ;
329+ }
330+
331+ auto it = Sessions.find (wit->second );
332+ if (it == Sessions.end ()) {
333+ LOG_E (" Cannot find session"
334+ << " : worker# " << ev->Sender
335+ << " , session# " << wit->second );
336+ return ;
337+ }
338+
339+ auto & session = it->second ;
340+ if (!session.HasWorker (ev->Sender )) {
341+ LOG_E (" Cannot find worker"
342+ << " : worker# " << ev->Sender
343+ << " , session# " << wit->second );
344+ return ;
345+ }
346+
347+ LOG_I (" Worker has gone"
348+ << " : worker# " << ev->Sender );
349+ WorkerActorIdToSession.erase (ev->Sender );
350+ session.StopWorker (this , ev->Sender );
351+ }
352+
291353 void PassAway () override {
292354 if (auto actorId = std::exchange (BoardPublisher, {})) {
293355 Send (actorId, new TEvents::TEvPoison ());
@@ -319,6 +381,7 @@ class TReplicationService: public TActorBootstrapped<TReplicationService> {
319381 hFunc (TEvService::TEvHandshake, Handle);
320382 hFunc (TEvService::TEvRunWorker, Handle);
321383 hFunc (TEvService::TEvStopWorker, Handle);
384+ hFunc (TEvWorker::TEvGone, Handle);
322385 sFunc (TEvents::TEvPoison, PassAway);
323386 }
324387 }
@@ -328,6 +391,7 @@ class TReplicationService: public TActorBootstrapped<TReplicationService> {
328391 TActorId BoardPublisher;
329392 THashMap<ui64, TSessionInfo> Sessions;
330393 THashMap<TCredentialsKey, TActorId> YdbProxies;
394+ THashMap<TActorId, ui64> WorkerActorIdToSession;
331395
332396}; // TReplicationService
333397
0 commit comments