Skip to content

Commit b846396

Browse files
committed
box: introduce box_localize_vclock
The function takes the burden of explaining why this hack about setting local component in a remote vclock is needed. It also creates a new vclock, not alters an existing one. This is to signify that the vclock is no longer what was received from a remote host. Otherwise it is too easy to actually mistreat this mutant vlock as a remote vclock. That btw did happen and is fixed in following commits. In scope of tarantool#10047 NO_TEST=refactoring NO_CHANGELOG=refactoring NO_DOC=refactoring
1 parent e0c9b65 commit b846396

File tree

1 file changed

+30
-26
lines changed

1 file changed

+30
-26
lines changed

src/box/box.cc

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4329,6 +4329,26 @@ box_process_fetch_snapshot(struct iostream *io,
43294329
coio_write_xrow(io, &row);
43304330
}
43314331

4332+
/**
4333+
* Replica vclock is used in gc state and recovery initialization - need to
4334+
* replace the remote 0-th component with the own one. This doesn't break
4335+
* recovery: it finds the WAL with a vclock strictly less than replia clock in
4336+
* all components except the 0th one.
4337+
*
4338+
* Note, that it would be bad to set 0-th component to a smaller value (like
4339+
* zero) - it would unnecessarily require additional WALs, which may have
4340+
* already been deleted.
4341+
*
4342+
* Speaking of gc, remote instances' local vclock components are not used by
4343+
* consumers at all.
4344+
*/
4345+
static void
4346+
box_localize_vclock(const struct vclock *remote, struct vclock *local)
4347+
{
4348+
vclock_copy(local, remote);
4349+
vclock_reset(local, 0, vclock_get(&replicaset.vclock, 0));
4350+
}
4351+
43324352
void
43334353
box_process_register(struct iostream *io, const struct xrow_header *header)
43344354
{
@@ -4385,10 +4405,10 @@ box_process_register(struct iostream *io, const struct xrow_header *header)
43854405
"wal_mode = 'none'");
43864406
}
43874407

4388-
/* @sa box_process_subscribe(). */
4389-
vclock_reset(&req.vclock, 0, vclock_get(&replicaset.vclock, 0));
4408+
struct vclock start_vclock;
4409+
box_localize_vclock(&req.vclock, &start_vclock);
43904410
struct gc_consumer *gc = gc_consumer_register(
4391-
&req.vclock, "replica %s", tt_uuid_str(&req.instance_uuid));
4411+
&start_vclock, "replica %s", tt_uuid_str(&req.instance_uuid));
43924412
if (gc == NULL)
43934413
diag_raise();
43944414
auto gc_guard = make_scoped_guard([&] { gc_consumer_unregister(gc); });
@@ -4405,13 +4425,12 @@ box_process_register(struct iostream *io, const struct xrow_header *header)
44054425
/* Remember master's vclock after the last request */
44064426
struct vclock stop_vclock;
44074427
vclock_copy(&stop_vclock, &replicaset.vclock);
4408-
44094428
/*
4410-
* Feed replica with WALs in range
4411-
* (req.vclock, stop_vclock) so that it gets its
4412-
* registration.
4429+
* Feed replica with WALs up to the REGISTER itself so that it gets own
4430+
* registration entry.
44134431
*/
4414-
relay_final_join(replica, io, header->sync, &req.vclock, &stop_vclock);
4432+
relay_final_join(replica, io, header->sync, &start_vclock,
4433+
&stop_vclock);
44154434
say_info("final data sent.");
44164435

44174436
RegionGuard region_guard(&fiber()->gc);
@@ -4692,6 +4711,8 @@ box_process_subscribe(struct iostream *io, const struct xrow_header *header)
46924711
tnt_raise(ClientError, ER_UNSUPPORTED, "Replication",
46934712
"wal_mode = 'none'");
46944713
}
4714+
struct vclock start_vclock;
4715+
box_localize_vclock(&req.vclock, &start_vclock);
46954716
/*
46964717
* Send a response to SUBSCRIBE request, tell
46974718
* the replica how many rows we have in stock for it,
@@ -4743,23 +4764,6 @@ box_process_subscribe(struct iostream *io, const struct xrow_header *header)
47434764
coio_write_xrow(io, &row);
47444765
sent_raft_term = req.term;
47454766
}
4746-
/*
4747-
* Replica vclock is used in gc state and recovery
4748-
* initialization, so we need to replace the remote 0-th
4749-
* component with our own one. This doesn't break
4750-
* recovery: it finds the WAL with a vclock strictly less
4751-
* than replia clock in all components except the 0th one.
4752-
* This leads to finding the correct WAL, if it exists,
4753-
* since we do not need to recover local rows (the ones,
4754-
* that contribute to the 0-th vclock component).
4755-
* Note, that it would be bad to set 0-th vclock component
4756-
* to a smaller value, since it would unnecessarily
4757-
* require additional WALs, which may have already been
4758-
* deleted.
4759-
* Speaking of gc, remote instances' local vclock
4760-
* components are not used by consumers at all.
4761-
*/
4762-
vclock_reset(&req.vclock, 0, vclock_get(&replicaset.vclock, 0));
47634767
/*
47644768
* Process SUBSCRIBE request via replication relay
47654769
* Send current recovery vector clock as a marker
@@ -4772,7 +4776,7 @@ box_process_subscribe(struct iostream *io, const struct xrow_header *header)
47724776
* a stall in updates (in this case replica may hang
47734777
* indefinitely).
47744778
*/
4775-
relay_subscribe(replica, io, header->sync, &req.vclock,
4779+
relay_subscribe(replica, io, header->sync, &start_vclock,
47764780
req.version_id, req.id_filter, sent_raft_term);
47774781
}
47784782

0 commit comments

Comments
 (0)