Skip to content

Commit a15e5b3

Browse files
committed
storage: improve safety of merge lock
Fix a known correctness bug in the merge locking scheme. See the comment updates within for details. As an added benefit, this fix frees store.RemoveReplica of conditionals to deal with uninitialized replicas. Acquiring the merge lock now always results in an initialized right-hand replica, so store.MergeRange is always removing an initialized right-hand replica. There are no tests for this because testing this particular edge case is quite difficult and I'm primarily interested in avoiding the obviously wrong call to batch.ClearRange(nil, nil) that occurs when calling store.RemoveReplica on an unintialized replica. This nonsensical ClearRange is, somehow, not currently an error, but it is preventing landing. Release note: None
1 parent 82b7f9b commit a15e5b3

File tree

2 files changed

+42
-27
lines changed

2 files changed

+42
-27
lines changed

pkg/storage/replica.go

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5181,33 +5181,38 @@ func (r *Replica) acquireMergeLock(
51815181
ctx context.Context, merge *roachpb.MergeTrigger,
51825182
) (func(storagebase.ReplicatedEvalResult), error) {
51835183
// The merge lock is the right-hand replica's raftMu. If the right-hand
5184-
// replica does not exist, we create an uninitialized replica. This is
5185-
// required for correctness: without the uninitialized replica, an incoming
5186-
// snapshot could create the right-hand replica before the merge trigger has a
5187-
// chance to widen the left-hand replica's end key. The merge trigger would
5188-
// then fatal the node upon realizing the right-hand replica already exists.
5189-
// With an uninitialized and locked right-hand replica in place, any snapshots
5190-
// for the right-hand range will block on raftMu, waiting for the merge to
5191-
// complete, after which the replica will realize it has been destroyed and
5192-
// reject the snapshot.
5193-
//
5194-
// TODO(benesch): if the right-hand replica splits, we could probably see a
5195-
// snapshot for the right-hand side of the right-hand side. The uninitialized
5196-
// replica this method installs will not block this snapshot from applying as
5197-
// the snapshot's range ID will be different. Add a placeholder to the
5198-
// replicasByKey map as well to be safe.
5199-
//
5200-
// TODO(benesch): make terminology for uninitialized replicas, which protect a
5201-
// range ID, and placeholder replicas, which protect a key range, consistent.
5202-
// Several code comments refer to "uninitialized placeholders," which is
5203-
// downright confusing.
5204-
rightRng, _, err := r.store.getOrCreateReplica(ctx, merge.RightDesc.RangeID, 0, nil)
5184+
// replica does not exist, we create one. This is required for correctness.
5185+
// Without a right-hand replica on this store, an incoming snapshot could
5186+
// create the right-hand replica before the merge trigger has a chance to
5187+
// widen the left-hand replica's end key. The merge trigger would then fatal
5188+
// the node upon realizing the right-hand replica already exists. With a
5189+
// right-hand replica in place, any snapshots for the right-hand range will
5190+
// block on raftMu, waiting for the merge to complete, after which the replica
5191+
// will realize it has been destroyed and reject the snapshot.
5192+
rightRepl, created, err := r.store.getOrCreateReplica(ctx, merge.RightDesc.RangeID, 0, nil)
52055193
if err != nil {
52065194
return nil, err
52075195
}
5196+
if created {
5197+
// rightRepl does not know its start and end keys. It can only block
5198+
// incoming snapshots with the same range ID. This is an insufficient lock:
5199+
// it's possible for this store to receive a snapshot with a different range
5200+
// ID that overlaps the right-hand keyspace while the merge is in progress.
5201+
// Consider the case where this replica is behind and the merged range
5202+
// resplits before this replica processes the merge. An up-to-date member of
5203+
// the new right-hand range could send this store a snapshot that would
5204+
// incorrectly sneak past rightRepl.
5205+
//
5206+
// So install a placeholder for the right range's keyspace in the
5207+
// replicasByKey map to block any intersecting snapshots.
5208+
placeholder := &ReplicaPlaceholder{rangeDesc: merge.RightDesc}
5209+
if err := r.store.addPlaceholder(placeholder); err != nil {
5210+
return nil, err
5211+
}
5212+
}
52085213

52095214
return func(storagebase.ReplicatedEvalResult) {
5210-
rightRng.raftMu.Unlock()
5215+
rightRepl.raftMu.Unlock()
52115216
}, nil
52125217
}
52135218

pkg/storage/store.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2370,8 +2370,14 @@ func (s *Store) addReplicaInternalLocked(repl *Replica) error {
23702370
return nil
23712371
}
23722372

2373+
func (s *Store) addPlaceholder(placeholder *ReplicaPlaceholder) error {
2374+
s.mu.Lock()
2375+
defer s.mu.Unlock()
2376+
return s.addPlaceholderLocked(placeholder)
2377+
}
2378+
23732379
// addPlaceholderLocked adds the specified placeholder. Requires that Store.mu
2374-
// and Replica.raftMu are held.
2380+
// is held.
23752381
func (s *Store) addPlaceholderLocked(placeholder *ReplicaPlaceholder) error {
23762382
rangeID := placeholder.Desc().RangeID
23772383
if exRng := s.mu.replicasByKey.ReplaceOrInsert(placeholder); exRng != nil {
@@ -2386,15 +2392,15 @@ func (s *Store) addPlaceholderLocked(placeholder *ReplicaPlaceholder) error {
23862392

23872393
// removePlaceholder removes a placeholder for the specified range if it
23882394
// exists, returning true if a placeholder was present and removed and false
2389-
// otherwise. Requires that Replica.raftMu is held.
2395+
// otherwise.
23902396
func (s *Store) removePlaceholder(ctx context.Context, rngID roachpb.RangeID) bool {
23912397
s.mu.Lock()
23922398
defer s.mu.Unlock()
23932399
return s.removePlaceholderLocked(ctx, rngID)
23942400
}
23952401

23962402
// removePlaceholderLocked removes the specified placeholder. Requires that
2397-
// Store.mu and Replica.raftMu are held.
2403+
// Store.mu is held.
23982404
func (s *Store) removePlaceholderLocked(ctx context.Context, rngID roachpb.RangeID) bool {
23992405
placeholder, ok := s.mu.replicaPlaceholders[rngID]
24002406
if !ok {
@@ -2527,8 +2533,12 @@ func (s *Store) removeReplicaImpl(
25272533
rep.mu.Unlock()
25282534
rep.readOnlyCmdMu.Unlock()
25292535

2530-
if err := rep.destroyRaftMuLocked(ctx, consistentDesc, opts.DestroyData); err != nil {
2531-
return err
2536+
// Uninitialized replicas don't know their start and keys. It is both
2537+
// incorrect and unnecessary to destroy their data.
2538+
if rep.IsInitialized() {
2539+
if err := rep.destroyRaftMuLocked(ctx, consistentDesc, opts.DestroyData); err != nil {
2540+
return err
2541+
}
25322542
}
25332543

25342544
s.mu.Lock()

0 commit comments

Comments
 (0)