Skip to content

Commit 5bec522

Browse files
committed
File-based with soft-deletes should send ops after checkpoint
Today a file-based recovery will replay all existing translog operations from the primary on a replica so that that replica can have a full history in translog as the primary. However, with soft-deletes enabled, we should not do it because: 1. All operations before the local checkpoint of the safe commit exist in the commit already. 2. The number of operations before the local checkpoint may be considerable and requires a significant amount of time to replay on a replica. Relates elastic#30522 Relates elastic#29530
1 parent 5b11df9 commit 5bec522

File tree

2 files changed

+12
-13
lines changed

2 files changed

+12
-13
lines changed

server/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,13 @@ public RecoveryResponse recoverToTarget() throws IOException {
162162
} catch (final Exception e) {
163163
throw new RecoveryEngineException(shard.shardId(), 1, "snapshot failed", e);
164164
}
165-
// we set this to 0 to create a translog roughly according to the retention policy
166-
// on the target. Note that it will still filter out legacy operations with no sequence numbers
167-
startingSeqNo = 0; //TODO: A follow-up to send only ops above the local checkpoint if soft-deletes enabled.
168-
// but we must have everything above the local checkpoint in the commit
165+
// We must have everything above the local checkpoint in the commit
169166
requiredSeqNoRangeStart =
170167
Long.parseLong(phase1Snapshot.getIndexCommit().getUserData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY)) + 1;
168+
// If soft-deletes enabled, we need to transfer only operations after the local_checkpoint of the commit to have
169+
// the same history on the target. However, with translog, we need to set this to 0 to create a translog roughly
170+
// according to the retention policy on the target. Note that it will still filter out legacy operations without seqNo.
171+
startingSeqNo = shard.indexSettings().isSoftDeleteEnabled() ? requiredSeqNoRangeStart : 0;
171172
try {
172173
final int estimateNumOps = shard.estimateNumberOfHistoryOperations("peer-recovery", startingSeqNo);
173174
phase1(phase1Snapshot.getIndexCommit(), () -> estimateNumOps);

server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,7 @@ public void testRecoveryToReplicaThatReceivedExtraDocument() throws Exception {
219219

220220
@TestLogging("org.elasticsearch.index.shard:TRACE,org.elasticsearch.indices.recovery:TRACE")
221221
public void testRecoveryAfterPrimaryPromotion() throws Exception {
222-
Settings settings = Settings.builder().put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).build();
223-
try (ReplicationGroup shards = createGroup(2, settings)) {
222+
try (ReplicationGroup shards = createGroup(2)) {
224223
shards.startAll();
225224
int totalDocs = shards.indexDocs(randomInt(10));
226225
int committedDocs = 0;
@@ -232,7 +231,6 @@ public void testRecoveryAfterPrimaryPromotion() throws Exception {
232231
final IndexShard oldPrimary = shards.getPrimary();
233232
final IndexShard newPrimary = shards.getReplicas().get(0);
234233
final IndexShard replica = shards.getReplicas().get(1);
235-
boolean softDeleteEnabled = replica.indexSettings().isSoftDeleteEnabled();
236234
if (randomBoolean()) {
237235
// simulate docs that were inflight when primary failed, these will be rolled back
238236
final int rollbackDocs = randomIntBetween(1, 5);
@@ -280,12 +278,13 @@ public void testRecoveryAfterPrimaryPromotion() throws Exception {
280278
assertThat(newPrimary.getLastSyncedGlobalCheckpoint(), equalTo(newPrimary.seqNoStats().getMaxSeqNo()));
281279
});
282280
newPrimary.flush(new FlushRequest().force(true));
283-
uncommittedOpsOnPrimary = shards.indexDocs(randomIntBetween(0, 10));
284-
totalDocs += uncommittedOpsOnPrimary;
285-
// we need an extra flush or refresh to advance the min_retained_seqno on the new primary so that ops-based won't happen
286-
if (softDeleteEnabled) {
281+
if (replica.indexSettings().isSoftDeleteEnabled()) {
282+
// We need an extra flush to advance the min_retained_seqno on the new primary so ops-based won't happen.
283+
// The min_retained_seqno only advances when a merge asks for the retention query.
287284
newPrimary.flush(new FlushRequest().force(true));
288285
}
286+
uncommittedOpsOnPrimary = shards.indexDocs(randomIntBetween(0, 10));
287+
totalDocs += uncommittedOpsOnPrimary;
289288
}
290289

291290
if (randomBoolean()) {
@@ -305,8 +304,7 @@ public void testRecoveryAfterPrimaryPromotion() throws Exception {
305304
assertThat(newReplica.recoveryState().getTranslog().recoveredOperations(), equalTo(totalDocs - committedDocs));
306305
} else {
307306
assertThat(newReplica.recoveryState().getIndex().fileDetails(), not(empty()));
308-
int expectOps = softDeleteEnabled ? totalDocs : uncommittedOpsOnPrimary;
309-
assertThat(newReplica.recoveryState().getTranslog().recoveredOperations(), equalTo(expectOps));
307+
assertThat(newReplica.recoveryState().getTranslog().recoveredOperations(), equalTo(uncommittedOpsOnPrimary));
310308
}
311309

312310
// roll back the extra ops in the replica

0 commit comments

Comments
 (0)