Skip to content

Commit 8474f8a

Browse files
authored
Validate source of an index in LuceneChangesSnapshot (#32288)
Today it's possible to encounter an Index operation in Lucene whose _source is disabled, and _recovery_source was pruned by the MergePolicy. If it's the case, we create a Translog#Index without source and let the caller validate it later. However, this approach is challenging for the caller. Deletes and No-Ops don't allow invoking "source()" method. The caller has to make sure to call "source()" only on index operations. The current implementation in CCR does not follow this and fail to replica deletes or no-ops. Moreover, it's easier to reason if a Translog#Index always has the source.
1 parent 90c5887 commit 8474f8a

File tree

6 files changed

+49
-34
lines changed

6 files changed

+49
-34
lines changed

server/src/main/java/org/elasticsearch/index/engine/LuceneChangesSnapshot.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,21 @@ private Translog.Operation readDocAsOp(int docIndex) throws IOException {
258258
assert assertDocSoftDeleted(leaf.reader(), segmentDocID) : "Delete op but soft_deletes field is not set [" + op + "]";
259259
} else {
260260
final BytesReference source = fields.source();
261+
if (source == null) {
262+
// TODO: Callers should ask for the range that source should be retained. Thus we should always
263+
// check for the existence source once we make peer-recovery to send ops after the local checkpoint.
264+
if (requiredFullRange) {
265+
throw new IllegalStateException("source not found for seqno=" + seqNo +
266+
" from_seqno=" + fromSeqNo + " to_seqno=" + toSeqNo);
267+
} else {
268+
skippedOperations++;
269+
return null;
270+
}
271+
}
261272
// TODO: pass the latest timestamp from engine.
262273
final long autoGeneratedIdTimestamp = -1;
263274
op = new Translog.Index(type, id, seqNo, primaryTerm, version,
264-
source == null ? null : source.toBytesRef().bytes, fields.routing(), autoGeneratedIdTimestamp);
275+
source.toBytesRef().bytes, fields.routing(), autoGeneratedIdTimestamp);
265276
}
266277
}
267278
assert fromSeqNo <= op.seqNo() && op.seqNo() <= toSeqNo && lastSeenSeqNo < op.seqNo() : "Unexpected operation; " +

server/src/main/java/org/elasticsearch/index/translog/Translog.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ public Index(String type, String id, long seqNo, long primaryTerm, long version,
10611061
byte[] source, String routing, long autoGeneratedIdTimestamp) {
10621062
this.type = type;
10631063
this.id = id;
1064-
this.source = source == null ? null : new BytesArray(source);
1064+
this.source = new BytesArray(source);
10651065
this.seqNo = seqNo;
10661066
this.primaryTerm = primaryTerm;
10671067
this.version = version;
@@ -1111,7 +1111,7 @@ public long version() {
11111111

11121112
@Override
11131113
public Source getSource() {
1114-
return source == null ? null : new Source(source, routing);
1114+
return new Source(source, routing);
11151115
}
11161116

11171117
private void write(final StreamOutput out) throws IOException {

server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,26 +1418,36 @@ public void testForceMergeWithSoftDeletesRetentionAndRecoverySource() throws Exc
14181418
final MapperService mapperService = createMapperService("test");
14191419
final boolean omitSourceAllTheTime = randomBoolean();
14201420
final Set<String> liveDocs = new HashSet<>();
1421+
final Set<String> liveDocsWithSource = new HashSet<>();
14211422
try (Store store = createStore();
14221423
InternalEngine engine = createEngine(config(indexSettings, store, createTempDir(), newMergePolicy(), null, null,
14231424
globalCheckpoint::get))) {
14241425
int numDocs = scaledRandomIntBetween(10, 100);
14251426
for (int i = 0; i < numDocs; i++) {
1426-
ParsedDocument doc = testParsedDocument(Integer.toString(i), null, testDocument(), B_1, null, randomBoolean()
1427-
|| omitSourceAllTheTime);
1427+
boolean useRecoverySource = randomBoolean() || omitSourceAllTheTime;
1428+
ParsedDocument doc = testParsedDocument(Integer.toString(i), null, testDocument(), B_1, null, useRecoverySource);
14281429
engine.index(indexForDoc(doc));
14291430
liveDocs.add(doc.id());
1431+
if (useRecoverySource == false) {
1432+
liveDocsWithSource.add(Integer.toString(i));
1433+
}
14301434
}
14311435
for (int i = 0; i < numDocs; i++) {
1432-
ParsedDocument doc = testParsedDocument(Integer.toString(i), null, testDocument(), B_1, null, randomBoolean()
1433-
|| omitSourceAllTheTime);
1436+
boolean useRecoverySource = randomBoolean() || omitSourceAllTheTime;
1437+
ParsedDocument doc = testParsedDocument(Integer.toString(i), null, testDocument(), B_1, null, useRecoverySource);
14341438
if (randomBoolean()) {
14351439
engine.delete(new Engine.Delete(doc.type(), doc.id(), newUid(doc.id()), primaryTerm.get()));
14361440
liveDocs.remove(doc.id());
1441+
liveDocsWithSource.remove(doc.id());
14371442
}
14381443
if (randomBoolean()) {
14391444
engine.index(indexForDoc(doc));
14401445
liveDocs.add(doc.id());
1446+
if (useRecoverySource == false) {
1447+
liveDocsWithSource.add(doc.id());
1448+
} else {
1449+
liveDocsWithSource.remove(doc.id());
1450+
}
14411451
}
14421452
if (randomBoolean()) {
14431453
engine.flush(randomBoolean(), true);
@@ -1453,12 +1463,7 @@ public void testForceMergeWithSoftDeletesRetentionAndRecoverySource() throws Exc
14531463
minSeqNoToRetain = Math.min(globalCheckpoint.get() + 1 - retainedExtraOps, safeCommitLocalCheckpoint + 1);
14541464
}
14551465
engine.forceMerge(true, 1, false, false, false);
1456-
assertConsistentHistoryBetweenTranslogAndLuceneIndex(engine, mapperService, (luceneOp, translogOp) -> {
1457-
if (luceneOp.seqNo() >= minSeqNoToRetain) {
1458-
assertNotNull(luceneOp.getSource());
1459-
assertThat(luceneOp.getSource().source, equalTo(translogOp.getSource().source));
1460-
}
1461-
});
1466+
assertConsistentHistoryBetweenTranslogAndLuceneIndex(engine, mapperService);
14621467
Map<Long, Translog.Operation> ops = readAllOperationsInLucene(engine, mapperService)
14631468
.stream().collect(Collectors.toMap(Translog.Operation::seqNo, Function.identity()));
14641469
for (long seqno = 0; seqno <= engine.getLocalCheckpoint(); seqno++) {
@@ -1483,10 +1488,8 @@ public void testForceMergeWithSoftDeletesRetentionAndRecoverySource() throws Exc
14831488
globalCheckpoint.set(engine.getLocalCheckpoint());
14841489
engine.syncTranslog();
14851490
engine.forceMerge(true, 1, false, false, false);
1486-
assertConsistentHistoryBetweenTranslogAndLuceneIndex(engine, mapperService, (luceneOp, translogOp) -> {
1487-
assertEquals(translogOp.getSource().source, B_1);
1488-
});
1489-
assertThat(readAllOperationsInLucene(engine, mapperService), hasSize(liveDocs.size()));
1491+
assertConsistentHistoryBetweenTranslogAndLuceneIndex(engine, mapperService);
1492+
assertThat(readAllOperationsInLucene(engine, mapperService), hasSize(liveDocsWithSource.size()));
14901493
}
14911494
}
14921495

test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@
104104
import java.util.concurrent.CountDownLatch;
105105
import java.util.concurrent.atomic.AtomicInteger;
106106
import java.util.concurrent.atomic.AtomicLong;
107-
import java.util.function.BiConsumer;
108107
import java.util.function.BiFunction;
109108
import java.util.function.Function;
110109
import java.util.function.LongSupplier;
@@ -822,14 +821,6 @@ public static List<Translog.Operation> readAllOperationsInLucene(Engine engine,
822821
* Asserts the provided engine has a consistent document history between translog and Lucene index.
823822
*/
824823
public static void assertConsistentHistoryBetweenTranslogAndLuceneIndex(Engine engine, MapperService mapper) throws IOException {
825-
assertConsistentHistoryBetweenTranslogAndLuceneIndex(engine, mapper, (luceneOp, translogOp) ->
826-
assertThat(luceneOp.getSource().source, equalTo(translogOp.getSource().source)));
827-
}
828-
829-
public static void assertConsistentHistoryBetweenTranslogAndLuceneIndex(Engine engine, MapperService mapper,
830-
BiConsumer<Translog.Operation, Translog.Operation> assertSource)
831-
throws IOException {
832-
833824
if (mapper.documentMapper() == null || engine.config().getIndexSettings().isSoftDeleteEnabled() == false) {
834825
return;
835826
}
@@ -867,7 +858,7 @@ public static void assertConsistentHistoryBetweenTranslogAndLuceneIndex(Engine e
867858
assertThat(luceneOp.toString(), luceneOp.primaryTerm(), equalTo(translogOp.primaryTerm()));
868859
assertThat(luceneOp.opType(), equalTo(translogOp.opType()));
869860
if (luceneOp.opType() == Translog.Operation.Type.INDEX) {
870-
assertSource.accept(luceneOp, translogOp);
861+
assertThat(luceneOp.getSource().source, equalTo(translogOp.getSource().source));
871862
}
872863
}
873864
}

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/ShardChangesAction.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,6 @@ static Translog.Operation[] getOperations(IndexShard indexShard, long globalChec
283283
try (Translog.Snapshot snapshot = indexShard.newLuceneChangesSnapshot("ccr", fromSeqNo, toSeqNo, true)) {
284284
Translog.Operation op;
285285
while ((op = snapshot.next()) != null) {
286-
if (op.getSource() == null) {
287-
throw new IllegalStateException("source not found for operation: " + op + " fromSeqNo: " + fromSeqNo +
288-
" maxOperationCount: " + maxOperationCount);
289-
}
290286
operations.add(op);
291287
seenBytes += op.estimateSize();
292288
if (seenBytes > maxOperationSizeInBytes) {

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowTaskReplicationTests.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
import org.elasticsearch.Version;
99
import org.elasticsearch.action.ActionListener;
10+
import org.elasticsearch.action.DocWriteResponse;
11+
import org.elasticsearch.action.bulk.BulkItemResponse;
12+
import org.elasticsearch.action.delete.DeleteRequest;
1013
import org.elasticsearch.action.support.replication.TransportWriteAction;
1114
import org.elasticsearch.cluster.metadata.IndexMetaData;
1215
import org.elasticsearch.cluster.routing.ShardRouting;
@@ -30,6 +33,7 @@
3033
import java.util.ArrayList;
3134
import java.util.Collections;
3235
import java.util.List;
36+
import java.util.Set;
3337
import java.util.concurrent.atomic.AtomicBoolean;
3438
import java.util.function.BiConsumer;
3539
import java.util.function.Consumer;
@@ -50,12 +54,22 @@ public void testSimpleCcrReplication() throws Exception {
5054
shardFollowTask.start(leaderGroup.getPrimary().getGlobalCheckpoint(), followerGroup.getPrimary().getGlobalCheckpoint());
5155
docCount += leaderGroup.appendDocs(randomInt(128));
5256
leaderGroup.syncGlobalCheckpoint();
53-
5457
leaderGroup.assertAllEqual(docCount);
55-
int expectedCount = docCount;
58+
Set<String> indexedDocIds = getShardDocUIDs(leaderGroup.getPrimary());
59+
assertBusy(() -> {
60+
assertThat(followerGroup.getPrimary().getGlobalCheckpoint(), equalTo(leaderGroup.getPrimary().getGlobalCheckpoint()));
61+
followerGroup.assertAllEqual(indexedDocIds.size());
62+
});
63+
// Deletes should be replicated to the follower
64+
List<String> deleteDocIds = randomSubsetOf(indexedDocIds);
65+
for (String deleteId : deleteDocIds) {
66+
BulkItemResponse resp = leaderGroup.delete(new DeleteRequest(index.getName(), "type", deleteId));
67+
assertThat(resp.getResponse().getResult(), equalTo(DocWriteResponse.Result.DELETED));
68+
}
69+
leaderGroup.syncGlobalCheckpoint();
5670
assertBusy(() -> {
5771
assertThat(followerGroup.getPrimary().getGlobalCheckpoint(), equalTo(leaderGroup.getPrimary().getGlobalCheckpoint()));
58-
followerGroup.assertAllEqual(expectedCount);
72+
followerGroup.assertAllEqual(indexedDocIds.size() - deleteDocIds.size());
5973
});
6074
shardFollowTask.markAsCompleted();
6175
}

0 commit comments

Comments
 (0)