Skip to content

Commit 75719ac

Browse files
authored
Make _id terms optional in segment containing only noop (#30409)
Previously only index and delete operations are indexed into Lucene, therefore every segment should have both _id and _version terms as these operations contain both terms. However, this is no longer guaranteed after noop is also indexed into Lucene. A segment which contains only no-ops does not have neither _id or _version because a no-op does not contain these terms. This change adds a dummy version to no-ops and makes _id terms optional in PerThreadIDVersionAndSeqNoLookup. Relates #30226
1 parent 5d677a0 commit 75719ac

File tree

6 files changed

+83
-12
lines changed

6 files changed

+83
-12
lines changed

server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.lucene.search.DocIdSetIterator;
2929
import org.apache.lucene.util.Bits;
3030
import org.apache.lucene.util.BytesRef;
31+
import org.elasticsearch.common.lucene.Lucene;
3132
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndSeqNo;
3233
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion;
3334
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
@@ -66,15 +67,22 @@ final class PerThreadIDVersionAndSeqNoLookup {
6667
*/
6768
PerThreadIDVersionAndSeqNoLookup(LeafReader reader, String uidField) throws IOException {
6869
this.uidField = uidField;
69-
Terms terms = reader.terms(uidField);
70+
final Terms terms = reader.terms(uidField);
7071
if (terms == null) {
71-
throw new IllegalArgumentException("reader misses the [" + uidField + "] field");
72+
// If a segment contains only no-ops, it does not have _uid but has both _soft_deletes and _tombstone fields.
73+
final NumericDocValues softDeletesDV = reader.getNumericDocValues(Lucene.SOFT_DELETE_FIELD);
74+
final NumericDocValues tombstoneDV = reader.getNumericDocValues(SeqNoFieldMapper.TOMBSTONE_NAME);
75+
if (softDeletesDV == null || tombstoneDV == null) {
76+
throw new IllegalArgumentException("reader does not have _uid terms but not a no-op segment; " +
77+
"_soft_deletes [" + softDeletesDV + "], _tombstone [" + tombstoneDV + "]");
78+
}
79+
termsEnum = null;
80+
} else {
81+
termsEnum = terms.iterator();
7282
}
73-
termsEnum = terms.iterator();
7483
if (reader.getNumericDocValues(VersionFieldMapper.NAME) == null) {
75-
throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field");
84+
throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field; _uid terms [" + terms + "]");
7685
}
77-
7886
Object readerKey = null;
7987
assert (readerKey = reader.getCoreCacheHelper().getKey()) != null;
8088
this.readerKey = readerKey;
@@ -111,7 +119,8 @@ public DocIdAndVersion lookupVersion(BytesRef id, LeafReaderContext context)
111119
* {@link DocIdSetIterator#NO_MORE_DOCS} is returned if not found
112120
* */
113121
private int getDocID(BytesRef id, Bits liveDocs) throws IOException {
114-
if (termsEnum.seekExact(id)) {
122+
// termsEnum can possibly be null here if this leaf contains only no-ops.
123+
if (termsEnum != null && termsEnum.seekExact(id)) {
115124
int docID = DocIdSetIterator.NO_MORE_DOCS;
116125
// there may be more than one matching docID, in the case of nested docs, so we want the last one:
117126
docsEnum = termsEnum.postings(docsEnum, 0);

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,6 +1347,9 @@ private NoOpResult innerNoOp(final NoOp noOp) throws IOException {
13471347
try {
13481348
final ParsedDocument tombstone = engineConfig.getTombstoneDocSupplier().newNoopTombstoneDoc();
13491349
tombstone.updateSeqID(noOp.seqNo(), noOp.primaryTerm());
1350+
// A noop tombstone does not require a _version but it's added to have a fully dense docvalues for the version field.
1351+
// 1L is selected to optimize the compression because it might probably be the most common value in version field.
1352+
tombstone.version().setLongValue(1L);
13501353
assert tombstone.docs().size() == 1 : "Tombstone should have a single doc [" + tombstone + "]";
13511354
final ParseContext.Document doc = tombstone.docs().get(0);
13521355
assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null

server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ public DocumentMapper(MapperService mapperService, Mapping mapping) {
178178
TypeFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME);
179179
this.deleteTombstoneMetadataFieldMappers = Stream.of(mapping.metadataMappers)
180180
.filter(field -> deleteTombstoneMetadataFields.contains(field.name())).toArray(MetadataFieldMapper[]::new);
181-
final Collection<String> noopTombstoneMetadataFields =
182-
Arrays.asList(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME);
181+
final Collection<String> noopTombstoneMetadataFields = Arrays.asList(
182+
VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME);
183183
this.noopTombstoneMetadataFieldMappers = Stream.of(mapping.metadataMappers)
184184
.filter(field -> noopTombstoneMetadataFields.contains(field.name())).toArray(MetadataFieldMapper[]::new);
185185
}

server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3781,6 +3781,61 @@ protected long doGenerateSeqNoForOperation(Operation operation) {
37813781
}
37823782
}
37833783

3784+
/**
3785+
* Verifies that a segment containing only no-ops can be used to look up _version and _seqno.
3786+
*/
3787+
public void testSegmentContainsOnlyNoOps() throws Exception {
3788+
Engine.NoOpResult noOpResult = engine.noOp(new Engine.NoOp(1, primaryTerm.get(),
3789+
randomFrom(Engine.Operation.Origin.values()), randomNonNegativeLong(), "test"));
3790+
assertThat(noOpResult.getFailure(), nullValue());
3791+
engine.refresh("test");
3792+
Engine.DeleteResult deleteResult = engine.delete(replicaDeleteForDoc("id", 1, 2, randomNonNegativeLong()));
3793+
assertThat(deleteResult.getFailure(), nullValue());
3794+
engine.refresh("test");
3795+
}
3796+
3797+
/**
3798+
* A simple test to check that random combination of operations can coexist in segments and be lookup.
3799+
* This is needed as some fields in Lucene may not exist if a segment misses operation types and this code is to check for that.
3800+
* For example, a segment containing only no-ops does not have neither _uid or _version.
3801+
*/
3802+
public void testRandomOperations() throws Exception {
3803+
int numOps = between(10, 100);
3804+
for (int i = 0; i < numOps; i++) {
3805+
String id = Integer.toString(randomIntBetween(1, 10));
3806+
ParsedDocument doc = createParsedDoc(id, null);
3807+
Engine.Operation.TYPE type = randomFrom(Engine.Operation.TYPE.values());
3808+
switch (type) {
3809+
case INDEX:
3810+
Engine.IndexResult index = engine.index(replicaIndexForDoc(doc, between(1, 100), i, randomBoolean()));
3811+
assertThat(index.getFailure(), nullValue());
3812+
break;
3813+
case DELETE:
3814+
Engine.DeleteResult delete = engine.delete(replicaDeleteForDoc(doc.id(), between(1, 100), i, randomNonNegativeLong()));
3815+
assertThat(delete.getFailure(), nullValue());
3816+
break;
3817+
case NO_OP:
3818+
Engine.NoOpResult noOp = engine.noOp(new Engine.NoOp(i, primaryTerm.get(),
3819+
randomFrom(Engine.Operation.Origin.values()), randomNonNegativeLong(), ""));
3820+
assertThat(noOp.getFailure(), nullValue());
3821+
break;
3822+
default:
3823+
throw new IllegalStateException("Invalid op [" + type + "]");
3824+
}
3825+
if (randomBoolean()) {
3826+
engine.refresh("test");
3827+
}
3828+
if (randomBoolean()) {
3829+
engine.flush();
3830+
}
3831+
if (randomBoolean()) {
3832+
engine.forceMerge(randomBoolean(), between(1, 10), randomBoolean(), false, false);
3833+
}
3834+
}
3835+
List<Translog.Operation> operations = readAllOperationsInLucene(engine, createMapperService("test"));
3836+
assertThat(operations, hasSize(numOps));
3837+
}
3838+
37843839
public void testMinGenerationForSeqNo() throws IOException, BrokenBarrierException, InterruptedException {
37853840
engine.close();
37863841
final int numberOfTriplets = randomIntBetween(1, 32);

server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3113,8 +3113,8 @@ public void testSupplyTombstoneDoc() throws Exception {
31133113
assertThat(noopTombstone.docs(), hasSize(1));
31143114
ParseContext.Document noopDoc = noopTombstone.docs().get(0);
31153115
assertThat(noopDoc.getFields().stream().map(IndexableField::name).collect(Collectors.toList()),
3116-
containsInAnyOrder(SeqNoFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME,
3117-
SeqNoFieldMapper.TOMBSTONE_NAME));
3116+
containsInAnyOrder(VersionFieldMapper.NAME, SeqNoFieldMapper.TOMBSTONE_NAME,
3117+
SeqNoFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME));
31183118
assertThat(noopDoc.getField(SeqNoFieldMapper.TOMBSTONE_NAME).numericValue().longValue(), equalTo(1L));
31193119

31203120
closeShards(shard);

test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,10 @@ public ParsedDocument newNoopTombstoneDoc() {
315315
doc.add(seqID.primaryTerm);
316316
seqID.tombstoneField.setLongValue(1);
317317
doc.add(seqID.tombstoneField);
318-
return new ParsedDocument(null, seqID, null, null, null, Collections.singletonList(doc), null, XContentType.JSON, null);
318+
Field versionField = new NumericDocValuesField(VersionFieldMapper.NAME, 0);
319+
doc.add(versionField);
320+
return new ParsedDocument(versionField, seqID, null, null, null,
321+
Collections.singletonList(doc), null, XContentType.JSON, null);
319322
}
320323
};
321324
}
@@ -734,6 +737,7 @@ private static Translog.Operation readOperationInLucene(IndexSearcher searcher,
734737
final int segmentDocID = docID - leaves.get(leafIndex).docBase;
735738
final long seqNo = readNumericDV(leaves.get(leafIndex), SeqNoFieldMapper.NAME, segmentDocID);
736739
final long primaryTerm = readNumericDV(leaves.get(leafIndex), SeqNoFieldMapper.PRIMARY_TERM_NAME, segmentDocID);
740+
final long version = readNumericDV(leaves.get(leafIndex), VersionFieldMapper.NAME, segmentDocID);
737741
final FieldsVisitor fields = new FieldsVisitor(true);
738742
searcher.doc(docID, fields);
739743
fields.postProcess(mapper);
@@ -743,11 +747,11 @@ private static Translog.Operation readOperationInLucene(IndexSearcher searcher,
743747
op = new Translog.NoOp(seqNo, primaryTerm, "");
744748
assert readNumericDV(leaves.get(leafIndex), Lucene.SOFT_DELETE_FIELD, segmentDocID) == 1
745749
: "Noop operation but soft_deletes field is not set";
750+
assert version == 1 : "Noop tombstone should have version 1L; actual version [" + version + "]";
746751
} else {
747752
final String id = fields.uid().id();
748753
final String type = fields.uid().type();
749754
final Term uid = new Term(IdFieldMapper.NAME, Uid.encodeId(id));
750-
final long version = readNumericDV(leaves.get(leafIndex), VersionFieldMapper.NAME, segmentDocID);
751755
if (isTombstone) {
752756
op = new Translog.Delete(type, id, uid, seqNo, primaryTerm, version, VersionType.INTERNAL);
753757
assert readNumericDV(leaves.get(leafIndex), Lucene.SOFT_DELETE_FIELD, segmentDocID) == 1

0 commit comments

Comments
 (0)