Skip to content

Commit ee0394d

Browse files
authored
Sort field tiebreaker for PIT (point in time) readers (#66093) (#66588)
This commit introduces a new sort field called `_shard_doc` that can be used in conjunction with a PIT to consistently tiebreak identical sort values. The sort value is a numeric long that is composed of the ordinal of the shard (assigned by the coordinating node) and the internal Lucene document ID. These two values are consistent within a PIT so this sort criteria can be used as the tiebreaker of any search requests. Since this sort criteria is stable we'd like to add it automatically to any sorted search requests that use a PIT but we also need to expose it explicitly in order to be able to: * Reverse the order of the tiebreaking, useful to search "before" `search_after`. * Force the primary sort to use it in order to benefit from the `search_after` optimization when sorting by index order (to be released in Lucene 8.8. I plan to add the documentation and the automatic configuration for PIT in a follow up since this change is already big. Relates #56828
1 parent 0324892 commit ee0394d

File tree

53 files changed

+491
-105
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+491
-105
lines changed

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/DisableGraphQueryTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ public void setup() {
8585
indexService = createIndex("test", settings, "t",
8686
"text_shingle", "type=text,analyzer=text_shingle",
8787
"text_shingle_unigram", "type=text,analyzer=text_shingle_unigram");
88-
shardContext = indexService.newQueryShardContext(0, null, () -> 0L, null, emptyMap());
88+
shardContext = indexService.newQueryShardContext(0, 0, null, () -> 0L, null, emptyMap());
8989

9090
// parsed queries for "text_shingle_unigram:(foo bar baz)" with query parsers
9191
// that ignores position length attribute

modules/lang-painless/src/main/java/org/elasticsearch/painless/action/PainlessExecuteAction.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,8 +577,14 @@ private static Response prepareRamIndex(Request request,
577577
final IndexSearcher searcher = new IndexSearcher(indexReader);
578578
searcher.setQueryCache(null);
579579
final long absoluteStartMillis = System.currentTimeMillis();
580-
QueryShardContext context =
581-
indexService.newQueryShardContext(0, searcher, () -> absoluteStartMillis, null, emptyMap());
580+
QueryShardContext context = indexService.newQueryShardContext(
581+
0,
582+
0,
583+
searcher,
584+
() -> absoluteStartMillis,
585+
null,
586+
emptyMap()
587+
);
582588
return handler.apply(context, indexReader.leaves().get(0));
583589
}
584590
}

modules/lang-painless/src/test/java/org/elasticsearch/painless/NeedsScoreTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public void testNeedsScores() {
4747
contexts.put(NumberSortScript.CONTEXT, Whitelist.BASE_WHITELISTS);
4848
PainlessScriptEngine service = new PainlessScriptEngine(Settings.EMPTY, contexts);
4949

50-
QueryShardContext shardContext = index.newQueryShardContext(0, null, () -> 0, null, emptyMap());
50+
QueryShardContext shardContext = index.newQueryShardContext(0, 0, null, () -> 0, null, emptyMap());
5151

5252
NumberSortScript.Factory factory = service.compile(null, "1.2", NumberSortScript.CONTEXT, Collections.emptyMap());
5353
NumberSortScript.LeafFactory ss = factory.newFactory(Collections.emptyMap(), shardContext.lookup());

modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ public void testQueryWithRewrite() throws Exception {
534534
XContentType.JSON));
535535
BytesRef qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue();
536536
QueryShardContext shardContext = indexService.newQueryShardContext(
537-
randomInt(20), null, () -> {
537+
randomInt(20), 0, null, () -> {
538538
throw new UnsupportedOperationException();
539539
}, null, emptyMap());
540540
PlainActionFuture<QueryBuilder> future = new PlainActionFuture<>();

modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ public void testRangeQueriesWithNow() throws Exception {
259259
try (Engine.Searcher searcher = indexService.getShard(0).acquireSearcher("test")) {
260260
long[] currentTime = new long[] {System.currentTimeMillis()};
261261
QueryShardContext queryShardContext =
262-
indexService.newQueryShardContext(0, searcher, () -> currentTime[0], null, emptyMap());
262+
indexService.newQueryShardContext(0, 0, searcher, () -> currentTime[0], null, emptyMap());
263263

264264
BytesReference source = BytesReference.bytes(jsonBuilder().startObject()
265265
.field("field1", "value")

rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,27 @@
228228
- match: {hits.hits.0._source.timestamp: "2019-10-21 00:30:04.828740" }
229229
- match: {hits.hits.0.sort: [1571617804828740000] }
230230

231+
232+
---
233+
"_shard_doc sort":
234+
- skip:
235+
version: " - 7.11.99"
236+
reason: _shard_doc sort was added in 7.12
237+
238+
- do:
239+
indices.create:
240+
index: test
241+
- do:
242+
index:
243+
index: test
244+
id: 1
245+
body: { id: 1, foo: bar, age: 18 }
246+
247+
- do:
248+
catch: /\[_shard_doc\] sort field cannot be used without \[point in time\]/
249+
search:
250+
index: test
251+
body:
252+
size: 1
253+
sort: ["_shard_doc"]
254+
search_after: [ 0L ]

server/src/main/java/org/elasticsearch/action/admin/indices/template/post/TransportSimulateIndexTemplateAction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ public static Template resolveTemplate(final String matchingTemplate, final Stri
182182
resolvedAliases, tempClusterState.metadata(), aliasValidator, xContentRegistry,
183183
// the context is only used for validation so it's fine to pass fake values for the
184184
// shard id and the current timestamp
185-
tempIndexService.newQueryShardContext(0, null, () -> 0L, null, emptyMap())));
185+
tempIndexService.newQueryShardContext(0, 0, null, () -> 0L, null, emptyMap())));
186186
Map<String, AliasMetadata> aliasesByName = aliases.stream().collect(
187187
Collectors.toMap(AliasMetadata::getAlias, Function.identity()));
188188

server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesIndexAction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ private FieldCapabilitiesIndexResponse shardOperation(final FieldCapabilitiesInd
117117
final IndexShard indexShard = indexService.getShard(request.shardId().getId());
118118
try (Engine.Searcher searcher = indexShard.acquireSearcher(Engine.CAN_MATCH_SEARCH_SOURCE)) {
119119

120-
final QueryShardContext queryShardContext = indexService.newQueryShardContext(shardId.id(), searcher,
121-
request::nowInMillis, null, Collections.emptyMap());
120+
final QueryShardContext queryShardContext = indexService.newQueryShardContext(shardId.id(), 0,
121+
searcher, request::nowInMillis, null, Collections.emptyMap());
122122

123123
if (canMatchShard(request, queryShardContext) == false) {
124124
return new FieldCapabilitiesIndexResponse(request.index(), Collections.emptyMap(), false);

server/src/main/java/org/elasticsearch/action/search/SearchRequest.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
import org.elasticsearch.search.builder.PointInTimeBuilder;
3535
import org.elasticsearch.search.builder.SearchSourceBuilder;
3636
import org.elasticsearch.search.internal.SearchContext;
37+
import org.elasticsearch.search.sort.FieldSortBuilder;
38+
import org.elasticsearch.search.sort.SortBuilder;
39+
import org.elasticsearch.search.sort.ShardDocSortField;
3740
import org.elasticsearch.tasks.TaskId;
3841

3942
import java.io.IOException;
@@ -300,6 +303,14 @@ public ActionRequestValidationException validate() {
300303
if (scroll) {
301304
validationException = addValidationError("using [point in time] is not allowed in a scroll context", validationException);
302305
}
306+
} else if (source != null && source.sorts() != null) {
307+
for (SortBuilder<?> sortBuilder : source.sorts()) {
308+
if (sortBuilder instanceof FieldSortBuilder
309+
&& ShardDocSortField.NAME.equals(((FieldSortBuilder) sortBuilder).getFieldName())) {
310+
validationException = addValidationError("[" + FieldSortBuilder.SHARD_DOC_FIELD_NAME
311+
+ "] sort field cannot be used without [point in time]", validationException);
312+
}
313+
}
303314
}
304315
return validationException;
305316
}

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ private ClusterState applyCreateIndexRequestWithV1Templates(final ClusterState c
496496
MetadataIndexTemplateService.resolveAliases(templates), currentState.metadata(), aliasValidator,
497497
// the context is only used for validation so it's fine to pass fake values for the
498498
// shard id and the current timestamp
499-
xContentRegistry, indexService.newQueryShardContext(0, null, () -> 0L, null, emptyMap())),
499+
xContentRegistry, indexService.newQueryShardContext(0, 0, null, () -> 0L, null, emptyMap())),
500500
templates.stream().map(IndexTemplateMetadata::getName).collect(toList()), metadataTransformer);
501501
}
502502

@@ -529,7 +529,7 @@ private ClusterState applyCreateIndexRequestWithV2Template(final ClusterState cu
529529
MetadataIndexTemplateService.resolveAliases(currentState.metadata(), templateName), currentState.metadata(), aliasValidator,
530530
// the context is only used for validation so it's fine to pass fake values for the
531531
// shard id and the current timestamp
532-
xContentRegistry, indexService.newQueryShardContext(0, null, () -> 0L, null, emptyMap())),
532+
xContentRegistry, indexService.newQueryShardContext(0, 0, null, () -> 0L, null, emptyMap())),
533533
Collections.singletonList(templateName), metadataTransformer);
534534
}
535535

@@ -580,7 +580,7 @@ private ClusterState applyCreateIndexRequestWithExistingMetadata(final ClusterSt
580580
currentState.metadata(), aliasValidator, xContentRegistry,
581581
// the context is only used for validation so it's fine to pass fake values for the
582582
// shard id and the current timestamp
583-
indexService.newQueryShardContext(0, null, () -> 0L, null, emptyMap())),
583+
indexService.newQueryShardContext(0, 0, null, () -> 0L, null, emptyMap())),
584584
org.elasticsearch.common.collect.List.of(), metadataTransformer);
585585
}
586586

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexAliasesService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,8 @@ public ClusterState applyAliasActions(ClusterState currentState, Iterable<AliasA
149149
}
150150
// the context is only used for validation so it's fine to pass fake values for the shard id,
151151
// but the current timestamp should be set to real value as we may use `now` in a filtered alias
152-
aliasValidator.validateAliasFilter(alias, filter, indexService.newQueryShardContext(0, null,
153-
() -> System.currentTimeMillis(), null, emptyMap()), xContentRegistry);
152+
aliasValidator.validateAliasFilter(alias, filter, indexService.newQueryShardContext(0, 0,
153+
null, () -> System.currentTimeMillis(), null, emptyMap()), xContentRegistry);
154154
}
155155
};
156156
if (action.apply(newAliasValidator, metadata, index)) {

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ private static void validateCompositeTemplate(final ClusterState state,
10931093
new AliasValidator(),
10941094
// the context is only used for validation so it's fine to pass fake values for the
10951095
// shard id and the current timestamp
1096-
xContentRegistry, tempIndexService.newQueryShardContext(0, null, () -> 0L, null, emptyMap()));
1096+
xContentRegistry, tempIndexService.newQueryShardContext(0, 0, null, () -> 0L, null, emptyMap()));
10971097

10981098
// triggers inclusion of _timestamp field and its validation:
10991099
String indexName = DataStream.BACKING_INDEX_PREFIX + temporaryIndexName;

server/src/main/java/org/elasticsearch/common/lucene/Lucene.java

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
import org.elasticsearch.index.analysis.AnalyzerScope;
9494
import org.elasticsearch.index.analysis.NamedAnalyzer;
9595
import org.elasticsearch.index.fielddata.IndexFieldData;
96+
import org.elasticsearch.search.sort.ShardDocSortField;
9697

9798
import java.io.IOException;
9899
import java.math.BigInteger;
@@ -574,29 +575,35 @@ public static void writeSortType(StreamOutput out, SortField.Type sortType) thro
574575
out.writeVInt(sortType.ordinal());
575576
}
576577

577-
public static void writeSortField(StreamOutput out, SortField sortField) throws IOException {
578+
/**
579+
* Returns the generic version of the provided {@link SortField} that
580+
* can be used to merge documents coming from different shards.
581+
*/
582+
private static SortField rewriteMergeSortField(SortField sortField) {
578583
if (sortField.getClass() == GEO_DISTANCE_SORT_TYPE_CLASS) {
579-
// for geo sorting, we replace the SortField with a SortField that assumes a double field.
580-
// this works since the SortField is only used for merging top docs
581584
SortField newSortField = new SortField(sortField.getField(), SortField.Type.DOUBLE);
582585
newSortField.setMissingValue(sortField.getMissingValue());
583-
sortField = newSortField;
586+
return newSortField;
584587
} else if (sortField.getClass() == SortedSetSortField.class) {
585-
// for multi-valued sort field, we replace the SortedSetSortField with a simple SortField.
586-
// It works because the sort field is only used to merge results from different shards.
587588
SortField newSortField = new SortField(sortField.getField(), SortField.Type.STRING, sortField.getReverse());
588589
newSortField.setMissingValue(sortField.getMissingValue());
589-
sortField = newSortField;
590+
return newSortField;
590591
} else if (sortField.getClass() == SortedNumericSortField.class) {
591-
// for multi-valued sort field, we replace the SortedSetSortField with a simple SortField.
592-
// It works because the sort field is only used to merge results from different shards.
593592
SortField newSortField = new SortField(sortField.getField(),
594593
((SortedNumericSortField) sortField).getNumericType(),
595594
sortField.getReverse());
596595
newSortField.setMissingValue(sortField.getMissingValue());
597-
sortField = newSortField;
596+
return newSortField;
597+
} else if (sortField.getClass() == ShardDocSortField.class) {
598+
SortField newSortField = new SortField(sortField.getField(), SortField.Type.LONG, sortField.getReverse());
599+
return newSortField;
600+
} else {
601+
return sortField;
598602
}
603+
}
599604

605+
public static void writeSortField(StreamOutput out, SortField sortField) throws IOException {
606+
sortField = rewriteMergeSortField(sortField);
600607
if (sortField.getClass() != SortField.class) {
601608
throw new IllegalArgumentException("Cannot serialize SortField impl [" + sortField + "]");
602609
}

server/src/main/java/org/elasticsearch/index/IndexService.java

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ public IndexService(
198198
assert indexAnalyzers != null;
199199
this.mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,
200200
// we parse all percolator queries as they would be parsed on shard 0
201-
() -> newQueryShardContext(0, null, System::currentTimeMillis, null, emptyMap()), idFieldDataEnabled, scriptService);
201+
() -> newQueryShardContext(0, 0, null, System::currentTimeMillis, null, emptyMap()), idFieldDataEnabled, scriptService);
202202
this.indexFieldData = new IndexFieldDataService(indexSettings, indicesFieldDataCache, circuitBreakerService, mapperService);
203203
if (indexSettings.getIndexSortConfig().hasIndexSort()) {
204204
// we delay the actual creation of the sort order for this index because the mapping has not been merged yet.
@@ -598,6 +598,7 @@ public IndexSettings getIndexSettings() {
598598
*/
599599
public QueryShardContext newQueryShardContext(
600600
int shardId,
601+
int shardRequestIndex,
601602
IndexSearcher searcher,
602603
LongSupplier nowInMillis,
603604
String clusterAlias,
@@ -606,9 +607,26 @@ public QueryShardContext newQueryShardContext(
606607
final SearchIndexNameMatcher indexNameMatcher =
607608
new SearchIndexNameMatcher(index().getName(), clusterAlias, clusterService, expressionResolver);
608609
return new QueryShardContext(
609-
shardId, indexSettings, bigArrays, indexCache.bitsetFilterCache(), indexFieldData::getForField, mapperService(),
610-
similarityService(), scriptService, xContentRegistry, namedWriteableRegistry, client, searcher, nowInMillis, clusterAlias,
611-
indexNameMatcher, allowExpensiveQueries, valuesSourceRegistry, runtimeMappings);
610+
shardId,
611+
shardRequestIndex,
612+
indexSettings,
613+
bigArrays,
614+
indexCache.bitsetFilterCache(),
615+
indexFieldData::getForField,
616+
mapperService(),
617+
similarityService(),
618+
scriptService,
619+
xContentRegistry,
620+
namedWriteableRegistry,
621+
client,
622+
searcher,
623+
nowInMillis,
624+
clusterAlias,
625+
indexNameMatcher,
626+
allowExpensiveQueries,
627+
valuesSourceRegistry,
628+
runtimeMappings
629+
);
612630
}
613631

614632
/**

server/src/main/java/org/elasticsearch/index/query/QueryShardContext.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ public class QueryShardContext extends QueryRewriteContext {
103103
private final BitsetFilterCache bitsetFilterCache;
104104
private final TriFunction<MappedFieldType, String, Supplier<SearchLookup>, IndexFieldData<?>> indexFieldDataService;
105105
private final int shardId;
106+
private final int shardRequestIndex;
106107
private final IndexSearcher searcher;
107108
private String[] types = Strings.EMPTY_ARRAY;
108109
private boolean cacheable = true;
@@ -132,6 +133,7 @@ public String[] getTypes() {
132133
*/
133134
public QueryShardContext(
134135
int shardId,
136+
int shardRequestIndex,
135137
IndexSettings indexSettings,
136138
BigArrays bigArrays,
137139
BitsetFilterCache bitsetFilterCache,
@@ -152,6 +154,7 @@ public QueryShardContext(
152154
) {
153155
this(
154156
shardId,
157+
shardRequestIndex,
155158
indexSettings,
156159
bigArrays,
157160
bitsetFilterCache,
@@ -176,13 +179,30 @@ public QueryShardContext(
176179
}
177180

178181
public QueryShardContext(QueryShardContext source) {
179-
this(source.shardId, source.indexSettings, source.bigArrays, source.bitsetFilterCache, source.indexFieldDataService,
180-
source.mapperService, source.similarityService, source.scriptService, source.getXContentRegistry(),
181-
source.getWriteableRegistry(), source.client, source.searcher, source.nowInMillis, source.indexNameMatcher,
182-
source.fullyQualifiedIndex, source.allowExpensiveQueries, source.valuesSourceRegistry, source.runtimeMappings);
182+
this(
183+
source.shardId,
184+
source.shardRequestIndex,
185+
source.indexSettings,
186+
source.bigArrays,
187+
source.bitsetFilterCache,
188+
source.indexFieldDataService,
189+
source.mapperService,
190+
source.similarityService,
191+
source.scriptService,
192+
source.getXContentRegistry(),
193+
source.getWriteableRegistry(),
194+
source.client, source.searcher,
195+
source.nowInMillis,
196+
source.indexNameMatcher,
197+
source.fullyQualifiedIndex,
198+
source.allowExpensiveQueries,
199+
source.valuesSourceRegistry,
200+
source.runtimeMappings
201+
);
183202
}
184203

185204
private QueryShardContext(int shardId,
205+
int shardRequestIndex,
186206
IndexSettings indexSettings,
187207
BigArrays bigArrays,
188208
BitsetFilterCache bitsetFilterCache,
@@ -202,6 +222,7 @@ private QueryShardContext(int shardId,
202222
Map<String, MappedFieldType> runtimeMappings) {
203223
super(xContentRegistry, namedWriteableRegistry, client, nowInMillis);
204224
this.shardId = shardId;
225+
this.shardRequestIndex = shardRequestIndex;
205226
this.similarityService = similarityService;
206227
this.mapperService = mapperService;
207228
this.bigArrays = bigArrays;
@@ -562,6 +583,14 @@ public int getShardId() {
562583
return shardId;
563584
}
564585

586+
/**
587+
* Returns the shard request ordinal that is used by the main search request
588+
* to reference this shard.
589+
*/
590+
public int getShardRequestIndex() {
591+
return shardRequestIndex;
592+
}
593+
565594
@Override
566595
public final long nowInMillis() {
567596
failIfFrozen();

server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ final class DefaultSearchContext extends SearchContext {
172172
this.minNodeVersion = minNodeVersion;
173173
queryShardContext = indexService.newQueryShardContext(
174174
request.shardId().id(),
175-
this.searcher,
175+
request.shardRequestIndex(),
176+
searcher,
176177
request::nowInMillis,
177178
shardTarget.getClusterAlias(),
178179
request.getRuntimeMappings()

0 commit comments

Comments
 (0)