Skip to content

Commit

Permalink
Iterate over Elasticsearch documents in index order
Browse files Browse the repository at this point in the history
This can improve performance of scroll queries.

Cherry-pick of trinodb/trino@7f4e39d

Co-authored-by: Martin Traverso mtraverso@gmail.com
  • Loading branch information
zhenxiao committed Jun 10, 2020
1 parent 4c7acd9 commit 4d436fd
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -483,12 +483,14 @@ else if (value.has("properties")) {
return new IndexMetadata.ObjectType(result.build());
}

public SearchResponse beginSearch(String index, int shard, QueryBuilder query, Optional<List<String>> fields, List<String> documentFields)
public SearchResponse beginSearch(String index, int shard, QueryBuilder query, Optional<List<String>> fields, List<String> documentFields, Optional<String> sort)
{
SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource()
.query(query)
.size(scrollSize);

sort.ifPresent(sourceBuilder::sort);

fields.ifPresent(values -> {
if (values.isEmpty()) {
sourceBuilder.fetchSource(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,23 @@ public ElasticsearchPageSource(
.filter(name -> !BuiltinColumns.NAMES.contains(name))
.collect(toList());

// sorting by _doc (index order) get special treatment in Elasticsearch and is more efficient
Optional<String> sort = Optional.of("_doc");

if (table.getQuery().isPresent()) {
// However, if we're using a custom Elasticsearch query, use default sorting.
// Documents will be scored and returned based on relevance
sort = Optional.empty();
}

long start = System.nanoTime();
SearchResponse searchResponse = client.beginSearch(
split.getIndex(),
split.getShard(),
buildSearchQuery(session, split.getTupleDomain().transform(ElasticsearchColumnHandle.class::cast), table.getQuery()),
needAllFields ? Optional.empty() : Optional.of(requiredFields),
documentFields);
documentFields,
sort);
readTimeNanos += System.nanoTime() - start;
this.iterator = new SearchHitIterator(client, () -> searchResponse);
}
Expand Down

0 comments on commit 4d436fd

Please sign in to comment.