Skip to content

FastVectorHighlighter should use ValueFetchers to load source data #87445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/changelog/85815.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
pr: 85815
summary: '`FastVectorHighlighter` should use `ValueFetchers` to load source data'
area: Highlighting
type: bug
issues:
- 75011
- 84690
- 82458
- 80895
9 changes: 9 additions & 0 deletions docs/changelog/87445.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
pr: 87445
summary: '`FastVectorHighlighter` should use `ValueFetchers` to load source data'
area: Highlighting
type: bug
issues:
- 75011
- 84690
- 82458
- 80895
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions;
Expand Down Expand Up @@ -98,6 +99,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(
field,
fieldType,
fieldContext.context.getSearchExecutionContext(),
forceSource,
fixBrokenAnalysis
);
Expand Down Expand Up @@ -217,6 +219,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(
SearchHighlightContext.Field field,
MappedFieldType fieldType,
SearchExecutionContext context,
boolean forceSource,
boolean fixBrokenAnalysis
) {
Expand All @@ -239,6 +242,7 @@ private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = lookup -> new SourceScoreOrderFragmentsBuilder(
fieldType,
context,
fixBrokenAnalysis,
lookup,
options.preTags(),
Expand All @@ -248,6 +252,7 @@ private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(
} else {
supplier = lookup -> new SourceSimpleFragmentsBuilder(
fieldType,
context,
fixBrokenAnalysis,
lookup,
options.preTags(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,24 @@
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {

private final MappedFieldType fieldType;
private final SourceLookup sourceLookup;
private final ValueFetcher valueFetcher;
private final boolean fixBrokenAnalysis;

public SourceScoreOrderFragmentsBuilder(
MappedFieldType fieldType,
SearchExecutionContext context,
boolean fixBrokenAnalysis,
SourceLookup sourceLookup,
String[] preTags,
Expand All @@ -37,13 +42,14 @@ public SourceScoreOrderFragmentsBuilder(
super(preTags, postTags, boundaryScanner);
this.fieldType = fieldType;
this.sourceLookup = sourceLookup;
this.valueFetcher = fieldType.valueFetcher(context, null);
this.fixBrokenAnalysis = fixBrokenAnalysis;
}

@Override
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
List<Object> values = sourceLookup.extractRawValues(fieldType.name());
List<Object> values = valueFetcher.fetchValues(sourceLookup, new ArrayList<>());
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(fieldType.name(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,22 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {

private final SourceLookup sourceLookup;
private final ValueFetcher valueFetcher;

public SourceSimpleFragmentsBuilder(
MappedFieldType fieldType,
SearchExecutionContext context,
boolean fixBrokenAnalysis,
SourceLookup sourceLookup,
String[] preTags,
Expand All @@ -31,14 +36,15 @@ public SourceSimpleFragmentsBuilder(
) {
super(fieldType, fixBrokenAnalysis, preTags, postTags, boundaryScanner);
this.sourceLookup = sourceLookup;
this.valueFetcher = fieldType.valueFetcher(context, null);
}

public static final Field[] EMPTY_FIELDS = new Field[0];

@Override
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
List<Object> values = sourceLookup.extractRawValues(fieldType.name());
List<Object> values = valueFetcher.fetchValues(sourceLookup, new ArrayList<>());
if (values.isEmpty()) {
return EMPTY_FIELDS;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,7 @@ public List<Object> extractRawValues(String path) {
/**
* For the provided path, return its value in the source.
*
* Note that in contrast with {@link SourceLookup#extractRawValues}, array and object values
* can be returned.
* Both array and object values can be returned.
*
* @param path the value's path in the source.
* @param nullValue a value to return if the path exists, but the value is 'null'. This helps
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.fetch.subphase.highlight;

import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.HighlighterTestCase;

import java.io.IOException;

public class FastVectorHighlighterTests extends HighlighterTestCase {

public void testHighlightingMultiFields() throws IOException {

MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text");
b.startObject("fields");
b.startObject("stemmed");
b.field("type", "text");
b.field("term_vector", "with_positions_offsets");
b.endObject();
b.endObject();
}));

ParsedDocument doc = mapperService.documentMapper()
.parse(source(b -> b.field("field", "here is some text, which is followed by some more text")));

{
// test SimpleFragmentsBuilder case
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("field.stemmed", "some"))
.highlighter(new HighlightBuilder().field("field.stemmed").highlighterType("fvh"));

assertHighlights(
highlight(mapperService, doc, search),
"field.stemmed",
"here is <em>some</em> text, which is followed by <em>some</em> more text"
);
}

{
// test ScoreOrderFragmentsBuilder case
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("field.stemmed", "some"))
.highlighter(new HighlightBuilder().field("field.stemmed").highlighterType("fvh").numOfFragments(2).fragmentSize(18));

assertHighlights(
highlight(mapperService, doc, search),
"field.stemmed",
"here is <em>some</em> text, which",
"followed by <em>some</em> more text"
);
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ protected final Map<String, HighlightField> highlight(MapperService mapperServic
* Given a set of highlights, assert that any particular field has the expected fragments
*/
protected static void assertHighlights(Map<String, HighlightField> highlights, String field, String... fragments) {
assertNotNull(highlights.get(field));
assertNotNull("No highlights reported for field [" + field + "]", highlights.get(field));
Set<String> actualFragments = Arrays.stream(highlights.get(field).getFragments()).map(Text::toString).collect(Collectors.toSet());
Set<String> expectedFragments = new HashSet<>(Arrays.asList(fragments));
assertEquals(expectedFragments, actualFragments);
Expand Down