Skip to content

Commit a69cdd0

Browse files
authored
FastVectorHighlighter should use ValueFetchers to load source data (#85815)
FVH was relying on `SourceLookup.extractRawValues()` to load its data, but this no longer works for multifields. It should instead use value fetchers which will correctly locate the input for multifields and/or copy fields. Fixes #84690 Fixes #82458 Fixes #80895 Fixes #75011
1 parent cce3d92 commit a69cdd0

File tree

7 files changed

+99
-14
lines changed

7 files changed

+99
-14
lines changed

docs/changelog/85815.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
pr: 85815
2+
summary: '`FastVectorHighlighter` should use `ValueFetchers` to load source data'
3+
area: Highlighting
4+
type: bug
5+
issues:
6+
- 75011
7+
- 84690
8+
- 82458
9+
- 80895

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.common.util.CollectionUtils;
2727
import org.elasticsearch.index.mapper.MappedFieldType;
2828
import org.elasticsearch.index.mapper.TextSearchInfo;
29+
import org.elasticsearch.index.query.SearchExecutionContext;
2930
import org.elasticsearch.lucene.search.vectorhighlight.CustomFieldQuery;
3031
import org.elasticsearch.search.fetch.FetchSubPhase;
3132
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
@@ -98,6 +99,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
9899
Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(
99100
field,
100101
fieldType,
102+
fieldContext.context.getSearchExecutionContext(),
101103
forceSource,
102104
fixBrokenAnalysis
103105
);
@@ -217,6 +219,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
217219
private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(
218220
SearchHighlightContext.Field field,
219221
MappedFieldType fieldType,
222+
SearchExecutionContext context,
220223
boolean forceSource,
221224
boolean fixBrokenAnalysis
222225
) {
@@ -239,6 +242,7 @@ private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(
239242
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
240243
supplier = lookup -> new SourceScoreOrderFragmentsBuilder(
241244
fieldType,
245+
context,
242246
fixBrokenAnalysis,
243247
lookup,
244248
options.preTags(),
@@ -248,6 +252,7 @@ private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(
248252
} else {
249253
supplier = lookup -> new SourceSimpleFragmentsBuilder(
250254
fieldType,
255+
context,
251256
fixBrokenAnalysis,
252257
lookup,
253258
options.preTags(),

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceScoreOrderFragmentsBuilder.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,24 @@
1515
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
1616
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
1717
import org.elasticsearch.index.mapper.MappedFieldType;
18+
import org.elasticsearch.index.mapper.ValueFetcher;
19+
import org.elasticsearch.index.query.SearchExecutionContext;
1820
import org.elasticsearch.search.lookup.SourceLookup;
1921

2022
import java.io.IOException;
23+
import java.util.ArrayList;
2124
import java.util.List;
2225

2326
public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {
2427

2528
private final MappedFieldType fieldType;
2629
private final SourceLookup sourceLookup;
30+
private final ValueFetcher valueFetcher;
2731
private final boolean fixBrokenAnalysis;
2832

2933
public SourceScoreOrderFragmentsBuilder(
3034
MappedFieldType fieldType,
35+
SearchExecutionContext context,
3136
boolean fixBrokenAnalysis,
3237
SourceLookup sourceLookup,
3338
String[] preTags,
@@ -37,13 +42,14 @@ public SourceScoreOrderFragmentsBuilder(
3742
super(preTags, postTags, boundaryScanner);
3843
this.fieldType = fieldType;
3944
this.sourceLookup = sourceLookup;
45+
this.valueFetcher = fieldType.valueFetcher(context, null);
4046
this.fixBrokenAnalysis = fixBrokenAnalysis;
4147
}
4248

4349
@Override
4450
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
4551
// we know its low level reader, and matching docId, since that's how we call the highlighter with
46-
List<Object> values = sourceLookup.extractRawValues(fieldType.name());
52+
List<Object> values = valueFetcher.fetchValues(sourceLookup, new ArrayList<>());
4753
Field[] fields = new Field[values.size()];
4854
for (int i = 0; i < values.size(); i++) {
4955
fields[i] = new Field(fieldType.name(), values.get(i).toString(), TextField.TYPE_NOT_STORED);

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceSimpleFragmentsBuilder.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,22 @@
1212
import org.apache.lucene.index.IndexReader;
1313
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
1414
import org.elasticsearch.index.mapper.MappedFieldType;
15+
import org.elasticsearch.index.mapper.ValueFetcher;
16+
import org.elasticsearch.index.query.SearchExecutionContext;
1517
import org.elasticsearch.search.lookup.SourceLookup;
1618

1719
import java.io.IOException;
20+
import java.util.ArrayList;
1821
import java.util.List;
1922

2023
public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
2124

2225
private final SourceLookup sourceLookup;
26+
private final ValueFetcher valueFetcher;
2327

2428
public SourceSimpleFragmentsBuilder(
2529
MappedFieldType fieldType,
30+
SearchExecutionContext context,
2631
boolean fixBrokenAnalysis,
2732
SourceLookup sourceLookup,
2833
String[] preTags,
@@ -31,14 +36,15 @@ public SourceSimpleFragmentsBuilder(
3136
) {
3237
super(fieldType, fixBrokenAnalysis, preTags, postTags, boundaryScanner);
3338
this.sourceLookup = sourceLookup;
39+
this.valueFetcher = fieldType.valueFetcher(context, null);
3440
}
3541

3642
public static final Field[] EMPTY_FIELDS = new Field[0];
3743

3844
@Override
3945
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
4046
// we know its low level reader, and matching docId, since that's how we call the highlighter with
41-
List<Object> values = sourceLookup.extractRawValues(fieldType.name());
47+
List<Object> values = valueFetcher.fetchValues(sourceLookup, new ArrayList<>());
4248
if (values.isEmpty()) {
4349
return EMPTY_FIELDS;
4450
}

server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -159,19 +159,11 @@ public boolean hasSourceAsMap() {
159159
return source != null;
160160
}
161161

162-
/**
163-
* Returns the values associated with the path. Those are "low" level values, and it can
164-
* handle path expression where an array/list is navigated within.
165-
*/
166-
public List<Object> extractRawValues(String path) {
167-
return XContentMapValues.extractRawValues(path, source());
168-
}
169-
170162
/**
171163
* Returns the values associated with the path. Those are "low" level values, and it can
172164
* handle path expression where an array/list is navigated within.
173165
*
174-
* The major difference with {@link SourceLookup#extractRawValues(String)} is that this version will:
166+
* This method will:
175167
*
176168
* - not cache source if it's not already parsed
177169
* - will only extract the desired values from the compressed source instead of deserializing the whole object
@@ -204,8 +196,7 @@ public List<Object> extractRawValuesWithoutCaching(String path) {
204196
/**
205197
* For the provided path, return its value in the source.
206198
*
207-
* Note that in contrast with {@link SourceLookup#extractRawValues}, array and object values
208-
* can be returned.
199+
* Both array and object values can be returned.
209200
*
210201
* @param path the value's path in the source.
211202
* @param nullValue a value to return if the path exists, but the value is 'null'. This helps
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.search.fetch.subphase.highlight;
10+
11+
import org.elasticsearch.index.mapper.MapperService;
12+
import org.elasticsearch.index.mapper.ParsedDocument;
13+
import org.elasticsearch.index.query.QueryBuilders;
14+
import org.elasticsearch.search.builder.SearchSourceBuilder;
15+
import org.elasticsearch.search.fetch.HighlighterTestCase;
16+
17+
import java.io.IOException;
18+
19+
public class FastVectorHighlighterTests extends HighlighterTestCase {
20+
21+
public void testHighlightingMultiFields() throws IOException {
22+
23+
MapperService mapperService = createMapperService("""
24+
{ "_doc" : { "properties" : {
25+
"field" : {
26+
"type" : "text",
27+
"fields" : {
28+
"stemmed" : {
29+
"type" : "text",
30+
"term_vector" : "with_positions_offsets"
31+
}
32+
}
33+
}
34+
}}}
35+
""");
36+
37+
ParsedDocument doc = mapperService.documentMapper().parse(source("""
38+
{ "field" : "here is some text, which is followed by some more text" }
39+
"""));
40+
41+
{
42+
// test SimpleFragmentsBuilder case
43+
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("field.stemmed", "some"))
44+
.highlighter(new HighlightBuilder().field("field.stemmed").highlighterType("fvh"));
45+
46+
assertHighlights(
47+
highlight(mapperService, doc, search),
48+
"field.stemmed",
49+
"here is <em>some</em> text, which is followed by <em>some</em> more text"
50+
);
51+
}
52+
53+
{
54+
// test ScoreOrderFragmentsBuilder case
55+
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("field.stemmed", "some"))
56+
.highlighter(new HighlightBuilder().field("field.stemmed").highlighterType("fvh").numOfFragments(2).fragmentSize(18));
57+
58+
assertHighlights(
59+
highlight(mapperService, doc, search),
60+
"field.stemmed",
61+
"here is <em>some</em> text, which",
62+
"followed by <em>some</em> more text"
63+
);
64+
}
65+
66+
}
67+
68+
}

test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ protected final Map<String, HighlightField> highlight(MapperService mapperServic
7171
* Given a set of highlights, assert that any particular field has the expected fragments
7272
*/
7373
protected static void assertHighlights(Map<String, HighlightField> highlights, String field, String... fragments) {
74-
assertNotNull(highlights.get(field));
74+
assertNotNull("No highlights reported for field [" + field + "]", highlights.get(field));
7575
Set<String> actualFragments = Arrays.stream(highlights.get(field).getFragments()).map(Text::toString).collect(Collectors.toSet());
7676
Set<String> expectedFragments = Set.of(fragments);
7777
assertEquals(expectedFragments, actualFragments);

0 commit comments

Comments
 (0)