Skip to content

Fix invalid break iterator highlighting on keyword field #49566

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();

List<Snippet> snippets = new ArrayList<>();
int numberOfFragments;
int numberOfFragments = field.fieldOptions().numberOfFragments();
try {

final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(), hitContext);
Expand All @@ -89,14 +89,16 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
"This maximum can be set by changing the [" + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey() +
"] index level setting. " + "For large texts, indexing with offsets or term vectors is recommended!");
}
if (field.fieldOptions().numberOfFragments() == 0) {
if (numberOfFragments == 0
// non-tokenized fields should not use any break iterator (ignore boundaryScannerType)
|| fieldType.tokenized() == false) {
// we use a control char to separate values, which is the only char that the custom break iterator
// breaks the text on, so we don't lose the distinction between the different values of a field and we
// get back a snippet per value
CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
highlighter = new CustomUnifiedHighlighter(searcher, analyzer, offsetSource, passageFormatter,
field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldValue, field.fieldOptions().noMatchSize());
numberOfFragments = fieldValues.size(); // we are highlighting the whole content, one snippet per value
numberOfFragments = numberOfFragments == 0 ? fieldValues.size() : numberOfFragments;
} else {
//using paragraph separator we make sure that each field value holds a discrete passage for highlighting
BreakIterator bi = getBreakIterator(field);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,35 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockAnalysisPlugin.class);
}

public void testHighlightingWithKeywordIgnoreBoundaryScanner() throws IOException {
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("properties")
.startObject("tags")
.field("type", "keyword")
.endObject()
.endObject().endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
client().prepareIndex("test").setId("1")
.setSource(jsonBuilder().startObject().array("tags", "foo bar", "foo bar", "foo bar", "foo baz").endObject())
.get();
client().prepareIndex("test").setId("2")
.setSource(jsonBuilder().startObject().array("tags", "foo baz", "foo baz", "foo baz", "foo bar").endObject())
.get();
refresh();

for (BoundaryScannerType scanner : BoundaryScannerType.values()) {
SearchResponse search = client().prepareSearch().setQuery(matchQuery("tags", "foo bar"))
.highlighter(new HighlightBuilder().field(new Field("tags")).numOfFragments(2).boundaryScannerType(scanner)).get();
assertHighlight(search, 0, "tags", 0, 2, equalTo("<em>foo bar</em>"));
assertHighlight(search, 0, "tags", 1, 2, equalTo("<em>foo bar</em>"));
assertHighlight(search, 1, "tags", 0, 1, equalTo("<em>foo bar</em>"));
}
}

public void testHighlightingWithStoredKeyword() throws IOException {
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
Expand Down