Skip to content

Commit f0f8078

Browse files
ajleong623HUSTERGS
andauthored
Solving wildcard sorting issue for doc_values (#18568)
--------- Signed-off-by: Anthony Leong <aj.leong623@gmail.com> Co-authored-by: HUSTERGS <songge.huster@gmail.com>
1 parent 55ba84d commit f0f8078

File tree

7 files changed

+411
-2
lines changed

7 files changed

+411
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2323
- Replace centos:8 with almalinux:8 since centos docker images are deprecated ([#19154](https://github.com/opensearch-project/OpenSearch/pull/19154))
2424
- Add CompletionStage variants to IndicesAdminClient as an alternative to ActionListener ([#19161](https://github.com/opensearch-project/OpenSearch/pull/19161))
2525
- Remove cap on Java version used by forbidden APIs ([#19163](https://github.com/opensearch-project/OpenSearch/pull/19163))
26+
- Disable pruning for `doc_values` for the wildcard field mapper ([#18568](https://github.com/opensearch-project/OpenSearch/pull/18568))
2627

2728
### Fixed
2829
- Fix unnecessary refreshes on update preparation failures ([#15261](https://github.com/opensearch-project/OpenSearch/issues/15261))

rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,3 +393,23 @@ setup:
393393
terms: { my_field: [ "\\*" ] }
394394
- match: { hits.total.value: 1 }
395395
- match: { hits.hits.0._id: "9" }
396+
397+
---
398+
"sort on doc value enabled wildcard":
399+
- skip:
400+
version: " - 3.2.99"
401+
reason: "sorting on doc value enabled wildcard has bug before 3.3.0"
402+
- do:
403+
search:
404+
index: test
405+
body:
406+
query:
407+
wildcard:
408+
my_field:
409+
value: "*"
410+
sort:
411+
- my_field.doc_values:
412+
order: asc
413+
size: 4
414+
- match: { hits.total.value: 8 }
415+
- match: { hits.hits.0._id: "8" }

server/src/main/java/org/opensearch/common/lucene/Lucene.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
import org.opensearch.index.analysis.AnalyzerScope;
9393
import org.opensearch.index.analysis.NamedAnalyzer;
9494
import org.opensearch.index.fielddata.IndexFieldData;
95+
import org.opensearch.index.fielddata.plain.NonPruningSortedSetOrdinalsIndexFieldData.NonPruningSortField;
9596
import org.opensearch.search.sort.SortedWiderNumericSortField;
9697

9798
import java.io.IOException;
@@ -576,6 +577,24 @@ public static void writeSortField(StreamOutput out, SortField sortField) throws
576577
);
577578
newSortField.setMissingValue(sortField.getMissingValue());
578579
sortField = newSortField;
580+
} else if (sortField instanceof NonPruningSortField) {
581+
// There are 2 cases of how NonPruningSortField wraps around its underlying sort field.
582+
// Which are through the SortField class or SortedSetSortField class
583+
// We will serialize the sort field based on the type of underlying sort field
584+
// Here the underlying sort field is SortedSetSortField, therefore, we will follow the
585+
// logic in serializing SortedSetSortField and also unwrap the SortField case.
586+
NonPruningSortField nonPruningSortField = (NonPruningSortField) sortField;
587+
if (nonPruningSortField.getDelegate().getClass() == SortedSetSortField.class) {
588+
SortField newSortField = new SortField(
589+
nonPruningSortField.getField(),
590+
SortField.Type.STRING,
591+
nonPruningSortField.getReverse()
592+
);
593+
newSortField.setMissingValue(nonPruningSortField.getMissingValue());
594+
sortField = newSortField;
595+
} else if (nonPruningSortField.getDelegate().getClass() == SortField.class) {
596+
sortField = nonPruningSortField.getDelegate();
597+
}
579598
}
580599

581600
if (sortField.getClass() != SortField.class) {
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.fielddata.plain;
10+
11+
import org.apache.lucene.index.IndexSorter;
12+
import org.apache.lucene.index.SortedSetDocValues;
13+
import org.apache.lucene.search.FieldComparator;
14+
import org.apache.lucene.search.FieldComparatorSource;
15+
import org.apache.lucene.search.IndexSearcher;
16+
import org.apache.lucene.search.Pruning;
17+
import org.apache.lucene.search.SortField;
18+
import org.apache.lucene.search.SortedSetSelector;
19+
import org.apache.lucene.search.SortedSetSortField;
20+
import org.apache.lucene.store.DataInput;
21+
import org.apache.lucene.util.BytesRef;
22+
import org.opensearch.common.Nullable;
23+
import org.opensearch.core.indices.breaker.CircuitBreakerService;
24+
import org.opensearch.index.fielddata.IndexFieldData;
25+
import org.opensearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
26+
import org.opensearch.index.fielddata.IndexFieldDataCache;
27+
import org.opensearch.index.fielddata.ScriptDocValues;
28+
import org.opensearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
29+
import org.opensearch.index.mapper.WildcardFieldMapper;
30+
import org.opensearch.search.MultiValueMode;
31+
import org.opensearch.search.aggregations.support.ValuesSourceType;
32+
33+
import java.io.IOException;
34+
import java.util.Comparator;
35+
import java.util.function.Function;
36+
37+
/**
38+
* Wrapper for {@link SortedSetOrdinalsIndexFieldData} which disables pruning optimization for
39+
* sorting. Used in {@link WildcardFieldMapper}.
40+
*
41+
* @opensearch.internal
42+
*/
43+
public class NonPruningSortedSetOrdinalsIndexFieldData extends SortedSetOrdinalsIndexFieldData {
44+
45+
/**
46+
* Builder for non-pruning sorted set ordinals
47+
*
48+
* @opensearch.internal
49+
*/
50+
public static class Builder implements IndexFieldData.Builder {
51+
private final String name;
52+
private final Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction;
53+
private final ValuesSourceType valuesSourceType;
54+
55+
public Builder(String name, ValuesSourceType valuesSourceType) {
56+
this(name, AbstractLeafOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION, valuesSourceType);
57+
}
58+
59+
public Builder(String name, Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction, ValuesSourceType valuesSourceType) {
60+
this.name = name;
61+
this.scriptFunction = scriptFunction;
62+
this.valuesSourceType = valuesSourceType;
63+
}
64+
65+
@Override
66+
public NonPruningSortedSetOrdinalsIndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
67+
return new NonPruningSortedSetOrdinalsIndexFieldData(cache, name, valuesSourceType, breakerService, scriptFunction);
68+
}
69+
}
70+
71+
public NonPruningSortedSetOrdinalsIndexFieldData(
72+
IndexFieldDataCache cache,
73+
String fieldName,
74+
ValuesSourceType valuesSourceType,
75+
CircuitBreakerService breakerService,
76+
Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction
77+
) {
78+
super(cache, fieldName, valuesSourceType, breakerService, scriptFunction);
79+
}
80+
81+
@Override
82+
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
83+
XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
84+
source.disableSkipping();
85+
/*
86+
Check if we can use a simple {@link SortedSetSortField} compatible with index sorting and
87+
returns a custom sort field otherwise.
88+
*/
89+
if (nested != null
90+
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
91+
|| (source.sortMissingLast(missingValue) == false && source.sortMissingFirst(missingValue) == false)) {
92+
return new NonPruningSortField(new SortField(getFieldName(), source, reverse));
93+
}
94+
SortField sortField = new NonPruningSortField(
95+
new SortedSetSortField(
96+
getFieldName(),
97+
reverse,
98+
sortMode == MultiValueMode.MAX ? SortedSetSelector.Type.MAX : SortedSetSelector.Type.MIN
99+
)
100+
);
101+
sortField.setMissingValue(
102+
source.sortMissingLast(missingValue) ^ reverse ? SortedSetSortField.STRING_LAST : SortedSetSortField.STRING_FIRST
103+
);
104+
return sortField;
105+
}
106+
107+
/**
108+
* {@link SortField} implementation which delegates calls to another {@link SortField}.
109+
*
110+
*/
111+
public abstract class FilteredSortField extends SortField {
112+
protected final SortField delegate;
113+
114+
protected FilteredSortField(SortField sortField) {
115+
super(sortField.getField(), sortField.getType());
116+
this.delegate = sortField;
117+
}
118+
119+
@Override
120+
public Object getMissingValue() {
121+
return delegate.getMissingValue();
122+
}
123+
124+
@Override
125+
public void setMissingValue(Object missingValue) {
126+
delegate.setMissingValue(missingValue);
127+
}
128+
129+
@Override
130+
public String getField() {
131+
return delegate.getField();
132+
}
133+
134+
@Override
135+
public Type getType() {
136+
return delegate.getType();
137+
}
138+
139+
@Override
140+
public boolean getReverse() {
141+
return delegate.getReverse();
142+
}
143+
144+
@Override
145+
public FieldComparatorSource getComparatorSource() {
146+
return delegate.getComparatorSource();
147+
}
148+
149+
@Override
150+
public String toString() {
151+
return delegate.toString();
152+
}
153+
154+
@Override
155+
public boolean equals(Object o) {
156+
return delegate.equals(o);
157+
}
158+
159+
@Override
160+
public int hashCode() {
161+
return delegate.hashCode();
162+
}
163+
164+
@Override
165+
public void setBytesComparator(Comparator<BytesRef> b) {
166+
delegate.setBytesComparator(b);
167+
}
168+
169+
@Override
170+
public Comparator<BytesRef> getBytesComparator() {
171+
return delegate.getBytesComparator();
172+
}
173+
174+
@Override
175+
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
176+
return delegate.getComparator(numHits, pruning);
177+
}
178+
179+
@Override
180+
public SortField rewrite(IndexSearcher searcher) throws IOException {
181+
return delegate.rewrite(searcher);
182+
}
183+
184+
@Override
185+
public boolean needsScores() {
186+
return delegate.needsScores();
187+
}
188+
189+
@Override
190+
public IndexSorter getIndexSorter() {
191+
return delegate.getIndexSorter();
192+
}
193+
194+
@Deprecated
195+
@Override
196+
public void setOptimizeSortWithIndexedData(boolean optimizeSortWithIndexedData) {
197+
delegate.setOptimizeSortWithIndexedData(optimizeSortWithIndexedData);
198+
}
199+
200+
@Deprecated
201+
@Override
202+
public boolean getOptimizeSortWithIndexedData() {
203+
return delegate.getOptimizeSortWithIndexedData();
204+
}
205+
206+
@Deprecated
207+
@Override
208+
public void setOptimizeSortWithPoints(boolean optimizeSortWithPoints) {
209+
delegate.setOptimizeSortWithPoints(optimizeSortWithPoints);
210+
}
211+
212+
@Deprecated
213+
@Override
214+
public boolean getOptimizeSortWithPoints() {
215+
return delegate.getOptimizeSortWithPoints();
216+
}
217+
}
218+
219+
/**
220+
* {@link SortField} extension which disables pruning in the comparator.
221+
*
222+
* @opensearch.internal
223+
*/
224+
public final class NonPruningSortField extends FilteredSortField {
225+
226+
private NonPruningSortField(SortField sortField) {
227+
super(sortField);
228+
}
229+
230+
public static Type readType(DataInput in) throws IOException {
231+
return SortField.readType(in);
232+
}
233+
234+
@Override
235+
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
236+
// explictly disable pruning
237+
return delegate.getComparator(numHits, Pruning.NONE);
238+
}
239+
240+
public SortField getDelegate() {
241+
return delegate;
242+
}
243+
}
244+
}

server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
import org.opensearch.index.analysis.IndexAnalyzers;
4848
import org.opensearch.index.analysis.NamedAnalyzer;
4949
import org.opensearch.index.fielddata.IndexFieldData;
50-
import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
50+
import org.opensearch.index.fielddata.plain.NonPruningSortedSetOrdinalsIndexFieldData;
5151
import org.opensearch.index.query.QueryShardContext;
5252
import org.opensearch.search.DocValueFormat;
5353
import org.opensearch.search.aggregations.support.CoreValuesSourceType;
@@ -366,7 +366,7 @@ NamedAnalyzer normalizer() {
366366
@Override
367367
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier<SearchLookup> searchLookup) {
368368
failIfNoDocValues();
369-
return new SortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES);
369+
return new NonPruningSortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES);
370370
}
371371

372372
@Override

server/src/test/java/org/opensearch/common/lucene/LuceneTests.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,13 @@
7777
import org.opensearch.common.util.io.IOUtils;
7878
import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
7979
import org.opensearch.index.fielddata.IndexFieldData;
80+
import org.opensearch.index.fielddata.IndexFieldData.XFieldComparatorSource;
8081
import org.opensearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
8182
import org.opensearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
8283
import org.opensearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
8384
import org.opensearch.index.fielddata.fieldcomparator.IntValuesComparatorSource;
8485
import org.opensearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
86+
import org.opensearch.index.fielddata.plain.NonPruningSortedSetOrdinalsIndexFieldData;
8587
import org.opensearch.search.MultiValueMode;
8688
import org.opensearch.test.OpenSearchTestCase;
8789
import org.opensearch.test.VersionUtils;
@@ -582,6 +584,44 @@ public void testSortFieldSerialization() throws IOException {
582584
assertEquals(sortFieldTuple.v2(), deserialized);
583585
}
584586

587+
public void testNonpruningSortFieldSerialization() throws IOException {
588+
NonPruningSortedSetOrdinalsIndexFieldData fieldData = new NonPruningSortedSetOrdinalsIndexFieldData(
589+
null,
590+
"field",
591+
null,
592+
null,
593+
null
594+
);
595+
596+
SortField nonPruningSortedSetField = fieldData.sortField(null, MultiValueMode.MAX, null, true);
597+
SortField expected = new SortField(
598+
nonPruningSortedSetField.getField(),
599+
SortField.Type.STRING,
600+
nonPruningSortedSetField.getReverse()
601+
);
602+
expected.setMissingValue(SortField.STRING_FIRST);
603+
SortField deserialized1 = copyInstance(
604+
nonPruningSortedSetField,
605+
EMPTY_REGISTRY,
606+
Lucene::writeSortField,
607+
Lucene::readSortField,
608+
VersionUtils.randomVersion(random())
609+
);
610+
assertEquals(expected, deserialized1);
611+
612+
SortField nonPruningSortField = fieldData.sortField(SortField.STRING_FIRST, MultiValueMode.SUM, null, true);
613+
XFieldComparatorSource source = new BytesRefFieldComparatorSource(null, SortField.STRING_FIRST, MultiValueMode.SUM, null);
614+
expected = new SortField(nonPruningSortField.getField(), source.reducedType(), nonPruningSortField.getReverse());
615+
SortField deserialized2 = copyInstance(
616+
nonPruningSortField,
617+
EMPTY_REGISTRY,
618+
Lucene::writeSortField,
619+
Lucene::readSortField,
620+
VersionUtils.randomVersion(random())
621+
);
622+
assertEquals(expected, deserialized2);
623+
}
624+
585625
public void testSortValueSerialization() throws IOException {
586626
Object sortValue = randomSortValue();
587627
Object deserialized = copyInstance(

0 commit comments

Comments
 (0)