Skip to content

Commit c99a57f

Browse files
jtibshiraniChristoph Büscher
andauthored
Fix CombinedFieldQuery (Lucene 9999) (#74857)
This commit moves a fix for a bug in MultiNormsLeafSimScorer in Lucene that fixes an error in the new CombinedFieldQuery for missing values. Its based on a PR for LUCENE-9999 (apache/lucene#185). This is a temporary copy of the affected query and its updated dependencies that should be removed again once we are able to use the original fix from Lucene. Relastes to apache/lucene#185 Co-authored-by: Christoph Büscher <cbuescher@posteo.de>
1 parent e0adbb8 commit c99a57f

File tree

2 files changed

+184
-347
lines changed

2 files changed

+184
-347
lines changed

server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java

Lines changed: 128 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@
1313
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414
* See the License for the specific language governing permissions and
1515
* limitations under the License.
16+
*
17+
* Modifications copyright (C) 2020 Elasticsearch B.V.
1618
*/
17-
1819
package org.apache.lucene.search;
1920

2021
import org.apache.lucene.index.LeafReader;
2122
import org.apache.lucene.index.NumericDocValues;
23+
import org.apache.lucene.search.XCombinedFieldQuery.FieldAndWeight;
2224
import org.apache.lucene.search.similarities.Similarity.SimScorer;
2325
import org.apache.lucene.util.SmallFloat;
2426

@@ -28,134 +30,148 @@
2830
import java.util.List;
2931
import java.util.Objects;
3032

31-
import static org.apache.lucene.search.XCombinedFieldQuery.FieldAndWeight;
32-
3333
/**
34-
* Copy of {@link LeafSimScorer} that sums document's norms from multiple fields.
34+
* Copy of {@link MultiNormsLeafSimScorer} that contains a fix for LUCENE-9999.
35+
* TODO: remove once LUCENE-9999 is fixed and integrated
3536
*
36-
* TODO: this is temporarily copied from Lucene, remove once we update to Lucene 8.9.
37+
* <p>This scorer requires that either all fields or no fields have norms enabled. It will throw an
38+
* error if some fields have norms enabled, while others have norms disabled.
3739
*/
3840
final class XMultiNormsLeafSimScorer {
39-
/**
40-
* Cache of decoded norms.
41-
*/
42-
private static final float[] LENGTH_TABLE = new float[256];
43-
44-
static {
45-
for (int i = 0; i < 256; i++) {
46-
LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
47-
}
48-
}
41+
/** Cache of decoded norms. */
42+
private static final float[] LENGTH_TABLE = new float[256];
4943

50-
private final SimScorer scorer;
51-
private final NumericDocValues norms;
52-
53-
/**
54-
* Sole constructor: Score documents of {@code reader} with {@code scorer}.
55-
*
56-
*/
57-
XMultiNormsLeafSimScorer(SimScorer scorer,
58-
LeafReader reader,
59-
Collection<FieldAndWeight> normFields,
60-
boolean needsScores) throws IOException {
61-
this.scorer = Objects.requireNonNull(scorer);
62-
if (needsScores) {
63-
final List<NumericDocValues> normsList = new ArrayList<>();
64-
final List<Float> weightList = new ArrayList<>();
65-
for (FieldAndWeight field : normFields) {
66-
NumericDocValues norms = reader.getNormValues(field.field);
67-
if (norms != null) {
68-
normsList.add(norms);
69-
weightList.add(field.weight);
70-
}
71-
}
72-
if (normsList.isEmpty()) {
73-
norms = null;
74-
} else if (normsList.size() == 1) {
75-
norms = normsList.get(0);
76-
} else {
77-
final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]);
78-
final float[] weightArr = new float[normsList.size()];
79-
for (int i = 0; i < weightList.size(); i++) {
80-
weightArr[i] = weightList.get(i);
81-
}
82-
norms = new XMultiNormsLeafSimScorer.MultiFieldNormValues(normsArr, weightArr);
83-
}
84-
} else {
85-
norms = null;
86-
}
44+
static {
45+
for (int i = 0; i < 256; i++) {
46+
LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
8747
}
88-
89-
private long getNormValue(int doc) throws IOException {
48+
}
49+
50+
private final SimScorer scorer;
51+
private final NumericDocValues norms;
52+
53+
/** Sole constructor: Score documents of {@code reader} with {@code scorer}. */
54+
XMultiNormsLeafSimScorer(
55+
SimScorer scorer,
56+
LeafReader reader,
57+
Collection<FieldAndWeight> normFields,
58+
boolean needsScores)
59+
throws IOException {
60+
this.scorer = Objects.requireNonNull(scorer);
61+
if (needsScores) {
62+
final List<NumericDocValues> normsList = new ArrayList<>();
63+
final List<Float> weightList = new ArrayList<>();
64+
for (FieldAndWeight field : normFields) {
65+
NumericDocValues norms = reader.getNormValues(field.field);
9066
if (norms != null) {
91-
boolean found = norms.advanceExact(doc);
92-
assert found;
93-
return norms.longValue();
94-
} else {
95-
return 1L; // default norm
67+
normsList.add(norms);
68+
weightList.add(field.weight);
69+
}
70+
}
71+
72+
if (normsList.isEmpty() == false && normsList.size() != normFields.size()) {
73+
throw new IllegalArgumentException(
74+
getClass().getSimpleName()
75+
+ " requires norms to be consistent across fields: some fields cannot"
76+
+ " have norms enabled, while others have norms disabled");
77+
}
78+
79+
if (normsList.isEmpty()) {
80+
norms = null;
81+
} else if (normsList.size() == 1) {
82+
norms = normsList.get(0);
83+
} else {
84+
final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]);
85+
final float[] weightArr = new float[normsList.size()];
86+
for (int i = 0; i < weightList.size(); i++) {
87+
weightArr[i] = weightList.get(i);
9688
}
89+
norms = new MultiFieldNormValues(normsArr, weightArr);
90+
}
91+
} else {
92+
norms = null;
9793
}
98-
99-
/** Score the provided document assuming the given term document frequency.
100-
* This method must be called on non-decreasing sequences of doc ids.
101-
* @see SimScorer#score(float, long) */
102-
public float score(int doc, float freq) throws IOException {
103-
return scorer.score(freq, getNormValue(doc));
94+
}
95+
96+
private long getNormValue(int doc) throws IOException {
97+
if (norms != null) {
98+
boolean found = norms.advanceExact(doc);
99+
assert found;
100+
return norms.longValue();
101+
} else {
102+
return 1L; // default norm
104103
}
105-
106-
/** Explain the score for the provided document assuming the given term document frequency.
107-
* This method must be called on non-decreasing sequences of doc ids.
108-
* @see SimScorer#explain(Explanation, long) */
109-
public Explanation explain(int doc, Explanation freqExpl) throws IOException {
110-
return scorer.explain(freqExpl, getNormValue(doc));
104+
}
105+
106+
/**
107+
* Score the provided document assuming the given term document frequency. This method must be
108+
* called on non-decreasing sequences of doc ids.
109+
*
110+
* @see SimScorer#score(float, long)
111+
*/
112+
public float score(int doc, float freq) throws IOException {
113+
return scorer.score(freq, getNormValue(doc));
114+
}
115+
116+
/**
117+
* Explain the score for the provided document assuming the given term document frequency. This
118+
* method must be called on non-decreasing sequences of doc ids.
119+
*
120+
* @see SimScorer#explain(Explanation, long)
121+
*/
122+
public Explanation explain(int doc, Explanation freqExpl) throws IOException {
123+
return scorer.explain(freqExpl, getNormValue(doc));
124+
}
125+
126+
private static class MultiFieldNormValues extends NumericDocValues {
127+
private final NumericDocValues[] normsArr;
128+
private final float[] weightArr;
129+
private long current;
130+
private int docID = -1;
131+
132+
MultiFieldNormValues(NumericDocValues[] normsArr, float[] weightArr) {
133+
this.normsArr = normsArr;
134+
this.weightArr = weightArr;
111135
}
112136

113-
private static class MultiFieldNormValues extends NumericDocValues {
114-
private final NumericDocValues[] normsArr;
115-
private final float[] weightArr;
116-
private long current;
117-
private int docID = -1;
118-
119-
MultiFieldNormValues(NumericDocValues[] normsArr, float[] weightArr) {
120-
this.normsArr = normsArr;
121-
this.weightArr = weightArr;
122-
}
123-
124-
@Override
125-
public long longValue() {
126-
return current;
127-
}
137+
@Override
138+
public long longValue() {
139+
return current;
140+
}
128141

129-
@Override
130-
public boolean advanceExact(int target) throws IOException {
131-
float normValue = 0;
132-
for (int i = 0; i < normsArr.length; i++) {
133-
boolean found = normsArr[i].advanceExact(target);
134-
assert found;
135-
normValue += weightArr[i] * LENGTH_TABLE[Byte.toUnsignedInt((byte) normsArr[i].longValue())];
136-
}
137-
current = SmallFloat.intToByte4(Math.round(normValue));
138-
return true;
142+
@Override
143+
public boolean advanceExact(int target) throws IOException {
144+
float normValue = 0;
145+
boolean found = false;
146+
for (int i = 0; i < normsArr.length; i++) {
147+
if (normsArr[i].advanceExact(target)) {
148+
normValue +=
149+
weightArr[i] * LENGTH_TABLE[Byte.toUnsignedInt((byte) normsArr[i].longValue())];
150+
found = true;
139151
}
152+
}
153+
current = SmallFloat.intToByte4(Math.round(normValue));
154+
return found;
155+
}
140156

141-
@Override
142-
public int docID() {
143-
return docID;
144-
}
157+
@Override
158+
public int docID() {
159+
return docID;
160+
}
145161

146-
@Override
147-
public int nextDoc() {
148-
throw new UnsupportedOperationException();
149-
}
162+
@Override
163+
public int nextDoc() {
164+
throw new UnsupportedOperationException();
165+
}
150166

151-
@Override
152-
public int advance(int target) {
153-
throw new UnsupportedOperationException();
154-
}
167+
@Override
168+
public int advance(int target) {
169+
throw new UnsupportedOperationException();
170+
}
155171

156-
@Override
157-
public long cost() {
158-
throw new UnsupportedOperationException();
159-
}
172+
@Override
173+
public long cost() {
174+
throw new UnsupportedOperationException();
160175
}
176+
}
161177
}

0 commit comments

Comments
 (0)