Skip to content

Commit 65820e5

Browse files
gsmillerGreg Miller
andauthored
LUCENE-9953: Make FacetResult#value accurate for LongValueFacetCounts multi-value doc cases (#131)
Co-authored-by: Greg Miller <gmiller@amazon.com>
1 parent ade50f0 commit 65820e5

File tree

4 files changed

+19
-8
lines changed

4 files changed

+19
-8
lines changed

lucene/CHANGES.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,10 @@ Bug Fixes
390390

391391
* LUCENE-9958: Fixed performance regression for boolean queries that configure a
392392
minimum number of matching clauses. (Adrien Grand, Matt Weber)
393+
394+
* LUCENE-9953: LongValueFacetCounts should count each document at most once when determining
395+
the total count for a dimension. Prior to this fix, multi-value docs could contribute a > 1
396+
count to the dimension count. (Greg Miller)
393397

394398
Other
395399
---------------------

lucene/facet/src/java/org/apache/lucene/facet/FacetResult.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,12 @@ public final class FacetResult {
2828
public final String[] path;
2929

3030
/**
31-
* Total value for this path (sum of all child counts, or sum of all child values), even those not
32-
* included in the topN.
31+
* Total number of documents containing a value for this path, even those not included in the
32+
* topN. If a document contains multiple values for the same path, it will only be counted once in
33+
* this value.
3334
*/
35+
// TODO: This may not hold true for SSDV faceting, where docs can be counted more than
36+
// once. We should fix this. See LUCENE-9952
3437
public final Number value;
3538

3639
/** How many child labels were encountered. */

lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,9 @@ private void count(String field, List<MatchingDocs> matchingDocs) throws IOExcep
160160

161161
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
162162
int limit = multiValues.docValueCount();
163-
totCount += limit;
163+
if (limit > 0) {
164+
totCount++;
165+
}
164166
for (int i = 0; i < limit; i++) {
165167
increment(multiValues.nextValue());
166168
}
@@ -204,7 +206,9 @@ private void countAll(IndexReader reader, String field) throws IOException {
204206

205207
while (multiValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
206208
int limit = multiValues.docValueCount();
207-
totCount += limit;
209+
if (limit > 0) {
210+
totCount++;
211+
}
208212
for (int i = 0; i < limit; i++) {
209213
increment(multiValues.nextValue());
210214
}

lucene/facet/src/test/org/apache/lucene/facet/TestLongValueFacetCounts.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,15 +429,15 @@ public void testRandomMultiValued() throws Exception {
429429
int expectedChildCount = 0;
430430
int expectedTotalCount = 0;
431431
for (int i = 0; i < valueCount; i++) {
432-
if (values[i] != null) {
432+
if (values[i] != null && values[i].length > 0) {
433+
expectedTotalCount++;
433434
for (long value : values[i]) {
434435
Integer curCount = expected.get(value);
435436
if (curCount == null) {
436437
curCount = 0;
437438
expectedChildCount++;
438439
}
439440
expected.put(value, curCount + 1);
440-
expectedTotalCount++;
441441
}
442442
}
443443
}
@@ -520,9 +520,9 @@ public void testRandomMultiValued() throws Exception {
520520
expectedChildCount = 0;
521521
int totCount = 0;
522522
for (int i = minId; i <= maxId; i++) {
523-
if (values[i] != null) {
523+
if (values[i] != null && values[i].length > 0) {
524+
totCount++;
524525
for (long value : values[i]) {
525-
totCount++;
526526
Integer curCount = expected.get(value);
527527
if (curCount == null) {
528528
expectedChildCount++;

0 commit comments

Comments
 (0)