-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Not maintain docBufferUpTo when only docs needed #14164
Merged
Merged
Changes from 3 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -345,7 +345,7 @@ private enum DeltaEncoding { | |
private int prevDocID; // last doc ID of the previous block | ||
|
||
private int docBufferSize; | ||
private int docBufferUpto; | ||
private int docBufferUpto; // only makes sense for packed encoding | ||
|
||
private IndexInput docIn; | ||
private PostingDecodingUtil docInUtil; | ||
|
@@ -388,6 +388,7 @@ private enum DeltaEncoding { | |
final boolean needsOffsetsOrPayloads; | ||
final boolean needsImpacts; | ||
final boolean needsDocsAndFreqsOnly; | ||
final boolean needsDocsOnly; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe that we do not need to track a separate variable as it should always be the same as |
||
|
||
private long freqFP; // offset of the freq block | ||
|
||
|
@@ -443,6 +444,7 @@ public BlockPostingsEnum(FieldInfo fieldInfo, int flags, boolean needsImpacts) | |
needsOffsetsOrPayloads = needsOffsets || needsPayloads; | ||
this.needsImpacts = needsImpacts; | ||
needsDocsAndFreqsOnly = needsPos == false && needsImpacts == false; | ||
needsDocsOnly = needsDocsAndFreqsOnly && needsFreq == false; | ||
|
||
if (needsFreq == false) { | ||
Arrays.fill(freqBuffer, 1); | ||
|
@@ -615,19 +617,21 @@ private void refillFullBlock() throws IOException { | |
numLongs = -bitsPerValue; | ||
docIn.readLongs(docBitSet.getBits(), 0, numLongs); | ||
} | ||
// Note: we know that BLOCK_SIZE bits are set, so no need to compute the cumulative pop | ||
// count at the last index, it will be BLOCK_SIZE. | ||
// Note: this for loop auto-vectorizes | ||
for (int i = 0; i < numLongs - 1; ++i) { | ||
docCumulativeWordPopCounts[i] = Long.bitCount(docBitSet.getBits()[i]); | ||
} | ||
for (int i = 1; i < numLongs - 1; ++i) { | ||
docCumulativeWordPopCounts[i] += docCumulativeWordPopCounts[i - 1]; | ||
if (!needsDocsOnly) { | ||
// Note: we know that BLOCK_SIZE bits are set, so no need to compute the cumulative pop | ||
// count at the last index, it will be BLOCK_SIZE. | ||
// Note: this for loop auto-vectorizes | ||
for (int i = 0; i < numLongs - 1; ++i) { | ||
docCumulativeWordPopCounts[i] = Long.bitCount(docBitSet.getBits()[i]); | ||
} | ||
for (int i = 1; i < numLongs - 1; ++i) { | ||
docCumulativeWordPopCounts[i] += docCumulativeWordPopCounts[i - 1]; | ||
} | ||
docCumulativeWordPopCounts[numLongs - 1] = BLOCK_SIZE; | ||
assert docCumulativeWordPopCounts[numLongs - 2] | ||
+ Long.bitCount(docBitSet.getBits()[numLongs - 1]) | ||
== BLOCK_SIZE; | ||
} | ||
docCumulativeWordPopCounts[numLongs - 1] = BLOCK_SIZE; | ||
assert docCumulativeWordPopCounts[numLongs - 2] | ||
+ Long.bitCount(docBitSet.getBits()[numLongs - 1]) | ||
== BLOCK_SIZE; | ||
encoding = DeltaEncoding.UNARY; | ||
} | ||
if (indexHasFreq) { | ||
|
@@ -726,7 +730,7 @@ private void skipLevel1To(int target) throws IOException { | |
} | ||
|
||
private void doMoveToNextLevel0Block() throws IOException { | ||
assert docBufferUpto == BLOCK_SIZE; | ||
assert doc == level0LastDocID; | ||
if (posIn != null) { | ||
if (level0PosEndFP >= posIn.getFilePointer()) { | ||
posIn.seek(level0PosEndFP); | ||
|
@@ -912,7 +916,7 @@ private void doAdvanceShallow(int target) throws IOException { | |
|
||
@Override | ||
public int nextDoc() throws IOException { | ||
if (docBufferUpto == BLOCK_SIZE) { | ||
if (doc == level0LastDocID) { | ||
moveToNextLevel0Block(); | ||
} | ||
|
||
|
@@ -954,13 +958,17 @@ public int advance(int target) throws IOException { | |
int next = docBitSet.nextSetBit(target - docBitSetBase); | ||
assert next != NO_MORE_DOCS; | ||
this.doc = docBitSetBase + next; | ||
int wordIndex = next >> 6; | ||
// Take the cumulative pop count for the given word, and subtract bits on the left of | ||
// the current doc. | ||
docBufferUpto = | ||
1 | ||
+ docCumulativeWordPopCounts[wordIndex] | ||
- Long.bitCount(docBitSet.getBits()[wordIndex] >>> next); | ||
if (!needsDocsOnly) { | ||
int wordIndex = next >> 6; | ||
// Take the cumulative pop count for the given word, and subtract bits on the left of | ||
// the current doc. | ||
docBufferUpto = | ||
1 | ||
+ docCumulativeWordPopCounts[wordIndex] | ||
- Long.bitCount(docBitSet.getBits()[wordIndex] >>> next); | ||
} else { | ||
docBufferUpto = 1; // Just make it not 0 | ||
} | ||
} | ||
break; | ||
} | ||
|
@@ -978,7 +986,7 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept | |
bitSet.set(doc - offset); | ||
|
||
for (; ; ) { | ||
if (docBufferUpto == BLOCK_SIZE) { | ||
if (doc == level0LastDocID) { | ||
// refill | ||
moveToNextLevel0Block(); | ||
} | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment is a bit misleading: it makes sense for the bitset encoding, but in this case it's only the index into the freq buffer, not into the doc buffer.