Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Not maintain docBufferUpTo when only docs needed #14164

Merged
merged 7 commits into from
Jan 25, 2025
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ private enum DeltaEncoding {
private int prevDocID; // last doc ID of the previous block

private int docBufferSize;
private int docBufferUpto;
private int docBufferUpto; // only makes sense for packed encoding
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is a bit misleading: it makes sense for the bitset encoding, but in this case it's only the index into the freq buffer, not into the doc buffer.


private IndexInput docIn;
private PostingDecodingUtil docInUtil;
Expand Down Expand Up @@ -388,6 +388,7 @@ private enum DeltaEncoding {
final boolean needsOffsetsOrPayloads;
final boolean needsImpacts;
final boolean needsDocsAndFreqsOnly;
final boolean needsDocsOnly;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that we do not need to track a separate variable as it should always be the same as needsFreq == false (needsFreq should always be true whenever positions or impacts are needed).


private long freqFP; // offset of the freq block

Expand Down Expand Up @@ -443,6 +444,7 @@ public BlockPostingsEnum(FieldInfo fieldInfo, int flags, boolean needsImpacts)
needsOffsetsOrPayloads = needsOffsets || needsPayloads;
this.needsImpacts = needsImpacts;
needsDocsAndFreqsOnly = needsPos == false && needsImpacts == false;
needsDocsOnly = needsDocsAndFreqsOnly && needsFreq == false;

if (needsFreq == false) {
Arrays.fill(freqBuffer, 1);
Expand Down Expand Up @@ -615,19 +617,21 @@ private void refillFullBlock() throws IOException {
numLongs = -bitsPerValue;
docIn.readLongs(docBitSet.getBits(), 0, numLongs);
}
// Note: we know that BLOCK_SIZE bits are set, so no need to compute the cumulative pop
// count at the last index, it will be BLOCK_SIZE.
// Note: this for loop auto-vectorizes
for (int i = 0; i < numLongs - 1; ++i) {
docCumulativeWordPopCounts[i] = Long.bitCount(docBitSet.getBits()[i]);
}
for (int i = 1; i < numLongs - 1; ++i) {
docCumulativeWordPopCounts[i] += docCumulativeWordPopCounts[i - 1];
if (!needsDocsOnly) {
// Note: we know that BLOCK_SIZE bits are set, so no need to compute the cumulative pop
// count at the last index, it will be BLOCK_SIZE.
// Note: this for loop auto-vectorizes
for (int i = 0; i < numLongs - 1; ++i) {
docCumulativeWordPopCounts[i] = Long.bitCount(docBitSet.getBits()[i]);
}
for (int i = 1; i < numLongs - 1; ++i) {
docCumulativeWordPopCounts[i] += docCumulativeWordPopCounts[i - 1];
}
docCumulativeWordPopCounts[numLongs - 1] = BLOCK_SIZE;
assert docCumulativeWordPopCounts[numLongs - 2]
+ Long.bitCount(docBitSet.getBits()[numLongs - 1])
== BLOCK_SIZE;
}
docCumulativeWordPopCounts[numLongs - 1] = BLOCK_SIZE;
assert docCumulativeWordPopCounts[numLongs - 2]
+ Long.bitCount(docBitSet.getBits()[numLongs - 1])
== BLOCK_SIZE;
encoding = DeltaEncoding.UNARY;
}
if (indexHasFreq) {
Expand Down Expand Up @@ -726,7 +730,7 @@ private void skipLevel1To(int target) throws IOException {
}

private void doMoveToNextLevel0Block() throws IOException {
assert docBufferUpto == BLOCK_SIZE;
assert doc == level0LastDocID;
if (posIn != null) {
if (level0PosEndFP >= posIn.getFilePointer()) {
posIn.seek(level0PosEndFP);
Expand Down Expand Up @@ -912,7 +916,7 @@ private void doAdvanceShallow(int target) throws IOException {

@Override
public int nextDoc() throws IOException {
if (docBufferUpto == BLOCK_SIZE) {
if (doc == level0LastDocID) {
moveToNextLevel0Block();
}

Expand Down Expand Up @@ -954,13 +958,17 @@ public int advance(int target) throws IOException {
int next = docBitSet.nextSetBit(target - docBitSetBase);
assert next != NO_MORE_DOCS;
this.doc = docBitSetBase + next;
int wordIndex = next >> 6;
// Take the cumulative pop count for the given word, and subtract bits on the left of
// the current doc.
docBufferUpto =
1
+ docCumulativeWordPopCounts[wordIndex]
- Long.bitCount(docBitSet.getBits()[wordIndex] >>> next);
if (!needsDocsOnly) {
int wordIndex = next >> 6;
// Take the cumulative pop count for the given word, and subtract bits on the left of
// the current doc.
docBufferUpto =
1
+ docCumulativeWordPopCounts[wordIndex]
- Long.bitCount(docBitSet.getBits()[wordIndex] >>> next);
} else {
docBufferUpto = 1; // Just make it not 0
}
}
break;
}
Expand All @@ -978,7 +986,7 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
bitSet.set(doc - offset);

for (; ; ) {
if (docBufferUpto == BLOCK_SIZE) {
if (doc == level0LastDocID) {
// refill
moveToNextLevel0Block();
}
Expand Down
Loading