Skip to content

Commit e7b8ea0

Browse files
committed
Revert "Speed up advancing within a block. (#13692)"
This reverts commit 36fd689.
1 parent a774a99 commit e7b8ea0

File tree

2 files changed

+33
-110
lines changed

2 files changed

+33
-110
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsReader.java

Lines changed: 33 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
public final class Lucene912PostingsReader extends PostingsReaderBase {
6868

6969
static final VectorizationProvider VECTORIZATION_PROVIDER = VectorizationProvider.getInstance();
70-
static int BINARY_SEARCH_WINDOW_SIZE = 4;
7170

7271
private final IndexInput docIn;
7372
private final IndexInput posIn;
@@ -213,74 +212,13 @@ static void prefixSum(long[] buffer, int count, long base) {
213212
}
214213
}
215214

216-
private static boolean assertDocBuffer(long[] values, int start) {
217-
assert values.length == BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE;
218-
assert values[BLOCK_SIZE] == DocIdSetIterator.NO_MORE_DOCS;
219-
assert start < BLOCK_SIZE;
220-
221-
int endOffset;
222-
if (values[0] == DocIdSetIterator.NO_MORE_DOCS) {
223-
endOffset = 0;
224-
} else {
225-
endOffset = -1;
226-
for (int i = 1; i < values.length; ++i) {
227-
assert values[i] > values[i - 1] : Arrays.toString(values);
228-
if (values[i] == DocIdSetIterator.NO_MORE_DOCS) {
229-
endOffset = i;
230-
break;
231-
}
215+
static int findFirstGreater(long[] buffer, int target, int from) {
216+
for (int i = from; i < BLOCK_SIZE; ++i) {
217+
if (buffer[i] >= target) {
218+
return i;
232219
}
233220
}
234-
for (int i = 0; i < BINARY_SEARCH_WINDOW_SIZE; ++i) {
235-
assert values[endOffset + i] == DocIdSetIterator.NO_MORE_DOCS;
236-
}
237-
return true;
238-
}
239-
240-
/**
241-
* Return the first index in sorted array {@code values} whose value is greater than or equal to
242-
* {@code target}. For correctness, it requires the last 4 values to be set to {@code
243-
* NO_MORE_DOCS}.
244-
*/
245-
static int findNextGEQ(long[] values, long target, int start) {
246-
assert assertDocBuffer(values, start);
247-
248-
if (values[start] >= target) {
249-
// Surprisingly this is a likely condition in practice, so optimizing for it helps.
250-
return start;
251-
}
252-
253-
// We just ruled out that our target index is at `start`.
254-
start += 1;
255-
256-
// Now find the first interval of 4 values that contains our target.
257-
for (int i = start;
258-
i + BINARY_SEARCH_WINDOW_SIZE <= values.length;
259-
i += BINARY_SEARCH_WINDOW_SIZE) {
260-
if (values[i + BINARY_SEARCH_WINDOW_SIZE - 1] >= target) {
261-
start = i;
262-
break;
263-
}
264-
}
265-
266-
// Binary search in this interval of 4 values.
267-
return binarySearch4(values, target, start);
268-
}
269-
270-
/**
271-
* Return the first index whose value is greater than or equal to {@code target} among the 4
272-
* values starting at {@code start}. If none of the values is greater than or equal to {@code
273-
* target}, this returns {@code start+3}.
274-
*/
275-
private static int binarySearch4(long[] values, long target, int start) {
276-
// This code is organized in a way that compiles to a branchless binary search.
277-
if (values[start + 1] < target) {
278-
start += 2;
279-
}
280-
if (values[start] < target) {
281-
start += 1;
282-
}
283-
return start;
221+
return BLOCK_SIZE;
284222
}
285223

286224
@Override
@@ -410,7 +348,7 @@ final class BlockDocsEnum extends PostingsEnum {
410348
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
411349
final PForUtil pforUtil = new PForUtil(forUtil);
412350

413-
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
351+
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
414352
private final long[] freqBuffer = new long[BLOCK_SIZE];
415353

416354
private int docBufferUpto;
@@ -452,9 +390,9 @@ public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
452390
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
453391
>= 0
454392
|| fieldInfo.hasPayloads();
455-
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
393+
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
456394
// advance()
457-
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
395+
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
458396
}
459397

460398
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@@ -557,7 +495,7 @@ private void refillFullBlock() throws IOException {
557495
docCountUpto += BLOCK_SIZE;
558496
prevDocID = docBuffer[BLOCK_SIZE - 1];
559497
docBufferUpto = 0;
560-
assert assertDocBuffer(docBuffer, 0);
498+
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
561499
}
562500

563501
private void refillRemainder() throws IOException {
@@ -568,14 +506,15 @@ private void refillRemainder() throws IOException {
568506
if (docFreq == 1) {
569507
docBuffer[0] = singletonDocID;
570508
freqBuffer[0] = totalTermFreq;
509+
docBuffer[1] = NO_MORE_DOCS;
571510
docCountUpto++;
572511
} else {
573512
// Read vInts:
574513
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq);
575514
prefixSum(docBuffer, left, prevDocID);
515+
docBuffer[left] = NO_MORE_DOCS;
576516
docCountUpto += left;
577517
}
578-
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
579518
docBufferUpto = 0;
580519
freqFP = -1;
581520
}
@@ -673,7 +612,7 @@ public int advance(int target) throws IOException {
673612
}
674613
}
675614

676-
int next = findNextGEQ(docBuffer, target, docBufferUpto);
615+
int next = findFirstGreater(docBuffer, target, docBufferUpto);
677616
this.doc = (int) docBuffer[next];
678617
docBufferUpto = next + 1;
679618
return doc;
@@ -691,8 +630,8 @@ final class EverythingEnum extends PostingsEnum {
691630
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
692631
final PForUtil pforUtil = new PForUtil(forUtil);
693632

694-
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
695-
private final long[] freqBuffer = new long[BLOCK_SIZE];
633+
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
634+
private final long[] freqBuffer = new long[BLOCK_SIZE + 1];
696635
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
697636

698637
private final long[] payloadLengthBuffer;
@@ -813,9 +752,9 @@ public EverythingEnum(FieldInfo fieldInfo) throws IOException {
813752
payload = null;
814753
}
815754

816-
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
755+
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
817756
// advance()
818-
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
757+
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
819758
}
820759

821760
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@@ -904,18 +843,18 @@ private void refillDocs() throws IOException {
904843
} else if (docFreq == 1) {
905844
docBuffer[0] = singletonDocID;
906845
freqBuffer[0] = totalTermFreq;
907-
Arrays.fill(docBuffer, 1, 1 + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
846+
docBuffer[1] = NO_MORE_DOCS;
908847
docCountUpto++;
909848
} else {
910849
// Read vInts:
911850
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
912851
prefixSum(docBuffer, left, prevDocID);
913-
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
852+
docBuffer[left] = NO_MORE_DOCS;
914853
docCountUpto += left;
915854
}
916855
prevDocID = docBuffer[BLOCK_SIZE - 1];
917856
docBufferUpto = 0;
918-
assert assertDocBuffer(docBuffer, 0);
857+
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
919858
}
920859

921860
private void skipLevel1To(int target) throws IOException {
@@ -1077,7 +1016,7 @@ public int advance(int target) throws IOException {
10771016
refillDocs();
10781017
}
10791018

1080-
int next = findNextGEQ(docBuffer, target, docBufferUpto);
1019+
int next = findFirstGreater(docBuffer, target, docBufferUpto);
10811020
for (int i = docBufferUpto; i <= next; ++i) {
10821021
posPendingCount += freqBuffer[i];
10831022
}
@@ -1278,7 +1217,7 @@ final class BlockImpactsDocsEnum extends ImpactsEnum {
12781217
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
12791218
final PForUtil pforUtil = new PForUtil(forUtil);
12801219

1281-
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
1220+
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
12821221
private final long[] freqBuffer = new long[BLOCK_SIZE];
12831222

12841223
private int docBufferUpto;
@@ -1326,9 +1265,9 @@ public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
13261265
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
13271266
>= 0
13281267
|| fieldInfo.hasPayloads();
1329-
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
1268+
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
13301269
// advance()
1331-
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
1270+
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
13321271

13331272
docFreq = termState.docFreq;
13341273
if (docFreq > 1) {
@@ -1418,13 +1357,13 @@ private void refillDocs() throws IOException {
14181357
// Read vInts:
14191358
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
14201359
prefixSum(docBuffer, left, prevDocID);
1421-
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
1360+
docBuffer[left] = NO_MORE_DOCS;
14221361
freqFP = -1;
14231362
docCountUpto += left;
14241363
}
14251364
prevDocID = docBuffer[BLOCK_SIZE - 1];
14261365
docBufferUpto = 0;
1427-
assert assertDocBuffer(docBuffer, 0);
1366+
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
14281367
}
14291368

14301369
private void skipLevel1To(int target) throws IOException {
@@ -1550,7 +1489,7 @@ public int advance(int target) throws IOException {
15501489
needsRefilling = false;
15511490
}
15521491

1553-
int next = findNextGEQ(docBuffer, target, docBufferUpto);
1492+
int next = findFirstGreater(docBuffer, target, docBufferUpto);
15541493
this.doc = (int) docBuffer[next];
15551494
docBufferUpto = next + 1;
15561495
return doc;
@@ -1633,7 +1572,7 @@ final class BlockImpactsPostingsEnum extends ImpactsEnum {
16331572
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
16341573
final PForUtil pforUtil = new PForUtil(forUtil);
16351574

1636-
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
1575+
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
16371576
private final long[] freqBuffer = new long[BLOCK_SIZE];
16381577
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
16391578

@@ -1713,9 +1652,9 @@ public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState
17131652
this.posIn = Lucene912PostingsReader.this.posIn.clone();
17141653
posInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(posIn);
17151654

1716-
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
1655+
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
17171656
// advance()
1718-
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
1657+
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
17191658

17201659
docFreq = termState.docFreq;
17211660
posTermStartFP = termState.posStartFP;
@@ -1785,18 +1724,18 @@ private void refillDocs() throws IOException {
17851724
} else if (docFreq == 1) {
17861725
docBuffer[0] = singletonDocID;
17871726
freqBuffer[0] = totalTermFreq;
1788-
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
1727+
docBuffer[1] = NO_MORE_DOCS;
17891728
docCountUpto++;
17901729
} else {
17911730
// Read vInts:
17921731
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
17931732
prefixSum(docBuffer, left, prevDocID);
1794-
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
1733+
docBuffer[left] = NO_MORE_DOCS;
17951734
docCountUpto += left;
17961735
}
17971736
prevDocID = docBuffer[BLOCK_SIZE - 1];
17981737
docBufferUpto = 0;
1799-
assert assertDocBuffer(docBuffer, 0);
1738+
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
18001739
}
18011740

18021741
private void skipLevel1To(int target) throws IOException {
@@ -1993,7 +1932,7 @@ public int advance(int target) throws IOException {
19931932
needsRefilling = false;
19941933
}
19951934

1996-
int next = findNextGEQ(docBuffer, target, docBufferUpto);
1935+
int next = findFirstGreater(docBuffer, target, docBufferUpto);
19971936
for (int i = docBufferUpto; i <= next; ++i) {
19981937
posPendingCount += freqBuffer[i];
19991938
}

lucene/core/src/test/org/apache/lucene/codecs/lucene912/TestLucene912PostingsFormat.java

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.apache.lucene.index.Impact;
3232
import org.apache.lucene.index.IndexWriter;
3333
import org.apache.lucene.index.IndexWriterConfig;
34-
import org.apache.lucene.search.DocIdSetIterator;
3534
import org.apache.lucene.store.ByteArrayDataInput;
3635
import org.apache.lucene.store.ByteArrayDataOutput;
3736
import org.apache.lucene.store.Directory;
@@ -155,19 +154,4 @@ private void doTestImpactSerialization(List<Impact> impacts) throws IOException
155154
}
156155
}
157156
}
158-
159-
public void testFindNextGEQ() {
160-
long[] values =
161-
new long[ForUtil.BLOCK_SIZE + Lucene912PostingsReader.BINARY_SEARCH_WINDOW_SIZE];
162-
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
163-
values[i] = i * 2;
164-
}
165-
Arrays.fill(values, ForUtil.BLOCK_SIZE, values.length, DocIdSetIterator.NO_MORE_DOCS);
166-
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
167-
for (int start = 0; start <= i; ++start) {
168-
assertEquals(i, Lucene912PostingsReader.findNextGEQ(values, i * 2, start));
169-
assertEquals(i + 1, Lucene912PostingsReader.findNextGEQ(values, i * 2 + 1, start));
170-
}
171-
}
172-
}
173157
}

0 commit comments

Comments
 (0)