Skip to content

Commit 36fd689

Browse files
committed
Speed up advancing within a block. (#13692)
Advancing within a block consists of finding the first index within an array of 128 values whose value is greater than or equal a target. Given the small size, it's not obvious whether it's better to perform a linear search, a binary search or something else... It is surprisingly hard to beat the linear search that we are using today. Experiments suggested that the following approach works in practice: - First check if the next item in the array is greater than or equal to the target. - Then find the first 4-values interval that contains our target. - Then perform a branchless binary search within this interval of 4 values. This approach still biases heavily towards the case when the target is very close to the current index, only a bit less than a linear search.
1 parent 256d6a2 commit 36fd689

File tree

2 files changed

+110
-33
lines changed

2 files changed

+110
-33
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsReader.java

Lines changed: 94 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
public final class Lucene912PostingsReader extends PostingsReaderBase {
6868

6969
static final VectorizationProvider VECTORIZATION_PROVIDER = VectorizationProvider.getInstance();
70+
static int BINARY_SEARCH_WINDOW_SIZE = 4;
7071

7172
private final IndexInput docIn;
7273
private final IndexInput posIn;
@@ -212,13 +213,74 @@ static void prefixSum(long[] buffer, int count, long base) {
212213
}
213214
}
214215

215-
static int findFirstGreater(long[] buffer, int target, int from) {
216-
for (int i = from; i < BLOCK_SIZE; ++i) {
217-
if (buffer[i] >= target) {
218-
return i;
216+
private static boolean assertDocBuffer(long[] values, int start) {
217+
assert values.length == BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE;
218+
assert values[BLOCK_SIZE] == DocIdSetIterator.NO_MORE_DOCS;
219+
assert start < BLOCK_SIZE;
220+
221+
int endOffset;
222+
if (values[0] == DocIdSetIterator.NO_MORE_DOCS) {
223+
endOffset = 0;
224+
} else {
225+
endOffset = -1;
226+
for (int i = 1; i < values.length; ++i) {
227+
assert values[i] > values[i - 1] : Arrays.toString(values);
228+
if (values[i] == DocIdSetIterator.NO_MORE_DOCS) {
229+
endOffset = i;
230+
break;
231+
}
219232
}
220233
}
221-
return BLOCK_SIZE;
234+
for (int i = 0; i < BINARY_SEARCH_WINDOW_SIZE; ++i) {
235+
assert values[endOffset + i] == DocIdSetIterator.NO_MORE_DOCS;
236+
}
237+
return true;
238+
}
239+
240+
/**
241+
* Return the first index in sorted array {@code values} whose value is greater than or equal to
242+
* {@code target}. For correctness, it requires the last 4 values to be set to {@code
243+
* NO_MORE_DOCS}.
244+
*/
245+
static int findNextGEQ(long[] values, long target, int start) {
246+
assert assertDocBuffer(values, start);
247+
248+
if (values[start] >= target) {
249+
// Surprisingly this is a likely condition in practice, so optimizing for it helps.
250+
return start;
251+
}
252+
253+
// We just ruled out that our target index is at `start`.
254+
start += 1;
255+
256+
// Now find the first interval of 4 values that contains our target.
257+
for (int i = start;
258+
i + BINARY_SEARCH_WINDOW_SIZE <= values.length;
259+
i += BINARY_SEARCH_WINDOW_SIZE) {
260+
if (values[i + BINARY_SEARCH_WINDOW_SIZE - 1] >= target) {
261+
start = i;
262+
break;
263+
}
264+
}
265+
266+
// Binary search in this interval of 4 values.
267+
return binarySearch4(values, target, start);
268+
}
269+
270+
/**
271+
* Return the first index whose value is greater than or equal to {@code target} among the 4
272+
* values starting at {@code start}. If none of the values is greater than or equal to {@code
273+
* target}, this returns {@code start+3}.
274+
*/
275+
private static int binarySearch4(long[] values, long target, int start) {
276+
// This code is organized in a way that compiles to a branchless binary search.
277+
if (values[start + 1] < target) {
278+
start += 2;
279+
}
280+
if (values[start] < target) {
281+
start += 1;
282+
}
283+
return start;
222284
}
223285

224286
@Override
@@ -348,7 +410,7 @@ final class BlockDocsEnum extends PostingsEnum {
348410
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
349411
final PForUtil pforUtil = new PForUtil(forUtil);
350412

351-
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
413+
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
352414
private final long[] freqBuffer = new long[BLOCK_SIZE];
353415

354416
private int docBufferUpto;
@@ -390,9 +452,9 @@ public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
390452
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
391453
>= 0
392454
|| fieldInfo.hasPayloads();
393-
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
455+
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
394456
// advance()
395-
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
457+
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
396458
}
397459

398460
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@@ -495,7 +557,7 @@ private void refillFullBlock() throws IOException {
495557
docCountUpto += BLOCK_SIZE;
496558
prevDocID = docBuffer[BLOCK_SIZE - 1];
497559
docBufferUpto = 0;
498-
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
560+
assert assertDocBuffer(docBuffer, 0);
499561
}
500562

501563
private void refillRemainder() throws IOException {
@@ -506,15 +568,14 @@ private void refillRemainder() throws IOException {
506568
if (docFreq == 1) {
507569
docBuffer[0] = singletonDocID;
508570
freqBuffer[0] = totalTermFreq;
509-
docBuffer[1] = NO_MORE_DOCS;
510571
docCountUpto++;
511572
} else {
512573
// Read vInts:
513574
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq);
514575
prefixSum(docBuffer, left, prevDocID);
515-
docBuffer[left] = NO_MORE_DOCS;
516576
docCountUpto += left;
517577
}
578+
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
518579
docBufferUpto = 0;
519580
freqFP = -1;
520581
}
@@ -612,7 +673,7 @@ public int advance(int target) throws IOException {
612673
}
613674
}
614675

615-
int next = findFirstGreater(docBuffer, target, docBufferUpto);
676+
int next = findNextGEQ(docBuffer, target, docBufferUpto);
616677
this.doc = (int) docBuffer[next];
617678
docBufferUpto = next + 1;
618679
return doc;
@@ -630,8 +691,8 @@ final class EverythingEnum extends PostingsEnum {
630691
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
631692
final PForUtil pforUtil = new PForUtil(forUtil);
632693

633-
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
634-
private final long[] freqBuffer = new long[BLOCK_SIZE + 1];
694+
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
695+
private final long[] freqBuffer = new long[BLOCK_SIZE];
635696
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
636697

637698
private final long[] payloadLengthBuffer;
@@ -752,9 +813,9 @@ public EverythingEnum(FieldInfo fieldInfo) throws IOException {
752813
payload = null;
753814
}
754815

755-
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
816+
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
756817
// advance()
757-
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
818+
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
758819
}
759820

760821
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@@ -843,18 +904,18 @@ private void refillDocs() throws IOException {
843904
} else if (docFreq == 1) {
844905
docBuffer[0] = singletonDocID;
845906
freqBuffer[0] = totalTermFreq;
846-
docBuffer[1] = NO_MORE_DOCS;
907+
Arrays.fill(docBuffer, 1, 1 + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
847908
docCountUpto++;
848909
} else {
849910
// Read vInts:
850911
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
851912
prefixSum(docBuffer, left, prevDocID);
852-
docBuffer[left] = NO_MORE_DOCS;
913+
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
853914
docCountUpto += left;
854915
}
855916
prevDocID = docBuffer[BLOCK_SIZE - 1];
856917
docBufferUpto = 0;
857-
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
918+
assert assertDocBuffer(docBuffer, 0);
858919
}
859920

860921
private void skipLevel1To(int target) throws IOException {
@@ -1016,7 +1077,7 @@ public int advance(int target) throws IOException {
10161077
refillDocs();
10171078
}
10181079

1019-
int next = findFirstGreater(docBuffer, target, docBufferUpto);
1080+
int next = findNextGEQ(docBuffer, target, docBufferUpto);
10201081
for (int i = docBufferUpto; i <= next; ++i) {
10211082
posPendingCount += freqBuffer[i];
10221083
}
@@ -1217,7 +1278,7 @@ final class BlockImpactsDocsEnum extends ImpactsEnum {
12171278
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
12181279
final PForUtil pforUtil = new PForUtil(forUtil);
12191280

1220-
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
1281+
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
12211282
private final long[] freqBuffer = new long[BLOCK_SIZE];
12221283

12231284
private int docBufferUpto;
@@ -1265,9 +1326,9 @@ public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
12651326
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
12661327
>= 0
12671328
|| fieldInfo.hasPayloads();
1268-
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
1329+
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
12691330
// advance()
1270-
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
1331+
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
12711332

12721333
docFreq = termState.docFreq;
12731334
if (docFreq > 1) {
@@ -1357,13 +1418,13 @@ private void refillDocs() throws IOException {
13571418
// Read vInts:
13581419
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
13591420
prefixSum(docBuffer, left, prevDocID);
1360-
docBuffer[left] = NO_MORE_DOCS;
1421+
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
13611422
freqFP = -1;
13621423
docCountUpto += left;
13631424
}
13641425
prevDocID = docBuffer[BLOCK_SIZE - 1];
13651426
docBufferUpto = 0;
1366-
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
1427+
assert assertDocBuffer(docBuffer, 0);
13671428
}
13681429

13691430
private void skipLevel1To(int target) throws IOException {
@@ -1489,7 +1550,7 @@ public int advance(int target) throws IOException {
14891550
needsRefilling = false;
14901551
}
14911552

1492-
int next = findFirstGreater(docBuffer, target, docBufferUpto);
1553+
int next = findNextGEQ(docBuffer, target, docBufferUpto);
14931554
this.doc = (int) docBuffer[next];
14941555
docBufferUpto = next + 1;
14951556
return doc;
@@ -1572,7 +1633,7 @@ final class BlockImpactsPostingsEnum extends ImpactsEnum {
15721633
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
15731634
final PForUtil pforUtil = new PForUtil(forUtil);
15741635

1575-
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
1636+
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
15761637
private final long[] freqBuffer = new long[BLOCK_SIZE];
15771638
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
15781639

@@ -1652,9 +1713,9 @@ public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState
16521713
this.posIn = Lucene912PostingsReader.this.posIn.clone();
16531714
posInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(posIn);
16541715

1655-
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
1716+
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
16561717
// advance()
1657-
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
1718+
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
16581719

16591720
docFreq = termState.docFreq;
16601721
posTermStartFP = termState.posStartFP;
@@ -1724,18 +1785,18 @@ private void refillDocs() throws IOException {
17241785
} else if (docFreq == 1) {
17251786
docBuffer[0] = singletonDocID;
17261787
freqBuffer[0] = totalTermFreq;
1727-
docBuffer[1] = NO_MORE_DOCS;
1788+
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
17281789
docCountUpto++;
17291790
} else {
17301791
// Read vInts:
17311792
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
17321793
prefixSum(docBuffer, left, prevDocID);
1733-
docBuffer[left] = NO_MORE_DOCS;
1794+
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
17341795
docCountUpto += left;
17351796
}
17361797
prevDocID = docBuffer[BLOCK_SIZE - 1];
17371798
docBufferUpto = 0;
1738-
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
1799+
assert assertDocBuffer(docBuffer, 0);
17391800
}
17401801

17411802
private void skipLevel1To(int target) throws IOException {
@@ -1932,7 +1993,7 @@ public int advance(int target) throws IOException {
19321993
needsRefilling = false;
19331994
}
19341995

1935-
int next = findFirstGreater(docBuffer, target, docBufferUpto);
1996+
int next = findNextGEQ(docBuffer, target, docBufferUpto);
19361997
for (int i = docBufferUpto; i <= next; ++i) {
19371998
posPendingCount += freqBuffer[i];
19381999
}

lucene/core/src/test/org/apache/lucene/codecs/lucene912/TestLucene912PostingsFormat.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.lucene.index.Impact;
3232
import org.apache.lucene.index.IndexWriter;
3333
import org.apache.lucene.index.IndexWriterConfig;
34+
import org.apache.lucene.search.DocIdSetIterator;
3435
import org.apache.lucene.store.ByteArrayDataInput;
3536
import org.apache.lucene.store.ByteArrayDataOutput;
3637
import org.apache.lucene.store.Directory;
@@ -154,4 +155,19 @@ private void doTestImpactSerialization(List<Impact> impacts) throws IOException
154155
}
155156
}
156157
}
158+
159+
public void testFindNextGEQ() {
160+
long[] values =
161+
new long[ForUtil.BLOCK_SIZE + Lucene912PostingsReader.BINARY_SEARCH_WINDOW_SIZE];
162+
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
163+
values[i] = i * 2;
164+
}
165+
Arrays.fill(values, ForUtil.BLOCK_SIZE, values.length, DocIdSetIterator.NO_MORE_DOCS);
166+
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
167+
for (int start = 0; start <= i; ++start) {
168+
assertEquals(i, Lucene912PostingsReader.findNextGEQ(values, i * 2, start));
169+
assertEquals(i + 1, Lucene912PostingsReader.findNextGEQ(values, i * 2 + 1, start));
170+
}
171+
}
172+
}
157173
}

0 commit comments

Comments
 (0)