6767public final class Lucene912PostingsReader extends PostingsReaderBase {
6868
6969 static final VectorizationProvider VECTORIZATION_PROVIDER = VectorizationProvider .getInstance ();
70+ static int BINARY_SEARCH_WINDOW_SIZE = 4 ;
7071
7172 private final IndexInput docIn ;
7273 private final IndexInput posIn ;
@@ -212,13 +213,74 @@ static void prefixSum(long[] buffer, int count, long base) {
212213 }
213214 }
214215
215- static int findFirstGreater (long [] buffer , int target , int from ) {
216- for (int i = from ; i < BLOCK_SIZE ; ++i ) {
217- if (buffer [i ] >= target ) {
218- return i ;
216+ private static boolean assertDocBuffer (long [] values , int start ) {
217+ assert values .length == BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE ;
218+ assert values [BLOCK_SIZE ] == DocIdSetIterator .NO_MORE_DOCS ;
219+ assert start < BLOCK_SIZE ;
220+
221+ int endOffset ;
222+ if (values [0 ] == DocIdSetIterator .NO_MORE_DOCS ) {
223+ endOffset = 0 ;
224+ } else {
225+ endOffset = -1 ;
226+ for (int i = 1 ; i < values .length ; ++i ) {
227+ assert values [i ] > values [i - 1 ] : Arrays .toString (values );
228+ if (values [i ] == DocIdSetIterator .NO_MORE_DOCS ) {
229+ endOffset = i ;
230+ break ;
231+ }
219232 }
220233 }
221- return BLOCK_SIZE ;
234+ for (int i = 0 ; i < BINARY_SEARCH_WINDOW_SIZE ; ++i ) {
235+ assert values [endOffset + i ] == DocIdSetIterator .NO_MORE_DOCS ;
236+ }
237+ return true ;
238+ }
239+
240+ /**
241+ * Return the first index in sorted array {@code values} whose value is greater than or equal to
242+ * {@code target}. For correctness, it requires the last 4 values to be set to {@code
243+ * NO_MORE_DOCS}.
244+ */
245+ static int findNextGEQ (long [] values , long target , int start ) {
246+ assert assertDocBuffer (values , start );
247+
248+ if (values [start ] >= target ) {
249+ // Surprisingly this is a likely condition in practice, so optimizing for it helps.
250+ return start ;
251+ }
252+
253+ // We just ruled out that our target index is at `start`.
254+ start += 1 ;
255+
256+ // Now find the first interval of 4 values that contains our target.
257+ for (int i = start ;
258+ i + BINARY_SEARCH_WINDOW_SIZE <= values .length ;
259+ i += BINARY_SEARCH_WINDOW_SIZE ) {
260+ if (values [i + BINARY_SEARCH_WINDOW_SIZE - 1 ] >= target ) {
261+ start = i ;
262+ break ;
263+ }
264+ }
265+
266+ // Binary search in this interval of 4 values.
267+ return binarySearch4 (values , target , start );
268+ }
269+
270+ /**
271+ * Return the first index whose value is greater than or equal to {@code target} among the 4
272+ * values starting at {@code start}. If none of the values is greater than or equal to {@code
273+ * target}, this returns {@code start+3}.
274+ */
275+ private static int binarySearch4 (long [] values , long target , int start ) {
276+ // This code is organized in a way that compiles to a branchless binary search.
277+ if (values [start + 1 ] < target ) {
278+ start += 2 ;
279+ }
280+ if (values [start ] < target ) {
281+ start += 1 ;
282+ }
283+ return start ;
222284 }
223285
224286 @ Override
@@ -348,7 +410,7 @@ final class BlockDocsEnum extends PostingsEnum {
348410 final ForDeltaUtil forDeltaUtil = new ForDeltaUtil ();
349411 final PForUtil pforUtil = new PForUtil (forUtil );
350412
351- private final long [] docBuffer = new long [BLOCK_SIZE + 1 ];
413+ private final long [] docBuffer = new long [BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE ];
352414 private final long [] freqBuffer = new long [BLOCK_SIZE ];
353415
354416 private int docBufferUpto ;
@@ -390,9 +452,9 @@ public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
390452 .compareTo (IndexOptions .DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS )
391453 >= 0
392454 || fieldInfo .hasPayloads ();
393- // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
455+ // We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
394456 // advance()
395- docBuffer [ BLOCK_SIZE ] = NO_MORE_DOCS ;
457+ Arrays . fill ( docBuffer , BLOCK_SIZE , BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS ) ;
396458 }
397459
398460 public boolean canReuse (IndexInput docIn , FieldInfo fieldInfo ) {
@@ -495,7 +557,7 @@ private void refillFullBlock() throws IOException {
495557 docCountUpto += BLOCK_SIZE ;
496558 prevDocID = docBuffer [BLOCK_SIZE - 1 ];
497559 docBufferUpto = 0 ;
498- assert docBuffer [ BLOCK_SIZE ] == NO_MORE_DOCS ;
560+ assert assertDocBuffer ( docBuffer , 0 ) ;
499561 }
500562
501563 private void refillRemainder () throws IOException {
@@ -506,15 +568,14 @@ private void refillRemainder() throws IOException {
506568 if (docFreq == 1 ) {
507569 docBuffer [0 ] = singletonDocID ;
508570 freqBuffer [0 ] = totalTermFreq ;
509- docBuffer [1 ] = NO_MORE_DOCS ;
510571 docCountUpto ++;
511572 } else {
512573 // Read vInts:
513574 PostingsUtil .readVIntBlock (docIn , docBuffer , freqBuffer , left , indexHasFreq , needsFreq );
514575 prefixSum (docBuffer , left , prevDocID );
515- docBuffer [left ] = NO_MORE_DOCS ;
516576 docCountUpto += left ;
517577 }
578+ Arrays .fill (docBuffer , left , left + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS );
518579 docBufferUpto = 0 ;
519580 freqFP = -1 ;
520581 }
@@ -612,7 +673,7 @@ public int advance(int target) throws IOException {
612673 }
613674 }
614675
615- int next = findFirstGreater (docBuffer , target , docBufferUpto );
676+ int next = findNextGEQ (docBuffer , target , docBufferUpto );
616677 this .doc = (int ) docBuffer [next ];
617678 docBufferUpto = next + 1 ;
618679 return doc ;
@@ -630,8 +691,8 @@ final class EverythingEnum extends PostingsEnum {
630691 final ForDeltaUtil forDeltaUtil = new ForDeltaUtil ();
631692 final PForUtil pforUtil = new PForUtil (forUtil );
632693
633- private final long [] docBuffer = new long [BLOCK_SIZE + 1 ];
634- private final long [] freqBuffer = new long [BLOCK_SIZE + 1 ];
694+ private final long [] docBuffer = new long [BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE ];
695+ private final long [] freqBuffer = new long [BLOCK_SIZE ];
635696 private final long [] posDeltaBuffer = new long [BLOCK_SIZE ];
636697
637698 private final long [] payloadLengthBuffer ;
@@ -752,9 +813,9 @@ public EverythingEnum(FieldInfo fieldInfo) throws IOException {
752813 payload = null ;
753814 }
754815
755- // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
816+ // We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
756817 // advance()
757- docBuffer [ BLOCK_SIZE ] = NO_MORE_DOCS ;
818+ Arrays . fill ( docBuffer , BLOCK_SIZE , BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS ) ;
758819 }
759820
760821 public boolean canReuse (IndexInput docIn , FieldInfo fieldInfo ) {
@@ -843,18 +904,18 @@ private void refillDocs() throws IOException {
843904 } else if (docFreq == 1 ) {
844905 docBuffer [0 ] = singletonDocID ;
845906 freqBuffer [0 ] = totalTermFreq ;
846- docBuffer [ 1 ] = NO_MORE_DOCS ;
907+ Arrays . fill ( docBuffer , 1 , 1 + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS ) ;
847908 docCountUpto ++;
848909 } else {
849910 // Read vInts:
850911 PostingsUtil .readVIntBlock (docIn , docBuffer , freqBuffer , left , indexHasFreq , true );
851912 prefixSum (docBuffer , left , prevDocID );
852- docBuffer [ left ] = NO_MORE_DOCS ;
913+ Arrays . fill ( docBuffer , left , left + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS ) ;
853914 docCountUpto += left ;
854915 }
855916 prevDocID = docBuffer [BLOCK_SIZE - 1 ];
856917 docBufferUpto = 0 ;
857- assert docBuffer [ BLOCK_SIZE ] == NO_MORE_DOCS ;
918+ assert assertDocBuffer ( docBuffer , 0 ) ;
858919 }
859920
860921 private void skipLevel1To (int target ) throws IOException {
@@ -1016,7 +1077,7 @@ public int advance(int target) throws IOException {
10161077 refillDocs ();
10171078 }
10181079
1019- int next = findFirstGreater (docBuffer , target , docBufferUpto );
1080+ int next = findNextGEQ (docBuffer , target , docBufferUpto );
10201081 for (int i = docBufferUpto ; i <= next ; ++i ) {
10211082 posPendingCount += freqBuffer [i ];
10221083 }
@@ -1217,7 +1278,7 @@ final class BlockImpactsDocsEnum extends ImpactsEnum {
12171278 final ForDeltaUtil forDeltaUtil = new ForDeltaUtil ();
12181279 final PForUtil pforUtil = new PForUtil (forUtil );
12191280
1220- private final long [] docBuffer = new long [BLOCK_SIZE + 1 ];
1281+ private final long [] docBuffer = new long [BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE ];
12211282 private final long [] freqBuffer = new long [BLOCK_SIZE ];
12221283
12231284 private int docBufferUpto ;
@@ -1265,9 +1326,9 @@ public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
12651326 .compareTo (IndexOptions .DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS )
12661327 >= 0
12671328 || fieldInfo .hasPayloads ();
1268- // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
1329+ // We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
12691330 // advance()
1270- docBuffer [ BLOCK_SIZE ] = NO_MORE_DOCS ;
1331+ Arrays . fill ( docBuffer , BLOCK_SIZE , BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS ) ;
12711332
12721333 docFreq = termState .docFreq ;
12731334 if (docFreq > 1 ) {
@@ -1357,13 +1418,13 @@ private void refillDocs() throws IOException {
13571418 // Read vInts:
13581419 PostingsUtil .readVIntBlock (docIn , docBuffer , freqBuffer , left , indexHasFreq , true );
13591420 prefixSum (docBuffer , left , prevDocID );
1360- docBuffer [ left ] = NO_MORE_DOCS ;
1421+ Arrays . fill ( docBuffer , left , left + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS ) ;
13611422 freqFP = -1 ;
13621423 docCountUpto += left ;
13631424 }
13641425 prevDocID = docBuffer [BLOCK_SIZE - 1 ];
13651426 docBufferUpto = 0 ;
1366- assert docBuffer [ BLOCK_SIZE ] == NO_MORE_DOCS ;
1427+ assert assertDocBuffer ( docBuffer , 0 ) ;
13671428 }
13681429
13691430 private void skipLevel1To (int target ) throws IOException {
@@ -1489,7 +1550,7 @@ public int advance(int target) throws IOException {
14891550 needsRefilling = false ;
14901551 }
14911552
1492- int next = findFirstGreater (docBuffer , target , docBufferUpto );
1553+ int next = findNextGEQ (docBuffer , target , docBufferUpto );
14931554 this .doc = (int ) docBuffer [next ];
14941555 docBufferUpto = next + 1 ;
14951556 return doc ;
@@ -1572,7 +1633,7 @@ final class BlockImpactsPostingsEnum extends ImpactsEnum {
15721633 final ForDeltaUtil forDeltaUtil = new ForDeltaUtil ();
15731634 final PForUtil pforUtil = new PForUtil (forUtil );
15741635
1575- private final long [] docBuffer = new long [BLOCK_SIZE + 1 ];
1636+ private final long [] docBuffer = new long [BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE ];
15761637 private final long [] freqBuffer = new long [BLOCK_SIZE ];
15771638 private final long [] posDeltaBuffer = new long [BLOCK_SIZE ];
15781639
@@ -1652,9 +1713,9 @@ public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState
16521713 this .posIn = Lucene912PostingsReader .this .posIn .clone ();
16531714 posInUtil = VECTORIZATION_PROVIDER .newPostingDecodingUtil (posIn );
16541715
1655- // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
1716+ // We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
16561717 // advance()
1657- docBuffer [ BLOCK_SIZE ] = NO_MORE_DOCS ;
1718+ Arrays . fill ( docBuffer , BLOCK_SIZE , BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS ) ;
16581719
16591720 docFreq = termState .docFreq ;
16601721 posTermStartFP = termState .posStartFP ;
@@ -1724,18 +1785,18 @@ private void refillDocs() throws IOException {
17241785 } else if (docFreq == 1 ) {
17251786 docBuffer [0 ] = singletonDocID ;
17261787 freqBuffer [0 ] = totalTermFreq ;
1727- docBuffer [ 1 ] = NO_MORE_DOCS ;
1788+ Arrays . fill ( docBuffer , left , left + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS ) ;
17281789 docCountUpto ++;
17291790 } else {
17301791 // Read vInts:
17311792 PostingsUtil .readVIntBlock (docIn , docBuffer , freqBuffer , left , indexHasFreq , true );
17321793 prefixSum (docBuffer , left , prevDocID );
1733- docBuffer [ left ] = NO_MORE_DOCS ;
1794+ Arrays . fill ( docBuffer , left , left + BINARY_SEARCH_WINDOW_SIZE , NO_MORE_DOCS ) ;
17341795 docCountUpto += left ;
17351796 }
17361797 prevDocID = docBuffer [BLOCK_SIZE - 1 ];
17371798 docBufferUpto = 0 ;
1738- assert docBuffer [ BLOCK_SIZE ] == NO_MORE_DOCS ;
1799+ assert assertDocBuffer ( docBuffer , 0 ) ;
17391800 }
17401801
17411802 private void skipLevel1To (int target ) throws IOException {
@@ -1932,7 +1993,7 @@ public int advance(int target) throws IOException {
19321993 needsRefilling = false ;
19331994 }
19341995
1935- int next = findFirstGreater (docBuffer , target , docBufferUpto );
1996+ int next = findNextGEQ (docBuffer , target , docBufferUpto );
19361997 for (int i = docBufferUpto ; i <= next ; ++i ) {
19371998 posPendingCount += freqBuffer [i ];
19381999 }
0 commit comments