Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,9 @@ API Changes
* GITHUB#15803: Add ReaderUtil#partitionByLeaf to partition doc IDs from
ScoreDoc hits by leaf reader. (Zihan Xu)

* GITHUB#15584: Add support for termdoc fields that use custom term freqs (via IndexOptions.DOCS_AND_CUSTOM_FREQS).
IndexWriter counts their terms rather than summing their freqs. Use

New Features
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,11 @@ public void decodeTerm(
termState.singletonDocID += BitUtil.zigZagDecode(l >>> 1);
}

if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
if (fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)) {
termState.posStartFP += in.readVLong();
if (fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|| fieldInfo.hasPayloads()) {
termState.payStartFP += in.readVLong();
}
Expand Down Expand Up @@ -403,10 +402,9 @@ private enum DeltaEncoding {
public BlockPostingsEnum(FieldInfo fieldInfo, int flags, boolean needsImpacts)
throws IOException {
options = fieldInfo.getIndexOptions();
indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos = options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets =
options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasFreq = options.subsumes(IndexOptions.DOCS_AND_FREQS);
indexHasPos = options.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
indexHasOffsets = options.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
indexHasPayloads = fieldInfo.hasPayloads();
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,12 +229,11 @@ public void decodeTerm(
termState.singletonDocID += BitUtil.zigZagDecode(l >>> 1);
}

if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
if (fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)) {
termState.posStartFP += in.readVLong();
if (fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|| fieldInfo.hasPayloads()) {
termState.payStartFP += in.readVLong();
}
Expand Down Expand Up @@ -405,10 +404,9 @@ private enum DeltaEncoding {
public BlockPostingsEnum(FieldInfo fieldInfo, int flags, boolean needsImpacts)
throws IOException {
options = fieldInfo.getIndexOptions();
indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos = options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets =
options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasFreq = options.subsumes(IndexOptions.DOCS_AND_FREQS);
indexHasPos = options.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
indexHasOffsets = options.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
indexHasPayloads = fieldInfo.hasPayloads();
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ private static VectorSimilarityFunction getDistFunc(IndexInput input, byte b) th
// We "mirror" IndexOptions enum values with the constants below; let's try to ensure if we add
// a new IndexOption while this format is
// still used for writing, we remember to fix this encoding:
assert IndexOptions.values().length == 5;
assert IndexOptions.values().length == 6;
}

private static byte indexOptionsByte(IndexOptions indexOptions) {
Expand All @@ -285,6 +285,8 @@ private static byte indexOptionsByte(IndexOptions indexOptions) {
return 3;
case DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS:
return 4;
case DOCS_AND_CUSTOM_FREQS:
return 5;
default:
// BUG:
throw new AssertionError("unhandled IndexOptions: " + indexOptions);
Expand All @@ -303,6 +305,8 @@ private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOE
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
case 4:
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
case 5:
return IndexOptions.DOCS_AND_CUSTOM_FREQS;
default:
// BUG
throw new CorruptIndexException("invalid IndexOptions byte: " + b, input);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,9 @@ public void decodeTerm(
throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState;
final boolean fieldHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
final boolean fieldHasOffsets =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
final boolean fieldHasPayloads = fieldInfo.hasPayloads();

if (absolute) {
Expand Down Expand Up @@ -244,7 +243,7 @@ public PostingsEnum postings(
throws IOException {

boolean indexHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

if (indexHasPositions == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
Expand Down Expand Up @@ -281,10 +280,9 @@ public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags)
}

final boolean indexHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
final boolean indexHasOffsets =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
final boolean indexHasPayloads = fieldInfo.hasPayloads();

if (indexHasPositions == false
Expand Down Expand Up @@ -352,14 +350,12 @@ final class BlockDocsEnum extends PostingsEnum {
public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
this.startDocIn = Lucene90PostingsReader.this.docIn;
this.docIn = null;
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasFreq = fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS);
indexHasPos = fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
indexHasPayloads = fieldInfo.hasPayloads();
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
// advance()
Expand All @@ -368,11 +364,9 @@ public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {

public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
return docIn == startDocIn
&& indexHasFreq
== (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0)
&& indexHasFreq == (fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS))
&& indexHasPos
== (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
>= 0)
== (fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS))
&& indexHasPayloads == fieldInfo.hasPayloads();
}

Expand Down Expand Up @@ -646,9 +640,8 @@ final class EverythingEnum extends PostingsEnum {
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
indexHasPayloads = fieldInfo.hasPayloads();

this.startDocIn = Lucene90PostingsReader.this.docIn;
Expand Down Expand Up @@ -688,9 +681,8 @@ public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
return docIn == startDocIn
&& indexHasOffsets
== (fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0)
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS))
&& indexHasPayloads == fieldInfo.hasPayloads();
}

Expand Down Expand Up @@ -1076,14 +1068,13 @@ final class BlockImpactsDocsEnum extends ImpactsEnum {

public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
throws IOException {
indexHasFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasFreqs = fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS);
final boolean indexHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
final boolean indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
final boolean indexHasPayloads = fieldInfo.hasPayloads();

this.docIn = Lucene90PostingsReader.this.docIn.clone();
Expand Down Expand Up @@ -1306,9 +1297,8 @@ public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState
throws IOException {
indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
indexHasPayloads = fieldInfo.hasPayloads();

this.docIn = Lucene90PostingsReader.this.docIn.clone();
Expand Down Expand Up @@ -1623,14 +1613,12 @@ final class BlockImpactsEverythingEnum extends ImpactsEnum {

public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, int flags)
throws IOException {
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasFreq = fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS);
indexHasPos = fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
indexHasPayloads = fieldInfo.hasPayloads();

needsPositions = PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,20 +163,19 @@ public Stats getStats() throws IOException {

@Override
public boolean hasFreqs() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
return fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS);
}

@Override
public boolean hasOffsets() {
return fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
}

@Override
public boolean hasPositions() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
return fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,12 +237,11 @@ public void decodeTerm(
termState.singletonDocID += BitUtil.zigZagDecode(l >>> 1);
}

if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
if (fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)) {
termState.posStartFP += in.readVLong();
if (fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|| fieldInfo.hasPayloads()) {
termState.payStartFP += in.readVLong();
}
Expand All @@ -258,7 +257,7 @@ public void decodeTerm(
public PostingsEnum postings(
FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags)
throws IOException {
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0
if (!fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
|| PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
return (reuse instanceof BlockDocsEnum blockDocsEnum
&& blockDocsEnum.canReuse(docIn, fieldInfo)
Expand All @@ -278,18 +277,17 @@ public PostingsEnum postings(
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags)
throws IOException {
final IndexOptions options = fieldInfo.getIndexOptions();
final boolean indexHasPositions =
options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean indexHasPositions = options.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

if (state.docFreq >= BLOCK_SIZE) {
if (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
if (options.subsumes(IndexOptions.DOCS_AND_FREQS)
&& (indexHasPositions == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false)) {
return new BlockImpactsDocsEnum(indexHasPositions, (IntBlockTermState) state);
}

if (indexHasPositions
&& (options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0
&& (!options.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|| PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
&& (fieldInfo.hasPayloads() == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
Expand Down Expand Up @@ -341,7 +339,7 @@ private abstract class AbstractPostingsEnum extends PostingsEnum {
protected IndexInput docIn;

protected AbstractPostingsEnum(FieldInfo fieldInfo) {
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasFreq = fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS);
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
// advance()
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
Expand Down Expand Up @@ -399,7 +397,7 @@ public BlockDocsEnum(FieldInfo fieldInfo) {
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
final IndexOptions options = fieldInfo.getIndexOptions();
return docIn == Lucene912PostingsReader.this.docIn
&& indexHasFreq == (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0);
&& indexHasFreq == (options.subsumes(IndexOptions.DOCS_AND_FREQS));
}

public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
Expand Down Expand Up @@ -651,9 +649,8 @@ public EverythingEnum(FieldInfo fieldInfo) throws IOException {
super(fieldInfo);
indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
indexHasPayloads = fieldInfo.hasPayloads();
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;

Expand Down Expand Up @@ -688,9 +685,8 @@ public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
return docIn == Lucene912PostingsReader.this.docIn
&& indexHasOffsets
== (fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0)
.getIndexOptions()
.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS))
&& indexHasPayloads == fieldInfo.hasPayloads();
}

Expand Down Expand Up @@ -1465,9 +1461,8 @@ public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState
throws IOException {
super(termState);
final IndexOptions options = fieldInfo.getIndexOptions();
indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasOffsets =
options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasFreq = options.subsumes(IndexOptions.DOCS_AND_FREQS);
indexHasOffsets = options.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
indexHasPayloads = fieldInfo.hasPayloads();
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;

Expand Down
Loading
Loading