Skip to content

Commit 58ab5b7

Browse files
authored
Merge related HashMaps in FieldInfos#FieldNumbers into one map (#13460)
Merges all immutable attributes in FieldInfos.FieldNumbers into one hashmap saving memory when writing big indices. Fixes an exotic bug when calling clear where not all attributes were cleared.
1 parent 61a6abd commit 58ab5b7

File tree

2 files changed

+68
-91
lines changed

2 files changed

+68
-91
lines changed

lucene/CHANGES.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,9 @@ Bug Fixes
265265

266266
Other
267267
---------------------
268-
(No changes)
268+
* GITHUB#13459: Merges all immutable attributes in FieldInfos.FieldNumbers into one Hashmap saving
269+
memory when writing big indices. Fixes an exotic bug when calling clear where not all attributes
270+
were cleared. (Ignacio Vera)
269271

270272
======================== Lucene 9.11.0 =======================
271273

lucene/core/src/java/org/apache/lucene/index/FieldInfos.java

+65-90
Original file line numberDiff line numberDiff line change
@@ -346,48 +346,31 @@ public FieldInfo fieldInfo(int fieldNumber) {
346346
return fieldNumber >= byNumber.length ? null : byNumber[fieldNumber];
347347
}
348348

349-
static final class FieldDimensions {
350-
public final int dimensionCount;
351-
public final int indexDimensionCount;
352-
public final int dimensionNumBytes;
353-
354-
public FieldDimensions(int dimensionCount, int indexDimensionCount, int dimensionNumBytes) {
355-
this.dimensionCount = dimensionCount;
356-
this.indexDimensionCount = indexDimensionCount;
357-
this.dimensionNumBytes = dimensionNumBytes;
358-
}
359-
}
360-
361-
static final class FieldVectorProperties {
362-
final int numDimensions;
363-
final VectorEncoding vectorEncoding;
364-
final VectorSimilarityFunction similarityFunction;
365-
366-
FieldVectorProperties(
367-
int numDimensions,
368-
VectorEncoding vectorEncoding,
369-
VectorSimilarityFunction similarityFunction) {
370-
this.numDimensions = numDimensions;
371-
this.vectorEncoding = vectorEncoding;
372-
this.similarityFunction = similarityFunction;
373-
}
374-
}
349+
private record FieldDimensions(
350+
int dimensionCount, int indexDimensionCount, int dimensionNumBytes) {}
351+
352+
private record FieldVectorProperties(
353+
int numDimensions,
354+
VectorEncoding vectorEncoding,
355+
VectorSimilarityFunction similarityFunction) {}
356+
357+
private record IndexOptionsProperties(boolean storeTermVectors, boolean omitNorms) {}
358+
359+
// We use this to enforce that a given field never
360+
// changes DV type, even across segments / IndexWriter
361+
// sessions:
362+
private record FieldProperties(
363+
int number,
364+
IndexOptions indexOptions,
365+
IndexOptionsProperties indexOptionsProperties,
366+
DocValuesType docValuesType,
367+
FieldDimensions fieldDimensions,
368+
FieldVectorProperties fieldVectorProperties) {}
375369

376370
static final class FieldNumbers {
377371

378372
private final IntObjectHashMap<String> numberToName;
379-
private final Map<String, Integer> nameToNumber;
380-
private final Map<String, IndexOptions> indexOptions;
381-
// We use this to enforce that a given field never
382-
// changes DV type, even across segments / IndexWriter
383-
// sessions:
384-
private final Map<String, DocValuesType> docValuesType;
385-
386-
private final Map<String, FieldDimensions> dimensions;
387-
388-
private final Map<String, FieldVectorProperties> vectorProps;
389-
private final Map<String, Boolean> omitNorms;
390-
private final Map<String, Boolean> storeTermVectors;
373+
private final Map<String, FieldProperties> fieldProperties;
391374

392375
// TODO: we should similarly catch an attempt to turn
393376
// norms back on after they were already committed; today
@@ -401,14 +384,8 @@ static final class FieldNumbers {
401384
private final String parentFieldName;
402385

403386
FieldNumbers(String softDeletesFieldName, String parentFieldName) {
404-
this.nameToNumber = new HashMap<>();
405387
this.numberToName = new IntObjectHashMap<>();
406-
this.indexOptions = new HashMap<>();
407-
this.docValuesType = new HashMap<>();
408-
this.dimensions = new HashMap<>();
409-
this.vectorProps = new HashMap<>();
410-
this.omitNorms = new HashMap<>();
411-
this.storeTermVectors = new HashMap<>();
388+
this.fieldProperties = new HashMap<>();
412389
this.softDeletesFieldName = softDeletesFieldName;
413390
this.parentFieldName = parentFieldName;
414391
if (softDeletesFieldName != null
@@ -425,7 +402,7 @@ synchronized void verifyFieldInfo(FieldInfo fi) {
425402
String fieldName = fi.getName();
426403
verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField());
427404
verifyParentFieldName(fieldName, fi.isParentField());
428-
if (nameToNumber.containsKey(fieldName)) {
405+
if (fieldProperties.containsKey(fieldName)) {
429406
verifySameSchema(fi);
430407
}
431408
}
@@ -439,15 +416,15 @@ synchronized int addOrGet(FieldInfo fi) {
439416
String fieldName = fi.getName();
440417
verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField());
441418
verifyParentFieldName(fieldName, fi.isParentField());
442-
Integer fieldNumber = nameToNumber.get(fieldName);
419+
var fieldProperties = this.fieldProperties.get(fieldName);
443420

444-
if (fieldNumber != null) {
421+
if (fieldProperties != null) {
445422
verifySameSchema(fi);
446423
} else { // first time we see this field in this index
447-
final Integer preferredBoxed = Integer.valueOf(fi.number);
448-
if (fi.number != -1 && !numberToName.containsKey(preferredBoxed)) {
424+
int fieldNumber;
425+
if (fi.number != -1 && numberToName.containsKey(fi.number) == false) {
449426
// cool - we can use this number globally
450-
fieldNumber = preferredBoxed;
427+
fieldNumber = fi.number;
451428
} else {
452429
// find a new FieldNumber
453430
while (numberToName.containsKey(++lowestUnassignedFieldNumber)) {
@@ -457,25 +434,25 @@ synchronized int addOrGet(FieldInfo fi) {
457434
}
458435
assert fieldNumber >= 0;
459436
numberToName.put(fieldNumber, fieldName);
460-
nameToNumber.put(fieldName, fieldNumber);
461-
this.indexOptions.put(fieldName, fi.getIndexOptions());
462-
if (fi.getIndexOptions() != IndexOptions.NONE) {
463-
this.storeTermVectors.put(fieldName, fi.hasVectors());
464-
this.omitNorms.put(fieldName, fi.omitsNorms());
465-
}
466-
docValuesType.put(fieldName, fi.getDocValuesType());
467-
dimensions.put(
468-
fieldName,
469-
new FieldDimensions(
470-
fi.getPointDimensionCount(),
471-
fi.getPointIndexDimensionCount(),
472-
fi.getPointNumBytes()));
473-
vectorProps.put(
474-
fieldName,
475-
new FieldVectorProperties(
476-
fi.getVectorDimension(), fi.getVectorEncoding(), fi.getVectorSimilarityFunction()));
437+
fieldProperties =
438+
new FieldProperties(
439+
fieldNumber,
440+
fi.getIndexOptions(),
441+
fi.getIndexOptions() != IndexOptions.NONE
442+
? new IndexOptionsProperties(fi.hasVectors(), fi.omitsNorms())
443+
: null,
444+
fi.getDocValuesType(),
445+
new FieldDimensions(
446+
fi.getPointDimensionCount(),
447+
fi.getPointIndexDimensionCount(),
448+
fi.getPointNumBytes()),
449+
new FieldVectorProperties(
450+
fi.getVectorDimension(),
451+
fi.getVectorEncoding(),
452+
fi.getVectorSimilarityFunction()));
453+
this.fieldProperties.put(fieldName, fieldProperties);
477454
}
478-
return fieldNumber.intValue();
455+
return fieldProperties.number;
479456
}
480457

481458
private void verifySoftDeletedFieldName(String fieldName, boolean isSoftDeletesField) {
@@ -532,19 +509,20 @@ private void verifyParentFieldName(String fieldName, boolean isParentField) {
532509

533510
private void verifySameSchema(FieldInfo fi) {
534511
String fieldName = fi.getName();
535-
IndexOptions currentOpts = this.indexOptions.get(fieldName);
512+
FieldProperties fieldProperties = this.fieldProperties.get(fieldName);
513+
IndexOptions currentOpts = fieldProperties.indexOptions;
536514
verifySameIndexOptions(fieldName, currentOpts, fi.getIndexOptions());
537515
if (currentOpts != IndexOptions.NONE) {
538-
boolean curStoreTermVector = this.storeTermVectors.get(fieldName);
516+
boolean curStoreTermVector = fieldProperties.indexOptionsProperties.storeTermVectors;
539517
verifySameStoreTermVectors(fieldName, curStoreTermVector, fi.hasVectors());
540-
boolean curOmitNorms = this.omitNorms.get(fieldName);
518+
boolean curOmitNorms = fieldProperties.indexOptionsProperties.omitNorms;
541519
verifySameOmitNorms(fieldName, curOmitNorms, fi.omitsNorms());
542520
}
543521

544-
DocValuesType currentDVType = docValuesType.get(fieldName);
522+
DocValuesType currentDVType = fieldProperties.docValuesType;
545523
verifySameDocValuesType(fieldName, currentDVType, fi.getDocValuesType());
546524

547-
FieldDimensions dims = dimensions.get(fieldName);
525+
FieldDimensions dims = fieldProperties.fieldDimensions;
548526
verifySamePointsOptions(
549527
fieldName,
550528
dims.dimensionCount,
@@ -554,7 +532,7 @@ private void verifySameSchema(FieldInfo fi) {
554532
fi.getPointIndexDimensionCount(),
555533
fi.getPointNumBytes());
556534

557-
FieldVectorProperties props = vectorProps.get(fieldName);
535+
FieldVectorProperties props = fieldProperties.fieldVectorProperties;
558536
verifySameVectorOptions(
559537
fieldName,
560538
props.numDimensions,
@@ -579,7 +557,7 @@ private void verifySameSchema(FieldInfo fi) {
579557
*/
580558
synchronized void verifyOrCreateDvOnlyField(
581559
String fieldName, DocValuesType dvType, boolean fieldMustExist) {
582-
if (nameToNumber.containsKey(fieldName) == false) {
560+
if (fieldProperties.containsKey(fieldName) == false) {
583561
if (fieldMustExist) {
584562
throw new IllegalArgumentException(
585563
"Can't update ["
@@ -612,7 +590,8 @@ synchronized void verifyOrCreateDvOnlyField(
612590
}
613591
} else {
614592
// verify that field is doc values only field with the give doc values type
615-
DocValuesType fieldDvType = docValuesType.get(fieldName);
593+
FieldProperties fieldProperties = this.fieldProperties.get(fieldName);
594+
DocValuesType fieldDvType = fieldProperties.docValuesType;
616595
if (dvType != fieldDvType) {
617596
throw new IllegalArgumentException(
618597
"Can't update ["
@@ -623,7 +602,7 @@ synchronized void verifyOrCreateDvOnlyField(
623602
+ fieldDvType
624603
+ "].");
625604
}
626-
FieldDimensions fdimensions = dimensions.get(fieldName);
605+
FieldDimensions fdimensions = fieldProperties.fieldDimensions;
627606
if (fdimensions != null && fdimensions.dimensionCount != 0) {
628607
throw new IllegalArgumentException(
629608
"Can't update ["
@@ -632,7 +611,7 @@ synchronized void verifyOrCreateDvOnlyField(
632611
+ fieldName
633612
+ "] must be doc values only field, but is also indexed with points.");
634613
}
635-
IndexOptions ioptions = indexOptions.get(fieldName);
614+
IndexOptions ioptions = fieldProperties.indexOptions;
636615
if (ioptions != null && ioptions != IndexOptions.NONE) {
637616
throw new IllegalArgumentException(
638617
"Can't update ["
@@ -641,7 +620,7 @@ synchronized void verifyOrCreateDvOnlyField(
641620
+ fieldName
642621
+ "] must be doc values only field, but is also indexed with postings.");
643622
}
644-
FieldVectorProperties fvp = vectorProps.get(fieldName);
623+
FieldVectorProperties fvp = fieldProperties.fieldVectorProperties;
645624
if (fvp != null && fvp.numDimensions != 0) {
646625
throw new IllegalArgumentException(
647626
"Can't update ["
@@ -664,14 +643,13 @@ synchronized void verifyOrCreateDvOnlyField(
664643
* {@code dvType} returns a new FieldInfo based based on the options in global field numbers
665644
*/
666645
FieldInfo constructFieldInfo(String fieldName, DocValuesType dvType, int newFieldNumber) {
667-
Integer fieldNumber;
646+
FieldProperties fieldProperties;
668647
synchronized (this) {
669-
fieldNumber = nameToNumber.get(fieldName);
648+
fieldProperties = this.fieldProperties.get(fieldName);
670649
}
671-
if (fieldNumber == null) return null;
672-
DocValuesType dvType0 = docValuesType.get(fieldName);
650+
if (fieldProperties == null) return null;
651+
DocValuesType dvType0 = fieldProperties.docValuesType;
673652
if (dvType != dvType0) return null;
674-
675653
boolean isSoftDeletesField = fieldName.equals(softDeletesFieldName);
676654
boolean isParentField = fieldName.equals(parentFieldName);
677655
return new FieldInfo(
@@ -695,15 +673,12 @@ FieldInfo constructFieldInfo(String fieldName, DocValuesType dvType, int newFiel
695673
}
696674

697675
synchronized Set<String> getFieldNames() {
698-
return Set.copyOf(nameToNumber.keySet());
676+
return Set.copyOf(fieldProperties.keySet());
699677
}
700678

701679
synchronized void clear() {
702680
numberToName.clear();
703-
nameToNumber.clear();
704-
indexOptions.clear();
705-
docValuesType.clear();
706-
dimensions.clear();
681+
fieldProperties.clear();
707682
lowestUnassignedFieldNumber = -1;
708683
}
709684
}

0 commit comments

Comments
 (0)