Skip to content

Commit

Permalink
Handle delete cases for star tree (#16380)
Browse files Browse the repository at this point in the history
Signed-off-by: Bharathwaj G <bharath78910@gmail.com>
  • Loading branch information
bharath-techie authored Oct 21, 2024
1 parent c4a9cc1 commit ad7f9e7
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.codecs.DocValuesProducer;

import java.util.Collections;
import java.util.Set;

/**
* Utility class for DocValuesProducers
* @opensearch.internal
*/
public class DocValuesProducerUtil {
/**
* Returns the segment doc values producers for the given doc values producer.
* If the given doc values producer is not a segment doc values producer, an empty set is returned.
* @param docValuesProducer the doc values producer
* @return the segment doc values producers
*/
public static Set<DocValuesProducer> getSegmentDocValuesProducers(DocValuesProducer docValuesProducer) {
if (docValuesProducer instanceof SegmentDocValuesProducer) {
return (((SegmentDocValuesProducer) docValuesProducer).dvProducers);
}
return Collections.emptySet();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,13 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
this.readState = new SegmentReadState(readState.directory, readState.segmentInfo, fieldInfos, readState.context);
this.readState = new SegmentReadState(
readState.directory,
readState.segmentInfo,
fieldInfos,
readState.context,
readState.segmentSuffix
);

// initialize star-tree doc values producer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesProducerUtil;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
Expand All @@ -35,7 +36,6 @@
import org.opensearch.index.mapper.CompositeMappedFieldType;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.StarTreeMapper;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -221,12 +221,8 @@ private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer,
}
// we have all the required fields to build composite fields
if (compositeFieldSet.isEmpty()) {
for (CompositeMappedFieldType mappedType : compositeMappedFieldTypes) {
if (mappedType instanceof StarTreeMapper.StarTreeFieldType) {
try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) {
starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, compositeDocValuesConsumer);
}
}
try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) {
starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, compositeDocValuesConsumer);
}
}
}
Expand Down Expand Up @@ -285,27 +281,27 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException {
if (mergeState.docValuesProducers[i] instanceof CompositeIndexReader) {
reader = (CompositeIndexReader) mergeState.docValuesProducers[i];
} else {
continue;
Set<DocValuesProducer> docValuesProducers = DocValuesProducerUtil.getSegmentDocValuesProducers(
mergeState.docValuesProducers[i]
);
for (DocValuesProducer docValuesProducer : docValuesProducers) {
if (docValuesProducer instanceof CompositeIndexReader) {
reader = (CompositeIndexReader) docValuesProducer;
List<CompositeIndexFieldInfo> compositeFieldInfo = reader.getCompositeIndexFields();
if (compositeFieldInfo.isEmpty() == false) {
break;
}
}
}
}

if (reader == null) continue;
List<CompositeIndexFieldInfo> compositeFieldInfo = reader.getCompositeIndexFields();
for (CompositeIndexFieldInfo fieldInfo : compositeFieldInfo) {
if (fieldInfo.getType().equals(CompositeMappedFieldType.CompositeFieldType.STAR_TREE)) {
CompositeIndexValues compositeIndexValues = reader.getCompositeIndexValues(fieldInfo);
if (compositeIndexValues instanceof StarTreeValues) {
StarTreeValues starTreeValues = (StarTreeValues) compositeIndexValues;
List<StarTreeValues> fieldsList = starTreeSubsPerField.getOrDefault(fieldInfo.getField(), new ArrayList<>());
if (starTreeField == null) {
starTreeField = starTreeValues.getStarTreeField();
}
// assert star tree configuration is same across segments
else {
if (starTreeField.equals(starTreeValues.getStarTreeField()) == false) {
throw new IllegalArgumentException(
"star tree field configuration must match the configuration of the field being merged"
);
}
}
fieldsList.add(starTreeValues);
starTreeSubsPerField.put(fieldInfo.getField(), fieldsList);
}
Expand Down Expand Up @@ -340,7 +336,8 @@ private static SegmentWriteState getSegmentWriteState(SegmentWriteState segmentW
segmentInfo,
segmentWriteState.fieldInfos,
segmentWriteState.segUpdates,
segmentWriteState.context
segmentWriteState.context,
segmentWriteState.segmentSuffix
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,6 @@ private SequentialDocValuesIterator getIteratorForNumericField(
* @throws IOException throws an exception if we are unable to add the doc
*/
private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOException {

appendStarTreeDocument(starTreeDocument);
numStarTreeDocs++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase;
import org.apache.lucene.tests.index.RandomIndexWriter;
Expand Down Expand Up @@ -58,9 +61,12 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX;
import static org.opensearch.index.compositeindex.CompositeIndexConstants.STAR_TREE_DOCS_COUNT;
import static org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils.assertStarTreeDocuments;

/**
Expand Down Expand Up @@ -207,6 +213,100 @@ public void testStarTreeDocValues() throws IOException {
directory.close();
}

public void testStarTreeDocValuesWithDeletions() throws IOException {
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);

int iterations = 3;
Map<String, Integer> map = new HashMap<>();
List<String> allIds = new ArrayList<>();
for (int iter = 0; iter < iterations; iter++) {
// Add 10 documents
for (int i = 0; i < 10; i++) {
String id = String.valueOf(random().nextInt() + i);
allIds.add(id);
Document doc = new Document();
doc.add(new StringField("_id", id, Field.Store.YES));
int fieldValue = random().nextInt(5) + 1;
doc.add(new SortedNumericDocValuesField("field", fieldValue));

int sndvValue = random().nextInt(3);

doc.add(new SortedNumericDocValuesField("sndv", sndvValue));
int dvValue = random().nextInt(3);

doc.add(new SortedNumericDocValuesField("dv", dvValue));
map.put(sndvValue + "-" + dvValue, fieldValue + map.getOrDefault(sndvValue + "-" + dvValue, 0));
iw.addDocument(doc);
}
iw.flush();
}
iw.commit();
// Delete random number of documents
int docsToDelete = random().nextInt(9); // Delete up to 9 documents
for (int i = 0; i < docsToDelete; i++) {
if (!allIds.isEmpty()) {
String idToDelete = allIds.remove(random().nextInt(allIds.size() - 1));
iw.deleteDocuments(new Term("_id", idToDelete));
allIds.remove(idToDelete);
}
}
iw.flush();
iw.commit();
iw.forceMerge(1);
iw.close();

DirectoryReader ir = DirectoryReader.open(directory);
TestUtil.checkReader(ir);
assertEquals(1, ir.leaves().size());

// Assert star tree documents
for (LeafReaderContext context : ir.leaves()) {
SegmentReader reader = Lucene.segmentReader(context.reader());
CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader();
List<CompositeIndexFieldInfo> compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields();

for (CompositeIndexFieldInfo compositeIndexFieldInfo : compositeIndexFields) {
StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo);
StarTreeDocument[] actualStarTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments(
List.of(starTreeValues),
List.of(
NumberFieldMapper.NumberType.DOUBLE,
NumberFieldMapper.NumberType.LONG,
NumberFieldMapper.NumberType.DOUBLE,
NumberFieldMapper.NumberType.DOUBLE,
NumberFieldMapper.NumberType.DOUBLE,
NumberFieldMapper.NumberType.LONG,
NumberFieldMapper.NumberType.DOUBLE,
NumberFieldMapper.NumberType.DOUBLE,
NumberFieldMapper.NumberType.LONG
),
Integer.parseInt(starTreeValues.getAttributes().get(STAR_TREE_DOCS_COUNT))
);
for (StarTreeDocument starDoc : actualStarTreeDocuments) {
Long sndvVal = null;
if (starDoc.dimensions[0] != null) {
sndvVal = starDoc.dimensions[0];
}
Long dvVal = null;
if (starDoc.dimensions[1] != null) {
dvVal = starDoc.dimensions[1];
}
if (starDoc.metrics[0] != null) {
double metric = (double) starDoc.metrics[0];
if (map.containsKey(sndvVal + "-" + dvVal)) {
assertEquals((long) map.get(sndvVal + "-" + dvVal), (long) metric);
}
}
}
}
}
ir.close();
directory.close();
}

private XContentBuilder getExpandedMapping() throws IOException {
return topMapping(b -> {
b.startObject("composite");
Expand Down

0 comments on commit ad7f9e7

Please sign in to comment.