Skip to content

Commit 0989a9c

Browse files
Use dafault BM25Similarity
Signed-off-by: Prudhvi Godithi <pgodithi@amazon.com>
1 parent 1673e74 commit 0989a9c

File tree

4 files changed

+46
-12
lines changed

4 files changed

+46
-12
lines changed

server/src/main/java/org/opensearch/index/similarity/SimilarityProviders.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.lucene.search.similarities.AfterEffect;
3636
import org.apache.lucene.search.similarities.AfterEffectB;
3737
import org.apache.lucene.search.similarities.AfterEffectL;
38+
import org.apache.lucene.search.similarities.BM25Similarity;
3839
import org.apache.lucene.search.similarities.BasicModel;
3940
import org.apache.lucene.search.similarities.BasicModelG;
4041
import org.apache.lucene.search.similarities.BasicModelIF;
@@ -62,6 +63,7 @@
6263
import org.apache.lucene.search.similarities.NormalizationH2;
6364
import org.apache.lucene.search.similarities.NormalizationH3;
6465
import org.apache.lucene.search.similarities.NormalizationZ;
66+
import org.apache.lucene.search.similarities.Similarity;
6567
import org.opensearch.Version;
6668
import org.opensearch.common.logging.DeprecationLogger;
6769
import org.opensearch.common.settings.Settings;
@@ -271,13 +273,26 @@ static void assertSettingsIsSubsetOf(String type, Version version, Settings sett
271273
}
272274
}
273275

274-
public static LegacyBM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
276+
public static Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
275277
assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
276278

277279
float k1 = settings.getAsFloat("k1", 1.2f);
278280
float b = settings.getAsFloat("b", 0.75f);
279281
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
280282

283+
return new BM25Similarity(k1, b, discountOverlaps);
284+
}
285+
286+
public static Similarity createLegacyBM25Similarity(Settings settings, Version indexCreatedVersion) {
287+
deprecationLogger.deprecate(
288+
"LegacyBM25Similarity",
289+
"The LegacyBM25 similarity is deprecated and will be removed in a future version. " + "Please use BM25Similarity instead."
290+
);
291+
// Validate settings and then create the similarity
292+
assertSettingsIsSubsetOf("LegacyBM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
293+
float k1 = settings.getAsFloat("k1", 1.2f);
294+
float b = settings.getAsFloat("b", 0.75f);
295+
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
281296
return new LegacyBM25Similarity(k1, b, discountOverlaps);
282297
}
283298

server/src/main/java/org/opensearch/index/similarity/SimilarityService.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.lucene.search.CollectionStatistics;
3838
import org.apache.lucene.search.Explanation;
3939
import org.apache.lucene.search.TermStatistics;
40+
import org.apache.lucene.search.similarities.BM25Similarity;
4041
import org.apache.lucene.search.similarities.BooleanSimilarity;
4142
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
4243
import org.apache.lucene.search.similarities.Similarity;
@@ -52,7 +53,6 @@
5253
import org.opensearch.index.IndexSettings;
5354
import org.opensearch.index.mapper.MappedFieldType;
5455
import org.opensearch.index.mapper.MapperService;
55-
import org.opensearch.lucene.similarity.LegacyBM25Similarity;
5656
import org.opensearch.script.ScriptService;
5757

5858
import java.util.Collections;
@@ -84,7 +84,8 @@ public final class SimilarityService extends AbstractIndexComponent {
8484
};
8585
});
8686
defaults.put("BM25", version -> {
87-
final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
87+
// final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
88+
final Similarity similarity = new BM25Similarity();
8889
return () -> similarity;
8990
});
9091
defaults.put("boolean", version -> {
@@ -100,6 +101,7 @@ public final class SimilarityService extends AbstractIndexComponent {
100101
);
101102
});
102103
builtIn.put("BM25", (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version));
104+
builtIn.put("LegacyBM25", (settings, version, scriptService) -> SimilarityProviders.createLegacyBM25Similarity(settings, version));
103105
builtIn.put("boolean", (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version));
104106
builtIn.put("DFR", (settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version));
105107
builtIn.put("IB", (settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version));

server/src/test/java/org/opensearch/index/similarity/SimilarityServiceTests.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.lucene.index.FieldInvertState;
3535
import org.apache.lucene.search.CollectionStatistics;
3636
import org.apache.lucene.search.TermStatistics;
37+
import org.apache.lucene.search.similarities.BM25Similarity;
3738
import org.apache.lucene.search.similarities.BooleanSimilarity;
3839
import org.apache.lucene.search.similarities.Similarity;
3940
import org.opensearch.Version;
@@ -53,6 +54,13 @@ public void testDefaultSimilarity() {
5354
Settings settings = Settings.builder().build();
5455
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
5556
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
57+
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
58+
}
59+
60+
public void testLegacySimilarity() {
61+
Settings settings = Settings.builder().put("index.similarity.default.type", "LegacyBM25").build();
62+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
63+
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
5664
assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class));
5765
}
5866

@@ -86,12 +94,10 @@ public long computeNorm(FieldInvertState state) {
8694
@Override
8795
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
8896
return new SimScorer() {
89-
9097
@Override
9198
public float score(float freq, long norm) {
9299
return -1;
93100
}
94-
95101
};
96102
}
97103
};
@@ -111,12 +117,10 @@ public long computeNorm(FieldInvertState state) {
111117
@Override
112118
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
113119
return new SimScorer() {
114-
115120
@Override
116121
public float score(float freq, long norm) {
117122
return 1 / (freq + norm);
118123
}
119-
120124
};
121125
}
122126
};
@@ -136,12 +140,10 @@ public long computeNorm(FieldInvertState state) {
136140
@Override
137141
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
138142
return new SimScorer() {
139-
140143
@Override
141144
public float score(float freq, long norm) {
142145
return freq + norm;
143146
}
144-
145147
};
146148
}
147149
};

server/src/test/java/org/opensearch/index/similarity/SimilarityTests.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.index.similarity;
3434

3535
import org.apache.lucene.search.similarities.AfterEffectL;
36+
import org.apache.lucene.search.similarities.BM25Similarity;
3637
import org.apache.lucene.search.similarities.BasicModelG;
3738
import org.apache.lucene.search.similarities.BooleanSimilarity;
3839
import org.apache.lucene.search.similarities.DFISimilarity;
@@ -72,7 +73,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {
7273

7374
public void testResolveDefaultSimilarities() {
7475
SimilarityService similarityService = createIndex("foo").similarityService();
75-
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class));
76+
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(org.apache.lucene.search.similarities.BM25Similarity.class));
7677
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
7778
assertThat(similarityService.getSimilarity("default"), equalTo(null));
7879
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> similarityService.getSimilarity("classic"));
@@ -83,6 +84,17 @@ public void testResolveDefaultSimilarities() {
8384
);
8485
}
8586

87+
public void testResolveLegacySimilarity() {
88+
Settings settings = Settings.builder()
89+
.put("index.similarity.my_similarity.type", "LegacyBM25")
90+
.put("index.similarity.my_similarity.k1", 1.2f)
91+
.put("index.similarity.my_similarity.b", 0.75f)
92+
.put("index.similarity.my_similarity.discount_overlaps", false)
93+
.build();
94+
MapperService mapperService = createIndex("foo", settings, "type", "{}").mapperService();
95+
assertThat(mapperService.fieldType("type").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
96+
}
97+
8698
public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException {
8799
Settings indexSettings = Settings.builder()
88100
.put("index.similarity.my_similarity.type", "classic")
@@ -114,9 +126,12 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
114126
.put("index.similarity.my_similarity.discount_overlaps", false)
115127
.build();
116128
MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService();
117-
assertThat(mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
129+
assertThat(
130+
mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(),
131+
instanceOf(org.apache.lucene.search.similarities.BM25Similarity.class)
132+
);
118133

119-
LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fieldType("field1")
134+
BM25Similarity similarity = (org.apache.lucene.search.similarities.BM25Similarity) mapperService.fieldType("field1")
120135
.getTextSearchInfo()
121136
.getSimilarity()
122137
.get();

0 commit comments

Comments
 (0)