Skip to content

Commit 203d685

Browse files
Use dafault BM25Similarity
Signed-off-by: Prudhvi Godithi <pgodithi@amazon.com>
1 parent 1673e74 commit 203d685

File tree

6 files changed

+101
-12
lines changed

6 files changed

+101
-12
lines changed

modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ teardown:
7979

8080

8181
- match: { hits.total.value: 2 }
82-
- match: { hits.hits.0._id: "3" }
83-
- match: { hits.hits.0.inner_hits.question.hits.total.value: 0}
84-
- match: { hits.hits.1._id: "2" }
85-
- match: { hits.hits.1.inner_hits.question.hits.total.value: 1}
86-
- match: { hits.hits.1.inner_hits.question.hits.hits.0._id: "1"}
82+
- match: { hits.hits.0._id: "2" }
83+
- match: { hits.hits.0.inner_hits.question.hits.total.value: 1 }
84+
- match: { hits.hits.0.inner_hits.question.hits.hits.0._id: "1" }
85+
- match: { hits.hits.1._id: "3" }
86+
- match: { hits.hits.1.inner_hits.question.hits.total.value: 0 }
8787

8888
---
8989
"HasParent disallow expensive queries":
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
---
2+
setup:
3+
- do:
4+
indices.create:
5+
index: legacy_bm25_test
6+
body:
7+
settings:
8+
number_of_shards: 1
9+
number_of_replicas: 0
10+
similarity:
11+
default:
12+
type: LegacyBM25
13+
k1: 1.2
14+
b: 0.75
15+
mappings:
16+
properties:
17+
content:
18+
type: text
19+
- do:
20+
index:
21+
index: legacy_bm25_test
22+
id: "1"
23+
body: { "content": "This is a test document for legacy BM25 scoring" }
24+
- do:
25+
indices.refresh:
26+
index: legacy_bm25_test
27+
28+
---
29+
"Legacy BM25 search":
30+
- do:
31+
search:
32+
index: legacy_bm25_test
33+
body:
34+
query:
35+
match:
36+
content: "legacy"
37+
- match: { hits.total.value: 1 }
38+
- match: { hits.hits.0._id: "1" }
39+
40+
---
41+
teardown:
42+
- do:
43+
indices.delete:
44+
index: legacy_bm25_test

server/src/main/java/org/opensearch/index/similarity/SimilarityProviders.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.lucene.search.similarities.AfterEffect;
3636
import org.apache.lucene.search.similarities.AfterEffectB;
3737
import org.apache.lucene.search.similarities.AfterEffectL;
38+
import org.apache.lucene.search.similarities.BM25Similarity;
3839
import org.apache.lucene.search.similarities.BasicModel;
3940
import org.apache.lucene.search.similarities.BasicModelG;
4041
import org.apache.lucene.search.similarities.BasicModelIF;
@@ -62,6 +63,7 @@
6263
import org.apache.lucene.search.similarities.NormalizationH2;
6364
import org.apache.lucene.search.similarities.NormalizationH3;
6465
import org.apache.lucene.search.similarities.NormalizationZ;
66+
import org.apache.lucene.search.similarities.Similarity;
6567
import org.opensearch.Version;
6668
import org.opensearch.common.logging.DeprecationLogger;
6769
import org.opensearch.common.settings.Settings;
@@ -271,13 +273,21 @@ static void assertSettingsIsSubsetOf(String type, Version version, Settings sett
271273
}
272274
}
273275

274-
public static LegacyBM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
276+
public static Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
275277
assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
276278

277279
float k1 = settings.getAsFloat("k1", 1.2f);
278280
float b = settings.getAsFloat("b", 0.75f);
279281
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
280282

283+
return new BM25Similarity(k1, b, discountOverlaps);
284+
}
285+
286+
public static Similarity createLegacyBM25Similarity(Settings settings, Version indexCreatedVersion) {
287+
assertSettingsIsSubsetOf("LegacyBM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
288+
float k1 = settings.getAsFloat("k1", 1.2f);
289+
float b = settings.getAsFloat("b", 0.75f);
290+
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
281291
return new LegacyBM25Similarity(k1, b, discountOverlaps);
282292
}
283293

server/src/main/java/org/opensearch/index/similarity/SimilarityService.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.lucene.search.CollectionStatistics;
3838
import org.apache.lucene.search.Explanation;
3939
import org.apache.lucene.search.TermStatistics;
40+
import org.apache.lucene.search.similarities.BM25Similarity;
4041
import org.apache.lucene.search.similarities.BooleanSimilarity;
4142
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
4243
import org.apache.lucene.search.similarities.Similarity;
@@ -52,7 +53,6 @@
5253
import org.opensearch.index.IndexSettings;
5354
import org.opensearch.index.mapper.MappedFieldType;
5455
import org.opensearch.index.mapper.MapperService;
55-
import org.opensearch.lucene.similarity.LegacyBM25Similarity;
5656
import org.opensearch.script.ScriptService;
5757

5858
import java.util.Collections;
@@ -84,7 +84,7 @@ public final class SimilarityService extends AbstractIndexComponent {
8484
};
8585
});
8686
defaults.put("BM25", version -> {
87-
final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
87+
final Similarity similarity = new BM25Similarity();
8888
return () -> similarity;
8989
});
9090
defaults.put("boolean", version -> {
@@ -100,6 +100,7 @@ public final class SimilarityService extends AbstractIndexComponent {
100100
);
101101
});
102102
builtIn.put("BM25", (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version));
103+
builtIn.put("LegacyBM25", (settings, version, scriptService) -> SimilarityProviders.createLegacyBM25Similarity(settings, version));
103104
builtIn.put("boolean", (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version));
104105
builtIn.put("DFR", (settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version));
105106
builtIn.put("IB", (settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version));

server/src/test/java/org/opensearch/index/similarity/SimilarityServiceTests.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.lucene.index.FieldInvertState;
3535
import org.apache.lucene.search.CollectionStatistics;
3636
import org.apache.lucene.search.TermStatistics;
37+
import org.apache.lucene.search.similarities.BM25Similarity;
3738
import org.apache.lucene.search.similarities.BooleanSimilarity;
3839
import org.apache.lucene.search.similarities.Similarity;
3940
import org.opensearch.Version;
@@ -53,6 +54,13 @@ public void testDefaultSimilarity() {
5354
Settings settings = Settings.builder().build();
5455
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
5556
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
57+
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
58+
}
59+
60+
public void testLegacySimilarity() {
61+
Settings settings = Settings.builder().put("index.similarity.default.type", "LegacyBM25").build();
62+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
63+
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
5664
assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class));
5765
}
5866

server/src/test/java/org/opensearch/index/similarity/SimilarityTests.java

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.index.similarity;
3434

3535
import org.apache.lucene.search.similarities.AfterEffectL;
36+
import org.apache.lucene.search.similarities.BM25Similarity;
3637
import org.apache.lucene.search.similarities.BasicModelG;
3738
import org.apache.lucene.search.similarities.BooleanSimilarity;
3839
import org.apache.lucene.search.similarities.DFISimilarity;
@@ -72,7 +73,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {
7273

7374
public void testResolveDefaultSimilarities() {
7475
SimilarityService similarityService = createIndex("foo").similarityService();
75-
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class));
76+
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
7677
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
7778
assertThat(similarityService.getSimilarity("default"), equalTo(null));
7879
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> similarityService.getSimilarity("classic"));
@@ -83,7 +84,29 @@ public void testResolveDefaultSimilarities() {
8384
);
8485
}
8586

86-
public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException {
87+
public void testResolveLegacySimilarity() throws IOException {
88+
Settings settings = Settings.builder()
89+
.put("index.similarity.my_similarity.type", "LegacyBM25")
90+
.put("index.similarity.my_similarity.k1", 1.2f)
91+
.put("index.similarity.my_similarity.b", 0.75f)
92+
.put("index.similarity.my_similarity.discount_overlaps", false)
93+
.build();
94+
95+
XContentBuilder mapping = XContentFactory.jsonBuilder()
96+
.startObject()
97+
.startObject("properties")
98+
.startObject("dummy")
99+
.field("type", "text")
100+
.field("similarity", "my_similarity")
101+
.endObject()
102+
.endObject()
103+
.endObject();
104+
105+
MapperService mapperService = createIndex("foo", settings, "type", mapping).mapperService();
106+
assertThat(mapperService.fieldType("dummy").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
107+
}
108+
109+
public void testResolveSimilaritiesFromMapping_classicIsForbidden() {
87110
Settings indexSettings = Settings.builder()
88111
.put("index.similarity.my_similarity.type", "classic")
89112
.put("index.similarity.my_similarity.discount_overlaps", false)
@@ -114,9 +137,12 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
114137
.put("index.similarity.my_similarity.discount_overlaps", false)
115138
.build();
116139
MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService();
117-
assertThat(mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
140+
assertThat(
141+
mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(),
142+
instanceOf(BM25Similarity.class)
143+
);
118144

119-
LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fieldType("field1")
145+
BM25Similarity similarity = (BM25Similarity) mapperService.fieldType("field1")
120146
.getTextSearchInfo()
121147
.getSimilarity()
122148
.get();

0 commit comments

Comments
 (0)