Skip to content

Commit

Permalink
Enabling sort optimization back for half_float with custom comparators (
Browse files Browse the repository at this point in the history
#11024)

* Enabling sort optimizatin back for half_float with custom comparators

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

* Fixing tests

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

* Adding test for Indecx sort half_float

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

* Making indexFieldData provate in FloatValuesComparatorSource

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

* Update server/src/main/java/org/opensearch/index/search/comparators/HalfFloatComparator.java

Co-authored-by: Andriy Redko <drreta@gmail.com>
Signed-off-by: Chaitanya Gohel <104654647+gashutos@users.noreply.github.com>

* Adding missing value instead null

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

* Adding more tests for desc order sort

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

* Update rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml

Co-authored-by: Prabhakar Sithanandam <prabhakar.s87@gmail.com>
Signed-off-by: Chaitanya Gohel <104654647+gashutos@users.noreply.github.com>

* Adding tests in case missing values are competitive

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

* chanheing newly added test supported version 3.0.0

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

* Assing missing float tests

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

* Remove missing value change to be part of another PR

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>

---------

Signed-off-by: Chaitanya Gohel <gashutos@amazon.com>
Signed-off-by: Chaitanya Gohel <104654647+gashutos@users.noreply.github.com>
Co-authored-by: Andriy Redko <drreta@gmail.com>
Co-authored-by: Prabhakar Sithanandam <prabhakar.s87@gmail.com>
  • Loading branch information
3 people authored Nov 14, 2023
1 parent 6c980bc commit 5143198
Show file tree
Hide file tree
Showing 8 changed files with 483 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [AdmissionControl] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting ([#9286](https://github.com/opensearch-project/OpenSearch/pull/9286))
- GHA to verify checklist items completion in PR descriptions ([#10800](https://github.com/opensearch-project/OpenSearch/pull/10800))
- [Remote cluster state] Restore cluster state version during remote state auto restore ([#10853](https://github.com/opensearch-project/OpenSearch/pull/10853))
- Add back half_float BKD based sort query optimization ([#11024](https://github.com/opensearch-project/OpenSearch/pull/11024))

### Dependencies
- Bump `log4j-core` from 2.18.0 to 2.19.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,23 @@
query: {"range": { "rank": { "from": 0 } } }
track_total_hits: false
size: 3

---
"Index Sort half float":
- do:
catch: bad_request
indices.create:
index: test
body:
settings:
number_of_shards: 1
number_of_replicas: 0
index.sort.field: rank
mappings:
properties:
rank:
type: half_float

# This should failed with 400 as half_float is not supported for index sort
- match: { status: 400 }
- match: { error.type: illegal_argument_exception }
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
properties:
counter:
type: double

- do:
bulk:
refresh: true
Expand Down Expand Up @@ -119,3 +120,87 @@
- match: { status: 400 }
- match: { error.type: search_phase_execution_exception }
- match: { error.caused_by.reason: "Can't do sort across indices, as a field has [unsigned_long] type in one index, and different type in another index!" }

---
"search across indices with mixed long and double and float numeric types":
- skip:
version: " - 2.10.99"
reason: half float was broken before 2.11

- do:
indices.create:
index: test_1
body:
mappings:
properties:
counter:
type: long

- do:
indices.create:
index: test_2
body:
mappings:
properties:
counter:
type: double

- do:
indices.create:
index: test_3
body:
mappings:
properties:
counter:
type: half_float

- do:
bulk:
refresh: true
body:
- index:
_index: test_1
- counter: 223372036854775800
- index:
_index: test_2
- counter: 1223372036854775800.23
- index:
_index: test_2
- counter: 184.4
- index:
_index: test_3
- counter: 187.4
- index:
_index: test_3
- counter: 194.4

- do:
search:
index: test_*
rest_total_hits_as_int: true
body:
sort: [{ counter: desc }]
- match: { hits.total: 5 }
- length: { hits.hits: 5 }
- match: { hits.hits.0._index: test_2 }
- match: { hits.hits.0._source.counter: 1223372036854775800.23 }
- match: { hits.hits.0.sort.0: 1223372036854775800.23 }
- match: { hits.hits.1._index: test_1 }
- match: { hits.hits.1._source.counter: 223372036854775800 }
- match: { hits.hits.1.sort.0: 223372036854775800 }
- match: { hits.hits.2._index: test_3 }
- match: { hits.hits.2._source.counter: 194.4 }

- do:
search:
index: test_*
rest_total_hits_as_int: true
body:
sort: [{ counter: asc }]
- match: { hits.total: 5 }
- length: { hits.hits: 5 }
- match: { hits.hits.0._index: test_2 }
- match: { hits.hits.0._source.counter: 184.4 }
- match: { hits.hits.0.sort.0: 184.4 }
- match: { hits.hits.1._index: test_3 }
- match: { hits.hits.1._source.counter: 187.4 }
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,130 @@
- length: { hits.hits: 1 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.population: null }

---
"half float":
- skip:
version: " - 2.10.99"
reason: half_float was broken for 2.10 and earlier

- do:
indices.create:
index: test
body:
mappings:
properties:
population:
type: half_float
- do:
bulk:
refresh: true
index: test
body: |
{"index":{}}
{"population": 184.4}
{"index":{}}
{"population": 194.4}
{"index":{}}
{"population": 144.4}
{"index":{}}
{"population": 174.4}
{"index":{}}
{"population": 164.4}
- do:
search:
index: test
rest_total_hits_as_int: true
body:
size: 3
sort: [ { population: desc } ]
- match: { hits.total: 5 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.population: 194.4 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.population: 184.4 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.population: 174.4 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
size: 3
sort: [ { population: asc } ]
- match: { hits.total: 5 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.population: 144.4 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.population: 164.4 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.population: 174.4 }

# search_after with the asc sort
- do:
search:
index: test
rest_total_hits_as_int: true
body:
size: 1
sort: [ { population: asc } ]
search_after: [ 184.375 ] # this is rounded sort value in sort result
- match: { hits.total: 5 }
- length: { hits.hits: 1 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.population: 194.4 }

# search_after with the desc sort
- do:
search:
index: test
rest_total_hits_as_int: true
body:
size: 1
sort: [ { population: desc } ]
search_after: [ 164.375 ] # this is rounded sort value in sort result
- match: { hits.total: 5 }
- length: { hits.hits: 1 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.population: 144.4 }

# search_after with the asc sort with missing
- do:
bulk:
refresh: true
index: test
body: |
{"index":{}}
{"population": null}
- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 5
"sort": [ { "population": { "order": "asc", "missing": "_last" } } ]
search_after: [ 200 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 6 }
- length: { hits.hits: 1 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.population: null }

# search_after with the desc sort with missing
- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 5
"sort": [ { "population": { "order": "desc", "missing": "_last" } } ]
search_after: [ 100 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 6 }
- length: { hits.hits: 1 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.population: null }
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,9 @@ public void testSimpleSorts() throws Exception {
.startObject("float_value")
.field("type", "float")
.endObject()
.startObject("half_float_value")
.field("type", "half_float")
.endObject()
.startObject("double_value")
.field("type", "double")
.endObject()
Expand All @@ -628,6 +631,7 @@ public void testSimpleSorts() throws Exception {
.field("long_value", i)
.field("unsigned_long_value", UNSIGNED_LONG_BASE.add(BigInteger.valueOf(10000 * i)))
.field("float_value", 0.1 * i)
.field("half_float_value", 0.1 * i)
.field("double_value", 0.1 * i)
.endObject()
);
Expand Down Expand Up @@ -794,6 +798,28 @@ public void testSimpleSorts() throws Exception {

assertThat(searchResponse.toString(), not(containsString("error")));

// HALF_FLOAT
size = 1 + random.nextInt(10);
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("half_float_value", SortOrder.ASC).get();

assertHitCount(searchResponse, 10L);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));
size = 1 + random.nextInt(10);
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("half_float_value", SortOrder.DESC).get();

assertHitCount(searchResponse, 10);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(9 - i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));

// DOUBLE
size = 1 + random.nextInt(10);
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("double_value", SortOrder.ASC).get();
Expand Down Expand Up @@ -1330,6 +1356,9 @@ public void testSortMVField() throws Exception {
.startObject("float_values")
.field("type", "float")
.endObject()
.startObject("half_float_values")
.field("type", "float")
.endObject()
.startObject("double_values")
.field("type", "double")
.endObject()
Expand All @@ -1351,6 +1380,7 @@ public void testSortMVField() throws Exception {
.array("short_values", 1, 5, 10, 8)
.array("byte_values", 1, 5, 10, 8)
.array("float_values", 1f, 5f, 10f, 8f)
.array("half_float_values", 1f, 5f, 10f, 8f)
.array("double_values", 1d, 5d, 10d, 8d)
.array("string_values", "01", "05", "10", "08")
.endObject()
Expand All @@ -1365,6 +1395,7 @@ public void testSortMVField() throws Exception {
.array("short_values", 11, 15, 20, 7)
.array("byte_values", 11, 15, 20, 7)
.array("float_values", 11f, 15f, 20f, 7f)
.array("half_float_values", 11f, 15f, 20f, 7f)
.array("double_values", 11d, 15d, 20d, 7d)
.array("string_values", "11", "15", "20", "07")
.endObject()
Expand All @@ -1379,6 +1410,7 @@ public void testSortMVField() throws Exception {
.array("short_values", 2, 1, 3, -4)
.array("byte_values", 2, 1, 3, -4)
.array("float_values", 2f, 1f, 3f, -4f)
.array("half_float_values", 2f, 1f, 3f, -4f)
.array("double_values", 2d, 1d, 3d, -4d)
.array("string_values", "02", "01", "03", "!4")
.endObject()
Expand Down Expand Up @@ -1585,6 +1617,34 @@ public void testSortMVField() throws Exception {
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo(Integer.toString(3)));
assertThat(((Number) searchResponse.getHits().getAt(2).getSortValues()[0]).floatValue(), equalTo(3f));

searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(10).addSort("half_float_values", SortOrder.ASC).get();

assertThat(searchResponse.getHits().getTotalHits().value, equalTo(3L));
assertThat(searchResponse.getHits().getHits().length, equalTo(3));

assertThat(searchResponse.getHits().getAt(0).getId(), equalTo(Integer.toString(3)));
assertThat(((Number) searchResponse.getHits().getAt(0).getSortValues()[0]).floatValue(), equalTo(-4f));

assertThat(searchResponse.getHits().getAt(1).getId(), equalTo(Integer.toString(1)));
assertThat(((Number) searchResponse.getHits().getAt(1).getSortValues()[0]).floatValue(), equalTo(1f));

assertThat(searchResponse.getHits().getAt(2).getId(), equalTo(Integer.toString(2)));
assertThat(((Number) searchResponse.getHits().getAt(2).getSortValues()[0]).floatValue(), equalTo(7f));

searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(10).addSort("half_float_values", SortOrder.DESC).get();

assertThat(searchResponse.getHits().getTotalHits().value, equalTo(3L));
assertThat(searchResponse.getHits().getHits().length, equalTo(3));

assertThat(searchResponse.getHits().getAt(0).getId(), equalTo(Integer.toString(2)));
assertThat(((Number) searchResponse.getHits().getAt(0).getSortValues()[0]).floatValue(), equalTo(20f));

assertThat(searchResponse.getHits().getAt(1).getId(), equalTo(Integer.toString(1)));
assertThat(((Number) searchResponse.getHits().getAt(1).getSortValues()[0]).floatValue(), equalTo(10f));

assertThat(searchResponse.getHits().getAt(2).getId(), equalTo(Integer.toString(3)));
assertThat(((Number) searchResponse.getHits().getAt(2).getSortValues()[0]).floatValue(), equalTo(3f));

searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(10).addSort("double_values", SortOrder.ASC).get();

assertThat(searchResponse.getHits().getTotalHits().value, equalTo(3L));
Expand Down
Loading

0 comments on commit 5143198

Please sign in to comment.