Skip to content

[8.12] Fix bug in rule_query where text_expansion errored because it was not rewritten (#105365) #105565

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/105365.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 105365
summary: Fix bug in `rule_query` where `text_expansion` errored because it was not
rewritten
area: Application
type: bug
issues: []
2 changes: 1 addition & 1 deletion x-pack/plugin/ent-search/qa/rest/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies {

restResources {
restApi {
include '_common', 'bulk', 'cluster', 'connector', 'nodes', 'indices', 'index', 'query_ruleset', 'search_application', 'xpack', 'security', 'search'
include '_common', 'bulk', 'cluster', 'connector', 'nodes', 'indices', 'index', 'query_ruleset', 'search_application', 'xpack', 'security', 'search', 'ml'
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,4 +236,154 @@ setup:
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: 'doc1' }

---
"Perform a rule query with an organic query that must be rewritten to another query type":
- skip:
version: " - 8.12.2"
reason: Bugfix that was broken in previous versions

- do:
indices.create:
index: test-index-with-sparse-vector
body:
mappings:
properties:
source_text:
type: keyword
ml.tokens:
type: sparse_vector

- do:
ml.put_trained_model:
model_id: "text_expansion_model"
body: >
{
"description": "simple model for testing",
"model_type": "pytorch",
"inference_config": {
"text_expansion": {
"tokenization": {
"bert": {
"with_special_tokens": false
}
}
}
}
}
- do:
ml.put_trained_model_vocabulary:
model_id: "text_expansion_model"
body: >
{ "vocabulary": ["[PAD]", "[UNK]", "these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"] }
- do:
ml.put_trained_model_definition_part:
model_id: "text_expansion_model"
part: 0
body: >
{
"total_definition_length":2078,
"definition": "UEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VGV4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxlbW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVctJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdFZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26QuZy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/eUuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNLFgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vLqbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLktgNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjghg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0YgbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSRrhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRglXIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAABkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgIAAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAAAACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLAQIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAAAAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAAAAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAAAAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA=",
"total_parts": 1
}
- do:
bulk:
index: test-index-with-sparse-vector
refresh: true
body: |
{"index": {}}
{"source_text": "my words comforter", "ml.tokens":{"my":1.0, "words":1.0,"comforter":1.0}}
{"index": {}}
{"source_text": "the machine is leaking", "ml.tokens":{"the":1.0,"machine":1.0,"is":1.0,"leaking":1.0}}
{"index": {}}
{"source_text": "these are my words", "ml.tokens":{"these":1.0,"are":1.0,"my":1.0,"words":1.0}}
{"index": {}}
{"source_text": "the octopus comforter smells", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"smells":1.0}}
{"index": {}}
{"source_text": "the octopus comforter is leaking", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"is":1.0,"leaking":1.0}}
{"index": {}}
{"source_text": "washing machine smells", "ml.tokens":{"washing":1.0,"machine":1.0,"smells":1.0}}
{"index": { "_id": "pinned_doc1" }}
{"source_text": "unrelated pinned doc", "ml.tokens":{"unrelated":1.0,"pinned":1.0,"doc":1.0}}
{"index": { "_id": "pinned_doc2" }}
{"source_text": "another unrelated pinned doc", "ml.tokens":{"another":1.0, "unrelated":1.0,"pinned":1.0,"doc":1.0}}

- do:
ml.start_trained_model_deployment:
model_id: text_expansion_model
wait_for: started

- do:
query_ruleset.put:
ruleset_id: combined-ruleset
body:
rules:
- rule_id: rule1
type: pinned
criteria:
- type: exact
metadata: foo
values: [ bar ]
actions:
ids:
- 'pinned_doc1'
- rule_id: rule2
type: pinned
criteria:
- type: exact
metadata: foo
values: [ baz ]
actions:
docs:
- '_index': 'test-index-with-sparse-vector'
'_id': 'pinned_doc2'
- do:
search:
body:
query:
rule_query:
organic:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
match_criteria:
foo: bar
ruleset_id: combined-ruleset

- match: { hits.total.value: 5 }
- match: { hits.hits.0._id: 'pinned_doc1' }

- do:
search:
body:
query:
rule_query:
organic:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
match_criteria:
foo: baz
ruleset_id: combined-ruleset

- match: { hits.total.value: 5 }
- match: { hits.hits.0._id: 'pinned_doc2' }

- do:
search:
body:
query:
rule_query:
organic:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
match_criteria:
foo: puggle
ruleset_id: combined-ruleset

- match: { hits.total.value: 4 }


Original file line number Diff line number Diff line change
Expand Up @@ -111,18 +111,6 @@ private RuleQueryBuilder(
throw new IllegalArgumentException("rulesetId must not be null or empty");
}

// PinnedQueryBuilder will return an error if we attempt to return more than the maximum number of
// pinned hits. Here, we truncate matching rules rather than return an error.
if (pinnedIds != null && pinnedIds.size() > MAX_NUM_PINNED_HITS) {
HeaderWarning.addWarning("Truncating query rule pinned hits to " + MAX_NUM_PINNED_HITS + " documents");
pinnedIds = pinnedIds.subList(0, MAX_NUM_PINNED_HITS);
}

if (pinnedDocs != null && pinnedDocs.size() > MAX_NUM_PINNED_HITS) {
HeaderWarning.addWarning("Truncating query rule pinned hits to " + MAX_NUM_PINNED_HITS + " documents");
pinnedDocs = pinnedDocs.subList(0, MAX_NUM_PINNED_HITS);
}

this.organicQuery = organicQuery;
this.matchCriteria = matchCriteria;
this.rulesetId = rulesetId;
Expand Down Expand Up @@ -174,6 +162,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep

@Override
protected Query doToQuery(SearchExecutionContext context) throws IOException {
// NOTE: this is old query logic, as in 8.12.2+ and 8.13.0+ we will always rewrite this query
// into a pinned query or the organic query. This logic remains here for backwards compatibility
// with coordinator nodes running versions 8.10.0 - 8.12.1.
if ((pinnedIds != null && pinnedIds.isEmpty() == false) && (pinnedDocs != null && pinnedDocs.isEmpty() == false)) {
throw new IllegalArgumentException("applied rules contain both pinned ids and pinned docs, only one of ids or docs is allowed");
}
Expand All @@ -187,21 +178,25 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
} else {
return organicQuery.toQuery(context);
}

}

@SuppressWarnings("unchecked")
@Override
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
if (pinnedIds != null || pinnedDocs != null) {
return this;
} else if (pinnedIdsSupplier != null || pinnedDocsSupplier != null) {
List<String> identifiedPinnedIds = pinnedIdsSupplier != null ? pinnedIdsSupplier.get() : null;
List<Item> identifiedPinnedDocs = pinnedDocsSupplier != null ? pinnedDocsSupplier.get() : null;
if (identifiedPinnedIds == null && identifiedPinnedDocs == null) {
return this; // not executed yet
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) {
if (pinnedIdsSupplier != null && pinnedDocsSupplier != null) {
List<String> identifiedPinnedIds = pinnedIdsSupplier.get();
List<Item> identifiedPinnedDocs = pinnedDocsSupplier.get();
if (identifiedPinnedIds == null || identifiedPinnedDocs == null) {
return this; // Not executed yet
} else if (identifiedPinnedIds.isEmpty() && identifiedPinnedDocs.isEmpty()) {
return organicQuery; // Nothing to pin here
} else if (identifiedPinnedIds.isEmpty() == false && identifiedPinnedDocs.isEmpty() == false) {
throw new IllegalArgumentException(
"applied rules contain both pinned ids and pinned docs, only one of ids or docs is allowed"
);
} else if (identifiedPinnedIds.isEmpty() == false) {
return new PinnedQueryBuilder(organicQuery, truncateList(identifiedPinnedIds).toArray(new String[0]));
} else {
return new RuleQueryBuilder(organicQuery, matchCriteria, rulesetId, identifiedPinnedIds, identifiedPinnedDocs, null, null);
return new PinnedQueryBuilder(organicQuery, truncateList(identifiedPinnedDocs).toArray(new Item[0]));
}
}

Expand Down Expand Up @@ -244,18 +239,19 @@ public void onFailure(Exception e) {
});
});

QueryBuilder newOrganicQuery = organicQuery.rewrite(queryRewriteContext);
RuleQueryBuilder rewritten = new RuleQueryBuilder(
newOrganicQuery,
matchCriteria,
this.rulesetId,
null,
null,
pinnedIdsSetOnce::get,
pinnedDocsSetOnce::get
);
rewritten.boost(this.boost);
return rewritten;
return new RuleQueryBuilder(organicQuery, matchCriteria, this.rulesetId, null, null, pinnedIdsSetOnce::get, pinnedDocsSetOnce::get)
.boost(this.boost)
.queryName(this.queryName);
}

private List<?> truncateList(List<?> input) {
// PinnedQueryBuilder will return an error if we attempt to return more than the maximum number of
// pinned hits. Here, we truncate matching rules rather than return an error.
if (input.size() > MAX_NUM_PINNED_HITS) {
HeaderWarning.addWarning("Truncating query rule pinned hits to " + MAX_NUM_PINNED_HITS + " documents");
return input.subList(0, MAX_NUM_PINNED_HITS);
}
return input;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xpack.application.LocalStateEnterpriseSearch;
import org.elasticsearch.xpack.searchbusinessrules.SearchBusinessRules;
import org.hamcrest.Matchers;

import java.io.IOException;
Expand Down Expand Up @@ -62,7 +63,7 @@ protected void doAssertLuceneQuery(RuleQueryBuilder queryBuilder, Query query, S

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Collections.singletonList(LocalStateEnterpriseSearch.class);
return List.of(LocalStateEnterpriseSearch.class, SearchBusinessRules.class);
}

public void testIllegalArguments() {
Expand Down