Skip to content

Commit 2a00167

Browse files
Address comments, fix IT
Signed-off-by: rithin-pullela-aws <rithinp@amazon.com>
1 parent 1338e12 commit 2a00167

File tree

2 files changed

+93
-75
lines changed

2 files changed

+93
-75
lines changed

ml-algorithms/src/main/java/org/opensearch/ml/engine/tools/QueryPlanningPromptTemplate.java

Lines changed: 91 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -5,91 +5,104 @@ public class QueryPlanningPromptTemplate {
55
public static final String DEFAULT_QUERY =
66
"{ \"query\": { \"multi_match\" : { \"query\": \"${parameters.query_text}\", \"fields\": ${parameters.query_fields:-[\"*\"]} } } }";
77

8-
public static final String QUERY_TYPE_RULES = "Choose query types based on user intent and fields: "
9-
+ "matchsingle-token full‑text searches on analyzed text fields, "
10-
+ "match_phrasemulti-token phrases on analyzed text fields (search string contains a space, hyphen, comma, etc.), "
11-
+ "term / termsexact match on keyword, numeric, boolean, "
12-
+ "rangenumeric/date comparisons (gt, lt, gte, lte), "
13-
+ "bool with must, should, must_not, filterAND/OR/NOT logic, "
14-
+ "wildcard / prefix on keyword\"starts with\", \"contains\", "
15-
+ "existsfield presence/absence, "
16-
+ "nested query / nested aggNever wrap a field in nested unless the mapping for that exact path (or one of its parents) explicitly says \"type\": \"nested\". "
17-
+ "Otherwise use a normal query on the flattened field. ";
18-
19-
public static final String AGGREGATION_RULES = "Aggregations (when asked for counts, averages, \"top N\", distributions): "
20-
+ "terms on field.keyword or numeric for grouping / top N, "
21-
+ "Metric aggs (avg, min, max, sum, stats, cardinality) on numeric fields, "
22-
+ "date_histogram, histogram, range for distributions, "
23-
+ "Always set \"size\": 0 when only aggregations are needed, "
24-
+ "Use sub‑aggregations + order for \"top N by metric\", "
25-
+ "If grouping by a text field, use its .keyword sub‑field.";
8+
public static final String QUERY_TYPE_RULES = "\nChoose query types based on user intent and fields: \n"
9+
+ "match: single-token full‑text searches on analyzed text fields, \n"
10+
+ "match_phrase: multi-token phrases on analyzed text fields (search string contains a space, hyphen, comma, etc.), \n"
11+
+ "term / terms:exact match on keyword, numeric, boolean, \n"
12+
+ "range:numeric/date comparisons (gt, lt, gte, lte), \n"
13+
+ "bool with must, should, must_not, filter: AND/OR/NOT logic, \n"
14+
+ "wildcard / prefix on keyword:\"starts with\", \"contains\", \n"
15+
+ "exists:field presence/absence, \n"
16+
+ "nested query / nested agg:Never wrap a field in nested unless the mapping for that exact path (or one of its parents) explicitly says \"type\": \"nested\". \n"
17+
+ "Otherwise use a normal query on the flattened field. \n";
18+
19+
public static final String AGGREGATION_RULES = "Aggregations (when asked for counts, averages, \"top N\", distributions): \n"
20+
+ "terms on field.keyword or numeric for grouping / top N, \n"
21+
+ "Metric aggs (avg, min, max, sum, stats, cardinality) on numeric fields, \n"
22+
+ "date_histogram, histogram, range for distributions, \n"
23+
+ "Always set \"size\": 0 when only aggregations are needed, \n"
24+
+ "Use sub‑aggregations + order for \"top N by metric\", \n"
25+
+ "If grouping by a text field, use its .keyword sub‑field.\n";
2626

2727
public static final String PROMPT_PREFIX =
28-
"You are an OpenSearch DSL expert. Your job is to convert natural‑language questions into strict JSON OpenSearch search query bodies. "
29-
+ "Follow every rule: Use only the provided index mapping to decide which fields exist and their types, pay close attention to index mapping. "
30-
+ "Do not use fields that not present in mapping. "
28+
"You are an OpenSearch DSL expert. Your job is to convert natural‑language questions into strict JSON OpenSearch search query bodies. \n"
29+
+ "Follow every rule: Use only the provided index mapping to decide which fields exist and their types, pay close attention to index mapping. \n"
30+
+ "Do not use fields that not present in mapping. \n"
3131
+ QUERY_TYPE_RULES
3232
+ AGGREGATION_RULES;
3333

34-
public static final String OUTPUT_FORMAT_INSTRUCTIONS = "Output format: Output only a valid escaped JSON string or the literal "
34+
public static final String OUTPUT_FORMAT_INSTRUCTIONS = "Output format: Output only a valid escaped JSON string or the literal \n"
3535
+ DEFAULT_QUERY
36-
+ ". Do not print anything other than the JSON like code blocks etc. "
37-
+ "Follow the examples below. "
38-
+ "Fallback: If the request cannot be fulfilled with the mapping (missing field, unsupported feature, etc.), "
36+
+ " \nReturn exactly one JSON object. "
37+
+ "Output nothing before or after it — no code fences/backticks (`), angle brackets (< >), hash marks (#), asterisks (*), pipes (|), tildes (~), ellipses (… or ...), emojis, typographic quotes (\" \"), non-breaking spaces (U+00A0), zero-width characters (U+200B, U+FEFF), or any other markup/control characters. "
38+
+ "Use valid JSON only (standard double quotes \"; no comments; no trailing commas). "
39+
+ "This applies to formatting only, string values inside the JSON may contain any needed Unicode characters. \n"
40+
+ "Follow the examples below. \n"
41+
+ "Fallback: If the request cannot be fulfilled with the mapping (missing field, unsupported feature, etc.), \n"
3942
+ "output the literal string: "
4043
+ DEFAULT_QUERY;
4144

4245
// Individual example constants for better maintainability
43-
public static final String EXAMPLE_1 = "Example 1 — numeric range Input: Show all products that cost more than 50 dollars. "
44-
+ "Mapping: \"{ \"properties\": { \"price\": { \"type\": \"float\" } } }\" "
45-
+ "Output: \"{ \"query\": { \"range\": { \"price\": { \"gt\": 50 } } } }\" ";
46+
public static final String EXAMPLE_1 = "Example 1 — numeric range \n"
47+
+ "Input: Show all products that cost more than 50 dollars. \n"
48+
+ "Mapping: \"{ \"properties\": { \"price\": { \"type\": \"float\" } } }\" \n"
49+
+ "Output: \"{ \"query\": { \"range\": { \"price\": { \"gt\": 50 } } } }\" \n";
4650

47-
public static final String EXAMPLE_2 = "Example 2 — text match + exact filter Input: Find employees in London who are active. "
48-
+ "Mapping: \"{ \"properties\": { \"city\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"status\": { \"type\": \"keyword\" } } }\" "
49-
+ "Output: \"{ \"query\": { \"bool\": { \"must\": [ { \"match\": { \"city\": \"London\" } } ], \"filter\": [ { \"term\": { \"status\": \"active\" } } ] } } }\" ";
51+
public static final String EXAMPLE_2 = "Example 2 — text match + exact filter \n"
52+
+ "Input: Find employees in London who are active. \n"
53+
+ "Mapping: \"{ \"properties\": { \"city\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"status\": { \"type\": \"keyword\" } } }\" \n"
54+
+ "Output: \"{ \"query\": { \"bool\": { \"must\": [ { \"match\": { \"city\": \"London\" } } ], \"filter\": [ { \"term\": { \"status\": \"active\" } } ] } } }\" \n";
5055

5156
public static final String EXAMPLE_3 =
52-
"Example 3 — match_phrase (use when search string contains a space, hyphen, comma, etc. here \"new york city\" has space) Input: Find employees who are active and located in New York City "
53-
+ "Mapping: \"{ \"properties\": { \"city\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"status\": { \"type\": \"keyword\" } } }\" "
54-
+ "Output: \"{ \"query\": { \"bool\": { \"must\": [ { \"match_phrase\": { \"city\": \"New York City\" } } ], \"filter\": [ { \"term\": { \"status\": \"active\" } } ] } } }\" ";
55-
56-
public static final String EXAMPLE_4 =
57-
"Example 4 — bool with SHOULD Input: Search articles about \"machine learning\" that are research papers or blogs. "
58-
+ "Mapping: \"{ \"properties\": { \"content\": { \"type\": \"text\" }, \"type\": { \"type\": \"keyword\" } } }\" "
59-
+ "Output: \"{ \"query\": { \"bool\": { \"must\": [ { \"match\": { \"content\": \"machine learning\" } } ], \"should\": [ { \"term\": { \"type\": \"research paper\" } }, { \"term\": { \"type\": \"blog\" } } ], \"minimum_should_match\": 1 } } }\" ";
60-
61-
public static final String EXAMPLE_5 = "Example 5 — MUST NOT Input: List customers who have not made a purchase in 2023. "
62-
+ "Mapping: \"{ \"properties\": { \"last_purchase_date\": { \"type\": \"date\" } } }\" "
63-
+ "Output: \"{ \"query\": { \"bool\": { \"must_not\": [ { \"range\": { \"last_purchase_date\": { \"gte\": \"2023-01-01\", \"lte\": \"2023-12-31\" } } } ] } } }\" ";
64-
65-
public static final String EXAMPLE_6 = "Example 6 — wildcard Input: Find files with names starting with \"report_\". "
66-
+ "Mapping: \"{ \"properties\": { \"filename\": { \"type\": \"keyword\" } } }\" "
67-
+ "Output: \"{ \"query\": { \"wildcard\": { \"filename\": \"report_*\" } } }\" ";
57+
"Example 3 — match_phrase (use when search string contains a space, hyphen, comma, etc. here \"new york city\" has space) \n"
58+
+ "Input: Find employees who are active and located in New York City \n"
59+
+ "Mapping: \"{ \"properties\": { \"city\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"status\": { \"type\": \"keyword\" } } }\" \n"
60+
+ "Output: \"{ \"query\": { \"bool\": { \"must\": [ { \"match_phrase\": { \"city\": \"New York City\" } } ], \"filter\": [ { \"term\": { \"status\": \"active\" } } ] } } }\" \n";
61+
62+
public static final String EXAMPLE_4 = "Example 4 — bool with SHOULD \n"
63+
+ "Input: Search articles about \"machine learning\" that are research papers or blogs. \n"
64+
+ "Mapping: \"{ \"properties\": { \"content\": { \"type\": \"text\" }, \"type\": { \"type\": \"keyword\" } } }\" \n"
65+
+ "Output: \"{ \"query\": { \"bool\": { \"must\": [ { \"match\": { \"content\": \"machine learning\" } } ], \"should\": [ { \"term\": { \"type\": \"research paper\" } }, { \"term\": { \"type\": \"blog\" } } ], \"minimum_should_match\": 1 } } }\" \n";
66+
67+
public static final String EXAMPLE_5 = "Example 5 — MUST NOT \n"
68+
+ "Input: List customers who have not made a purchase in 2023. \n"
69+
+ "Mapping: \"{ \"properties\": { \"last_purchase_date\": { \"type\": \"date\" } } }\" \n"
70+
+ "Output: \"{ \"query\": { \"bool\": { \"must_not\": [ { \"range\": { \"last_purchase_date\": { \"gte\": \"2023-01-01\", \"lte\": \"2023-12-31\" } } } ] } } }\" \n";
71+
72+
public static final String EXAMPLE_6 = "Example 6 — wildcard \n"
73+
+ "Input: Find files with names starting with \"report_\". \n"
74+
+ "Mapping: \"{ \"properties\": { \"filename\": { \"type\": \"keyword\" } } }\" \n"
75+
+ "Output: \"{ \"query\": { \"wildcard\": { \"filename\": \"report_*\" } } }\" \n";
6876

6977
public static final String EXAMPLE_7 =
70-
"Example 7 — nested query (note the index mapping says \"type\": \"nested\", do not use it for other types) Input: Find books where an authors first_name is John AND last_name is Doe. "
71-
+ "Mapping: \"{ \"properties\": { \"author\": { \"type\": \"nested\", \"properties\": { \"first_name\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"last_name\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } } } } } }\" "
72-
+ "Output: \"{ \"query\": { \"nested\": { \"path\": \"author\", \"query\": { \"bool\": { \"must\": [ { \"term\": { \"author.first_name.keyword\": \"John\" } }, { \"term\": { \"author.last_name.keyword\": \"Doe\" } } ] } } } } }\" ";
73-
74-
public static final String EXAMPLE_8 = "Example 8 — terms aggregation Input: Show the number of orders per status. "
75-
+ "Mapping: \"{ \"properties\": { \"status\": { \"type\": \"keyword\" } } }\" "
76-
+ "Output: \"{ \"size\": 0, \"aggs\": { \"orders_by_status\": { \"terms\": { \"field\": \"status\" } } } }\" ";
77-
78-
public static final String EXAMPLE_9 =
79-
"Example 9 — metric aggregation with filter Input: What is the average price of electronics products? "
80-
+ "Mapping: \"{ \"properties\": { \"category\": { \"type\": \"keyword\" }, \"price\": { \"type\": \"float\" } } }\" "
81-
+ "Output: \"{ \"size\": 0, \"query\": { \"term\": { \"category\": \"electronics\" } }, \"aggs\": { \"avg_price\": { \"avg\": { \"field\": \"price\" } } } }\" ";
82-
83-
public static final String EXAMPLE_10 = "Example 10 — top N by metric Input: List the top 3 categories by total sales volume. "
84-
+ "Mapping: \"{ \"properties\": { \"category\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"sales\": { \"type\": \"float\" } } }\" "
85-
+ "Output: \"{ \"size\": 0, \"aggs\": { \"top_categories\": { \"terms\": { \"field\": \"category.keyword\", \"size\": 3, \"order\": { \"total_sales\": \"desc\" } }, \"aggs\": { \"total_sales\": { \"sum\": { \"field\": \"sales\" } } } } } }\" ";
86-
87-
public static final String EXAMPLE_11 = "Example 11 — fallback Input: Find employees who speak Klingon fluently. "
88-
+ "Mapping: \"{ \"properties\": { \"name\": { \"type\": \"text\" }, \"role\": { \"type\": \"keyword\" } } }\" "
78+
"Example 7 — nested query (note the index mapping says \"type\": \"nested\", do not use it for other types) \n"
79+
+ "Input: Find books where an authors first_name is John AND last_name is Doe. \n"
80+
+ "Mapping: \"{ \"properties\": { \"author\": { \"type\": \"nested\", \"properties\": { \"first_name\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"last_name\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } } } } } }\" \n"
81+
+ "Output: \"{ \"query\": { \"nested\": { \"path\": \"author\", \"query\": { \"bool\": { \"must\": [ { \"term\": { \"author.first_name.keyword\": \"John\" } }, { \"term\": { \"author.last_name.keyword\": \"Doe\" } } ] } } } } }\" \n";
82+
83+
public static final String EXAMPLE_8 = "Example 8 — terms aggregation \n"
84+
+ "Input: Show the number of orders per status. \n"
85+
+ "Mapping: \"{ \"properties\": { \"status\": { \"type\": \"keyword\" } } }\" \n"
86+
+ "Output: \"{ \"size\": 0, \"aggs\": { \"orders_by_status\": { \"terms\": { \"field\": \"status\" } } } }\" \n";
87+
88+
public static final String EXAMPLE_9 = "Example 9 — metric aggregation with filter \n"
89+
+ "Input: What is the average price of electronics products? \n"
90+
+ "Mapping: \"{ \"properties\": { \"category\": { \"type\": \"keyword\" }, \"price\": { \"type\": \"float\" } } }\" \n"
91+
+ "Output: \"{ \"size\": 0, \"query\": { \"term\": { \"category\": \"electronics\" } }, \"aggs\": { \"avg_price\": { \"avg\": { \"field\": \"price\" } } } }\" \n";
92+
93+
public static final String EXAMPLE_10 = "Example 10 — top N by metric \n"
94+
+ "Input: List the top 3 categories by total sales volume. \n"
95+
+ "Mapping: \"{ \"properties\": { \"category\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"sales\": { \"type\": \"float\" } } }\" \n"
96+
+ "Output: \"{ \"size\": 0, \"aggs\": { \"top_categories\": { \"terms\": { \"field\": \"category.keyword\", \"size\": 3, \"order\": { \"total_sales\": \"desc\" } }, \"aggs\": { \"total_sales\": { \"sum\": { \"field\": \"sales\" } } } } } }\" \n";
97+
98+
public static final String EXAMPLE_11 = "Example 11 — fallback \n"
99+
+ "Input: Find employees who speak Klingon fluently. \n"
100+
+ "Mapping: \"{ \"properties\": { \"name\": { \"type\": \"text\" }, \"role\": { \"type\": \"keyword\" } } }\" \n"
89101
+ "Output: "
90-
+ DEFAULT_QUERY;
102+
+ DEFAULT_QUERY
103+
+ "\n";
91104

92-
public static final String EXAMPLES = "EXAMPLES: "
105+
public static final String EXAMPLES = "\nEXAMPLES: "
93106
+ EXAMPLE_1
94107
+ EXAMPLE_2
95108
+ EXAMPLE_3
@@ -102,10 +115,16 @@ public class QueryPlanningPromptTemplate {
102115
+ EXAMPLE_10
103116
+ EXAMPLE_11;
104117

105-
public static final String PROMPT_SUFFIX = "GIVE THE OUTPUT PART ONLY IN YOUR RESPONSE "
106-
+ "Question: asked by user "
107-
+ "Mapping:${parameters.index_mapping:-} "
118+
public static final String PROMPT_SUFFIX = "GIVE THE OUTPUT PART ONLY IN YOUR RESPONSE \n"
119+
+ "Question: asked by user \n"
120+
+ "Mapping:${parameters.index_mapping:-} \n"
108121
+ "Output:";
109122

110-
public static final String DEFAULT_SYSTEM_PROMPT = PROMPT_PREFIX + " " + OUTPUT_FORMAT_INSTRUCTIONS + EXAMPLES + " " + PROMPT_SUFFIX;
123+
public static final String DEFAULT_SYSTEM_PROMPT = PROMPT_PREFIX
124+
+ " \n "
125+
+ OUTPUT_FORMAT_INSTRUCTIONS
126+
+ " \n "
127+
+ EXAMPLES
128+
+ " \n "
129+
+ PROMPT_SUFFIX;
111130
}

plugin/src/test/java/org/opensearch/ml/rest/RestQueryPlanningToolIT.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ public class RestQueryPlanningToolIT extends MLCommonsRestTestCase {
4040
+ GITHUB_CI_AWS_REGION
4141
+ "\",\n"
4242
+ " \"service_name\": \"bedrock\",\n"
43-
+ " \"model\": \"us.anthropic.claude-3-7-sonnet-20250219-v1:0\",\n"
44-
+ " \"system_prompt\":\"please help answer the user question. \"\n"
43+
+ " \"model\": \"us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n"
4544
+ " },\n"
4645
+ " \"credential\": {\n"
4746
+ " \"access_key\":\" "
@@ -62,7 +61,7 @@ public class RestQueryPlanningToolIT extends MLCommonsRestTestCase {
6261
+ " \"headers\": {\n"
6362
+ " \"content-type\": \"application/json\"\n"
6463
+ " },\n"
65-
+ " \"request_body\": \"{ \\\"system\\\": [{\\\"text\\\": \\\"${parameters.system_prompt}\\\"}], \\\"messages\\\": [{\\\"role\\\":\\\"user\\\",\\\"content\\\":[{\\\"text\\\":\\\"${parameters.prompt}\\\"}]}]}\"\n"
64+
+ " \"request_body\": \"{ \\\"system\\\": [{\\\"text\\\": \\\"${parameters.system_prompt}\\\"}], \\\"messages\\\": [{\\\"role\\\":\\\"user\\\",\\\"content\\\":[{\\\"text\\\":\\\"${parameters.query_text}\\\"}]}]}\"\n"
6665
+ " }\n"
6766
+ " ]\n"
6867
+ "}";

0 commit comments

Comments
 (0)