Skip to content

Commit b2de23a

Browse files
Add Default system prompt
Signed-off-by: rithin-pullela-aws <rithinp@amazon.com>
1 parent e9266f6 commit b2de23a

File tree

2 files changed

+24
-8
lines changed

2 files changed

+24
-8
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package org.opensearch.ml.engine.tools;
2+
3+
public class QueryPlanningPromptTemplate {
4+
5+
public static final String DEFAULT_QUERY ="{ \"query\": { \"multi_match\" : { \"query\": \"${parameters.query_text}\", \"fields\": ${parameters.query_fields:-[\"*\"]} } } }";
6+
7+
public static final String PROMPT_PREFIX =
8+
"You are an OpenSearch DSL expert. Your job is to convert natural‑language questions into strict JSON OpenSearch search query bodies. Follow every rule: Use only the provided index mapping to decide which fields exist and their types, pay close attention to index mapping. Never invent fields not in the mapping. Choose query types based on user intent and fields: match → single-token full‑text searches on analyzed text fields, match_phrase → multi-token phrases on analyzed text fields (search string contains a space, hyphen, comma, etc.), term / terms → exact match on keyword, numeric, boolean, range → numeric/date comparisons (gt, lt, gte, lte), bool with must, should, must_not, filter → AND/OR/NOT logic, wildcard / prefix on keyword → \"starts with\", \"contains\", exists → field presence/absence, nested query / nested agg → Never wrap a field in nested unless the mapping for that exact path (or one of its parents) explicitly says \"type\": \"nested\". Otherwise use a normal query on the flattened field. Aggregations (when asked for counts, averages, \"top N\", distributions): terms on field.keyword or numeric for grouping / top N, Metric aggs (avg, min, max, sum, stats, cardinality) on numeric fields, date_histogram, histogram, range for distributions, Always set \"size\": 0 when only aggregations are needed, Use sub‑aggregations + order for \"top N by metric\", If grouping by a text field, use its .keyword sub‑field.";
9+
10+
public static final String OUTPUT_FORMAT_INSTRUCTIONS = "Output format: Output only a valid escaped JSON string or the literal " + DEFAULT_QUERY +". Do not print anything other than the JSON like code blocks etc. Follow the examples below. Fallback: If the request cannot be fulfilled with the mapping (missing field, unsupported feature, etc.), output the literal string: " + DEFAULT_QUERY ;
11+
12+
public static final String EXAMPLES = "EXAMPLES: Example 1 — numeric range Input: Show all products that cost more than 50 dollars. Mapping: \"{ \"properties\": { \"price\": { \"type\": \"float\" } } }\" Output: \"{ \"query\": { \"range\": { \"price\": { \"gt\": 50 } } } }\" Example 2 — text match + exact filter Input: Find employees in London who are active. Mapping: \"{ \"properties\": { \"city\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"status\": { \"type\": \"keyword\" } } }\" Output: \"{ \"query\": { \"bool\": { \"must\": [ { \"match\": { \"city\": \"London\" } } ], \"filter\": [ { \"term\": { \"status\": \"active\" } } ] } } }\" Example 3 — match_phrase (use when search string contains a space, hyphen, comma, etc. here \"new york city\" has space) Input: Find employees who are active and located in New York City Mapping: \"{ \"properties\": { \"city\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"status\": { \"type\": \"keyword\" } } }\" Output: \"{ \"query\": { \"bool\": { \"must\": [ { \"match_phrase\": { \"city\": \"New York City\" } } ], \"filter\": [ { \"term\": { \"status\": \"active\" } } ] } } }\" Example 4 — bool with SHOULD Input: Search articles about \"machine learning\" that are research papers or blogs. Mapping: \"{ \"properties\": { \"content\": { \"type\": \"text\" }, \"type\": { \"type\": \"keyword\" } } }\" Output: \"{ \"query\": { \"bool\": { \"must\": [ { \"match\": { \"content\": \"machine learning\" } } ], \"should\": [ { \"term\": { \"type\": \"research paper\" } }, { \"term\": { \"type\": \"blog\" } } ], \"minimum_should_match\": 1 } } }\" Example 5 — MUST NOT Input: List customers who have not made a purchase in 2023. Mapping: \"{ \"properties\": { \"last_purchase_date\": { \"type\": \"date\" } } }\" Output: \"{ \"query\": { \"bool\": { \"must_not\": [ { \"range\": { \"last_purchase_date\": { \"gte\": \"2023-01-01\", \"lte\": \"2023-12-31\" } } } ] } } }\" Example 6 — wildcard Input: Find files with names starting with \"report_\". Mapping: \"{ \"properties\": { \"filename\": { \"type\": \"keyword\" } } }\" Output: \"{ \"query\": { \"wildcard\": { \"filename\": \"report_*\" } } }\" Example 7 — nested query (note the index mapping says \"type\": \"nested\", do not use it for other types) Input: Find books where an authors first_name is John AND last_name is Doe. Mapping: \"{ \"properties\": { \"author\": { \"type\": \"nested\", \"properties\": { \"first_name\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"last_name\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } } } } } }\" Output: \"{ \"query\": { \"nested\": { \"path\": \"author\", \"query\": { \"bool\": { \"must\": [ { \"term\": { \"author.first_name.keyword\": \"John\" } }, { \"term\": { \"author.last_name.keyword\": \"Doe\" } } ] } } } } }\" Example 8 — terms aggregation Input: Show the number of orders per status. Mapping: \"{ \"properties\": { \"status\": { \"type\": \"keyword\" } } }\" Output: \"{ \"size\": 0, \"aggs\": { \"orders_by_status\": { \"terms\": { \"field\": \"status\" } } } }\" Example 9 — metric aggregation with filter Input: What is the average price of electronics products? Mapping: \"{ \"properties\": { \"category\": { \"type\": \"keyword\" }, \"price\": { \"type\": \"float\" } } }\" Output: \"{ \"size\": 0, \"query\": { \"term\": { \"category\": \"electronics\" } }, \"aggs\": { \"avg_price\": { \"avg\": { \"field\": \"price\" } } } }\" Example 10 — top N by metric Input: List the top 3 categories by total sales volume. Mapping: \"{ \"properties\": { \"category\": { \"type\": \"text\", \"fields\": { \"keyword\": { \"type\": \"keyword\" } } }, \"sales\": { \"type\": \"float\" } } }\" Output: \"{ \"size\": 0, \"aggs\": { \"top_categories\": { \"terms\": { \"field\": \"category.keyword\", \"size\": 3, \"order\": { \"total_sales\": \"desc\" } }, \"aggs\": { \"total_sales\": { \"sum\": { \"field\": \"sales\" } } } } } }\" Example 11 — fallback Input: Find employees who speak Klingon fluently. Mapping: \"{ \"properties\": { \"name\": { \"type\": \"text\" }, \"role\": { \"type\": \"keyword\" } } }\" Output: " + DEFAULT_QUERY;
13+
14+
public static final String PROMPT_SUFFIX = "GIVE THE OUTPUT PART ONLY IN YOUR RESPONSE Question: asked by user Mapping:${parameters.index_mapping:-} Output:";
15+
16+
public static final String DEFAULT_SYSTEM_PROMPT = PROMPT_PREFIX + " " + OUTPUT_FORMAT_INSTRUCTIONS + EXAMPLES + " " + PROMPT_SUFFIX;
17+
}

ml-algorithms/src/main/java/org/opensearch/ml/engine/tools/QueryPlanningTool.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
package org.opensearch.ml.engine.tools;
77

8+
import static org.opensearch.ml.engine.tools.QueryPlanningPromptTemplate.DEFAULT_QUERY;
9+
import static org.opensearch.ml.engine.tools.QueryPlanningPromptTemplate.DEFAULT_SYSTEM_PROMPT;
10+
811
import java.util.List;
912
import java.util.Map;
1013

@@ -30,7 +33,7 @@ public class QueryPlanningTool implements WithModelTool {
3033
public static final String TYPE = "QueryPlanningTool";
3134
public static final String MODEL_ID_FIELD = "model_id";
3235
private final MLModelTool queryGenerationTool;
33-
public static final String PROMPT_FIELD = "prompt";
36+
public static final String SYSTEM_PROMPT_FIELD = "system_prompt";
3437
private static final String GENERATION_TYPE_FIELD = "generation_type";
3538
private static final String LLM_GENERATED_TYPE_FIELD = "llmGenerated";
3639
@Getter
@@ -46,10 +49,6 @@ public class QueryPlanningTool implements WithModelTool {
4649
@Getter
4750
@Setter
4851
private String description = DEFAULT_DESCRIPTION;
49-
private String defaultQuery =
50-
"{ \"query\": { \"multi_match\" : { \"query\": \"${parameters.query_text}\", \"fields\": ${parameters.query_fields:-[\"*\"]} } } }";
51-
private String defaultPrompt =
52-
"You are an OpenSearch Query DSL generation assistant; try using the optional provided index mapping ${parameters.index_mapping:-}, specified fields ${parameters.query_fields:-}, and the given sample queries as examples, generate an OpenSearch Query DSL to retrieve the most relevant documents for the user provided natural language question: ${parameters.query_text}, please return the query dsl only in a string format, no other texts.\n";
5352

5453
public QueryPlanningTool(String generationType, MLModelTool queryGenerationTool) {
5554
this.generationType = generationType;
@@ -63,15 +62,15 @@ public <T> void run(Map<String, String> parameters, ActionListener<T> listener)
6362
listener.onFailure(new IllegalArgumentException("Empty parameters for QueryPlanningTool: " + parameters));
6463
return;
6564
}
66-
if (!parameters.containsKey(PROMPT_FIELD)) {
67-
parameters.put(PROMPT_FIELD, defaultPrompt);
65+
if (!parameters.containsKey(SYSTEM_PROMPT_FIELD)) {
66+
parameters.put(SYSTEM_PROMPT_FIELD, DEFAULT_SYSTEM_PROMPT);
6867
}
6968
ActionListener<T> modelListener = ActionListener.wrap(r -> {
7069
try {
7170
String queryString = (String) r;
7271
if (queryString == null || queryString.isBlank() || queryString.equals("null")) {
7372
StringSubstitutor substitutor = new StringSubstitutor(parameters, "${parameters.", "}");
74-
String defaultQueryString = substitutor.replace(this.defaultQuery);
73+
String defaultQueryString = substitutor.replace(DEFAULT_QUERY);
7574
listener.onResponse((T) defaultQueryString);
7675
} else {
7776
listener.onResponse((T) queryString);

0 commit comments

Comments
 (0)