@@ -5,91 +5,104 @@ public class QueryPlanningPromptTemplate {
55 public static final String DEFAULT_QUERY =
66 "{ \" query\" : { \" multi_match\" : { \" query\" : \" ${parameters.query_text}\" , \" fields\" : ${parameters.query_fields:-[\" *\" ]} } } }" ;
77
8- public static final String QUERY_TYPE_RULES = "Choose query types based on user intent and fields: "
9- + "match → single-token full‑text searches on analyzed text fields, "
10- + "match_phrase → multi-token phrases on analyzed text fields (search string contains a space, hyphen, comma, etc.), "
11- + "term / terms → exact match on keyword, numeric, boolean, "
12- + "range → numeric/date comparisons (gt, lt, gte, lte), "
13- + "bool with must, should, must_not, filter → AND/OR/NOT logic, "
14- + "wildcard / prefix on keyword → \" starts with\" , \" contains\" , "
15- + "exists → field presence/absence, "
16- + "nested query / nested agg → Never wrap a field in nested unless the mapping for that exact path (or one of its parents) explicitly says \" type\" : \" nested\" . "
17- + "Otherwise use a normal query on the flattened field. " ;
18-
19- public static final String AGGREGATION_RULES = "Aggregations (when asked for counts, averages, \" top N\" , distributions): "
20- + "terms on field.keyword or numeric for grouping / top N, "
21- + "Metric aggs (avg, min, max, sum, stats, cardinality) on numeric fields, "
22- + "date_histogram, histogram, range for distributions, "
23- + "Always set \" size\" : 0 when only aggregations are needed, "
24- + "Use sub‑aggregations + order for \" top N by metric\" , "
25- + "If grouping by a text field, use its .keyword sub‑field." ;
8+ public static final String QUERY_TYPE_RULES = "\n Choose query types based on user intent and fields: \n "
9+ + "match: single-token full‑text searches on analyzed text fields, \n "
10+ + "match_phrase: multi-token phrases on analyzed text fields (search string contains a space, hyphen, comma, etc.), \n "
11+ + "term / terms: exact match on keyword, numeric, boolean, \n "
12+ + "range: numeric/date comparisons (gt, lt, gte, lte), \n "
13+ + "bool with must, should, must_not, filter: AND/OR/NOT logic, \n "
14+ + "wildcard / prefix on keyword: \" starts with\" , \" contains\" , \n "
15+ + "exists: field presence/absence, \n "
16+ + "nested query / nested agg: Never wrap a field in nested unless the mapping for that exact path (or one of its parents) explicitly says \" type\" : \" nested\" . \n "
17+ + "Otherwise use a normal query on the flattened field. \n " ;
18+
19+ public static final String AGGREGATION_RULES = "Aggregations (when asked for counts, averages, \" top N\" , distributions): \n "
20+ + "terms on field.keyword or numeric for grouping / top N, \n "
21+ + "Metric aggs (avg, min, max, sum, stats, cardinality) on numeric fields, \n "
22+ + "date_histogram, histogram, range for distributions, \n "
23+ + "Always set \" size\" : 0 when only aggregations are needed, \n "
24+ + "Use sub‑aggregations + order for \" top N by metric\" , \n "
25+ + "If grouping by a text field, use its .keyword sub‑field.\n " ;
2626
2727 public static final String PROMPT_PREFIX =
28- "You are an OpenSearch DSL expert. Your job is to convert natural‑language questions into strict JSON OpenSearch search query bodies. "
29- + "Follow every rule: Use only the provided index mapping to decide which fields exist and their types, pay close attention to index mapping. "
30- + "Do not use fields that not present in mapping. "
28+ "You are an OpenSearch DSL expert. Your job is to convert natural‑language questions into strict JSON OpenSearch search query bodies. \n "
29+ + "Follow every rule: Use only the provided index mapping to decide which fields exist and their types, pay close attention to index mapping. \n "
30+ + "Do not use fields that not present in mapping. \n "
3131 + QUERY_TYPE_RULES
3232 + AGGREGATION_RULES ;
3333
34- public static final String OUTPUT_FORMAT_INSTRUCTIONS = "Output format: Output only a valid escaped JSON string or the literal "
34+ public static final String OUTPUT_FORMAT_INSTRUCTIONS = "Output format: Output only a valid escaped JSON string or the literal \n "
3535 + DEFAULT_QUERY
36- + ". Do not print anything other than the JSON like code blocks etc. "
37- + "Follow the examples below. "
38- + "Fallback: If the request cannot be fulfilled with the mapping (missing field, unsupported feature, etc.), "
36+ + " \n Return exactly one JSON object. "
37+ + "Output nothing before or after it — no code fences/backticks (`), angle brackets (< >), hash marks (#), asterisks (*), pipes (|), tildes (~), ellipses (… or ...), emojis, typographic quotes (\" \" ), non-breaking spaces (U+00A0), zero-width characters (U+200B, U+FEFF), or any other markup/control characters. "
38+ + "Use valid JSON only (standard double quotes \" ; no comments; no trailing commas). "
39+ + "This applies to formatting only, string values inside the JSON may contain any needed Unicode characters. \n "
40+ + "Follow the examples below. \n "
41+ + "Fallback: If the request cannot be fulfilled with the mapping (missing field, unsupported feature, etc.), \n "
3942 + "output the literal string: "
4043 + DEFAULT_QUERY ;
4144
4245 // Individual example constants for better maintainability
43- public static final String EXAMPLE_1 = "Example 1 — numeric range Input: Show all products that cost more than 50 dollars. "
44- + "Mapping: \" { \" properties\" : { \" price\" : { \" type\" : \" float\" } } }\" "
45- + "Output: \" { \" query\" : { \" range\" : { \" price\" : { \" gt\" : 50 } } } }\" " ;
46+ public static final String EXAMPLE_1 = "Example 1 — numeric range \n "
47+ + "Input: Show all products that cost more than 50 dollars. \n "
48+ + "Mapping: \" { \" properties\" : { \" price\" : { \" type\" : \" float\" } } }\" \n "
49+ + "Output: \" { \" query\" : { \" range\" : { \" price\" : { \" gt\" : 50 } } } }\" \n " ;
4650
47- public static final String EXAMPLE_2 = "Example 2 — text match + exact filter Input: Find employees in London who are active. "
48- + "Mapping: \" { \" properties\" : { \" city\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } }, \" status\" : { \" type\" : \" keyword\" } } }\" "
49- + "Output: \" { \" query\" : { \" bool\" : { \" must\" : [ { \" match\" : { \" city\" : \" London\" } } ], \" filter\" : [ { \" term\" : { \" status\" : \" active\" } } ] } } }\" " ;
51+ public static final String EXAMPLE_2 = "Example 2 — text match + exact filter \n "
52+ + "Input: Find employees in London who are active. \n "
53+ + "Mapping: \" { \" properties\" : { \" city\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } }, \" status\" : { \" type\" : \" keyword\" } } }\" \n "
54+ + "Output: \" { \" query\" : { \" bool\" : { \" must\" : [ { \" match\" : { \" city\" : \" London\" } } ], \" filter\" : [ { \" term\" : { \" status\" : \" active\" } } ] } } }\" \n " ;
5055
5156 public static final String EXAMPLE_3 =
52- "Example 3 — match_phrase (use when search string contains a space, hyphen, comma, etc. here \" new york city\" has space) Input: Find employees who are active and located in New York City "
53- + "Mapping: \" { \" properties\" : { \" city\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } }, \" status\" : { \" type\" : \" keyword\" } } }\" "
54- + "Output: \" { \" query\" : { \" bool\" : { \" must\" : [ { \" match_phrase\" : { \" city\" : \" New York City\" } } ], \" filter\" : [ { \" term\" : { \" status\" : \" active\" } } ] } } }\" " ;
55-
56- public static final String EXAMPLE_4 =
57- "Example 4 — bool with SHOULD Input: Search articles about \" machine learning\" that are research papers or blogs. "
58- + "Mapping: \" { \" properties\" : { \" content\" : { \" type\" : \" text\" }, \" type\" : { \" type\" : \" keyword\" } } }\" "
59- + "Output: \" { \" query\" : { \" bool\" : { \" must\" : [ { \" match\" : { \" content\" : \" machine learning\" } } ], \" should\" : [ { \" term\" : { \" type\" : \" research paper\" } }, { \" term\" : { \" type\" : \" blog\" } } ], \" minimum_should_match\" : 1 } } }\" " ;
60-
61- public static final String EXAMPLE_5 = "Example 5 — MUST NOT Input: List customers who have not made a purchase in 2023. "
62- + "Mapping: \" { \" properties\" : { \" last_purchase_date\" : { \" type\" : \" date\" } } }\" "
63- + "Output: \" { \" query\" : { \" bool\" : { \" must_not\" : [ { \" range\" : { \" last_purchase_date\" : { \" gte\" : \" 2023-01-01\" , \" lte\" : \" 2023-12-31\" } } } ] } } }\" " ;
64-
65- public static final String EXAMPLE_6 = "Example 6 — wildcard Input: Find files with names starting with \" report_\" . "
66- + "Mapping: \" { \" properties\" : { \" filename\" : { \" type\" : \" keyword\" } } }\" "
67- + "Output: \" { \" query\" : { \" wildcard\" : { \" filename\" : \" report_*\" } } }\" " ;
57+ "Example 3 — match_phrase (use when search string contains a space, hyphen, comma, etc. here \" new york city\" has space) \n "
58+ + "Input: Find employees who are active and located in New York City \n "
59+ + "Mapping: \" { \" properties\" : { \" city\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } }, \" status\" : { \" type\" : \" keyword\" } } }\" \n "
60+ + "Output: \" { \" query\" : { \" bool\" : { \" must\" : [ { \" match_phrase\" : { \" city\" : \" New York City\" } } ], \" filter\" : [ { \" term\" : { \" status\" : \" active\" } } ] } } }\" \n " ;
61+
62+ public static final String EXAMPLE_4 = "Example 4 — bool with SHOULD \n "
63+ + "Input: Search articles about \" machine learning\" that are research papers or blogs. \n "
64+ + "Mapping: \" { \" properties\" : { \" content\" : { \" type\" : \" text\" }, \" type\" : { \" type\" : \" keyword\" } } }\" \n "
65+ + "Output: \" { \" query\" : { \" bool\" : { \" must\" : [ { \" match\" : { \" content\" : \" machine learning\" } } ], \" should\" : [ { \" term\" : { \" type\" : \" research paper\" } }, { \" term\" : { \" type\" : \" blog\" } } ], \" minimum_should_match\" : 1 } } }\" \n " ;
66+
67+ public static final String EXAMPLE_5 = "Example 5 — MUST NOT \n "
68+ + "Input: List customers who have not made a purchase in 2023. \n "
69+ + "Mapping: \" { \" properties\" : { \" last_purchase_date\" : { \" type\" : \" date\" } } }\" \n "
70+ + "Output: \" { \" query\" : { \" bool\" : { \" must_not\" : [ { \" range\" : { \" last_purchase_date\" : { \" gte\" : \" 2023-01-01\" , \" lte\" : \" 2023-12-31\" } } } ] } } }\" \n " ;
71+
72+ public static final String EXAMPLE_6 = "Example 6 — wildcard \n "
73+ + "Input: Find files with names starting with \" report_\" . \n "
74+ + "Mapping: \" { \" properties\" : { \" filename\" : { \" type\" : \" keyword\" } } }\" \n "
75+ + "Output: \" { \" query\" : { \" wildcard\" : { \" filename\" : \" report_*\" } } }\" \n " ;
6876
6977 public static final String EXAMPLE_7 =
70- "Example 7 — nested query (note the index mapping says \" type\" : \" nested\" , do not use it for other types) Input: Find books where an authors first_name is John AND last_name is Doe. "
71- + "Mapping: \" { \" properties\" : { \" author\" : { \" type\" : \" nested\" , \" properties\" : { \" first_name\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } }, \" last_name\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } } } } } }\" "
72- + "Output: \" { \" query\" : { \" nested\" : { \" path\" : \" author\" , \" query\" : { \" bool\" : { \" must\" : [ { \" term\" : { \" author.first_name.keyword\" : \" John\" } }, { \" term\" : { \" author.last_name.keyword\" : \" Doe\" } } ] } } } } }\" " ;
73-
74- public static final String EXAMPLE_8 = "Example 8 — terms aggregation Input: Show the number of orders per status. "
75- + "Mapping: \" { \" properties\" : { \" status\" : { \" type\" : \" keyword\" } } }\" "
76- + "Output: \" { \" size\" : 0, \" aggs\" : { \" orders_by_status\" : { \" terms\" : { \" field\" : \" status\" } } } }\" " ;
77-
78- public static final String EXAMPLE_9 =
79- "Example 9 — metric aggregation with filter Input: What is the average price of electronics products? "
80- + "Mapping: \" { \" properties\" : { \" category\" : { \" type\" : \" keyword\" }, \" price\" : { \" type\" : \" float\" } } }\" "
81- + "Output: \" { \" size\" : 0, \" query\" : { \" term\" : { \" category\" : \" electronics\" } }, \" aggs\" : { \" avg_price\" : { \" avg\" : { \" field\" : \" price\" } } } }\" " ;
82-
83- public static final String EXAMPLE_10 = "Example 10 — top N by metric Input: List the top 3 categories by total sales volume. "
84- + "Mapping: \" { \" properties\" : { \" category\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } }, \" sales\" : { \" type\" : \" float\" } } }\" "
85- + "Output: \" { \" size\" : 0, \" aggs\" : { \" top_categories\" : { \" terms\" : { \" field\" : \" category.keyword\" , \" size\" : 3, \" order\" : { \" total_sales\" : \" desc\" } }, \" aggs\" : { \" total_sales\" : { \" sum\" : { \" field\" : \" sales\" } } } } } }\" " ;
86-
87- public static final String EXAMPLE_11 = "Example 11 — fallback Input: Find employees who speak Klingon fluently. "
88- + "Mapping: \" { \" properties\" : { \" name\" : { \" type\" : \" text\" }, \" role\" : { \" type\" : \" keyword\" } } }\" "
78+ "Example 7 — nested query (note the index mapping says \" type\" : \" nested\" , do not use it for other types) \n "
79+ + "Input: Find books where an authors first_name is John AND last_name is Doe. \n "
80+ + "Mapping: \" { \" properties\" : { \" author\" : { \" type\" : \" nested\" , \" properties\" : { \" first_name\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } }, \" last_name\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } } } } } }\" \n "
81+ + "Output: \" { \" query\" : { \" nested\" : { \" path\" : \" author\" , \" query\" : { \" bool\" : { \" must\" : [ { \" term\" : { \" author.first_name.keyword\" : \" John\" } }, { \" term\" : { \" author.last_name.keyword\" : \" Doe\" } } ] } } } } }\" \n " ;
82+
83+ public static final String EXAMPLE_8 = "Example 8 — terms aggregation \n "
84+ + "Input: Show the number of orders per status. \n "
85+ + "Mapping: \" { \" properties\" : { \" status\" : { \" type\" : \" keyword\" } } }\" \n "
86+ + "Output: \" { \" size\" : 0, \" aggs\" : { \" orders_by_status\" : { \" terms\" : { \" field\" : \" status\" } } } }\" \n " ;
87+
88+ public static final String EXAMPLE_9 = "Example 9 — metric aggregation with filter \n "
89+ + "Input: What is the average price of electronics products? \n "
90+ + "Mapping: \" { \" properties\" : { \" category\" : { \" type\" : \" keyword\" }, \" price\" : { \" type\" : \" float\" } } }\" \n "
91+ + "Output: \" { \" size\" : 0, \" query\" : { \" term\" : { \" category\" : \" electronics\" } }, \" aggs\" : { \" avg_price\" : { \" avg\" : { \" field\" : \" price\" } } } }\" \n " ;
92+
93+ public static final String EXAMPLE_10 = "Example 10 — top N by metric \n "
94+ + "Input: List the top 3 categories by total sales volume. \n "
95+ + "Mapping: \" { \" properties\" : { \" category\" : { \" type\" : \" text\" , \" fields\" : { \" keyword\" : { \" type\" : \" keyword\" } } }, \" sales\" : { \" type\" : \" float\" } } }\" \n "
96+ + "Output: \" { \" size\" : 0, \" aggs\" : { \" top_categories\" : { \" terms\" : { \" field\" : \" category.keyword\" , \" size\" : 3, \" order\" : { \" total_sales\" : \" desc\" } }, \" aggs\" : { \" total_sales\" : { \" sum\" : { \" field\" : \" sales\" } } } } } }\" \n " ;
97+
98+ public static final String EXAMPLE_11 = "Example 11 — fallback \n "
99+ + "Input: Find employees who speak Klingon fluently. \n "
100+ + "Mapping: \" { \" properties\" : { \" name\" : { \" type\" : \" text\" }, \" role\" : { \" type\" : \" keyword\" } } }\" \n "
89101 + "Output: "
90- + DEFAULT_QUERY ;
102+ + DEFAULT_QUERY
103+ + "\n " ;
91104
92- public static final String EXAMPLES = "EXAMPLES : "
105+ public static final String EXAMPLES = "\n EXAMPLES : "
93106 + EXAMPLE_1
94107 + EXAMPLE_2
95108 + EXAMPLE_3
@@ -102,10 +115,16 @@ public class QueryPlanningPromptTemplate {
102115 + EXAMPLE_10
103116 + EXAMPLE_11 ;
104117
105- public static final String PROMPT_SUFFIX = "GIVE THE OUTPUT PART ONLY IN YOUR RESPONSE "
106- + "Question: asked by user "
107- + "Mapping:${parameters.index_mapping:-} "
118+ public static final String PROMPT_SUFFIX = "GIVE THE OUTPUT PART ONLY IN YOUR RESPONSE \n "
119+ + "Question: asked by user \n "
120+ + "Mapping:${parameters.index_mapping:-} \n "
108121 + "Output:" ;
109122
110- public static final String DEFAULT_SYSTEM_PROMPT = PROMPT_PREFIX + " " + OUTPUT_FORMAT_INSTRUCTIONS + EXAMPLES + " " + PROMPT_SUFFIX ;
123+ public static final String DEFAULT_SYSTEM_PROMPT = PROMPT_PREFIX
124+ + " \n "
125+ + OUTPUT_FORMAT_INSTRUCTIONS
126+ + " \n "
127+ + EXAMPLES
128+ + " \n "
129+ + PROMPT_SUFFIX ;
111130}
0 commit comments