Skip to content

Commit c2f3777

Browse files
skeptrunedevcdxker
authored andcommitted
feature: allow custom pre and post tags for highlights
1 parent b854b23 commit c2f3777

File tree

10 files changed

+117
-20
lines changed

10 files changed

+117
-20
lines changed

clients/ts-sdk/openapi.json

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,7 +1297,7 @@
12971297
"Chunk"
12981298
],
12991299
"summary": "Search",
1300-
"description": "This route provides the primary search functionality for the API. It can be used to search for chunks by semantic similarity, full-text similarity, or a combination of both. Results' `chunk_html` values will be modified with `<b><mark>` tags for sub-sentence highlighting.",
1300+
"description": "This route provides the primary search functionality for the API. It can be used to search for chunks by semantic similarity, full-text similarity, or a combination of both. Results' `chunk_html` values will be modified with `<mark><b>` or custom specified tags for sub-sentence highlighting.",
13011301
"operationId": "search_chunks",
13021302
"parameters": [
13031303
{
@@ -9634,7 +9634,7 @@
96349634
},
96359635
"highlight_results": {
96369636
"type": "boolean",
9637-
"description": "Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<b><mark>` tags to the chunk_html of the chunks to highlight matching splits.",
9637+
"description": "Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<mark><b>` tags to the chunk_html of the chunks to highlight matching splits.",
96389638
"nullable": true
96399639
},
96409640
"image_config": {
@@ -10221,7 +10221,7 @@
1022110221
},
1022210222
"highlight_results": {
1022310223
"type": "boolean",
10224-
"description": "Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<b><mark>` tags to the chunk_html of the chunks to highlight matching splits and return the highlights on each scored chunk in the response.",
10224+
"description": "Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<mark><b>` tags to the chunk_html of the chunks to highlight matching splits and return the highlights on each scored chunk in the response.",
1022510225
"nullable": true
1022610226
},
1022710227
"highlight_strategy": {
@@ -10244,6 +10244,16 @@
1024410244
"description": "Set highlight_window to a number to control the amount of words that are returned around the matched phrases. If not specified, this defaults to 0. This is useful for when you want to show more context around the matched words. When specified, window/2 whitespace separated words are added before and after each highlight in the response's highlights array. If an extended highlight overlaps with another highlight, the overlapping words are only included once. This parameter can be overriden to respect the highlight_max_length param.",
1024510245
"nullable": true,
1024610246
"minimum": 0
10247+
},
10248+
"post_tag": {
10249+
"type": "string",
10250+
"description": "Custom html tag which should appear after highlights. If not specified, this defaults to '</mark></b>'.",
10251+
"nullable": true
10252+
},
10253+
"pre_tag": {
10254+
"type": "string",
10255+
"description": "Custom html tag which should appear before highlights. If not specified, this defaults to '<mark><b>'.",
10256+
"nullable": true
1024710257
}
1024810258
}
1024910259
},

clients/ts-sdk/src/types.gen.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,7 +1152,7 @@ export type GenerateOffChunksReqPayload = {
11521152
*/
11531153
frequency_penalty?: (number) | null;
11541154
/**
1155-
* Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<b><mark>` tags to the chunk_html of the chunks to highlight matching splits.
1155+
* Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<mark><b>` tags to the chunk_html of the chunks to highlight matching splits.
11561156
*/
11571157
highlight_results?: (boolean) | null;
11581158
image_config?: ((ImageConfig) | null);
@@ -1351,7 +1351,7 @@ export type HighlightOptions = {
13511351
*/
13521352
highlight_max_num?: (number) | null;
13531353
/**
1354-
* Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<b><mark>` tags to the chunk_html of the chunks to highlight matching splits and return the highlights on each scored chunk in the response.
1354+
* Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<mark><b>` tags to the chunk_html of the chunks to highlight matching splits and return the highlights on each scored chunk in the response.
13551355
*/
13561356
highlight_results?: (boolean) | null;
13571357
highlight_strategy?: ((HighlightStrategy) | null);
@@ -1363,6 +1363,14 @@ export type HighlightOptions = {
13631363
* Set highlight_window to a number to control the amount of words that are returned around the matched phrases. If not specified, this defaults to 0. This is useful for when you want to show more context around the matched words. When specified, window/2 whitespace separated words are added before and after each highlight in the response's highlights array. If an extended highlight overlaps with another highlight, the overlapping words are only included once. This parameter can be overriden to respect the highlight_max_length param.
13641364
*/
13651365
highlight_window?: (number) | null;
1366+
/**
1367+
* Custom html tag which should appear after highlights. If not specified, this defaults to '</mark></b>'.
1368+
*/
1369+
post_tag?: (string) | null;
1370+
/**
1371+
* Custom html tag which should appear before highlights. If not specified, this defaults to '<mark><b>'.
1372+
*/
1373+
pre_tag?: (string) | null;
13661374
};
13671375

13681376
export type HighlightStrategy = 'exactmatch' | 'v1';

frontends/search/src/components/GroupPage.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,8 @@ export const GroupPage = (props: GroupPageProps) => {
283283
highlight_delimiters: search.debounced.highlightDelimiters,
284284
highlight_max_length: search.debounced.highlightMaxLength,
285285
highlight_window: search.debounced.highlightWindow,
286+
pre_tag: search.debounced.highlightPreTag,
287+
post_tag: search.debounced.highlightPostTag,
286288
},
287289
sort_options: {
288290
sort_by: sort_by,

frontends/search/src/components/ResultsPage.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,8 @@ const ResultsPage = (props: ResultsPageProps) => {
311311
highlight_max_length: props.search.debounced.highlightMaxLength ?? 8,
312312
highlight_max_num: props.search.debounced.highlightMaxNum ?? 3,
313313
highlight_window: props.search.debounced.highlightWindow ?? 0,
314+
pre_tag: props.search.debounced.highlightPreTag ?? "<mark><b>",
315+
post_tag: props.search.debounced.highlightPostTag ?? "</b></mark>",
314316
},
315317

316318
group_size: props.search.debounced.group_size ?? 3,

frontends/search/src/components/SearchForm.tsx

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,8 @@ const SearchForm = (props: {
10641064
highlightMaxLength: 8,
10651065
highlightMaxNum: 3,
10661066
highlightWindow: 0,
1067+
highlightPreTag: "<mark><b>",
1068+
highlightPostTag: "</b></mark>",
10671069
group_size: 3,
10681070
removeStopWords: false,
10691071
} as SearchOptions;
@@ -1468,6 +1470,38 @@ const SearchForm = (props: {
14681470
}}
14691471
/>
14701472
</div>
1473+
<div class="items flex justify-between space-x-2 p-1">
1474+
<label>Highlight Pre Tag:</label>
1475+
<input
1476+
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
1477+
type="text"
1478+
value={tempSearchValues().highlightPreTag}
1479+
onInput={(e) => {
1480+
setTempSearchValues((prev) => {
1481+
return {
1482+
...prev,
1483+
highlightPreTag: e.currentTarget.value,
1484+
};
1485+
});
1486+
}}
1487+
/>
1488+
</div>
1489+
<div class="items flex justify-between space-x-2 p-1">
1490+
<label>Highlight Post Tag:</label>
1491+
<input
1492+
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
1493+
type="text"
1494+
value={tempSearchValues().highlightPostTag}
1495+
onInput={(e) => {
1496+
setTempSearchValues((prev) => {
1497+
return {
1498+
...prev,
1499+
highlightPostTag: e.currentTarget.value,
1500+
};
1501+
});
1502+
}}
1503+
/>
1504+
</div>
14711505
<div class="flex items-center justify-between space-x-2 p-1">
14721506
<label>Highlight exact match</label>
14731507
<select

frontends/search/src/hooks/useSearch.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ export interface SearchOptions {
7777
highlightMaxLength: number;
7878
highlightMaxNum: number;
7979
highlightWindow: number;
80+
highlightPreTag: string;
81+
highlightPostTag: string;
8082
group_size: number;
8183
useQuoteNegatedTerms: boolean;
8284
removeStopWords: boolean;
@@ -112,6 +114,8 @@ const initalState: SearchOptions = {
112114
highlightMaxLength: 8,
113115
highlightMaxNum: 3,
114116
highlightWindow: 0,
117+
highlightPreTag: "<mark><b>",
118+
highlightPostTag: "</b></mark>",
115119
group_size: 3,
116120
useQuoteNegatedTerms: false,
117121
removeStopWords: false,
@@ -152,6 +156,8 @@ const fromStateToParams = (state: SearchOptions): Params => {
152156
highlightMaxLength: state.highlightMaxLength.toString(),
153157
highlightMaxNum: state.highlightMaxNum.toString(),
154158
highlightWindow: state.highlightWindow.toString(),
159+
highlightPreTag: state.highlightPreTag,
160+
highlightPostTag: state.highlightPostTag,
155161
group_size: state.group_size?.toString(),
156162
useQuoteNegatedTerms: state.useQuoteNegatedTerms.toString(),
157163
removeStopWords: state.removeStopWords.toString(),
@@ -201,6 +207,8 @@ const fromParamsToState = (
201207
highlightMaxLength: parseInt(params.highlightMaxLength ?? "8"),
202208
highlightMaxNum: parseInt(params.highlightMaxNum ?? "3"),
203209
highlightWindow: parseInt(params.highlightWindow ?? "0"),
210+
highlightPreTag: params.highlightPreTag ?? initalState.highlightPreTag,
211+
highlightPostTag: params.highlightPostTag ?? initalState.highlightPostTag,
204212
group_size: parseInt(params.group_size ?? "3"),
205213
useQuoteNegatedTerms: (params.useQuoteNegatedTerms ?? "false") === "true",
206214
removeStopWords: (params.removeStopWords ?? "false") === "true",

server/src/data/models.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5848,7 +5848,7 @@ pub struct SortOptions {
58485848
#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)]
58495849
/// Highlight Options lets you specify different methods to highlight the chunks in the result set. If not specified, this defaults to the score of the chunks.
58505850
pub struct HighlightOptions {
5851-
/// Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<b><mark>` tags to the chunk_html of the chunks to highlight matching splits and return the highlights on each scored chunk in the response.
5851+
/// Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<mark><b>` tags to the chunk_html of the chunks to highlight matching splits and return the highlights on each scored chunk in the response.
58525852
pub highlight_results: Option<bool>,
58535853
/// Set highlight_exact_match to true to highlight exact matches from your query.
58545854
pub highlight_strategy: Option<HighlightStrategy>,
@@ -5862,6 +5862,10 @@ pub struct HighlightOptions {
58625862
pub highlight_max_num: Option<u32>,
58635863
/// Set highlight_window to a number to control the amount of words that are returned around the matched phrases. If not specified, this defaults to 0. This is useful for when you want to show more context around the matched words. When specified, window/2 whitespace separated words are added before and after each highlight in the response's highlights array. If an extended highlight overlaps with another highlight, the overlapping words are only included once. This parameter can be overriden to respect the highlight_max_length param.
58645864
pub highlight_window: Option<u32>,
5865+
/// Custom html tag which should appear before highlights. If not specified, this defaults to '<mark><b>'.
5866+
pub pre_tag: Option<String>,
5867+
/// Custom html tag which should appear after highlights. If not specified, this defaults to '</mark></b>'.
5868+
pub post_tag: Option<String>,
58655869
}
58665870

58675871
#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)]

server/src/handlers/chunk_handler.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,7 +1138,7 @@ pub fn parse_query(
11381138

11391139
/// Search
11401140
///
1141-
/// This route provides the primary search functionality for the API. It can be used to search for chunks by semantic similarity, full-text similarity, or a combination of both. Results' `chunk_html` values will be modified with `<b><mark>` tags for sub-sentence highlighting.
1141+
/// This route provides the primary search functionality for the API. It can be used to search for chunks by semantic similarity, full-text similarity, or a combination of both. Results' `chunk_html` values will be modified with `<mark><b>` or custom specified tags for sub-sentence highlighting.
11421142
#[utoipa::path(
11431143
post,
11441144
path = "/chunk/search",
@@ -2331,7 +2331,7 @@ pub struct GenerateOffChunksReqPayload {
23312331
pub prompt: Option<String>,
23322332
/// Whether or not to stream the response. If this is set to true or not included, the response will be a stream. If this is set to false, the response will be a normal JSON response. Default is true.
23332333
pub stream_response: Option<bool>,
2334-
/// Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<b><mark>` tags to the chunk_html of the chunks to highlight matching splits.
2334+
/// Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<mark><b>` tags to the chunk_html of the chunks to highlight matching splits.
23352335
pub highlight_results: Option<bool>,
23362336
/// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. Default is 0.5.
23372337
pub temperature: Option<f32>,

0 commit comments

Comments
 (0)