Skip to content

Commit f0746ec

Browse files
meili-bors[bot]kumarUjjawalcurquiza
authored
Merge #712
712: Add support for multi-modal search r=curquiza a=kumarUjjawal # Pull Request ## Related issue Fixes #698 ## What does this PR do? ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - New Features - Search queries can include media payloads for multimodal retrieval. - Added an experimental toggle to enable multimodal capabilities. - Embedders support fragment configurations for indexing and search to enable multimodal embeddings. - Documentation - New code sample demonstrating a search request with media alongside hybrid retrieval. - Updated example text in the embedders template for clarity. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Co-authored-by: Kumar Ujjawal <ujjawalpathak6@gmail.com> Co-authored-by: Clémentine <clementine@meilisearch.com>
2 parents bcc931a + 2d8acf1 commit f0746ec

File tree

4 files changed

+166
-0
lines changed

4 files changed

+166
-0
lines changed

.code-samples.meilisearch.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1971,6 +1971,20 @@ search_parameter_reference_retrieve_vectors_1: |-
19711971
.execute()
19721972
.await
19731973
.unwrap();
1974+
search_parameter_reference_media_1: |-
1975+
let results = index
1976+
.search()
1977+
.with_hybrid("EMBEDDER_NAME", 0.5)
1978+
.with_media(json!({
1979+
"FIELD_A": "VALUE_A",
1980+
"FIELD_B": {
1981+
"FIELD_C": "VALUE_B",
1982+
"FIELD_D": "VALUE_C"
1983+
}
1984+
}))
1985+
.execute()
1986+
.await
1987+
.unwrap();
19741988
update_embedders_1: |-
19751989
let embedders = HashMap::from([(
19761990
String::from("default"),

src/features.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ pub struct ExperimentalFeaturesResult {
1414
pub contains_filter: bool,
1515
pub network: bool,
1616
pub edit_documents_by_function: bool,
17+
#[serde(default)]
18+
pub multimodal: bool,
1719
}
1820

1921
/// Struct representing the experimental features request.
@@ -45,6 +47,8 @@ pub struct ExperimentalFeatures<'a, Http: HttpClient> {
4547
pub network: Option<bool>,
4648
#[serde(skip_serializing_if = "Option::is_none")]
4749
pub edit_documents_by_function: Option<bool>,
50+
#[serde(skip_serializing_if = "Option::is_none")]
51+
pub multimodal: Option<bool>,
4852
}
4953

5054
impl<'a, Http: HttpClient> ExperimentalFeatures<'a, Http> {
@@ -57,6 +61,7 @@ impl<'a, Http: HttpClient> ExperimentalFeatures<'a, Http> {
5761
network: None,
5862
contains_filter: None,
5963
edit_documents_by_function: None,
64+
multimodal: None,
6065
}
6166
}
6267

@@ -140,6 +145,11 @@ impl<'a, Http: HttpClient> ExperimentalFeatures<'a, Http> {
140145
self.network = Some(network);
141146
self
142147
}
148+
149+
pub fn set_multimodal(&mut self, multimodal: bool) -> &mut Self {
150+
self.multimodal = Some(multimodal);
151+
self
152+
}
143153
}
144154

145155
#[cfg(test)]
@@ -155,6 +165,7 @@ mod tests {
155165
features.set_contains_filter(true);
156166
features.set_network(true);
157167
features.set_edit_documents_by_function(true);
168+
features.set_multimodal(true);
158169
let _ = features.update().await.unwrap();
159170

160171
let res = features.get().await.unwrap();
@@ -163,5 +174,6 @@ mod tests {
163174
assert!(res.contains_filter);
164175
assert!(res.network);
165176
assert!(res.edit_documents_by_function);
177+
assert!(res.multimodal);
166178
}
167179
}

src/search.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,10 @@ pub struct SearchQuery<'a, Http: HttpClient> {
411411
#[serde(skip_serializing_if = "Option::is_none")]
412412
pub retrieve_vectors: Option<bool>,
413413

414+
/// Provides multimodal data for search queries.
415+
#[serde(skip_serializing_if = "Option::is_none")]
416+
pub media: Option<Value>,
417+
414418
/// Request exhaustive facet counts up to the limit defined by `maxTotalHits`.
415419
///
416420
/// When set to `true`, Meilisearch computes exact facet counts instead of approximate ones.
@@ -463,6 +467,7 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
463467
hybrid: None,
464468
vector: None,
465469
retrieve_vectors: None,
470+
media: None,
466471
exhaustive_facet_count: None,
467472
distinct: None,
468473
ranking_score_threshold: None,
@@ -710,6 +715,12 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
710715
self
711716
}
712717

718+
/// Attach media fragments to the search query.
719+
pub fn with_media<'b>(&'b mut self, media: Value) -> &'b mut SearchQuery<'a, Http> {
720+
self.media = Some(media);
721+
self
722+
}
723+
713724
pub fn with_distinct<'b>(&'b mut self, distinct: &'a str) -> &'b mut SearchQuery<'a, Http> {
714725
self.distinct = Some(distinct);
715726
self
@@ -1122,6 +1133,34 @@ pub(crate) mod tests {
11221133
use serde::{Deserialize, Serialize};
11231134
use serde_json::{json, Map, Value};
11241135

1136+
#[test]
1137+
fn search_query_serializes_media_parameter() {
1138+
let client = Client::new("http://localhost:7700", Some("masterKey")).unwrap();
1139+
let index = client.index("media_query");
1140+
let mut query = SearchQuery::new(&index);
1141+
1142+
query.with_query("example").with_media(json!({
1143+
"FIELD_A": "VALUE_A",
1144+
"FIELD_B": {
1145+
"FIELD_C": "VALUE_B",
1146+
"FIELD_D": "VALUE_C"
1147+
}
1148+
}));
1149+
1150+
let serialized = serde_json::to_value(&query.build()).unwrap();
1151+
1152+
assert_eq!(
1153+
serialized.get("media"),
1154+
Some(&json!({
1155+
"FIELD_A": "VALUE_A",
1156+
"FIELD_B": {
1157+
"FIELD_C": "VALUE_B",
1158+
"FIELD_D": "VALUE_C"
1159+
}
1160+
}))
1161+
);
1162+
}
1163+
11251164
#[derive(Debug, Serialize, Deserialize, PartialEq)]
11261165
pub struct Nested {
11271166
child: String,

src/settings.rs

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,20 @@ pub struct Embedder {
146146
/// Configures embedder to vectorize search queries (composite embedders only)
147147
#[serde(skip_serializing_if = "Option::is_none")]
148148
pub search_embedder: Option<Box<Embedder>>,
149+
150+
/// Configures multimodal embedding generation at indexing time.
151+
#[serde(skip_serializing_if = "Option::is_none")]
152+
pub indexing_fragments: Option<HashMap<String, EmbedderFragment>>,
153+
154+
/// Configures incoming media fragments for multimodal search queries.
155+
#[serde(skip_serializing_if = "Option::is_none")]
156+
pub search_fragments: Option<HashMap<String, EmbedderFragment>>,
157+
}
158+
159+
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default)]
160+
#[serde(rename_all = "camelCase")]
161+
pub struct EmbedderFragment {
162+
pub value: serde_json::Value,
149163
}
150164

151165
#[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq)]
@@ -2798,6 +2812,7 @@ mod tests {
27982812

27992813
use crate::client::*;
28002814
use meilisearch_test_macro::meilisearch_test;
2815+
use serde_json::json;
28012816

28022817
#[meilisearch_test]
28032818
async fn test_set_faceting_settings(client: Client, index: Index) {
@@ -3139,6 +3154,92 @@ mod tests {
31393154
assert_eq!(embedders, res);
31403155
}
31413156

3157+
#[test]
3158+
fn embedder_with_fragments_serializes() {
3159+
let embedder = Embedder {
3160+
source: EmbedderSource::Rest,
3161+
url: Some(String::from("https://example.com/embeddings")),
3162+
indexing_fragments: Some(HashMap::from([(
3163+
String::from("default"),
3164+
EmbedderFragment {
3165+
value: json!({
3166+
"content": [
3167+
{ "type": "text", "text": "{{ doc.description }}" }
3168+
]
3169+
}),
3170+
},
3171+
)])),
3172+
search_fragments: Some(HashMap::from([(
3173+
String::from("default"),
3174+
EmbedderFragment {
3175+
value: json!({
3176+
"content": [
3177+
{ "type": "text", "text": "{{ query.q }}" }
3178+
]
3179+
}),
3180+
},
3181+
)])),
3182+
request: Some(json!({
3183+
"input": [
3184+
"{{fragment}}",
3185+
"{{..}}"
3186+
],
3187+
"model": "example-model"
3188+
})),
3189+
response: Some(json!({
3190+
"data": [
3191+
{
3192+
"embedding": "{{embedding}}"
3193+
},
3194+
"{{..}}"
3195+
]
3196+
})),
3197+
..Default::default()
3198+
};
3199+
3200+
let serialized = serde_json::to_value(&embedder).unwrap();
3201+
3202+
assert_eq!(
3203+
serialized
3204+
.get("indexingFragments")
3205+
.and_then(|value| value.get("default"))
3206+
.and_then(|value| value.get("value"))
3207+
.and_then(|value| value.get("content"))
3208+
.and_then(|value| value.get(0))
3209+
.and_then(|value| value.get("text")),
3210+
Some(&json!("{{ doc.description }}"))
3211+
);
3212+
3213+
assert_eq!(
3214+
serialized
3215+
.get("searchFragments")
3216+
.and_then(|value| value.get("default"))
3217+
.and_then(|value| value.get("value"))
3218+
.and_then(|value| value.get("content"))
3219+
.and_then(|value| value.get(0))
3220+
.and_then(|value| value.get("text")),
3221+
Some(&json!("{{ query.q }}"))
3222+
);
3223+
3224+
assert_eq!(
3225+
serialized.get("request"),
3226+
Some(&json!({
3227+
"input": ["{{fragment}}", "{{..}}"],
3228+
"model": "example-model"
3229+
}))
3230+
);
3231+
3232+
assert_eq!(
3233+
serialized.get("response"),
3234+
Some(&json!({
3235+
"data": [
3236+
{ "embedding": "{{embedding}}" },
3237+
"{{..}}"
3238+
]
3239+
}))
3240+
);
3241+
}
3242+
31423243
#[meilisearch_test]
31433244
async fn test_reset_proximity_precision(index: Index) {
31443245
let expected = "byWord".to_string();

0 commit comments

Comments
 (0)