Skip to content

Commit bf0aea5

Browse files
Merge #924
924: Implement vector search experimental feature v2 (v1.6) r=curquiza a=CaroFG # Pull Request ## Related issue Fixes #901 ## What does this PR do? - Creates embedders classes - Adds embedders to paths - Introduces new routes: - Create a new method to get the settings by calling GET /indexes/:index_uid/settings/embedders - Create a new method to update the settings by calling PATCH /indexes/:index_uid/settings/embedders - Create a new method to reset the settings by calling DELETE /indexes/:index_uid/settings/embedders - Adds embedders settings tests - Updates vector search tests ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: CaroFG <carolina.ferreira131@gmail.com> Co-authored-by: CaroFG <48251481+CaroFG@users.noreply.github.com>
2 parents 1e9a3a8 + cfb57f1 commit bf0aea5

File tree

7 files changed

+158
-11
lines changed

7 files changed

+158
-11
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ index.search(
205205

206206
This package guarantees compatibility with [version v1.x of Meilisearch](https://github.com/meilisearch/meilisearch/releases/latest), but some features may not be present. Please check the [issues](https://github.com/meilisearch/meilisearch-python/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+label%3Aenhancement) for more info.
207207

208-
⚠️ This package is not compatible with the [`vectoreStore` experimental feature](https://www.meilisearch.com/docs/learn/experimental/vector_search) of Meilisearch v1.6.0 and later. More information on this [issue](https://github.com/meilisearch/meilisearch-python/issues/901).
209208

210209
## 💡 Learn more
211210

meilisearch/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class Paths:
3838
separator_tokens = "separator-tokens"
3939
non_separator_tokens = "non-separator-tokens"
4040
swap = "swap-indexes"
41+
embedders = "embedders"
4142

4243
def __init__(
4344
self,

meilisearch/index.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from meilisearch.config import Config
1111
from meilisearch.errors import version_error_hint_message
1212
from meilisearch.models.document import Document, DocumentsResults
13-
from meilisearch.models.index import Faceting, IndexStats, Pagination, TypoTolerance
13+
from meilisearch.models.index import Embedders, Faceting, IndexStats, Pagination, TypoTolerance
1414
from meilisearch.models.task import Task, TaskInfo, TaskResults
1515
from meilisearch.task import TaskHandler
1616

@@ -1757,6 +1757,71 @@ def reset_non_separator_tokens(self) -> TaskInfo:
17571757

17581758
return TaskInfo(**task)
17591759

1760+
# EMBEDDERS SUB-ROUTES
1761+
1762+
def get_embedders(self) -> Embedders | None:
1763+
"""Get embedders of the index.
1764+
1765+
Returns
1766+
-------
1767+
settings:
1768+
The embedders settings of the index.
1769+
1770+
Raises
1771+
------
1772+
MeilisearchApiError
1773+
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
1774+
"""
1775+
response = self.http.get(self.__settings_url_for(self.config.paths.embedders))
1776+
1777+
if not response:
1778+
return None
1779+
1780+
return Embedders(embedders=response)
1781+
1782+
def update_embedders(self, body: Union[Mapping[str, Any], None]) -> TaskInfo:
1783+
"""Update embedders of the index.
1784+
1785+
Parameters
1786+
----------
1787+
body: dict
1788+
Dictionary containing the embedders.
1789+
1790+
Returns
1791+
-------
1792+
task_info:
1793+
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
1794+
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
1795+
1796+
Raises
1797+
------
1798+
MeilisearchApiError
1799+
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
1800+
"""
1801+
task = self.http.patch(self.__settings_url_for(self.config.paths.embedders), body)
1802+
1803+
return TaskInfo(**task)
1804+
1805+
def reset_embedders(self) -> TaskInfo:
1806+
"""Reset embedders of the index to default values.
1807+
1808+
Returns
1809+
-------
1810+
task_info:
1811+
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
1812+
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
1813+
1814+
Raises
1815+
------
1816+
MeilisearchApiError
1817+
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
1818+
"""
1819+
task = self.http.delete(
1820+
self.__settings_url_for(self.config.paths.embedders),
1821+
)
1822+
1823+
return TaskInfo(**task)
1824+
17601825
@staticmethod
17611826
def _batch(
17621827
documents: Sequence[Mapping[str, Any]], batch_size: int

meilisearch/models/index.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import Any, Dict, Iterator, List, Optional
3+
from typing import Any, Dict, Iterator, List, Optional, Union
44

55
from camel_converter import to_snake
66
from camel_converter.pydantic_base import CamelBase
@@ -46,3 +46,26 @@ class TypoTolerance(CamelBase):
4646
disable_on_attributes: Optional[List[str]] = None
4747
disable_on_words: Optional[List[str]] = None
4848
min_word_size_for_typos: Optional[MinWordSizeForTypos] = None
49+
50+
51+
class OpenAiEmbedder(CamelBase):
52+
source: str = "openAi"
53+
model: Optional[str] = None # Defaults to text-embedding-ada-002
54+
api_key: Optional[str] = None # Can be provided through a CLI option or environment variable
55+
document_template: Optional[str] = None
56+
57+
58+
class HuggingFaceEmbedder(CamelBase):
59+
source: str = "huggingFace"
60+
model: Optional[str] = None # Defaults to BAAI/bge-base-en-v1.5
61+
revision: Optional[str] = None
62+
document_template: Optional[str] = None
63+
64+
65+
class UserProvidedEmbedder(CamelBase):
66+
source: str = "userProvided"
67+
dimensions: int
68+
69+
70+
class Embedders(CamelBase):
71+
embedders: Dict[str, Union[OpenAiEmbedder, HuggingFaceEmbedder, UserProvidedEmbedder]]

tests/conftest.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,12 +129,21 @@ def index_maker(index_uid=common.INDEX_UID, documents=small_movies):
129129

130130
@fixture(scope="function")
131131
def index_with_documents_and_vectors(empty_index, small_movies):
132-
small_movies[0]["_vectors"] = [0.1, 0.2]
132+
small_movies[0]["_vectors"] = {"default": [0.1, 0.2]}
133133

134134
def index_maker(index_uid=common.INDEX_UID, documents=small_movies):
135135
index = empty_index(index_uid)
136-
task = index.add_documents(documents)
137-
index.wait_for_task(task.task_uid)
136+
settings_update_task = index.update_embedders(
137+
{
138+
"default": {
139+
"source": "userProvided",
140+
"dimensions": 2,
141+
}
142+
}
143+
)
144+
index.wait_for_task(settings_update_task.task_uid)
145+
document_addition_task = index.add_documents(documents)
146+
index.wait_for_task(document_addition_task.task_uid)
138147
return index
139148

140149
return index_maker
@@ -216,3 +225,13 @@ def enable_vector_search():
216225
json={"vectorStore": False},
217226
timeout=10,
218227
)
228+
229+
230+
@fixture
231+
def new_embedders():
232+
return {
233+
"default": {
234+
"source": "userProvided",
235+
"dimensions": 1,
236+
}
237+
}

tests/index/test_index_search_meilisearch.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -459,13 +459,9 @@ def test_attributes_to_search_on_search_no_match(index_with_documents):
459459
assert response["hits"] == []
460460

461461

462-
@pytest.mark.xfail(
463-
strict=True, reason="https://github.com/meilisearch/meilisearch-python/issues/901"
464-
)
465462
@pytest.mark.usefixtures("enable_vector_search")
466463
def test_vector_search(index_with_documents_and_vectors):
467464
response = index_with_documents_and_vectors().search(
468-
"How to Train Your Dragon", opt_params={"vector": [0.1, 0.2]}
465+
"", opt_params={"vector": [0.1, 0.2], "hybrid": {"semanticRatio": 1.0}}
469466
)
470-
assert response["hits"][0]["id"] == "287947"
471467
assert response["vector"] == [0.1, 0.2]
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import pytest
2+
3+
from meilisearch.models.index import Embedders
4+
5+
6+
@pytest.mark.usefixtures("enable_vector_search")
7+
def test_get_default_embedders(empty_index):
8+
"""Tests getting default embedders."""
9+
response = empty_index().get_embedders()
10+
11+
assert response is None
12+
13+
14+
@pytest.mark.usefixtures("enable_vector_search")
15+
def test_update_embedders_with_user_provided_source(new_embedders, empty_index):
16+
"""Tests updating embedders."""
17+
index = empty_index()
18+
response_update = index.update_embedders(new_embedders)
19+
update = index.wait_for_task(response_update.task_uid)
20+
response_get = index.get_embedders()
21+
assert update.status == "succeeded"
22+
assert response_get == Embedders(embedders=new_embedders)
23+
24+
25+
@pytest.mark.usefixtures("enable_vector_search")
26+
def test_reset_embedders(new_embedders, empty_index):
27+
"""Tests resetting the typo_tolerance setting to its default value."""
28+
index = empty_index()
29+
30+
# Update the settings
31+
response_update = index.update_embedders(new_embedders)
32+
update1 = index.wait_for_task(response_update.task_uid)
33+
# Get the setting after update
34+
response_get = index.get_embedders()
35+
# Reset the setting
36+
response_reset = index.reset_embedders()
37+
update2 = index.wait_for_task(response_reset.task_uid)
38+
# Get the setting after reset
39+
response_last = index.get_embedders()
40+
41+
assert update1.status == "succeeded"
42+
assert response_get == Embedders(embedders=new_embedders)
43+
assert update2.status == "succeeded"
44+
assert response_last is None

0 commit comments

Comments
 (0)