Merge #924

meili-bors[bot] · CaroFG · web-flow · commit bf0aea5dfe2a · 2024-02-08T12:05:20.000Z
924: Implement vector search experimental feature v2 (v1.6) r=curquiza a=CaroFG # Pull Request ## Related issue Fixes #901 ## What does this PR do? - Creates embedders classes - Adds embedders to paths - Introduces new routes: - Create a new method to get the settings by calling GET /indexes/:index_uid/settings/embedders - Create a new method to update the settings by calling PATCH /indexes/:index_uid/settings/embedders - Create a new method to reset the settings by calling DELETE /indexes/:index_uid/settings/embedders - Adds embedders settings tests - Updates vector search tests ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: CaroFG <carolina.ferreira131@gmail.com> Co-authored-by: CaroFG <48251481+CaroFG@users.noreply.github.com>
diff --git a/README.md b/README.md
@@ -205,7 +205,6 @@ index.search(
 
 This package guarantees compatibility with [version v1.x of Meilisearch](https://github.com/meilisearch/meilisearch/releases/latest), but some features may not be present. Please check the [issues](https://github.com/meilisearch/meilisearch-python/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+label%3Aenhancement) for more info.
 
-⚠️ This package is not compatible with the [`vectoreStore` experimental feature](https://www.meilisearch.com/docs/learn/experimental/vector_search) of Meilisearch v1.6.0 and later. More information on this [issue](https://github.com/meilisearch/meilisearch-python/issues/901).
 
 ## 💡 Learn more
 
diff --git a/meilisearch/config.py b/meilisearch/config.py
@@ -38,6 +38,7 @@ class Paths:
         separator_tokens = "separator-tokens"
         non_separator_tokens = "non-separator-tokens"
         swap = "swap-indexes"
+        embedders = "embedders"
 
     def __init__(
         self,
diff --git a/meilisearch/index.py b/meilisearch/index.py
@@ -10,7 +10,7 @@
 from meilisearch.config import Config
 from meilisearch.errors import version_error_hint_message
 from meilisearch.models.document import Document, DocumentsResults
-from meilisearch.models.index import Faceting, IndexStats, Pagination, TypoTolerance
+from meilisearch.models.index import Embedders, Faceting, IndexStats, Pagination, TypoTolerance
 from meilisearch.models.task import Task, TaskInfo, TaskResults
 from meilisearch.task import TaskHandler
 
@@ -1757,6 +1757,71 @@ def reset_non_separator_tokens(self) -> TaskInfo:
 
         return TaskInfo(**task)
 
+    # EMBEDDERS SUB-ROUTES
+
+    def get_embedders(self) -> Embedders | None:
+        """Get embedders of the index.
+
+        Returns
+        -------
+        settings:
+            The embedders settings of the index.
+
+        Raises
+        ------
+        MeilisearchApiError
+            An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
+        """
+        response = self.http.get(self.__settings_url_for(self.config.paths.embedders))
+
+        if not response:
+            return None
+
+        return Embedders(embedders=response)
+
+    def update_embedders(self, body: Union[Mapping[str, Any], None]) -> TaskInfo:
+        """Update embedders of the index.
+
+        Parameters
+        ----------
+        body: dict
+            Dictionary containing the embedders.
+
+        Returns
+        -------
+        task_info:
+            TaskInfo instance containing information about a task to track the progress of an asynchronous process.
+            https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
+
+        Raises
+        ------
+        MeilisearchApiError
+            An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
+        """
+        task = self.http.patch(self.__settings_url_for(self.config.paths.embedders), body)
+
+        return TaskInfo(**task)
+
+    def reset_embedders(self) -> TaskInfo:
+        """Reset embedders of the index to default values.
+
+        Returns
+        -------
+        task_info:
+            TaskInfo instance containing information about a task to track the progress of an asynchronous process.
+            https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
+
+        Raises
+        ------
+        MeilisearchApiError
+            An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
+        """
+        task = self.http.delete(
+            self.__settings_url_for(self.config.paths.embedders),
+        )
+
+        return TaskInfo(**task)
+
     @staticmethod
     def _batch(
         documents: Sequence[Mapping[str, Any]], batch_size: int
diff --git a/meilisearch/models/index.py b/meilisearch/models/index.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, Dict, Iterator, List, Optional
+from typing import Any, Dict, Iterator, List, Optional, Union
 
 from camel_converter import to_snake
 from camel_converter.pydantic_base import CamelBase
@@ -46,3 +46,26 @@ class TypoTolerance(CamelBase):
     disable_on_attributes: Optional[List[str]] = None
     disable_on_words: Optional[List[str]] = None
     min_word_size_for_typos: Optional[MinWordSizeForTypos] = None
+
+
+class OpenAiEmbedder(CamelBase):
+    source: str = "openAi"
+    model: Optional[str] = None  # Defaults to text-embedding-ada-002
+    api_key: Optional[str] = None  # Can be provided through a CLI option or environment variable
+    document_template: Optional[str] = None
+
+
+class HuggingFaceEmbedder(CamelBase):
+    source: str = "huggingFace"
+    model: Optional[str] = None  # Defaults to BAAI/bge-base-en-v1.5
+    revision: Optional[str] = None
+    document_template: Optional[str] = None
+
+
+class UserProvidedEmbedder(CamelBase):
+    source: str = "userProvided"
+    dimensions: int
+
+
+class Embedders(CamelBase):
+    embedders: Dict[str, Union[OpenAiEmbedder, HuggingFaceEmbedder, UserProvidedEmbedder]]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -129,12 +129,21 @@ def index_maker(index_uid=common.INDEX_UID, documents=small_movies):
 
 @fixture(scope="function")
 def index_with_documents_and_vectors(empty_index, small_movies):
-    small_movies[0]["_vectors"] = [0.1, 0.2]
+    small_movies[0]["_vectors"] = {"default": [0.1, 0.2]}
 
     def index_maker(index_uid=common.INDEX_UID, documents=small_movies):
         index = empty_index(index_uid)
-        task = index.add_documents(documents)
-        index.wait_for_task(task.task_uid)
+        settings_update_task = index.update_embedders(
+            {
+                "default": {
+                    "source": "userProvided",
+                    "dimensions": 2,
+                }
+            }
+        )
+        index.wait_for_task(settings_update_task.task_uid)
+        document_addition_task = index.add_documents(documents)
+        index.wait_for_task(document_addition_task.task_uid)
         return index
 
     return index_maker
@@ -216,3 +225,13 @@ def enable_vector_search():
         json={"vectorStore": False},
         timeout=10,
     )
+
+
+@fixture
+def new_embedders():
+    return {
+        "default": {
+            "source": "userProvided",
+            "dimensions": 1,
+        }
+    }
diff --git a/tests/index/test_index_search_meilisearch.py b/tests/index/test_index_search_meilisearch.py
@@ -459,13 +459,9 @@ def test_attributes_to_search_on_search_no_match(index_with_documents):
     assert response["hits"] == []
 
 
-@pytest.mark.xfail(
-    strict=True, reason="https://github.com/meilisearch/meilisearch-python/issues/901"
-)
 @pytest.mark.usefixtures("enable_vector_search")
 def test_vector_search(index_with_documents_and_vectors):
     response = index_with_documents_and_vectors().search(
-        "How to Train Your Dragon", opt_params={"vector": [0.1, 0.2]}
+        "", opt_params={"vector": [0.1, 0.2], "hybrid": {"semanticRatio": 1.0}}
     )
-    assert response["hits"][0]["id"] == "287947"
     assert response["vector"] == [0.1, 0.2]
diff --git a/tests/settings/test_settings_embedders.py b/tests/settings/test_settings_embedders.py
@@ -0,0 +1,44 @@
+import pytest
+
+from meilisearch.models.index import Embedders
+
+
+@pytest.mark.usefixtures("enable_vector_search")
+def test_get_default_embedders(empty_index):
+    """Tests getting default embedders."""
+    response = empty_index().get_embedders()
+
+    assert response is None
+
+
+@pytest.mark.usefixtures("enable_vector_search")
+def test_update_embedders_with_user_provided_source(new_embedders, empty_index):
+    """Tests updating embedders."""
+    index = empty_index()
+    response_update = index.update_embedders(new_embedders)
+    update = index.wait_for_task(response_update.task_uid)
+    response_get = index.get_embedders()
+    assert update.status == "succeeded"
+    assert response_get == Embedders(embedders=new_embedders)
+
+
+@pytest.mark.usefixtures("enable_vector_search")
+def test_reset_embedders(new_embedders, empty_index):
+    """Tests resetting the typo_tolerance setting to its default value."""
+    index = empty_index()
+
+    # Update the settings
+    response_update = index.update_embedders(new_embedders)
+    update1 = index.wait_for_task(response_update.task_uid)
+    # Get the setting after update
+    response_get = index.get_embedders()
+    # Reset the setting
+    response_reset = index.reset_embedders()
+    update2 = index.wait_for_task(response_reset.task_uid)
+    # Get the setting after reset
+    response_last = index.get_embedders()
+
+    assert update1.status == "succeeded"
+    assert response_get == Embedders(embedders=new_embedders)
+    assert update2.status == "succeeded"
+    assert response_last is None

Original file line number	Diff line number	Diff line change
`@@ -205,7 +205,6 @@ index.search(`
`205`	`205`
`206`	`206`	`This package guarantees compatibility with [version v1.x of Meilisearch](https://github.com/meilisearch/meilisearch/releases/latest), but some features may not be present. Please check the [issues](https://github.com/meilisearch/meilisearch-python/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+label%3Aenhancement) for more info.`
`207`	`207`
`208`		-⚠️ This package is not compatible with the [`vectoreStore` experimental feature](https://www.meilisearch.com/docs/learn/experimental/vector_search) of Meilisearch v1.6.0 and later. More information on this [issue](https://github.com/meilisearch/meilisearch-python/issues/901).
`209`	`208`
`210`	`209`	`## 💡 Learn more`
`211`	`210`