optimize: test tencent vdb

langgenius · takatost · Jun 20, 2024 · Apr 17, 2024 · Apr 17, 2024 · Apr 18, 2024
commit fe905ea69673a4038c399164b591921c0813968b
diff --git a/api/.env.example b/api/.env.example
@@ -86,7 +86,7 @@ RELYT_PASSWORD=postgres
 RELYT_DATABASE=postgres
 
 # Tencent configuration
-TENCENT_URL=http://127.0.0.1
+TENCENT_VECTOR_DB_URL=http://127.0.0.1
 TENCENT_API_KEY=dify
 TENCENT_TIMEOUT=30
 TENCENT_USERNAME=dify

diff --git a/api/config.py b/api/config.py
@@ -229,7 +229,7 @@ def __init__(self):
         self.RELYT_DATABASE = get_env('RELYT_DATABASE')
 
         # tencent settings
-        self.TENCENT_URL = get_env('TENCENT_URL')
+        self.TENCENT_VECTOR_DB_URL = get_env('TENCENT_VECTOR_DB_URL')
         self.TENCENT_API_KEY = get_env('TENCENT_API_KEY')
         self.TENCENT_TIMEOUT = get_env('TENCENT_TIMEOUT')
         self.TENCENT_USERNAME = get_env('TENCENT_USERNAME')

diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py
@@ -469,13 +469,13 @@ class DatasetRetrievalSettingApi(Resource):
     @account_initialization_required
     def get(self):
         vector_type = current_app.config['VECTOR_STORE']
-        if vector_type == 'milvus':
+        if vector_type == 'milvus' or vector_type == 'tencent':
             return {
                 'retrieval_method': [
                     'semantic_search'
                 ]
             }
-        elif vector_type == 'qdrant' or vector_type == 'weaviate' or vector_type == 'tencent':
+        elif vector_type == 'qdrant' or vector_type == 'weaviate':
             return {
                 'retrieval_method': [
                     'semantic_search', 'full_text_search', 'hybrid_search'
@@ -491,13 +491,13 @@ class DatasetRetrievalSettingMockApi(Resource):
     @account_initialization_required
     def get(self, vector_type):
 
-        if vector_type == 'milvus':
+        if vector_type == 'milvus' or vector_type == 'tencent':
             return {
                 'retrieval_method': [
                     'semantic_search'
                 ]
             }
-        elif vector_type == 'qdrant' or vector_type == 'weaviate' or vector_type == 'tencent':
+        elif vector_type == 'qdrant' or vector_type == 'weaviate':
             return {
                 'retrieval_method': [
                     'semantic_search', 'full_text_search', 'hybrid_search'

diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py
@@ -156,14 +156,15 @@ def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Doc
         return self._get_search_res(res)
 
     def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
-        res = (self._db.collection(self._collection_name)
-               .searchByText(embeddingItems=[query],
-                             params=document.HNSWSearchParams(ef=kwargs.get("ef", 10)),
-                             retrieve_vector=False,
-                             limit=kwargs.get('top_k', 4),
-                             timeout=self._client_config.timeout,
-                             ))
-        return self._get_search_res(res)
+        # res = (self._db.collection(self._collection_name)
+        #        .searchByText(embeddingItems=[query],
+        #                      params=document.HNSWSearchParams(ef=kwargs.get("ef", 10)),
+        #                      retrieve_vector=False,
+        #                      limit=kwargs.get('top_k', 4),
+        #                      timeout=self._client_config.timeout,
+        #                      ))
+        # must deploy embedding model in tencent vector db , for now not support
+        return []
 
     def _get_search_res(self, res):
         docs = []

diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py
@@ -153,7 +153,7 @@ def _init_vector(self) -> BaseVector:
             return TencentVector(
                 collection_name=collection_name,
                 config=TencentConfig(
-                    url=config.get('TENCENT_URL'),
+                    url=config.get('TENCENT_VECTOR_DB_URL'),
                     api_key=config.get('TENCENT_API_KEY'),
                     timeout=config.get('TENCENT_TIMEOUT'),
                     username=config.get('TENCENT_USERNAME'),

diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/tencent/test_tencent.py b/api/tests/unit_tests/core/rag/datasource/vdb/tencent/test_tencent.py
@@ -0,0 +1,57 @@
+import pytest
+from extensions.ext_redis import redis_client
+from core.rag.datasource.vdb.tencent.tencent_vector import TencentConfig, TencentVector
+from core.rag.models.document import Document
+
+
+def _create_tencent_vector() -> TencentVector:
+    tencent_vector = TencentVector(
+        collection_name='test-001',
+        config=TencentConfig(
+            url="http://10.6.x.x",
+            api_key="nTZ**********************",
+            timeout=30,
+            username="dify",
+            database="dify",
+            shard=1,
+            replicas=2,
+        )
+    )
+    documents = [
+        Document(page_content="This is document 1", metadata={"doc_id": "doc1", "document_id": "foo1"}),
+        Document(page_content="This is document 2", metadata={"doc_id": "doc2", "document_id": "foo2"}),
+    ]
+    embeddings = [[0.2123, 0.23, 0.213], [0.2123, 0.22, 0.213]]
+    tencent_vector.create(texts=documents, embeddings=embeddings)
+
+    return tencent_vector
+
+
+@pytest.fixture(autouse=True)
+def mock_redis_lock(mocker):
+    mocker.patch.object(redis_client, "lock")
+
+
+def test_text_exists():
+    tencent_vector = _create_tencent_vector()
+    assert tencent_vector.text_exists(id="doc1") is True
+
+
+def test_delete_by_ids():
+    tencent_vector = _create_tencent_vector()
+    tencent_vector.delete_by_ids(ids=['doc2'])
+
+
+def test_delete_by_metadata_field():
+    tencent_vector = _create_tencent_vector()
+    tencent_vector.delete_by_metadata_field(key="document_id", value="foo1")
+
+
+def test_search_by_vector():
+    tencent_vector = _create_tencent_vector()
+    res = tencent_vector.search_by_vector(query_vector=[0.3123, 0.43, 0.213])
+    assert len(res) > 0
+
+def test_delete():
+    tencent_vector = _create_tencent_vector()
+    tencent_vector.delete()
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
@@ -230,7 +230,7 @@ services:
       RELYT_PASSWORD: difyai123456
       RELYT_DATABASE: postgres
       # tencent configurations
-      TENCENT_URL: http://127.0.0.1
+      TENCENT_VECTOR_DB_URL: http://127.0.0.1
       TENCENT_API_KEY: dify
       TENCENT_TIMEOUT: 30
       TENCENT_USERNAME: dify