Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix bug: tencent vdb #5378 #5408

Merged
merged 44 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
324a0ba
feat: support tencent vdb
quicksandznzn Apr 17, 2024
75bbfb5
optimize:add requirements
quicksandznzn Apr 17, 2024
96aeb34
remove .env
quicksandznzn Apr 18, 2024
fe905ea
optimize: test tencent vdb
quicksandznzn Apr 18, 2024
90dca38
optimize: config prefix
quicksandznzn Apr 18, 2024
e3d5d2f
remove comments
quicksandznzn Apr 19, 2024
617fec0
Merge branch 'langgenius:main' into main
quicksandznzn Apr 23, 2024
01e27de
optimize: reformat and move test to api/tests/integration_tests/vdb/t…
quicksandznzn Apr 25, 2024
a401a73
Merge branch 'main' into main
quicksandznzn Apr 25, 2024
1ed4926
optimize: score_threshold
quicksandznzn Apr 26, 2024
4cdf8e4
Merge branch 'main' into main
quicksandznzn Apr 26, 2024
6ae95de
optimize
quicksandznzn Apr 26, 2024
3afc52e
Merge branch 'langgenius:main' into main
quicksandznzn Apr 28, 2024
bbfbb74
Merge branch 'langgenius:main' into main
quicksandznzn Apr 28, 2024
2409bbd
optimize: test tencent vdb
quicksandznzn Apr 28, 2024
5b706f8
Merge branch 'langgenius:main' into main
quicksandznzn Apr 28, 2024
08c922a
Merge branch 'main' into main
quicksandznzn Apr 29, 2024
a289312
optimize
quicksandznzn Apr 29, 2024
f3fcc16
optimize
quicksandznzn Apr 29, 2024
dbf22ef
Merge branch 'main' into main
quicksandznzn Apr 30, 2024
ddee4ed
Merge branch 'langgenius:main' into main
quicksandznzn May 8, 2024
94c01ce
optimize: remove cache
quicksandznzn May 8, 2024
802d8ec
Merge branch 'main' into main
quicksandznzn May 10, 2024
742ea0a
Merge branch 'langgenius:main' into main
quicksandznzn May 11, 2024
1a60f37
optimize: collection
quicksandznzn May 11, 2024
a591366
Merge branch 'main' into main
quicksandznzn May 20, 2024
4b368a5
Merge branch 'langgenius:main' into main
quicksandznzn May 27, 2024
78a6cc4
optimize: use delete by filter , self.collection change to self._db.c…
quicksandznzn May 27, 2024
b495e44
Merge branch 'main' into main
quicksandznzn Jun 6, 2024
b0c4949
fix
quicksandznzn Jun 13, 2024
7137ebe
fix
quicksandznzn Jun 13, 2024
d79b025
Merge branch 'main' of https://github.com/langgenius/dify
quicksandznzn Jun 14, 2024
a076d2e
poetry lock --no-update
quicksandznzn Jun 14, 2024
4f6dd68
fix Conflicts
quicksandznzn Jun 14, 2024
483e64d
Merge branch 'main' of https://github.com/langgenius/dify
quicksandznzn Jun 17, 2024
9c6be6d
feat: support tencent cos storage
quicksandznzn Jun 17, 2024
433fa26
Merge branch 'langgenius:main' into main
quicksandznzn Jun 17, 2024
e4a70bc
Merge branch 'main' of https://github.com/langgenius/dify
quicksandznzn Jun 17, 2024
7f1f1bd
feat: add tecent cos env setting to docker-compose.yaml
quicksandznzn Jun 17, 2024
e67daa0
Merge branch 'main' of https://github.com/langgenius/dify
quicksandznzn Jun 17, 2024
7baa788
Merge branch 'main' of https://github.com/langgenius/dify
quicksandznzn Jun 17, 2024
3b933b1
Merge branch 'main' of https://github.com/langgenius/dify
quicksandznzn Jun 18, 2024
ab5e34d
Merge branch 'langgenius:main' into main
quicksandznzn Jun 20, 2024
23235d5
fix bug: tencent vdb #5378
quicksandznzn Jun 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
optimize: test tencent vdb
  • Loading branch information
quicksandznzn committed Apr 18, 2024
commit fe905ea69673a4038c399164b591921c0813968b
2 changes: 1 addition & 1 deletion api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ RELYT_PASSWORD=postgres
RELYT_DATABASE=postgres

# Tencent configuration
TENCENT_URL=http://127.0.0.1
TENCENT_VECTOR_DB_URL=http://127.0.0.1
TENCENT_API_KEY=dify
TENCENT_TIMEOUT=30
TENCENT_USERNAME=dify
Expand Down
2 changes: 1 addition & 1 deletion api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def __init__(self):
self.RELYT_DATABASE = get_env('RELYT_DATABASE')

# tencent settings
self.TENCENT_URL = get_env('TENCENT_URL')
self.TENCENT_VECTOR_DB_URL = get_env('TENCENT_VECTOR_DB_URL')
self.TENCENT_API_KEY = get_env('TENCENT_API_KEY')
self.TENCENT_TIMEOUT = get_env('TENCENT_TIMEOUT')
self.TENCENT_USERNAME = get_env('TENCENT_USERNAME')
Expand Down
8 changes: 4 additions & 4 deletions api/controllers/console/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,13 +469,13 @@ class DatasetRetrievalSettingApi(Resource):
@account_initialization_required
def get(self):
vector_type = current_app.config['VECTOR_STORE']
if vector_type == 'milvus':
if vector_type == 'milvus' or vector_type == 'tencent':
return {
'retrieval_method': [
'semantic_search'
]
}
elif vector_type == 'qdrant' or vector_type == 'weaviate' or vector_type == 'tencent':
elif vector_type == 'qdrant' or vector_type == 'weaviate':
return {
'retrieval_method': [
'semantic_search', 'full_text_search', 'hybrid_search'
Expand All @@ -491,13 +491,13 @@ class DatasetRetrievalSettingMockApi(Resource):
@account_initialization_required
def get(self, vector_type):

if vector_type == 'milvus':
if vector_type == 'milvus' or vector_type == 'tencent':
return {
'retrieval_method': [
'semantic_search'
]
}
elif vector_type == 'qdrant' or vector_type == 'weaviate' or vector_type == 'tencent':
elif vector_type == 'qdrant' or vector_type == 'weaviate':
return {
'retrieval_method': [
'semantic_search', 'full_text_search', 'hybrid_search'
Expand Down
17 changes: 9 additions & 8 deletions api/core/rag/datasource/vdb/tencent/tencent_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,15 @@ def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Doc
return self._get_search_res(res)

def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
res = (self._db.collection(self._collection_name)
.searchByText(embeddingItems=[query],
params=document.HNSWSearchParams(ef=kwargs.get("ef", 10)),
retrieve_vector=False,
limit=kwargs.get('top_k', 4),
timeout=self._client_config.timeout,
))
return self._get_search_res(res)
# res = (self._db.collection(self._collection_name)
# .searchByText(embeddingItems=[query],
# params=document.HNSWSearchParams(ef=kwargs.get("ef", 10)),
# retrieve_vector=False,
# limit=kwargs.get('top_k', 4),
# timeout=self._client_config.timeout,
# ))
# must deploy embedding model in tencent vector db , for now not support
return []

def _get_search_res(self, res):
docs = []
Expand Down
2 changes: 1 addition & 1 deletion api/core/rag/datasource/vdb/vector_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def _init_vector(self) -> BaseVector:
return TencentVector(
collection_name=collection_name,
config=TencentConfig(
url=config.get('TENCENT_URL'),
url=config.get('TENCENT_VECTOR_DB_URL'),
api_key=config.get('TENCENT_API_KEY'),
timeout=config.get('TENCENT_TIMEOUT'),
username=config.get('TENCENT_USERNAME'),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import pytest
from extensions.ext_redis import redis_client
from core.rag.datasource.vdb.tencent.tencent_vector import TencentConfig, TencentVector
from core.rag.models.document import Document


def _create_tencent_vector() -> TencentVector:
tencent_vector = TencentVector(
collection_name='test-001',
config=TencentConfig(
url="http://10.6.x.x",
api_key="nTZ**********************",
timeout=30,
username="dify",
database="dify",
shard=1,
replicas=2,
)
)
documents = [
Document(page_content="This is document 1", metadata={"doc_id": "doc1", "document_id": "foo1"}),
Document(page_content="This is document 2", metadata={"doc_id": "doc2", "document_id": "foo2"}),
]
embeddings = [[0.2123, 0.23, 0.213], [0.2123, 0.22, 0.213]]
tencent_vector.create(texts=documents, embeddings=embeddings)

return tencent_vector


@pytest.fixture(autouse=True)
def mock_redis_lock(mocker):
mocker.patch.object(redis_client, "lock")


def test_text_exists():
tencent_vector = _create_tencent_vector()
assert tencent_vector.text_exists(id="doc1") is True


def test_delete_by_ids():
tencent_vector = _create_tencent_vector()
tencent_vector.delete_by_ids(ids=['doc2'])


def test_delete_by_metadata_field():
tencent_vector = _create_tencent_vector()
tencent_vector.delete_by_metadata_field(key="document_id", value="foo1")


def test_search_by_vector():
tencent_vector = _create_tencent_vector()
res = tencent_vector.search_by_vector(query_vector=[0.3123, 0.43, 0.213])
assert len(res) > 0

def test_delete():
tencent_vector = _create_tencent_vector()
tencent_vector.delete()
2 changes: 1 addition & 1 deletion docker/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ services:
RELYT_PASSWORD: difyai123456
RELYT_DATABASE: postgres
# tencent configurations
TENCENT_URL: http://127.0.0.1
TENCENT_VECTOR_DB_URL: http://127.0.0.1
TENCENT_API_KEY: dify
TENCENT_TIMEOUT: 30
TENCENT_USERNAME: dify
Expand Down