diff --git a/agentuniverse/agent/action/knowledge/embedding/dashscope_embedding.py b/agentuniverse/agent/action/knowledge/embedding/dashscope_embedding.py new file mode 100644 index 00000000..7cab3b0b --- /dev/null +++ b/agentuniverse/agent/action/knowledge/embedding/dashscope_embedding.py @@ -0,0 +1,156 @@ +# !/usr/bin/env python3 +# -*- coding:utf-8 -*- + +# @Time : 2024/6/12 11:43 +# @Author : wangchongshi +# @Email : wangchongshi.wcs@antgroup.com +# @FileName: dashscope_embedding.py +import aiohttp +import requests +from typing import List, Generator, Optional +import json + +from agentuniverse.base.util.env_util import get_from_env +from agentuniverse.agent.action.knowledge.embedding.embedding import Embedding + +# Dashscope support max 25 string in one batch, each string max tokens is 2048. +DASHSCOPE_MAX_BATCH_SIZE = 25 +DASHSCOPE_EMBEDDING_URL = "https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding" + + +def batched(inputs: List, + batch_size: int = DASHSCOPE_MAX_BATCH_SIZE) -> Generator[List, None, None]: + # Split input string list, due to dashscope support 25 strings in one call. + for i in range(0, len(inputs), batch_size): + yield inputs[i:i + batch_size] + + +class DashscopeEmbedding(Embedding): + """The Dashscope embedding class.""" + dashscope_api_key: Optional[str] = None + + def __init__(self, **kwargs): + """Initialize the dashscope embedding class, need dashscope api key.""" + super().__init__(**kwargs) + self.dashscope_api_key = get_from_env("DASHSCOPE_API_KEY") + if not self.dashscope_api_key: + raise Exception("No DASHSCOPE_API_KEY in your environment.") + + + def get_embeddings(self, texts: List[str]) -> List[List[float]]: + """ + Retrieve text embeddings for a list of input texts. + + This function interfaces with the DashScope embedding API to obtain + embeddings for a batch of input texts. It handles batching of input texts + to ensure efficient API calls. Each text is processed using the specified + embedding model. + + Args: + texts (List[str]): A list of input texts to be embedded. + + Returns: + List[List[float]]: A list of embeddings corresponding to the input texts. + + Raises: + Exception: If the API call to DashScope fails, an exception is raised with + the respective error code and message. + """ + def post(post_params): + response = requests.post( + url=DASHSCOPE_EMBEDDING_URL, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {self.dashscope_api_key}" + }, + data=json.dumps(post_params, ensure_ascii=False).encode( + "utf-8"), + timeout=120 + ) + resp_json = response.json() + return resp_json + + result = [] + post_params = { + "model": self.embedding_model_name, + "input": {}, + "parameters": { + "text_type": "query" + } + } + + for batch in batched(texts): + post_params["input"]["texts"] = batch + resp_json: dict = post(post_params) + data = resp_json.get("output") + if data: + data = data["embeddings"] + batch_result = [d['embedding'] for d in data if 'embedding' in d] + result += batch_result + else: + error_code = resp_json.get("code", "") + error_message = resp_json.get("message", "") + raise Exception(f"Failed to call dashscope embedding api, " + f"error code:{error_code}, " + f"error message:{error_message}") + return result + + async def async_get_embeddings(self, texts: List[str]) -> List[List[float]]: + """ + Async version of get_embeddings. + + This function interfaces with the DashScope embedding API to obtain + embeddings for a batch of input texts. It handles batching of input texts + to ensure efficient API calls. Each text is processed using the specified + embedding model. + + Args: + texts (List[str]): A list of input texts to be embedded. + + Returns: + List[List[float]]: A list of embeddings corresponding to the input texts. + + Raises: + Exception: If the API call to DashScope fails, an exception is raised with + the respective error code and message. + """ + async def async_post(post_params): + async with aiohttp.ClientSession() as session: + async with await session.post( + url=DASHSCOPE_EMBEDDING_URL, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {self.dashscope_api_key}" + }, + data=json.dumps(post_params, ensure_ascii=False).encode( + "utf-8"), + timeout=120, + ) as resp: + resp_json = await resp.json() + return resp_json + + result = [] + post_params = { + "model": self.embedding_model_name, + "input": {}, + "parameters": { + "text_type": "query" + } + } + + for batch in batched(texts): + post_params["input"]["texts"] = batch + resp_json: dict = await async_post(post_params) + data = resp_json.get("output") + if data: + data = data["embeddings"] + batch_result = [d['embedding'] for d in data if + 'embedding' in d] + result += batch_result + else: + error_code = resp_json.get("code", "") + error_message = resp_json.get("message", "") + raise Exception(f"Failed to call dashscope embedding api, " + f"error code:{error_code}, " + f"error message:{error_message}") + return result \ No newline at end of file diff --git a/docs/guidebook/en/3_3_1_Milvus.md b/docs/guidebook/en/3_3_1_Milvus.md index a0c83538..6b05addb 100644 --- a/docs/guidebook/en/3_3_1_Milvus.md +++ b/docs/guidebook/en/3_3_1_Milvus.md @@ -18,7 +18,7 @@ pip install pymilvus ``` ### What can I do with Milvus -You can use Milvus in the Knowledge component to store and query knowledge. You can create a storage component using Milvus as follows: +You can use Milvus in the [Knowledge component]() to store and query knowledge. You can create a storage component using Milvus as follows: ```python from agentuniverse.agent.action.knowledge.store.milvus_store import MilvusStore @@ -37,4 +37,4 @@ init_params['store'] = MilvusStore( ) knowledge = Knowledge(**init_params) ``` -The above code will create a Milvus-based Knowledge instance. For detailed usage of Knowledge, you can refer to the [Knowledge documentation]() or the code `tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge_with_milvus.py`. \ No newline at end of file +The above code will create a Milvus-based Knowledge instance. For detailed usage of Knowledge, you can refer to the [Knowledge component]() or the code `tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge_with_milvus.py`. \ No newline at end of file diff --git a/docs/guidebook/en/3_3_2_ChromaDB.md b/docs/guidebook/en/3_3_2_ChromaDB.md new file mode 100644 index 00000000..a92ed9bf --- /dev/null +++ b/docs/guidebook/en/3_3_2_ChromaDB.md @@ -0,0 +1,22 @@ +## ChromaDB + +The agentUniverse already integrates ChromaDB-related dependencies, so you do not need to install additional packages to use ChromaDB's features. +If you want to learn about the underlying principles of ChromaDB, you can visit the [official ChromaDB website](https://www.trychroma.com/). + +### What can I do with ChromaDB? + +You can use ChromaDB in the [Knowledge component]() to store and query knowledge. You can create a storage component using ChromaDB with the following method: +```python +from agentuniverse.agent.action.knowledge.embedding.openai_embedding import OpenAIEmbedding +from agentuniverse.agent.action.knowledge.knowledge import Knowledge +from agentuniverse.agent.action.knowledge.store.chroma_store import ChromaStore + + +init_params = dict() +init_params['name'] = 'test_knowledge' +init_params['description'] = 'test_knowledge_description' +init_params['store'] = ChromaStore(collection_name="test_knowledge", embedding_model=OpenAIEmbedding( + embedding_model_name='text-embedding-ada-002')) +knowledge = Knowledge(**init_params) +``` +The above code will create a Knowledge component based on ChromaDB. For more details on how to use the Knowledge component, you can refer to the [Knowledge component](),or check the code in `tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge.py`。 \ No newline at end of file diff --git a/docs/guidebook/zh/3_3_1_Milvus.md b/docs/guidebook/zh/3_3_1_Milvus.md index 0f97daaa..108ee3cc 100644 --- a/docs/guidebook/zh/3_3_1_Milvus.md +++ b/docs/guidebook/zh/3_3_1_Milvus.md @@ -38,4 +38,4 @@ init_params['store'] = MilvusStore( ) knowledge = Knowledge(**init_params) ``` -上面的代码会创建一个基于Milvus的Knowledge,关于Knowledge的具体用法您可以参考[Knowledge文档](),或是参考代码`tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge_with_milvus.py`。 \ No newline at end of file +上面的代码会创建一个基于Milvus的Knowledge,关于Knowledge的具体用法您可以参考[Knowledge组件](),或是参考代码`tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge_with_milvus.py`。 \ No newline at end of file diff --git a/docs/guidebook/zh/3_3_2_ChromaDB.md b/docs/guidebook/zh/3_3_2_ChromaDB.md new file mode 100644 index 00000000..47e89a40 --- /dev/null +++ b/docs/guidebook/zh/3_3_2_ChromaDB.md @@ -0,0 +1,22 @@ +## ChromaDB + +agentUniverse中已集成ChromaDB相关依赖,您无需额外安装包即可使用ChromaDB的相关功能。 +如果您想学习ChromaDB相关的底层原理,您可以查阅ChromaDB的[官方网站](https://www.trychroma.com/)。 + +### 我可以用ChromaDB做些什么 + +您可以在[Knowledge组件]()中使用ChromaDB来存储和查询知识,你可以使用以下方式来创建一个使用ChromaDB的存储组件: +```python +from agentuniverse.agent.action.knowledge.embedding.openai_embedding import OpenAIEmbedding +from agentuniverse.agent.action.knowledge.knowledge import Knowledge +from agentuniverse.agent.action.knowledge.store.chroma_store import ChromaStore + + +init_params = dict() +init_params['name'] = 'test_knowledge' +init_params['description'] = 'test_knowledge_description' +init_params['store'] = ChromaStore(collection_name="test_knowledge", embedding_model=OpenAIEmbedding( + embedding_model_name='text-embedding-ada-002')) +knowledge = Knowledge(**init_params) +``` +上面的代码会创建一个基于ChromaDB的Knowledge,关于Knowledge的具体用法您可以参考[Knowledge组件](),或是参考代码`tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge.py`。 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index d725b8f9..e6ab5832 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ pydantic = "^2.6.4" gunicorn = "^22.0.0" grpcio = "1.63.0" chromadb = "0.4.24" +opentelemetry-exporter-otlp-proto-grpc = "^1.25.0" sphinx = "^7.2.6" Jinja2 = "^3.1.4" tqdm = "^4.66.3"