Skip to content

Commit

Permalink
Merge pull request #72 from alipay/dev_fanen
Browse files Browse the repository at this point in the history
Add: Chromadb docs, dashscope embedding. Change: Fix opentelemetry-exporter-otlp-proto-grpc version to 1.25.0
  • Loading branch information
LandJerry authored Jun 12, 2024
2 parents 68bb1d5 + 27edc17 commit 7cc6a61
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 3 deletions.
156 changes: 156 additions & 0 deletions agentuniverse/agent/action/knowledge/embedding/dashscope_embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

# @Time : 2024/6/12 11:43
# @Author : wangchongshi
# @Email : wangchongshi.wcs@antgroup.com
# @FileName: dashscope_embedding.py
import aiohttp
import requests
from typing import List, Generator, Optional
import json

from agentuniverse.base.util.env_util import get_from_env
from agentuniverse.agent.action.knowledge.embedding.embedding import Embedding

# Dashscope support max 25 string in one batch, each string max tokens is 2048.
DASHSCOPE_MAX_BATCH_SIZE = 25
DASHSCOPE_EMBEDDING_URL = "https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding"


def batched(inputs: List,
batch_size: int = DASHSCOPE_MAX_BATCH_SIZE) -> Generator[List, None, None]:
# Split input string list, due to dashscope support 25 strings in one call.
for i in range(0, len(inputs), batch_size):
yield inputs[i:i + batch_size]


class DashscopeEmbedding(Embedding):
"""The Dashscope embedding class."""
dashscope_api_key: Optional[str] = None

def __init__(self, **kwargs):
"""Initialize the dashscope embedding class, need dashscope api key."""
super().__init__(**kwargs)
self.dashscope_api_key = get_from_env("DASHSCOPE_API_KEY")
if not self.dashscope_api_key:
raise Exception("No DASHSCOPE_API_KEY in your environment.")


def get_embeddings(self, texts: List[str]) -> List[List[float]]:
"""
Retrieve text embeddings for a list of input texts.
This function interfaces with the DashScope embedding API to obtain
embeddings for a batch of input texts. It handles batching of input texts
to ensure efficient API calls. Each text is processed using the specified
embedding model.
Args:
texts (List[str]): A list of input texts to be embedded.
Returns:
List[List[float]]: A list of embeddings corresponding to the input texts.
Raises:
Exception: If the API call to DashScope fails, an exception is raised with
the respective error code and message.
"""
def post(post_params):
response = requests.post(
url=DASHSCOPE_EMBEDDING_URL,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.dashscope_api_key}"
},
data=json.dumps(post_params, ensure_ascii=False).encode(
"utf-8"),
timeout=120
)
resp_json = response.json()
return resp_json

result = []
post_params = {
"model": self.embedding_model_name,
"input": {},
"parameters": {
"text_type": "query"
}
}

for batch in batched(texts):
post_params["input"]["texts"] = batch
resp_json: dict = post(post_params)
data = resp_json.get("output")
if data:
data = data["embeddings"]
batch_result = [d['embedding'] for d in data if 'embedding' in d]
result += batch_result
else:
error_code = resp_json.get("code", "")
error_message = resp_json.get("message", "")
raise Exception(f"Failed to call dashscope embedding api, "
f"error code:{error_code}, "
f"error message:{error_message}")
return result

async def async_get_embeddings(self, texts: List[str]) -> List[List[float]]:
"""
Async version of get_embeddings.
This function interfaces with the DashScope embedding API to obtain
embeddings for a batch of input texts. It handles batching of input texts
to ensure efficient API calls. Each text is processed using the specified
embedding model.
Args:
texts (List[str]): A list of input texts to be embedded.
Returns:
List[List[float]]: A list of embeddings corresponding to the input texts.
Raises:
Exception: If the API call to DashScope fails, an exception is raised with
the respective error code and message.
"""
async def async_post(post_params):
async with aiohttp.ClientSession() as session:
async with await session.post(
url=DASHSCOPE_EMBEDDING_URL,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.dashscope_api_key}"
},
data=json.dumps(post_params, ensure_ascii=False).encode(
"utf-8"),
timeout=120,
) as resp:
resp_json = await resp.json()
return resp_json

result = []
post_params = {
"model": self.embedding_model_name,
"input": {},
"parameters": {
"text_type": "query"
}
}

for batch in batched(texts):
post_params["input"]["texts"] = batch
resp_json: dict = await async_post(post_params)
data = resp_json.get("output")
if data:
data = data["embeddings"]
batch_result = [d['embedding'] for d in data if
'embedding' in d]
result += batch_result
else:
error_code = resp_json.get("code", "")
error_message = resp_json.get("message", "")
raise Exception(f"Failed to call dashscope embedding api, "
f"error code:{error_code}, "
f"error message:{error_message}")
return result
4 changes: 2 additions & 2 deletions docs/guidebook/en/3_3_1_Milvus.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pip install pymilvus
```

### What can I do with Milvus
You can use Milvus in the Knowledge component to store and query knowledge. You can create a storage component using Milvus as follows:
You can use Milvus in the [Knowledge component]() to store and query knowledge. You can create a storage component using Milvus as follows:

```python
from agentuniverse.agent.action.knowledge.store.milvus_store import MilvusStore
Expand All @@ -37,4 +37,4 @@ init_params['store'] = MilvusStore(
)
knowledge = Knowledge(**init_params)
```
The above code will create a Milvus-based Knowledge instance. For detailed usage of Knowledge, you can refer to the [Knowledge documentation]() or the code `tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge_with_milvus.py`.
The above code will create a Milvus-based Knowledge instance. For detailed usage of Knowledge, you can refer to the [Knowledge component]() or the code `tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge_with_milvus.py`.
22 changes: 22 additions & 0 deletions docs/guidebook/en/3_3_2_ChromaDB.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
## ChromaDB

The agentUniverse already integrates ChromaDB-related dependencies, so you do not need to install additional packages to use ChromaDB's features.
If you want to learn about the underlying principles of ChromaDB, you can visit the [official ChromaDB website](https://www.trychroma.com/).

### What can I do with ChromaDB?

You can use ChromaDB in the [Knowledge component]() to store and query knowledge. You can create a storage component using ChromaDB with the following method:
```python
from agentuniverse.agent.action.knowledge.embedding.openai_embedding import OpenAIEmbedding
from agentuniverse.agent.action.knowledge.knowledge import Knowledge
from agentuniverse.agent.action.knowledge.store.chroma_store import ChromaStore


init_params = dict()
init_params['name'] = 'test_knowledge'
init_params['description'] = 'test_knowledge_description'
init_params['store'] = ChromaStore(collection_name="test_knowledge", embedding_model=OpenAIEmbedding(
embedding_model_name='text-embedding-ada-002'))
knowledge = Knowledge(**init_params)
```
The above code will create a Knowledge component based on ChromaDB. For more details on how to use the Knowledge component, you can refer to the [Knowledge component](),or check the code in `tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge.py`
2 changes: 1 addition & 1 deletion docs/guidebook/zh/3_3_1_Milvus.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ init_params['store'] = MilvusStore(
)
knowledge = Knowledge(**init_params)
```
上面的代码会创建一个基于Milvus的Knowledge,关于Knowledge的具体用法您可以参考[Knowledge文档](),或是参考代码`tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge_with_milvus.py`
上面的代码会创建一个基于Milvus的Knowledge,关于Knowledge的具体用法您可以参考[Knowledge组件](),或是参考代码`tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge_with_milvus.py`
22 changes: 22 additions & 0 deletions docs/guidebook/zh/3_3_2_ChromaDB.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
## ChromaDB

agentUniverse中已集成ChromaDB相关依赖,您无需额外安装包即可使用ChromaDB的相关功能。
如果您想学习ChromaDB相关的底层原理,您可以查阅ChromaDB的[官方网站](https://www.trychroma.com/)

### 我可以用ChromaDB做些什么

您可以在[Knowledge组件]()中使用ChromaDB来存储和查询知识,你可以使用以下方式来创建一个使用ChromaDB的存储组件:
```python
from agentuniverse.agent.action.knowledge.embedding.openai_embedding import OpenAIEmbedding
from agentuniverse.agent.action.knowledge.knowledge import Knowledge
from agentuniverse.agent.action.knowledge.store.chroma_store import ChromaStore


init_params = dict()
init_params['name'] = 'test_knowledge'
init_params['description'] = 'test_knowledge_description'
init_params['store'] = ChromaStore(collection_name="test_knowledge", embedding_model=OpenAIEmbedding(
embedding_model_name='text-embedding-ada-002'))
knowledge = Knowledge(**init_params)
```
上面的代码会创建一个基于ChromaDB的Knowledge,关于Knowledge的具体用法您可以参考[Knowledge组件](),或是参考代码`tests/test_agentuniverse/unit/agent/action/knowledge/test_knowledge.py`
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pydantic = "^2.6.4"
gunicorn = "^22.0.0"
grpcio = "1.63.0"
chromadb = "0.4.24"
opentelemetry-exporter-otlp-proto-grpc = "^1.25.0"
sphinx = "^7.2.6"
Jinja2 = "^3.1.4"
tqdm = "^4.66.3"
Expand Down

0 comments on commit 7cc6a61

Please sign in to comment.