diff --git a/README.md b/README.md index 5833e44c..bd031b4b 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,12 @@ bash scripts/run_for_openai_api_with_gpu_in_Linux_or_WSL.sh bash scripts/run_for_openai_api_in_M1_mac.sh ``` +## Run With ollama API On M1 Mac + +```bash +bash scripts/run_for_ollama_api_in_M1_mac.sh +``` + ## Run With 3B LLM (MiniChat-2-3B-INT8-GGUF) On M1 Mac ```bash bash scripts/run_for_3B_in_M1_mac.sh diff --git a/README_zh.md b/README_zh.md index bc027a9d..00059432 100644 --- a/README_zh.md +++ b/README_zh.md @@ -58,6 +58,12 @@ bash scripts/run_for_openai_api_with_gpu_in_Linux_or_WSL.sh bash scripts/run_for_openai_api_in_M1_mac.sh ``` +## 在M1Mac环境下使用Ollama API + +```bash +bash scripts/run_for_ollama_api_in_M1_mac.sh +``` + ## 在M1Mac环境下使用3B LLM((MiniChat-2-3B-INT8-GGUF) ```bash diff --git a/qanything_kernel/connector/embedding/embedding_model_configs_v0.0.1 b/qanything_kernel/connector/embedding/embedding_model_configs_v0.0.1 new file mode 120000 index 00000000..37896c26 --- /dev/null +++ b/qanything_kernel/connector/embedding/embedding_model_configs_v0.0.1 @@ -0,0 +1 @@ +/Users/wuerping/.cache/modelscope/hub/maidalun/bce-embedding-base_v1 \ No newline at end of file diff --git a/qanything_kernel/connector/llm/llm_for_openai_api.py b/qanything_kernel/connector/llm/llm_for_openai_api.py index 061dcc4e..95b02ccf 100644 --- a/qanything_kernel/connector/llm/llm_for_openai_api.py +++ b/qanything_kernel/connector/llm/llm_for_openai_api.py @@ -83,6 +83,7 @@ def num_tokens_from_messages(self, messages, model=None): "gpt-4-32k-0613", "gpt-4-32k", # "gpt-4-1106-preview", + "qwen:32b", }: tokens_per_message = 3 tokens_per_name = 1 @@ -97,7 +98,10 @@ def num_tokens_from_messages(self, messages, model=None): # 对于 gpt-4 模型可能会有更新,此处返回假设为 gpt-4-0613 的token数量,并给出警告 debug_logger.info("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.") return self.num_tokens_from_messages(messages, model="gpt-4-0613") - + elif "qwen:32b" in model: + # 对于 qwen 模型可能会有更新,此处返回假设为 qwen:32b 的token数量,并给出警告 + debug_logger.info("Warning: qwen may update over time. Returning num tokens assuming qwen:32b.") + return self.num_tokens_from_messages(messages, model="qwen:32b") else: # 对于没有实现的模型,抛出未实现错误 raise NotImplementedError( diff --git a/qanything_kernel/connector/rerank/rerank_model_configs_v0.0.1 b/qanything_kernel/connector/rerank/rerank_model_configs_v0.0.1 new file mode 120000 index 00000000..e42fb35a --- /dev/null +++ b/qanything_kernel/connector/rerank/rerank_model_configs_v0.0.1 @@ -0,0 +1 @@ +/Users/wuerping/.cache/modelscope/hub/maidalun/bce-reranker-base_v1 \ No newline at end of file diff --git a/scripts/run_for_ollama_api_in_M1_mac.sh b/scripts/run_for_ollama_api_in_M1_mac.sh new file mode 100644 index 00000000..36f49976 --- /dev/null +++ b/scripts/run_for_ollama_api_in_M1_mac.sh @@ -0,0 +1,2 @@ +#!/bin/bash +bash scripts/base_run.sh -s "M1mac" -w 4 -m 19530 -q 8777 -o -b 'http://localhost:11434/v1' -k 'ollama' -n 'qwen:32b' -M '32B' -l '4096' \ No newline at end of file