feat: Add ollama API suport, run_for_ollama_api_in_M1_mac.sh

netease-youdao · May 10, 2024 · fe2076b · fe2076b
1 parent a66989f
commit fe2076b
Show file tree

Hide file tree

Showing 6 changed files with 21 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -61,6 +61,12 @@ bash scripts/run_for_openai_api_with_gpu_in_Linux_or_WSL.sh
 bash scripts/run_for_openai_api_in_M1_mac.sh
 ```
 
+## Run With ollama API On M1 Mac
+
+```bash
+bash scripts/run_for_ollama_api_in_M1_mac.sh
+```
+
 ## Run With 3B LLM (MiniChat-2-3B-INT8-GGUF) On M1 Mac
 ```bash
 bash scripts/run_for_3B_in_M1_mac.sh

diff --git a/README_zh.md b/README_zh.md
@@ -58,6 +58,12 @@ bash scripts/run_for_openai_api_with_gpu_in_Linux_or_WSL.sh
 bash scripts/run_for_openai_api_in_M1_mac.sh
 ```
 
+## 在M1Mac环境下使用Ollama API 
+
+```bash
+bash scripts/run_for_ollama_api_in_M1_mac.sh
+```
+
 ## 在M1Mac环境下使用3B LLM（(MiniChat-2-3B-INT8-GGUF）
 
 ```bash

diff --git a/qanything_kernel/connector/embedding/embedding_model_configs_v0.0.1 b/qanything_kernel/connector/embedding/embedding_model_configs_v0.0.1
@@ -0,0 +1 @@
+/Users/wuerping/.cache/modelscope/hub/maidalun/bce-embedding-base_v1
diff --git a/qanything_kernel/connector/llm/llm_for_openai_api.py b/qanything_kernel/connector/llm/llm_for_openai_api.py
@@ -83,6 +83,7 @@ def num_tokens_from_messages(self, messages, model=None):
             "gpt-4-32k-0613",
             "gpt-4-32k",
             # "gpt-4-1106-preview",
+            "qwen:32b",
             }:
             tokens_per_message = 3
             tokens_per_name = 1
@@ -97,7 +98,10 @@ def num_tokens_from_messages(self, messages, model=None):
             # 对于 gpt-4 模型可能会有更新，此处返回假设为 gpt-4-0613 的token数量，并给出警告
             debug_logger.info("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
             return self.num_tokens_from_messages(messages, model="gpt-4-0613")
-
+        elif "qwen:32b" in model:
+            # 对于 qwen 模型可能会有更新，此处返回假设为 qwen:32b 的token数量，并给出警告
+            debug_logger.info("Warning: qwen may update over time. Returning num tokens assuming qwen:32b.")
+            return self.num_tokens_from_messages(messages, model="qwen:32b")
         else:
             # 对于没有实现的模型，抛出未实现错误
             raise NotImplementedError(

diff --git a/qanything_kernel/connector/rerank/rerank_model_configs_v0.0.1 b/qanything_kernel/connector/rerank/rerank_model_configs_v0.0.1
@@ -0,0 +1 @@
+/Users/wuerping/.cache/modelscope/hub/maidalun/bce-reranker-base_v1
diff --git a/scripts/run_for_ollama_api_in_M1_mac.sh b/scripts/run_for_ollama_api_in_M1_mac.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+bash scripts/base_run.sh -s "M1mac" -w 4 -m 19530 -q 8777 -o -b 'http://localhost:11434/v1' -k 'ollama' -n 'qwen:32b' -M '32B' -l '4096'