support qwen2

xusenlin · xusenlin · commit 2ba1d4d38822 · 2024-06-07T09:56:34.000+08:00
diff --git a/README.md b/README.md
@@ -20,6 +20,9 @@
 
 ## 📢 新闻
 
++ 【2024.06.05】 已支持 `QWEN2` 模型，修改环境变量 `MODEL_NAME=qwen2`  `PROMPT_NAME=qwen2`
+
+
 + 【2024.06.05】 支持 `GLM4` 模型，修改环境变量 `MODEL_NAME=chatglm4`  `PROMPT_NAME=chatglm4`
 
 
@@ -29,7 +32,7 @@
 + 【2024.04.16】 支持 `Rerank` 重排序模型，[使用方式](./docs/RAG.md)
 
 
-+ 【2024.02.26】 `QWEN2` 模型需要修改环境变量 `MODEL_NAME=qwen2`  `PROMPT_NAME=qwen2`
++ 【2024.02.26】 `QWEN1.5` 模型需要修改环境变量 `MODEL_NAME=qwen2`  `PROMPT_NAME=qwen2`
 
 
 + 【2024.01.19】 添加 [InternLM2](https://github.com/InternLM/InternLM) 模型支持，[启动方式](https://github.com/xusenlinzy/api-for-open-llm/blob/master/docs/SCRIPT.md#internlm2)
diff --git a/api/models.py b/api/models.py
@@ -96,7 +96,7 @@ def create_vllm_engine():
         from vllm.engine.async_llm_engine import AsyncLLMEngine
         from api.core.vllm_engine import VllmEngine, LoRA
     except ImportError:
-        return None
+        raise ValueError("VLLM engine not available")
 
     include = {
         "tokenizer_mode",
@@ -146,7 +146,7 @@ def create_llama_cpp_engine():
         from llama_cpp import Llama
         from api.core.llama_cpp_engine import LlamaCppEngine
     except ImportError:
-        return None
+        raise ValueError("Llama cpp engine not available")
 
     include = {
         "n_gpu_layers",
@@ -172,12 +172,12 @@ def create_llama_cpp_engine():
 
 
 def create_tgi_engine():
-    """ get llama.cpp generate engine for chat or completion. """
+    """ get tgi generate engine for chat or completion. """
     try:
         from text_generation import AsyncClient
         from api.core.tgi import TGIEngine
     except ImportError:
-        return None
+        raise ValueError("TGI engine not available")
 
     client = AsyncClient(SETTINGS.tgi_endpoint)
     logger.info("Using TGI engine")