File tree Expand file tree Collapse file tree 1 file changed +41
-2
lines changed Expand file tree Collapse file tree 1 file changed +41
-2
lines changed Original file line number Diff line number Diff line change @@ -22,7 +22,7 @@ docker build -f docker/Dockerfile.vllm -t llm-api:vllm .
22
22
+ ` tokenizer-mode ` (可选项): ` tokenizer ` 的模式,默认为 ` auto `
23
23
24
24
25
- + ` tensor_parallel_size ` (可选项): ` GPU ` 数量,默认为 ` 1 `
25
+ + ` tensor-parallel-size ` (可选项): ` GPU ` 数量,默认为 ` 1 `
26
26
27
27
28
28
+ ` embedding_name ` (可选项): 嵌入模型的文件所在路径,推荐使用 ` moka-ai/m3e-base ` 或者 ` BAAI/bge-large-zh `
@@ -47,5 +47,44 @@ docker run -it -d --gpus all --ipc=host --net=host -p 80:80 --name=qwen \
47
47
--model_name qwen \
48
48
--model Qwen/Qwen-7B-Chat \
49
49
--trust-remote-code \
50
- --tokenizer-mode slow
50
+ --tokenizer-mode slow \
51
+ --dtype half
52
+ ```
53
+
54
+ ### InternLM
55
+
56
+ internlm-chat-7b:
57
+
58
+ ``` shell
59
+ docker run -it -d --gpus all --ipc=host --net=host -p 80:80 --name=internlm \
60
+ --ulimit memlock=-1 --ulimit stack=67108864 \
61
+ -v ` pwd` :/workspace \
62
+ llm-api:vllm \
63
+ python api/vllm_server.py \
64
+ --port 80 \
65
+ --allow-credentials \
66
+ --model_name internlm \
67
+ --model internlm/internlm-chat-7b \
68
+ --trust-remote-code \
69
+ --tokenizer-mode slow \
70
+ --dtype half
71
+ ```
72
+
73
+ ### Baichuan-13b-chat
74
+
75
+ baichuan-inc/Baichuan-13B-Chat:
76
+
77
+ ``` shell
78
+ docker run -it -d --gpus all --ipc=host --net=host -p 80:80 --name=baichuan-13b-chat \
79
+ --ulimit memlock=-1 --ulimit stack=67108864 \
80
+ -v ` pwd` :/workspace \
81
+ llm-api:vllm \
82
+ python api/vllm_server.py \
83
+ --port 80 \
84
+ --allow-credentials \
85
+ --model_name baichuan-13b-chat \
86
+ --model baichuan-inc/Baichuan-13B-Chat \
87
+ --trust-remote-code \
88
+ --tokenizer-mode slow \
89
+ --dtype half
51
90
```
You can’t perform that action at this time.
0 commit comments