File tree Expand file tree Collapse file tree 4 files changed +5
-5
lines changed Expand file tree Collapse file tree 4 files changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -203,8 +203,8 @@ class VLLMSetting(BaseModel):
203
203
default = get_bool_env ("ENFORCE_EAGER" ),
204
204
description = "Always use eager-mode PyTorch. If False, will use eager mode and CUDA graph in hybrid for maximal performance and flexibility."
205
205
)
206
- max_context_len_to_capture : Optional [int ] = Field (
207
- default = int (get_env ("MAX_CONTEXT_LEN_TO_CAPTURE " , 8192 )),
206
+ max_seq_len_to_capture : Optional [int ] = Field (
207
+ default = int (get_env ("MAX_SEQ_LEN_TO_CAPTURE " , 8192 )),
208
208
description = "aximum context length covered by CUDA graphs. When a sequence has context length larger than this, we fall back to eager mode."
209
209
)
210
210
max_loras : Optional [int ] = Field (
Original file line number Diff line number Diff line change @@ -107,7 +107,7 @@ def create_vllm_engine():
107
107
"gpu_memory_utilization" ,
108
108
"max_num_seqs" ,
109
109
"enforce_eager" ,
110
- "max_context_len_to_capture " ,
110
+ "max_seq_len_to_capture " ,
111
111
"max_loras" ,
112
112
"max_lora_rank" ,
113
113
"lora_extra_vocab_size" ,
Original file line number Diff line number Diff line change @@ -6,6 +6,6 @@ COPY requirements.txt /workspace/
6
6
7
7
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
8
8
pip install bitsandbytes --upgrade && \
9
- pip install vllm==0.4.0 && \
9
+ pip install vllm==0.4.2 && \
10
10
pip install --no-cache-dir -r /workspace/requirements.txt && \
11
11
pip uninstall transformer-engine -y
Original file line number Diff line number Diff line change @@ -16,7 +16,7 @@ docker build -f docker/Dockerfile.vllm -t llm-api:vllm .
16
16
17
17
``` shell
18
18
pip install torch==2.1.0
19
- pip install vllm==0.4.0
19
+ pip install vllm==0.4.2
20
20
pip install -r requirements.txt
21
21
pip uninstall transformer-engine -y
22
22
```
You can’t perform that action at this time.
0 commit comments