-
-
Notifications
You must be signed in to change notification settings - Fork 8.9k
Description
Your current environment
File "/pkg/modal/_container_io_manager.py", line 488, in handle_input_exception
yield
File "/pkg/modal/_container_entrypoint.py", line 260, in run_input
value = await res
File "/root/modal_exp_auto_wow_predictions_client.py", line 75, in complete_adapter_prompts
results = llm.generate(prompts, sampling_params, lora_request=lora_request)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 214, in generate
return self._run_engine(use_tqdm)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 242, in _run_engine
step_outputs = self.llm_engine.step()
File "/opt/conda/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 557, in step
seq_group_metadata_list, scheduler_outputs = self.scheduler.schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 890, in schedule
scheduler_outputs = self._schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 863, in _schedule
return self._schedule_default()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 722, in _schedule_default
remaining_running, running_scheduled = self._schedule_running(
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 407, in _schedule_running
curr_loras.remove(seq_group.lora_int_id)
KeyError: 1
Container on modal that is used:
modal.Image.from_registry("pytorch/pytorch:2.2.1-cuda12.1-cudnn8-devel").apt_install("git") #.debian_slim()
.pip_install(
"vllm==0.4.1",
"torch==2.2.1",
"transformers==4.40.1",
"huggingface_hub==0.19.4",
"hf-transfer==0.1.4",
).pip_install("setuptools==65.5","packaging==23.2","ninja==1.11.1.1").pip_install("flash-attn==2.5.8", "--no-build-isolation")
.pip_install("google-cloud-bigquery")
.pip_install("google-cloud-storage")
This is using vanilla LoRA with Gemma (happened to me also with other models)
Vllm version 0.4.1
🐛 Describe the bug
This doesn't look like it's tied to anything regarding size of batch etc.
when retrying to run the code again using the same prompts it sometimes passes.
LLM init code:
this runs on H100 node
llm = vllm.LLM(
'google/gemma-1.1-7b-it',
enforce_eager=True,
tensor_parallel_size=1,
enable_lora = True,
max_loras=1,
max_lora_rank=64,
max_cpu_loras=8,
max_model_len = 5000,
gpu_memory_utilization=0.85,
enable_prefix_caching=True,
)
sampling_params = vllm.SamplingParams( #TODO move to parameter
temperature=0.0,
top_p=0.99,
max_tokens=720,
presence_penalty=0.07,
)
results = llm.generate(prompts, sampling_params, lora_request=lora_request)
when retrying to run the code again using the same prompts it sometimes passes.
Processed prompts: 0%| | 0/214 [00:00<?, ?it/s]Traceback (most recent call last):
File "/pkg/modal/_container_io_manager.py", line 488, in handle_input_exception
yield
File "/pkg/modal/_container_entrypoint.py", line 260, in run_input
value = await res
File "/root/modal_exp_auto_wow_predictions_client.py", line 75, in complete_adapter_prompts
results = llm.generate(prompts, sampling_params, lora_request=lora_request)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 214, in generate
return self._run_engine(use_tqdm)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 242, in _run_engine
step_outputs = self.llm_engine.step()
File "/opt/conda/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 557, in step
seq_group_metadata_list, scheduler_outputs = self.scheduler.schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 890, in schedule
scheduler_outputs = self._schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 863, in _schedule
return self._schedule_default()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 722, in _schedule_default
remaining_running, running_scheduled = self._schedule_running(
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 407, in _schedule_running
curr_loras.remove(seq_group.lora_int_id)
KeyError: 1
Didn't find any issue related to this so opening it.