You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File "/pkg/modal/_container_io_manager.py", line 488, in handle_input_exception
yield
File "/pkg/modal/_container_entrypoint.py", line 260, in run_input
value = await res
File "/root/modal_exp_auto_wow_predictions_client.py", line 75, in complete_adapter_prompts
results = llm.generate(prompts, sampling_params, lora_request=lora_request)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 214, in generate
return self._run_engine(use_tqdm)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 242, in _run_engine
step_outputs = self.llm_engine.step()
File "/opt/conda/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 557, in step
seq_group_metadata_list, scheduler_outputs = self.scheduler.schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 890, in schedule
scheduler_outputs = self._schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 863, in _schedule
return self._schedule_default()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 722, in _schedule_default
remaining_running, running_scheduled = self._schedule_running(
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 407, in _schedule_running
curr_loras.remove(seq_group.lora_int_id)
KeyError: 1
Container on modal that is used:
modal.Image.from_registry("pytorch/pytorch:2.2.1-cuda12.1-cudnn8-devel").apt_install("git") #.debian_slim()
.pip_install(
"vllm==0.4.1",
"torch==2.2.1",
"transformers==4.40.1",
"huggingface_hub==0.19.4",
"hf-transfer==0.1.4",
).pip_install("setuptools==65.5","packaging==23.2","ninja==1.11.1.1").pip_install("flash-attn==2.5.8", "--no-build-isolation")
.pip_install("google-cloud-bigquery")
.pip_install("google-cloud-storage")
This is using vanilla LoRA with Gemma (happened to me also with other models)
Vllm version 0.4.1
🐛 Describe the bug
This doesn't look like it's tied to anything regarding size of batch etc.
when retrying to run the code again using the same prompts it sometimes passes.
when retrying to run the code again using the same prompts it sometimes passes.
Processed prompts: 0%| | 0/214 [00:00<?, ?it/s]Traceback (most recent call last):
File "/pkg/modal/_container_io_manager.py", line 488, in handle_input_exception
yield
File "/pkg/modal/_container_entrypoint.py", line 260, in run_input
value = await res
File "/root/modal_exp_auto_wow_predictions_client.py", line 75, in complete_adapter_prompts
results = llm.generate(prompts, sampling_params, lora_request=lora_request)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 214, in generate
return self._run_engine(use_tqdm)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 242, in _run_engine
step_outputs = self.llm_engine.step()
File "/opt/conda/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 557, in step
seq_group_metadata_list, scheduler_outputs = self.scheduler.schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 890, in schedule
scheduler_outputs = self._schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 863, in _schedule
return self._schedule_default()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 722, in _schedule_default
remaining_running, running_scheduled = self._schedule_running(
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 407, in _schedule_running
curr_loras.remove(seq_group.lora_int_id)
KeyError: 1
Didn't find any issue related to this so opening it.
The text was updated successfully, but these errors were encountered:
Your current environment
Container on modal that is used:
modal.Image.from_registry("pytorch/pytorch:2.2.1-cuda12.1-cudnn8-devel").apt_install("git") #.debian_slim()
.pip_install(
"vllm==0.4.1",
"torch==2.2.1",
"transformers==4.40.1",
"huggingface_hub==0.19.4",
"hf-transfer==0.1.4",
).pip_install("setuptools==65.5","packaging==23.2","ninja==1.11.1.1").pip_install("flash-attn==2.5.8", "--no-build-isolation")
.pip_install("google-cloud-bigquery")
.pip_install("google-cloud-storage")
This is using vanilla LoRA with Gemma (happened to me also with other models)
Vllm version 0.4.1
🐛 Describe the bug
This doesn't look like it's tied to anything regarding size of batch etc.
when retrying to run the code again using the same prompts it sometimes passes.
LLM init code:
this runs on H100 node
llm = vllm.LLM(
'google/gemma-1.1-7b-it',
enforce_eager=True,
tensor_parallel_size=1,
enable_lora = True,
max_loras=1,
max_lora_rank=64,
max_cpu_loras=8,
max_model_len = 5000,
gpu_memory_utilization=0.85,
enable_prefix_caching=True,
)
sampling_params = vllm.SamplingParams( #TODO move to parameter
temperature=0.0,
top_p=0.99,
max_tokens=720,
presence_penalty=0.07,
)
results = llm.generate(prompts, sampling_params, lora_request=lora_request)
when retrying to run the code again using the same prompts it sometimes passes.
Processed prompts: 0%| | 0/214 [00:00<?, ?it/s]Traceback (most recent call last):
File "/pkg/modal/_container_io_manager.py", line 488, in handle_input_exception
yield
File "/pkg/modal/_container_entrypoint.py", line 260, in run_input
value = await res
File "/root/modal_exp_auto_wow_predictions_client.py", line 75, in complete_adapter_prompts
results = llm.generate(prompts, sampling_params, lora_request=lora_request)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 214, in generate
return self._run_engine(use_tqdm)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 242, in _run_engine
step_outputs = self.llm_engine.step()
File "/opt/conda/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 557, in step
seq_group_metadata_list, scheduler_outputs = self.scheduler.schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 890, in schedule
scheduler_outputs = self._schedule()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 863, in _schedule
return self._schedule_default()
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 722, in _schedule_default
remaining_running, running_scheduled = self._schedule_running(
File "/opt/conda/lib/python3.10/site-packages/vllm/core/scheduler.py", line 407, in _schedule_running
curr_loras.remove(seq_group.lora_int_id)
KeyError: 1
Didn't find any issue related to this so opening it.
The text was updated successfully, but these errors were encountered: