Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,15 @@ RUN pip install --no-cache-dir starlette==0.49.1
RUN pip install --no-cache-dir "cryptography>=43.0.0"
# Upgrade wandb to fix golang.org/x/crypto vulnerabilities (CVE-2025-47914, CVE-2025-58181)
RUN pip install --no-cache-dir --upgrade "wandb>=0.23.0"
RUN MAX_JOBS=$(nproc) pip install --no-cache-dir --upgrade flash-attn==2.8.3 --no-build-isolation
RUN pip install --no-cache-dir triton==3.4.0
RUN pip install xgrammar==0.1.27
RUN pip install torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0 --index-url https://download.pytorch.org/whl/cu126
COPY vllm_async_server dest/opt/conda/envs/ptca/lib/python3.10/site-packages/verl/workers/rollout/vllm_rollout/vllm_async_server.py
COPY __init__ /opt/conda/envs/ptca/lib/python3.10/site-packages/verl/utils/reward_score/__init__.py
COPY azure_grader /opt/conda/envs/ptca/lib/python3.10/site-packages/verl/utils/reward_score/azure_grader.py
COPY azure_python_grader /opt/conda/envs/ptca/lib/python3.10/site-packages/verl/utils/reward_score/azure_python_grader.py
COPY utils /opt/conda/envs/ptca/lib/python3.10/site-packages/verl/utils/vllm/utils.py
RUN python3 -m pip install --upgrade pip setuptools wheel
RUN pip install vllm==0.13.0
RUN pip install openai==2.14.0
RUN pip install --force-reinstall --no-cache-dir --no-build-isolation git+https://github.com/deepseek-ai/DeepGEMM.git@c9f8b34dcdacc20aa746b786f983492c51072870
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pyarrow==21.0.0
pybind11==3.0.0
pylatexenc==2.10
qwen-vl-utils==0.0.10
ray[default]==2.52.0
ray[default]==2.53.0
tensorboard==2.20.0
tensordict==0.9.1
torchdata==0.11.0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from msgspec import field
from packaging import version as vs
from vllm.lora.lora_model import LoRAModel
from vllm.lora.request import LoRARequest
from vllm.lora.utils import get_adapter_absolute_path
from vllm.lora.worker_manager import LRUCacheWorkerLoRAManager

from verl.third_party.vllm import get_version


class TensorLoRARequest(LoRARequest):
peft_config: dict = field(default=None)
lora_tensors: dict = field(default=None)


class VLLMHijack:
@staticmethod
def hijack():
def hijack__load_adapter(self, lora_request: TensorLoRARequest) -> LoRAModel:
"""
based on vllm.lora.worker_manager.WorkerLoRAManager._load_adapter, support load adapter with lora tensors

Reason:
VLLM does not support adding LoRA from tensors directly. It only supports adding LoRA via file paths.
To synchronize the LoRA tensors of the actor model, we need to find a workaround to enable VLLM to
load memory-based LoRA tensors.
"""
try:
supported_lora_modules = self._adapter_manager.supported_lora_modules
packed_modules_mapping = self._adapter_manager.packed_modules_mapping
expected_lora_modules: list[str] = []
for module in supported_lora_modules:
if module in packed_modules_mapping:
expected_lora_modules.extend(packed_modules_mapping[module])
else:
expected_lora_modules.append(module)

expected_lora_modules = list(set(expected_lora_modules))

lora_tensors = None
from vllm.lora.peft_helper import PEFTHelper

if isinstance(lora_request, TensorLoRARequest):
peft_config = lora_request.peft_config
lora_tensors = lora_request.lora_tensors
peft_helper = PEFTHelper.from_dict(peft_config)
else:
lora_path = get_adapter_absolute_path(lora_request.lora_path)

peft_helper = PEFTHelper.from_local_dir(lora_path, self.max_position_embeddings)

# Validates the LoRA configuration against requirements before
# loading weights, throwing an exception if validation fails.
peft_helper.validate_legal(self.lora_config)

# For some models like Qwen2VL, we need to use hf_to_vllm_mapper
# to ensure correct loading of lora weights.
model = self._adapter_manager.model
hf_to_vllm_mapper = None
if hasattr(model, "hf_to_vllm_mapper") and model.hf_to_vllm_mapper is not None:
hf_to_vllm_mapper = model.hf_to_vllm_mapper
# vLLM 0.13.0 compatibility: lora_extra_vocab_size was removed
lora_extra_vocab_size = getattr(self.lora_config, 'lora_extra_vocab_size', 0)

# vLLM 0.13.0 compatibility: embedding_padding_modules may not exist
embedding_padding_modules = getattr(self, 'embedding_padding_modules', {})
if isinstance(lora_request, TensorLoRARequest):
print(f"Lora module class is {self._lora_model_cls}")
lora = self._lora_model_cls.from_lora_tensors(
lora_model_id=lora_request.lora_int_id,
tensors=lora_tensors,
peft_helper=peft_helper,
device="cpu",
dtype=self.lora_config.lora_dtype,
model_vocab_size=self.vocab_size + lora_extra_vocab_size,
weights_mapper=hf_to_vllm_mapper,
)
else:
lora = self._lora_model_cls.from_local_checkpoint(
lora_path,
expected_lora_modules,
peft_helper=peft_helper,
lora_model_id=lora_request.lora_int_id,
device="cpu",
dtype=self.lora_config.lora_dtype,
target_embedding_padding=self.vocab_size + lora_extra_vocab_size,
embedding_modules=self.embedding_modules,
embedding_padding_modules=embedding_padding_modules,
weights_mapper=hf_to_vllm_mapper,
)
except Exception as e:
raise e

return lora

def do_hijack(target_cls, target_method_name, hooking_method):
setattr(target_cls, target_method_name, hooking_method)

do_hijack(LRUCacheWorkerLoRAManager, "_load_adapter", hijack__load_adapter)


def is_version_ge(pkg: str = "vllm", minver: str = "0.7.3"):
"""check if the package version is greater than or equal to the minimum version"""
return vs.parse(get_version(pkg)) >= vs.parse(minver)
Loading
Loading