Skip to content

Commit 9f70e2d

Browse files
jeejeelee0xrushi
authored andcommitted
[Core] Relax the LoRA max rank (vllm-project#26461)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Signed-off-by: 0xrushi <6279035+0xrushi@users.noreply.github.com>
1 parent 6015d4e commit 9f70e2d

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

vllm/config/lora.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def __post_init__(self):
103103

104104
# Setting the maximum rank to 512 should be able to satisfy the vast
105105
# majority of applications.
106-
possible_max_ranks = (8, 16, 32, 64, 128, 256, 320, 512)
106+
possible_max_ranks = (1, 8, 16, 32, 64, 128, 256, 320, 512)
107107
possible_lora_extra_vocab_size = (256, 512)
108108
if self.max_lora_rank not in possible_max_ranks:
109109
raise ValueError(

vllm/v1/worker/lora_model_runner_mixin.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@
2828

2929
# Defined as a mixin for GPUModelRunner
3030
class LoRAModelRunnerMixin:
31-
LORA_WARMUP_RANK = 8
32-
3331
def load_lora_model(
3432
self, model: nn.Module, vllm_config: VllmConfig, device: torch.device
3533
) -> nn.Module:
@@ -96,7 +94,9 @@ def maybe_setup_dummy_loras(
9694
assert self.lora_manager is not None, "LoRA is not enabled"
9795

9896
num_loras = lora_config.max_loras
99-
97+
lora_warmup_rank = (
98+
lora_config.max_lora_rank if lora_config.max_lora_rank < 8 else 8
99+
)
100100
# Make dummy lora requests
101101
lora_requests: set[LoRARequest] = {
102102
LoRARequest(
@@ -111,7 +111,7 @@ def maybe_setup_dummy_loras(
111111
# Add the dummy LoRAs here so _set_active_loras doesn't try to
112112
# load from disk.
113113
for lr in lora_requests:
114-
self.lora_manager.add_dummy_lora(lr, rank=self.LORA_WARMUP_RANK)
114+
self.lora_manager.add_dummy_lora(lr, rank=lora_warmup_rank)
115115

116116
yield
117117

0 commit comments

Comments
 (0)