Skip to content

Commit

Permalink
[LoRA] Relax LoRA condition (vllm-project#7146)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeejeelee authored Aug 6, 2024
1 parent e3c664b commit 9118217
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 7 deletions.
2 changes: 1 addition & 1 deletion tests/lora/test_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ def create_random_embedding_layer():
@torch.inference_mode()
@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
@pytest.mark.parametrize("device", CUDA_DEVICES)
@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 128000])
@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 256512])
@pytest.mark.parametrize("stage", STAGES)
def test_lm_head_logits_processor(dist_init, num_loras, device, vocab_size,
stage) -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/lora/test_punica_variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
BATCHES = [1, 4, 16, 32]
NUM_LORA = [1, 4, 8, 16, 32, 64, 128]
DTYPES = [torch.float16, torch.bfloat16]
MAX_RANKS = [1, 4, 8, 16, 32, 64, 128]
MAX_RANKS = [1, 4, 8, 16, 32, 64, 128, 256]
SCALES = [0.5]
SEED = [0]
CUDA_DEVICES = [f"cuda:{0}"]
Expand Down
5 changes: 3 additions & 2 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1311,8 +1311,9 @@ class LoRAConfig:
long_lora_scaling_factors: Optional[Tuple[float]] = None

def __post_init__(self):
# TODO: Increase the range of rank
possible_max_ranks = (8, 16, 32, 64)
# Setting the maximum rank to 256 should be able to satisfy the vast
# majority of applications.
possible_max_ranks = (8, 16, 32, 64, 128, 256)
possible_lora_extra_vocab_size = (0, 256, 512)
if self.max_lora_rank not in possible_max_ranks:
raise ValueError(
Expand Down
6 changes: 3 additions & 3 deletions vllm/lora/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,10 +1073,10 @@ def create_lora_weights(
lora_config: LoRAConfig,
model_config: Optional[PretrainedConfig] = None,
) -> None:
# TODO: Verify if this condition can be relaxed
if 32000 < self.base_layer.vocab_size > 128512:
# TODO: Verify if this condition can be further relaxed
if 32000 < self.base_layer.vocab_size > 257024:
raise ValueError("When using LoRA, vocab size must be "
"32000 >= vocab_size <= 128512")
"32000 >= vocab_size <= 257024")
self.lora_a_stacked = torch.zeros(
(
max_loras,
Expand Down

0 comments on commit 9118217

Please sign in to comment.