Skip to content

[Bugfix] Further clean up LoRA test #14422

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions tests/lora/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,6 @@ def mixtral_lora_files():
return snapshot_download(repo_id="SangBinCho/mixtral-lora")


@pytest.fixture(scope="session")
def mixtral_lora_files_all_target_modules():
return snapshot_download(repo_id="dyang415/mixtral-lora-v0")


@pytest.fixture(scope="session")
def gemma_lora_files():
return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")
Expand Down
42 changes: 0 additions & 42 deletions tests/lora/test_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,45 +69,3 @@ def test_mixtral_lora(mixtral_lora_files, tp_size):
prompts=prompts) == expected_lora_output
assert do_sample(llm, mixtral_lora_files, lora_id=2,
prompts=prompts) == expected_lora_output


@pytest.mark.parametrize("tp_size", [4])
@pytest.mark.parametrize("fully_shard", [True, False])
def test_mixtral_lora_all_target_modules(mixtral_lora_files_all_target_modules,
tp_size, fully_shard):
"""This LoRA model has all supported Mixtral target modules"""

if torch.cuda.device_count() < tp_size:
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")

prompts = [
"Question: What happens if you touch the eyes of a blind man?\nChoices:\nB: The man's sight will be restored.\nA: Nothing happens if you touch the eyes of a blind man.\nC: The man will get his eyesight back.\nD: The man will no longer be blind.\nAnswer:", # noqa: E501
"Question: Which will increase the energy of motion of water molecules?\nChoices:\nA: add heat\nB: add ice\nC: remove some of the water\nD: place the water in the freezer\nAnswer:", # noqa: E501
"Since Craig threw aluminum cans in the trash and Benjamin recycled, _ was environmentally irresponsible.\nChoices:\n1: Craig\n2: Benjamin\nAnswer:", # noqa: E501
]

llm = vllm.LLM(
MODEL_PATH,
enable_lora=True,
max_num_seqs=16,
max_loras=4,
distributed_executor_backend="ray",
tensor_parallel_size=tp_size,
fully_sharded_loras=fully_shard,
max_lora_rank=32,
)

expected_lora_output = [
"A: Nothing happens if you touch the eyes of a blind man.",
"A: add heat",
"1: Craig",
]

assert do_sample(llm,
mixtral_lora_files_all_target_modules,
lora_id=1,
prompts=prompts) == expected_lora_output
assert do_sample(llm,
mixtral_lora_files_all_target_modules,
lora_id=2,
prompts=prompts) == expected_lora_output
3 changes: 2 additions & 1 deletion tests/lora/test_quant_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
model):
if num_gpus_available < 2:
pytest.skip(f"Not enough GPUs for tensor parallelism {2}")

if model.quantization == "GPTQ":
pytest.skip("GPTQ lora outputs are just incredibly unstable")
llm_tp1 = vllm.LLM(
model=model.model_path,
enable_lora=True,
Expand Down