Skip to content

[CI/Build] Remove V0 LoRA test #19066

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 2 additions & 19 deletions tests/lora/test_add_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import vllm.envs as env
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
from vllm.inputs import TextPrompt
from vllm.lora.request import LoRARequest
from vllm.sampling_params import SamplingParams
Expand All @@ -16,14 +18,6 @@
DEFAULT_MAX_LORAS = 4 * 3


@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


def get_lora_requests(lora_path) -> list[LoRARequest]:
lora_requests: list[LoRARequest] = [
LoRARequest(lora_name=f"{i}", lora_int_id=i, lora_path=lora_path)
Expand Down Expand Up @@ -88,17 +82,6 @@ async def test_add_lora(chatglm3_lora_files):
trust_remote_code=True,
enforce_eager=True)

# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import importlib

import vllm.engine.async_llm_engine
importlib.reload(vllm.engine.async_llm_engine)
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)

# split lora_requests into 3 parts
part_size = len(lora_requests) // 3
dummy_run_requests = lora_requests[:part_size]
Expand Down
10 changes: 0 additions & 10 deletions tests/lora/test_chatglm3_tp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0

import pytest

import vllm
from vllm.lora.request import LoRARequest

Expand All @@ -18,14 +16,6 @@
]


@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
prompts = [
PROMPT_TEMPLATE.format(query="How many singers do we have?"),
Expand Down
8 changes: 0 additions & 8 deletions tests/lora/test_llama_tp.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,6 @@
]


@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


def do_sample(llm: vllm.LLM,
lora_path: str,
lora_id: int,
Expand Down
34 changes: 8 additions & 26 deletions tests/lora/test_lora_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,24 @@
"""
Script to test add_lora, remove_lora, pin_lora, list_loras functions.
"""

import os

import pytest

from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
from vllm.lora.request import LoRARequest

MODEL_PATH = "meta-llama/Llama-2-7b-hf"
LORA_MODULE_PATH = "yard1/llama-2-7b-sql-lora-test"
LORA_RANK = 8


@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
# @pytest.fixture(autouse=True)
# def v1(run_with_both_engines_lora):
# # Simple autouse wrapper to run both engines for each test
# # This can be promoted up to conftest.py to run for every
# # test in a package
# pass


def make_lora_request(lora_id: int):
Expand Down Expand Up @@ -79,22 +77,6 @@ def run_check(fn, args, expected: list):
@pytest.mark.asyncio
async def test_lora_functions_async():

if os.getenv("VLLM_USE_V1") == "0":
pytest.skip(
reason=
"V0 AsyncLLMEngine does not expose remove/list/pin LoRA functions")

# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import importlib

import vllm.engine.async_llm_engine
importlib.reload(vllm.engine.async_llm_engine)
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)

max_loras = 4
engine_args = AsyncEngineArgs(model=MODEL_PATH,
enable_lora=True,
Expand Down
8 changes: 0 additions & 8 deletions tests/lora/test_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@
MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1"


@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int,
prompts: list[str]) -> list[str]:

Expand Down
8 changes: 0 additions & 8 deletions tests/lora/test_quant_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,6 @@ class ModelWithQuantization:
]


@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


def do_sample(llm: vllm.LLM,
lora_path: str,
lora_id: int,
Expand Down
8 changes: 0 additions & 8 deletions tests/lora/test_qwen2vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@
from vllm.sampling_params import BeamSearchParams


@pytest.fixture(autouse=not current_platform.is_cpu())
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


@dataclass
class TestConfig:
model_path: str
Expand Down
10 changes: 0 additions & 10 deletions tests/lora/test_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from typing import Union
from unittest.mock import patch

import pytest

import vllm.envs as envs
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
ModelConfig, ParallelConfig, SchedulerConfig,
Expand All @@ -18,14 +16,6 @@
from vllm.worker.worker import Worker


@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


@patch.dict(os.environ, {"RANK": "0"})
def test_worker_apply_lora(sql_lora_files):

Expand Down