diff --git a/Dockerfile b/Dockerfile index 220dbe26712ec..682f046d4b6ec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -191,6 +191,10 @@ ADD . /vllm-workspace/ RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install -r requirements-dev.txt +# install development dependencies (for testing) +RUN --mount=type=cache,target=/root/.cache/pip \ + python3 -m pip install -e tests/vllm_test_utils + # enable fast downloads from hf (for testing) RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install hf_transfer diff --git a/Dockerfile.cpu b/Dockerfile.cpu index 287b4958da4e5..d2f72ea975a3d 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -62,4 +62,8 @@ WORKDIR /workspace/ RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks +# install development dependencies (for testing) +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -e tests/vllm_test_utils + ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] diff --git a/Dockerfile.hpu b/Dockerfile.hpu index d18fc016387bf..87e0c1a6a934e 100644 --- a/Dockerfile.hpu +++ b/Dockerfile.hpu @@ -11,6 +11,9 @@ ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install +# install development dependencies (for testing) +RUN python3 -m pip install -e tests/vllm_test_utils + WORKDIR /workspace/ RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks diff --git a/Dockerfile.neuron b/Dockerfile.neuron index 2143315d2a078..76dbd4c04d3f3 100644 --- a/Dockerfile.neuron +++ b/Dockerfile.neuron @@ -38,4 +38,7 @@ ENV VLLM_TARGET_DEVICE neuron RUN --mount=type=bind,source=.git,target=.git \ pip install --no-build-isolation -v -e . +# install development dependencies (for testing) +RUN python3 -m pip install -e tests/vllm_test_utils + CMD ["/bin/bash"] diff --git a/Dockerfile.openvino b/Dockerfile.openvino index a05ff452cd36e..8bd188ffde408 100644 --- a/Dockerfile.openvino +++ b/Dockerfile.openvino @@ -22,4 +22,7 @@ RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVIC COPY examples/ /workspace/examples COPY benchmarks/ /workspace/benchmarks +# install development dependencies (for testing) +RUN python3 -m pip install -e tests/vllm_test_utils + CMD ["/bin/bash"] diff --git a/Dockerfile.ppc64le b/Dockerfile.ppc64le index b19c6ddec7948..971248577983f 100644 --- a/Dockerfile.ppc64le +++ b/Dockerfile.ppc64le @@ -29,6 +29,9 @@ RUN --mount=type=cache,target=/root/.cache/pip \ RUN --mount=type=bind,source=.git,target=.git \ VLLM_TARGET_DEVICE=cpu python3 setup.py install +# install development dependencies (for testing) +RUN python3 -m pip install -e tests/vllm_test_utils + WORKDIR /workspace/ RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 62d4a9b4909c3..e733994f8c33e 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -168,4 +168,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \ if ls libs/*.whl; then \ python3 -m pip install libs/*.whl; fi +# install development dependencies (for testing) +RUN python3 -m pip install -e tests/vllm_test_utils + CMD ["/bin/bash"] diff --git a/Dockerfile.tpu b/Dockerfile.tpu index 0a507b6ecdf60..b617932a85b47 100644 --- a/Dockerfile.tpu +++ b/Dockerfile.tpu @@ -22,4 +22,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \ -r requirements-tpu.txt RUN python3 setup.py develop +# install development dependencies (for testing) +RUN python3 -m pip install -e tests/vllm_test_utils + CMD ["/bin/bash"] diff --git a/Dockerfile.xpu b/Dockerfile.xpu index 63bc682770422..a374f20d7d949 100644 --- a/Dockerfile.xpu +++ b/Dockerfile.xpu @@ -64,5 +64,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \ ENV VLLM_USAGE_SOURCE production-docker-image \ TRITON_XPU_PROFILE 1 - +# install development dependencies (for testing) +RUN python3 -m pip install -e tests/vllm_test_utils ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] diff --git a/tests/entrypoints/llm/test_lazy_outlines.py b/tests/entrypoints/llm/test_lazy_outlines.py index cbfb0cc32c1ce..81fb000d8ac56 100644 --- a/tests/entrypoints/llm/test_lazy_outlines.py +++ b/tests/entrypoints/llm/test_lazy_outlines.py @@ -1,12 +1,12 @@ import sys +from vllm_test_utils import blame + from vllm import LLM, SamplingParams from vllm.distributed import cleanup_dist_env_and_memory -def test_lazy_outlines(sample_regex): - """If users don't use guided decoding, outlines should not be imported. - """ +def run_normal(): prompts = [ "Hello, my name is", "The president of the United States is", @@ -25,13 +25,12 @@ def test_lazy_outlines(sample_regex): generated_text = output.outputs[0].text print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - # make sure outlines is not imported - assert 'outlines' not in sys.modules - # Destroy the LLM object and free up the GPU memory. del llm cleanup_dist_env_and_memory() + +def run_lmfe(sample_regex): # Create an LLM with guided decoding enabled. llm = LLM(model="facebook/opt-125m", enforce_eager=True, @@ -51,5 +50,15 @@ def test_lazy_outlines(sample_regex): generated_text = output.outputs[0].text print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + +def test_lazy_outlines(sample_regex): + """If users don't use guided decoding, outlines should not be imported. + """ # make sure outlines is not imported - assert 'outlines' not in sys.modules + module_name = "outlines" + with blame(lambda: module_name in sys.modules) as result: + run_normal() + run_lmfe(sample_regex) + assert not result.found, ( + f"Module {module_name} is already imported, the" + f" first import location is:\n{result.trace_stack}") diff --git a/tests/test_lazy_torch_compile.py b/tests/test_lazy_torch_compile.py index b8ac4dd93732b..4756fac8e2a8d 100644 --- a/tests/test_lazy_torch_compile.py +++ b/tests/test_lazy_torch_compile.py @@ -1,61 +1,9 @@ # Description: Test the lazy import module # The utility function cannot be placed in `vllm.utils` # this needs to be a standalone script - -import contextlib -import dataclasses import sys -import traceback -from typing import Callable, Generator - - -@dataclasses.dataclass -class BlameResult: - found: bool = False - trace_stack: str = "" - - -@contextlib.contextmanager -def blame(func: Callable) -> Generator[BlameResult, None, None]: - """ - Trace the function calls to find the first function that satisfies the - condition. The trace stack will be stored in the result. - - Usage: - - ```python - with blame(lambda: some_condition()) as result: - # do something - - if result.found: - print(result.trace_stack) - """ - result = BlameResult() - - def _trace_calls(frame, event, arg=None): - nonlocal result - if event in ['call', 'return']: - # for every function call or return - try: - # Temporarily disable the trace function - sys.settrace(None) - # check condition here - if not result.found and func(): - result.found = True - result.trace_stack = "".join(traceback.format_stack()) - # Re-enable the trace function - sys.settrace(_trace_calls) - except NameError: - # modules are deleted during shutdown - pass - return _trace_calls - - sys.settrace(_trace_calls) - - yield result - - sys.settrace(None) +from vllm_test_utils import blame module_name = "torch._inductor.async_compile" diff --git a/tests/vllm_test_utils/setup.py b/tests/vllm_test_utils/setup.py new file mode 100644 index 0000000000000..790e891ec837d --- /dev/null +++ b/tests/vllm_test_utils/setup.py @@ -0,0 +1,7 @@ +from setuptools import setup + +setup( + name='vllm_test_utils', + version='0.1', + packages=['vllm_test_utils'], +) diff --git a/tests/vllm_test_utils/vllm_test_utils/__init__.py b/tests/vllm_test_utils/vllm_test_utils/__init__.py new file mode 100644 index 0000000000000..bf0b62a5b75e3 --- /dev/null +++ b/tests/vllm_test_utils/vllm_test_utils/__init__.py @@ -0,0 +1,8 @@ +""" +vllm_utils is a package for vLLM testing utilities. +It does not import any vLLM modules. +""" + +from .blame import BlameResult, blame + +__all__ = ["blame", "BlameResult"] diff --git a/tests/vllm_test_utils/vllm_test_utils/blame.py b/tests/vllm_test_utils/vllm_test_utils/blame.py new file mode 100644 index 0000000000000..ad23ab83c2d81 --- /dev/null +++ b/tests/vllm_test_utils/vllm_test_utils/blame.py @@ -0,0 +1,53 @@ +import contextlib +import dataclasses +import sys +import traceback +from typing import Callable, Generator + + +@dataclasses.dataclass +class BlameResult: + found: bool = False + trace_stack: str = "" + + +@contextlib.contextmanager +def blame(func: Callable) -> Generator[BlameResult, None, None]: + """ + Trace the function calls to find the first function that satisfies the + condition. The trace stack will be stored in the result. + + Usage: + + ```python + with blame(lambda: some_condition()) as result: + # do something + + if result.found: + print(result.trace_stack) + """ + result = BlameResult() + + def _trace_calls(frame, event, arg=None): + nonlocal result + if event in ['call', 'return']: + # for every function call or return + try: + # Temporarily disable the trace function + sys.settrace(None) + # check condition here + if not result.found and func(): + result.found = True + result.trace_stack = "".join(traceback.format_stack()) + # Re-enable the trace function + sys.settrace(_trace_calls) + except NameError: + # modules are deleted during shutdown + pass + return _trace_calls + + sys.settrace(_trace_calls) + + yield result + + sys.settrace(None)