From e4c34c23de2a90ab837772ac182638ac3bc1636d Mon Sep 17 00:00:00 2001
From: Daniele <36171005+dtrifiro@users.noreply.github.com>
Date: Wed, 4 Dec 2024 22:48:13 +0100
Subject: [PATCH] [CI/Build] improve python-only dev setup (#9621)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com>
Signed-off-by: youkaichao <youkaichao@gmail.com>
Co-authored-by: youkaichao <youkaichao@gmail.com>
---
 docs/source/getting_started/installation.rst | 41 +++------
 python_only_dev.py                           | 96 ++------------------
 setup.py                                     | 83 ++++++++++++++++-
 vllm/envs.py                                 |  3 +-
 4 files changed, 102 insertions(+), 121 deletions(-)

diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst
index 52412fa8437b9..9b6cb0e80d60e 100644
--- a/docs/source/getting_started/installation.rst
+++ b/docs/source/getting_started/installation.rst
@@ -21,7 +21,7 @@ You can install vLLM using pip:
 .. code-block:: console
 
     $ # (Recommended) Create a new conda environment.
-    $ conda create -n myenv python=3.10 -y
+    $ conda create -n myenv python=3.12 -y
     $ conda activate myenv
 
     $ # Install vLLM with CUDA 12.1.
@@ -89,45 +89,24 @@ Build from source
 Python-only build (without compilation)
 ---------------------------------------
 
-If you only need to change Python code, you can simply build vLLM without compilation.
-
-The first step is to install the latest vLLM wheel:
-
-.. code-block:: console
-
-    pip install https://vllm-wheels.s3.us-west-2.amazonaws.com/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
-
-You can find more information about vLLM's wheels `above <#install-the-latest-code>`_.
-
-After verifying that the installation is successful, you can use `the following script <https://github.com/vllm-project/vllm/blob/main/python_only_dev.py>`_:
+If you only need to change Python code, you can build and install vLLM without compilation. Using `pip's ``--editable`` flag <https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs>`_, changes you make to the code will be reflected when you run vLLM:
 
 .. code-block:: console
 
     $ git clone https://github.com/vllm-project/vllm.git
     $ cd vllm
-    $ python python_only_dev.py
+    $ VLLM_USE_PRECOMPILED=1 pip install --editable .
 
-The script will:
+This will download the latest nightly wheel and use the compiled libraries from there in the install.
 
-* Find the installed vLLM package in the current environment.
-* Copy built files to the current directory.
-* Rename the installed vLLM package.
-* Symbolically link the current directory to the installed vLLM package.
-
-Now, you can edit the Python code in the current directory, and the changes will be reflected when you run vLLM.
-
-Once you have finished editing or want to install another vLLM wheel, you should exit the development environment using `the same script <https://github.com/vllm-project/vllm/blob/main/python_only_dev.py>`_ with the ``--quit-dev`` (or ``-q`` for short) flag:
+The ``VLLM_PRECOMPILED_WHEEL_LOCATION`` environment variable can be used instead of ``VLLM_USE_PRECOMPILED`` to specify a custom path or URL to the wheel file. For example, to use the `0.6.1.post1 PyPi wheel <https://pypi.org/project/vllm/#files>`_:
 
 .. code-block:: console
 
-    $ python python_only_dev.py --quit-dev
-
-The ``--quit-dev`` flag will:
-
-* Remove the symbolic link from the current directory to the vLLM package.
-* Restore the original vLLM package from the backup.
+   $ export VLLM_PRECOMPILED_WHEEL_LOCATION=https://files.pythonhosted.org/packages/4a/4c/ee65ba33467a4c0de350ce29fbae39b9d0e7fcd887cc756fa993654d1228/vllm-0.6.3.post1-cp38-abi3-manylinux1_x86_64.whl
+   $ pip install --editable .
 
-If you update the vLLM wheel and rebuild from the source to make further edits, you will need to repeat the `Python-only build <#python-only-build>`_ steps again.
+You can find more information about vLLM's wheels `above <#install-the-latest-code>`_.
 
 .. note::
 
@@ -148,9 +127,13 @@ If you want to modify C++ or CUDA code, you'll need to build vLLM from source. T
 .. tip::
 
     Building from source requires a lot of compilation. If you are building from source repeatedly, it's more efficient to cache the compilation results.
+
     For example, you can install `ccache <https://github.com/ccache/ccache>`_ using ``conda install ccache`` or ``apt install ccache`` .
     As long as ``which ccache`` command can find the ``ccache`` binary, it will be used automatically by the build system. After the first build, subsequent builds will be much faster.
 
+    `sccache <https://github.com/mozilla/sccache>`_ works similarly to ``ccache``, but has the capability to utilize caching in remote storage environments.
+    The following environment variables can be set to configure the vLLM ``sccache`` remote: ``SCCACHE_BUCKET=vllm-build-sccache SCCACHE_REGION=us-west-2 SCCACHE_S3_NO_CREDENTIALS=1``. We also recommend setting ``SCCACHE_IDLE_TIMEOUT=0``.
+
 
 Use an existing PyTorch installation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/python_only_dev.py b/python_only_dev.py
index 1ca0f5c30b741..f70b4984025b3 100644
--- a/python_only_dev.py
+++ b/python_only_dev.py
@@ -1,92 +1,14 @@
-# enable python only development
-# copy compiled files to the current directory directly
+msg = """Old style python only build (without compilation) is deprecated, please check https://docs.vllm.ai/en/latest/getting_started/installation.html#python-only-build-without-compilation for the new way to do python only build (without compilation).
 
-import argparse
-import os
-import shutil
-import subprocess
-import sys
-import warnings
+TL;DR:
 
-parser = argparse.ArgumentParser(
-    description="Development mode for python-only code")
-parser.add_argument('-q',
-                    '--quit-dev',
-                    action='store_true',
-                    help='Set the flag to quit development mode')
-args = parser.parse_args()
+VLLM_USE_PRECOMPILED=1 pip install -e .
 
-# cannot directly `import vllm` , because it will try to
-# import from the current directory
-output = subprocess.run([sys.executable, "-m", "pip", "show", "vllm"],
-                        capture_output=True)
+or
 
-assert output.returncode == 0, "vllm is not installed"
+export VLLM_COMMIT=33f460b17a54acb3b6cc0b03f4a17876cff5eafd # use full commit hash from the main branch
+export VLLM_PRECOMPILED_WHEEL_LOCATION=https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
+pip install -e .
+""" # noqa
 
-text = output.stdout.decode("utf-8")
-
-package_path = None
-for line in text.split("\n"):
-    if line.startswith("Location: "):
-        package_path = line.split(": ")[1]
-        break
-
-assert package_path is not None, "could not find package path"
-
-cwd = os.getcwd()
-
-assert cwd != package_path, "should not import from the current directory"
-
-files_to_copy = [
-    "vllm/_C.abi3.so",
-    "vllm/_moe_C.abi3.so",
-    "vllm/vllm_flash_attn/vllm_flash_attn_c.abi3.so",
-    "vllm/vllm_flash_attn/flash_attn_interface.py",
-    "vllm/vllm_flash_attn/__init__.py",
-    # "vllm/_version.py", # not available in nightly wheels yet
-]
-
-# Try to create _version.py to avoid version related warning
-# Refer to https://github.com/vllm-project/vllm/pull/8771
-try:
-    from setuptools_scm import get_version
-    get_version(write_to="vllm/_version.py")
-except ImportError:
-    warnings.warn(
-        "To avoid warnings related to vllm._version, "
-        "you should install setuptools-scm by `pip install setuptools-scm`",
-        stacklevel=2)
-
-if not args.quit_dev:
-    for file in files_to_copy:
-        src = os.path.join(package_path, file)
-        dst = file
-        print(f"Copying {src} to {dst}")
-        shutil.copyfile(src, dst)
-
-    pre_built_vllm_path = os.path.join(package_path, "vllm")
-    tmp_path = os.path.join(package_path, "vllm_pre_built")
-    current_vllm_path = os.path.join(cwd, "vllm")
-
-    print(f"Renaming {pre_built_vllm_path} to {tmp_path} for backup")
-    shutil.copytree(pre_built_vllm_path, tmp_path)
-    shutil.rmtree(pre_built_vllm_path)
-
-    print(f"Linking {current_vllm_path} to {pre_built_vllm_path}")
-    os.symlink(current_vllm_path, pre_built_vllm_path)
-else:
-    vllm_symlink_path = os.path.join(package_path, "vllm")
-    vllm_backup_path = os.path.join(package_path, "vllm_pre_built")
-    current_vllm_path = os.path.join(cwd, "vllm")
-
-    print(f"Unlinking {current_vllm_path} to {vllm_symlink_path}")
-    assert os.path.islink(
-        vllm_symlink_path
-    ), f"not in dev mode: {vllm_symlink_path} is not a symbolic link"
-    assert current_vllm_path == os.readlink(
-        vllm_symlink_path
-    ), "current directory is not the source code of package"
-    os.unlink(vllm_symlink_path)
-
-    print(f"Recovering backup from {vllm_backup_path} to {vllm_symlink_path}")
-    os.rename(vllm_backup_path, vllm_symlink_path)
+print(msg)
diff --git a/setup.py b/setup.py
index b936589869e76..182dabe449674 100644
--- a/setup.py
+++ b/setup.py
@@ -249,6 +249,74 @@ def run(self):
             self.copy_file(file, dst_file)
 
 
+class repackage_wheel(build_ext):
+    """Extracts libraries and other files from an existing wheel."""
+    default_wheel = "https://vllm-wheels.s3.us-west-2.amazonaws.com/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+
+    def run(self) -> None:
+        wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION",
+                                   self.default_wheel)
+
+        assert _is_cuda(
+        ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
+
+        import zipfile
+
+        if os.path.isfile(wheel_location):
+            wheel_path = wheel_location
+            print(f"Using existing wheel={wheel_path}")
+        else:
+            # Download the wheel from a given URL, assume
+            # the filename is the last part of the URL
+            wheel_filename = wheel_location.split("/")[-1]
+
+            import tempfile
+
+            # create a temporary directory to store the wheel
+            temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
+            wheel_path = os.path.join(temp_dir, wheel_filename)
+
+            print(f"Downloading wheel from {wheel_location} to {wheel_path}")
+
+            from urllib.request import urlretrieve
+
+            try:
+                urlretrieve(wheel_location, filename=wheel_path)
+            except Exception as e:
+                from setuptools.errors import SetupError
+
+                raise SetupError(
+                    f"Failed to get vLLM wheel from {wheel_location}") from e
+
+        with zipfile.ZipFile(wheel_path) as wheel:
+            files_to_copy = [
+                "vllm/_C.abi3.so",
+                "vllm/_moe_C.abi3.so",
+                "vllm/vllm_flash_attn/vllm_flash_attn_c.abi3.so",
+                "vllm/vllm_flash_attn/flash_attn_interface.py",
+                "vllm/vllm_flash_attn/__init__.py",
+                # "vllm/_version.py", # not available in nightly wheels yet
+            ]
+            file_members = filter(lambda x: x.filename in files_to_copy,
+                                  wheel.filelist)
+
+            for file in file_members:
+                print(f"Extracting and including {file.filename} "
+                      "from existing wheel")
+                package_name = os.path.dirname(file.filename).replace("/", ".")
+                file_name = os.path.basename(file.filename)
+
+                if package_name not in package_data:
+                    package_data[package_name] = []
+
+                wheel.extract(file)
+                if file_name.endswith(".py"):
+                    # python files shouldn't be added to package_data
+                    continue
+
+                package_data[package_name].append(file_name)
+
+
 def _is_hpu() -> bool:
     is_hpu_available = True
     try:
@@ -403,6 +471,8 @@ def get_vllm_version() -> str:
             # skip this for source tarball, required for pypi
             if "sdist" not in sys.argv:
                 version += f"{sep}cu{cuda_version_str}"
+        if envs.VLLM_USE_PRECOMPILED:
+            version += ".precompiled"
     elif _is_hip():
         # Get the HIP version
         hipcc_version = get_hipcc_rocm_version()
@@ -514,13 +584,18 @@ def _read_requirements(filename: str) -> List[str]:
 package_data = {
     "vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
 }
-if envs.VLLM_USE_PRECOMPILED:
-    ext_modules = []
-    package_data["vllm"].append("*.so")
 
 if _no_device():
     ext_modules = []
 
+if not ext_modules:
+    cmdclass = {}
+else:
+    cmdclass = {
+        "build_ext":
+        repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
+    }
+
 setup(
     name="vllm",
     version=get_vllm_version(),
@@ -557,7 +632,7 @@ def _read_requirements(filename: str) -> List[str]:
         "audio": ["librosa", "soundfile"],  # Required for audio processing
         "video": ["decord"]  # Required for video processing
     },
-    cmdclass={"build_ext": cmake_build_ext} if len(ext_modules) > 0 else {},
+    cmdclass=cmdclass,
     package_data=package_data,
     entry_points={
         "console_scripts": [
diff --git a/vllm/envs.py b/vllm/envs.py
index c896770e5f6bc..28797ac1e4af2 100644
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -113,7 +113,8 @@ def get_default_config_root():
 
     # If set, vllm will use precompiled binaries (*.so)
     "VLLM_USE_PRECOMPILED":
-    lambda: bool(os.environ.get("VLLM_USE_PRECOMPILED")),
+    lambda: bool(os.environ.get("VLLM_USE_PRECOMPILED")) or bool(
+        os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
 
     # CMake build type
     # If not set, defaults to "Debug" or "RelWithDebInfo"