From 2f1fd0b609d3036ba47ef41beae8ca6b5e12c364 Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Mon, 5 Aug 2024 06:37:08 +0800
Subject: [PATCH] Clean up remaining Punica C information (#7027)

---
 .github/workflows/clang-format.yml | 6 ------
 cmake/utils.cmake                  | 2 +-
 format.sh                          | 6 ------
 vllm/config.py                     | 2 +-
 vllm/lora/layers.py                | 2 +-
 5 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml
index e9b6e28fa6bcb..79b85d8cad0d5 100644
--- a/.github/workflows/clang-format.yml
+++ b/.github/workflows/clang-format.yml
@@ -30,12 +30,6 @@ jobs:
       run: |
         EXCLUDES=(
             'csrc/moe/topk_softmax_kernels.cu'
-            'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
-            'csrc/punica/bgmv/bgmv_config.h'
-            'csrc/punica/bgmv/bgmv_impl.cuh'
-            'csrc/punica/bgmv/vec_dtypes.cuh'
-            'csrc/punica/punica_ops.cu'
-            'csrc/punica/type_convert.h'
         )
         find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
             | grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
diff --git a/cmake/utils.cmake b/cmake/utils.cmake
index 4869cad541135..69998b45be70a 100644
--- a/cmake/utils.cmake
+++ b/cmake/utils.cmake
@@ -181,7 +181,7 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
     #
     # The torch cmake setup hardcodes the detected architecture flags in
     # `CMAKE_CUDA_FLAGS`.  Since `CMAKE_CUDA_FLAGS` is a "global" variable, it
-    # can't modified on a per-target basis, e.g. for the `punica` extension.
+    # can't modified on a per-target basis.
     # So, all the `-gencode` flags need to be extracted and removed from
     # `CMAKE_CUDA_FLAGS` for processing so they can be passed by another method.
     # Since it's not possible to use `target_compiler_options` for adding target
diff --git a/format.sh b/format.sh
index abc688c702aa6..baaebc811d405 100755
--- a/format.sh
+++ b/format.sh
@@ -242,12 +242,6 @@ echo 'vLLM isort: Done'
 # NOTE: Keep up to date with .github/workflows/clang-format.yml
 CLANG_FORMAT_EXCLUDES=(
     'csrc/moe/topk_softmax_kernels.cu'
-    'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
-    'csrc/punica/bgmv/bgmv_config.h'
-    'csrc/punica/bgmv/bgmv_impl.cuh'
-    'csrc/punica/bgmv/vec_dtypes.cuh'
-    'csrc/punica/punica_ops.cu'
-    'csrc/punica/type_convert.h'
 )
 
 # Format specified files with clang-format
diff --git a/vllm/config.py b/vllm/config.py
index 0524514f6633a..35945e34452d2 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1304,7 +1304,7 @@ class LoRAConfig:
     long_lora_scaling_factors: Optional[Tuple[float]] = None
 
     def __post_init__(self):
-        # Keep this in sync with csrc/punica/bgmv/bgmv_config.h
+        # TODO: Increase the range of rank
         possible_max_ranks = (8, 16, 32, 64)
         possible_lora_extra_vocab_size = (0, 256, 512)
         if self.max_lora_rank not in possible_max_ranks:
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
index 42ec99e6ea2c8..d3978ff6f4ff1 100644
--- a/vllm/lora/layers.py
+++ b/vllm/lora/layers.py
@@ -1073,7 +1073,7 @@ def create_lora_weights(
         lora_config: LoRAConfig,
         model_config: Optional[PretrainedConfig] = None,
     ) -> None:
-        # Keep this in sync with csrc/punica/bgmv/bgmv_config.h
+        # TODO: Verify if this condition can be relaxed
         if 32000 < self.base_layer.vocab_size > 128512:
             raise ValueError("When using LoRA, vocab size must be "
                              "32000 >= vocab_size <= 128512")