Skip to content

Commit

Permalink
Clean up remaining Punica C information (vllm-project#7027)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeejeelee authored and sfc-gh-mkeralapura committed Aug 12, 2024
1 parent 7f75c53 commit 2f1fd0b
Show file tree
Hide file tree
Showing 5 changed files with 3 additions and 15 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/clang-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,6 @@ jobs:
run: |
EXCLUDES=(
'csrc/moe/topk_softmax_kernels.cu'
'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
'csrc/punica/bgmv/bgmv_config.h'
'csrc/punica/bgmv/bgmv_impl.cuh'
'csrc/punica/bgmv/vec_dtypes.cuh'
'csrc/punica/punica_ops.cu'
'csrc/punica/type_convert.h'
)
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
| grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
Expand Down
2 changes: 1 addition & 1 deletion cmake/utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
#
# The torch cmake setup hardcodes the detected architecture flags in
# `CMAKE_CUDA_FLAGS`. Since `CMAKE_CUDA_FLAGS` is a "global" variable, it
# can't modified on a per-target basis, e.g. for the `punica` extension.
# can't modified on a per-target basis.
# So, all the `-gencode` flags need to be extracted and removed from
# `CMAKE_CUDA_FLAGS` for processing so they can be passed by another method.
# Since it's not possible to use `target_compiler_options` for adding target
Expand Down
6 changes: 0 additions & 6 deletions format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -242,12 +242,6 @@ echo 'vLLM isort: Done'
# NOTE: Keep up to date with .github/workflows/clang-format.yml
CLANG_FORMAT_EXCLUDES=(
'csrc/moe/topk_softmax_kernels.cu'
'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
'csrc/punica/bgmv/bgmv_config.h'
'csrc/punica/bgmv/bgmv_impl.cuh'
'csrc/punica/bgmv/vec_dtypes.cuh'
'csrc/punica/punica_ops.cu'
'csrc/punica/type_convert.h'
)

# Format specified files with clang-format
Expand Down
2 changes: 1 addition & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1304,7 +1304,7 @@ class LoRAConfig:
long_lora_scaling_factors: Optional[Tuple[float]] = None

def __post_init__(self):
# Keep this in sync with csrc/punica/bgmv/bgmv_config.h
# TODO: Increase the range of rank
possible_max_ranks = (8, 16, 32, 64)
possible_lora_extra_vocab_size = (0, 256, 512)
if self.max_lora_rank not in possible_max_ranks:
Expand Down
2 changes: 1 addition & 1 deletion vllm/lora/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ def create_lora_weights(
lora_config: LoRAConfig,
model_config: Optional[PretrainedConfig] = None,
) -> None:
# Keep this in sync with csrc/punica/bgmv/bgmv_config.h
# TODO: Verify if this condition can be relaxed
if 32000 < self.base_layer.vocab_size > 128512:
raise ValueError("When using LoRA, vocab size must be "
"32000 >= vocab_size <= 128512")
Expand Down

0 comments on commit 2f1fd0b

Please sign in to comment.