Skip to content

Commit 78d7d30

Browse files
gshtraskhluu
andauthored
Update pre-commit.yml (#374)
* Update pre-commit.yml * Reapplying missing format * New codespell exclude location --------- Co-authored-by: Kevin H. Luu <kevin@anyscale.com>
1 parent faa1815 commit 78d7d30

File tree

5 files changed

+14
-7
lines changed

5 files changed

+14
-7
lines changed

.github/workflows/pre-commit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ jobs:
1616
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
1717
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
1818
with:
19-
extra_args: --hook-stage manual
19+
extra_args: --all-files --hook-stage manual

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ repos:
1717
rev: v2.3.0
1818
hooks:
1919
- id: codespell
20-
exclude: 'benchmarks/sonnet.txt|(build|tests/(lora/data|models/fixtures|prompts))/.*'
20+
exclude: 'benchmarks/sonnet.txt|(build|tests/(lora/data|models/fixtures|prompts))/.*|csrc/rocm/.*|csrc/gradlib/.*'
2121
- repo: https://github.com/PyCQA/isort
2222
rev: 5.13.2
2323
hooks:

tests/test_utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,11 @@ def measure_current_non_torch():
326326
# Add some extra non-torch memory 256 MiB (simulate NCCL)
327327
handle2 = lib.cudaMalloc(256 * 1024 * 1024)
328328

329+
# this is an analytic value, it is exact,
330+
# we only have 256 MiB non-torch memory increase
331+
measured_diff = monitored_values.values[-1] - monitored_values.values[0]
332+
assert measured_diff == 256 * 1024 * 1024
333+
329334
# Check that the memory usage is within 5% of the expected values
330335
# 5% tolerance is caused by cuda runtime.
331336
# we cannot control cuda runtime in the granularity of bytes,

vllm/attention/backends/rocm_flash_attn.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -681,10 +681,12 @@ def forward(
681681
seq_lens,
682682
make_attn_mask=False) # type: ignore
683683
full_scales = (
684-
1.0 / layer._q_scale.item(), 1.0 / layer._k_scale.item(),
685-
1.0 / layer._v_scale.item(), 1.0 / layer._prob_scale.item(),
684+
1.0 / layer._q_scale.item(),
685+
1.0 / layer._k_scale.item(), 1.0 /
686+
layer._v_scale.item(), 1.0 / layer._prob_scale.item(),
686687
fp8_out_scale.item()) if (
687-
fp8_out_scale and layer._q_scale and layer._prob_scale
688+
fp8_out_scale and layer._q_scale
689+
and layer._prob_scale
688690
and envs.VLLM_USE_ROCM_FP8_FLASH_ATTN) else None
689691
out, _ = self.attn_func(
690692
query,

vllm/model_executor/layers/quantization/utils/fp8_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def apply_w8a8_block_fp8_linear(
3636

3737

3838
def input_to_float8(
39-
x: torch.Tensor,
40-
dtype: Optional[torch.dtype] = None
39+
x: torch.Tensor,
40+
dtype: Optional[torch.dtype] = None
4141
) -> Tuple[torch.Tensor, torch.Tensor]:
4242
"""This function quantizes input values to float8 values "
4343
"with tensor-wise quantization."""

0 commit comments

Comments
 (0)