Skip to content

Commit 1dbba64

Browse files
authored
Merge branch 'main' into deepseek-ocr-cpu
2 parents ebcbd4b + 0976711 commit 1dbba64

File tree

89 files changed

+3516
-967
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+3516
-967
lines changed

.buildkite/test-amd.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ steps:
4848
commands:
4949
- bash standalone_tests/pytorch_nightly_dependency.sh
5050

51-
- label: Async Engine, Inputs, Utils, Worker Test # 36min
52-
timeout_in_minutes: 50
51+
- label: Async Engine, Inputs, Utils, Worker Test # 10min
52+
timeout_in_minutes: 15
5353
mirror_hardwares: [amdexperimental, amdproduction]
5454
agent_pool: mi325_1
5555
# grade: Blocking
@@ -616,9 +616,9 @@ steps:
616616
- uv pip install --system torchao==0.13.0
617617
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
618618

619-
- label: LM Eval Small Models # 53min
620-
timeout_in_minutes: 75
621-
mirror_hardwares: [amdexperimental]
619+
- label: LM Eval Small Models # 15min
620+
timeout_in_minutes: 20
621+
mirror_hardwares: [amdexperimental, amdproduction]
622622
agent_pool: mi325_1
623623
# grade: Blocking
624624
source_file_dependencies:
@@ -627,8 +627,8 @@ steps:
627627
commands:
628628
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt --tp-size=1
629629

630-
- label: OpenAI API correctness # 22min
631-
timeout_in_minutes: 30
630+
- label: OpenAI API correctness # 10min
631+
timeout_in_minutes: 15
632632
mirror_hardwares: [amdexperimental, amdproduction]
633633
agent_pool: mi325_1
634634
# grade: Blocking
@@ -859,10 +859,10 @@ steps:
859859
- pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
860860
- cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model # Otherwise, mp_method="spawn" doesn't work
861861

862-
- label: Multi-Modal Accuracy Eval (Small Models) # 50min
863-
mirror_hardwares: [amdexperimental]
862+
- label: Multi-Modal Accuracy Eval (Small Models) # 10min
863+
mirror_hardwares: [amdexperimental, amdproduction]
864864
agent_pool: mi325_1
865-
timeout_in_minutes: 70
865+
timeout_in_minutes: 15
866866
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
867867
source_file_dependencies:
868868
- vllm/multimodal/

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,6 @@ csrc/moe/marlin_moe_wna16/kernel_*
221221

222222
# Ignore ep_kernels_workspace folder
223223
ep_kernels_workspace/
224+
225+
# Allow tracked library source folders under submodules (e.g., benchmarks/lib)
226+
!vllm/benchmarks/lib/

csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include "scaled_mm_kernels.hpp"
22
#include "scaled_mm_sm100_fp8_dispatch.cuh"
3-
#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
43

54
namespace vllm {
65

@@ -13,11 +12,11 @@ void cutlass_scaled_mm_sm100_fp8(torch::Tensor& out, torch::Tensor const& a,
1312
if (bias) {
1413
TORCH_CHECK(bias->dtype() == out.dtype(),
1514
"currently bias dtype must match output dtype ", out.dtype());
16-
return cutlass_scaled_mm_sm100_fp8_epilogue<c3x::ScaledEpilogueBias>(
17-
out, a, b, a_scales, b_scales, *bias);
15+
return cutlass_scaled_mm_sm100_fp8_epilogue<true>(out, a, b, a_scales,
16+
b_scales, *bias);
1817
} else {
19-
return cutlass_scaled_mm_sm100_fp8_epilogue<c3x::ScaledEpilogue>(
20-
out, a, b, a_scales, b_scales);
18+
return cutlass_scaled_mm_sm100_fp8_epilogue<false>(out, a, b, a_scales,
19+
b_scales);
2120
}
2221
}
2322

0 commit comments

Comments
 (0)