Skip to content

Commit ef28ed2

Browse files
committed
WIP
1 parent f839396 commit ef28ed2

File tree

3 files changed

+7
-13
lines changed

3 files changed

+7
-13
lines changed

ggml/src/ggml-cuda/mmf.cu

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,11 @@ bool ggml_cuda_should_use_mmf(enum ggml_type type, int cc, int warp_size, const
136136
}
137137

138138
if (mul_mat_id) {
139-
if (type == GGML_TYPE_F32 && src1_ncols > 512) {
139+
if (src0_ne[1] <= 1024 && src1_ncols > 512) {
140140
return false;
141-
}
142-
if ((type == GGML_TYPE_F16 || type == GGML_TYPE_BF16) && src1_ncols > 512) {
141+
} else if(src0_ne[1] > 1024 && src1_ncols > 128) {
142+
return false;
143+
} else {
143144
return false;
144145
}
145146
} else {

ggml/src/ggml-cuda/mmf.cuh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,9 @@ static inline void mul_mat_f_switch_ids(
495495
const mmf_ids_data * ids_data) {
496496
const bool has_ids_data = ids_data && ids_data->ids_src_compact;
497497

498-
if (has_ids_data) {
498+
// Use the compact-ids kernel only for larger tiles; for small ncols_dst (< 16)
499+
// we prefer the normal mul_mat_f path with has_ids=true.
500+
if (has_ids_data && ncols_dst > 16) {
499501
const int max_tiles = (int) ((ncols_dst + cols_per_block - 1) / cols_per_block);
500502
if (max_tiles == 0) {
501503
return;

tests/test-backend-ops.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6819,15 +6819,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
68196819
}
68206820
}
68216821

6822-
// liquid 1b-8b
6823-
for (int bs : {1, 4, 8, 32, 64, 128, 256, 512}) {
6824-
for (ggml_type type_a : {GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0, GGML_TYPE_Q4_K, GGML_TYPE_Q6_K, GGML_TYPE_IQ2_XS}) {
6825-
for (ggml_type type_b : {GGML_TYPE_F32}) {
6826-
test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, 32, 4, false, 1792, bs, 2048, 1));
6827-
}
6828-
}
6829-
}
6830-
68316822
// gpt-oss-20b
68326823
for (int bs : {1, 4, 8, 512}) {
68336824
for (ggml_type type_a : {GGML_TYPE_MXFP4}) {

0 commit comments

Comments
 (0)