Skip to content

Commit a7b2d20

Browse files
committed
Revert "musa: update compile flags (ggml-org#16265)"
This reverts commit 91a2a56.
1 parent 66dedda commit a7b2d20

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

ggml/src/ggml-cuda/fattn-vec.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,8 @@ void ggml_cuda_flash_attn_ext_vec_case(ggml_backend_cuda_context & ctx, ggml_ten
535535
float logit_softcap;
536536
memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float));
537537

538+
const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
539+
538540
if (Q->ne[1] == 1) {
539541
constexpr int cols_per_block = 1;
540542
if (logit_softcap == 0.0f) {

ggml/src/ggml-cuda/topk-moe.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
1414
It is intended as fusion of softmax->top-k->get_rows pipeline for MoE models
1515
*/
16-
template <int n_experts, bool with_norm>
16+
template <size_t n_experts, bool with_norm>
1717
__launch_bounds__(4 * WARP_SIZE, 1) __global__ void topk_moe_cuda(const float * logits,
1818
float * weights,
1919
int32_t * ids,
@@ -204,6 +204,8 @@ void ggml_cuda_op_topk_moe(ggml_backend_cuda_context & ctx,
204204

205205
GGML_ASSERT(ids->nb[1] / ggml_type_size(ids->type) == (size_t) n_experts);
206206

207+
cudaStream_t stream = ctx.stream();
208+
207209
const int n_expert_used = weights->ne[1];
208210

209211
if (with_norm) {

0 commit comments

Comments
 (0)