Skip to content

Commit e97e226

Browse files
committed
resolve against #15489, sync after clearing partial sums
1 parent 8d382bc commit e97e226

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7953,7 +7953,6 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
79537953
if (op == GGML_OP_ADD || op == GGML_OP_RMS_NORM) {
79547954
vk_buffer d_A = ctx->do_add_rms_partials ? ctx->prealloc_add_rms_partials : d_X;
79557955
size_t a_buf_offset = ctx->do_add_rms_partials ? ctx->prealloc_size_add_rms_partials_offset : 0;
7956-
ggml_vk_sync_buffers(subctx);
79577956
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline,
79587957
{ vk_subbuffer{ d_X, x_buf_offset, x_sz },
79597958
vk_subbuffer{ d_Y, y_buf_offset, y_sz },
@@ -11297,6 +11296,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
1129711296
}
1129811297
// initialize partial sums to zero.
1129911298
ggml_vk_buffer_memset_async(compute_ctx, ctx->prealloc_add_rms_partials, 0, 0, ctx->prealloc_size_add_rms_partials);
11299+
ggml_vk_sync_buffers(ctx, compute_ctx);
1130011300
}
1130111301

1130211302
// Submit after enough work has accumulated, to overlap CPU cmdbuffer generation with GPU execution.

0 commit comments

Comments
 (0)