Skip to content

Commit a5d75e1

Browse files
jeffbolznvwalidbr
authored andcommitted
vulkan: Skip syncing for prealloc_y when it is reused (ggml-org#15544)
1 parent 0bcbeaa commit a5d75e1

File tree

1 file changed

+15
-20
lines changed

1 file changed

+15
-20
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5923,11 +5923,6 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
59235923
ggml_vk_sync_buffers(ctx, subctx);
59245924
}
59255925
}
5926-
if (y_non_contig || quantize_y) {
5927-
if (ctx->prealloc_y_need_sync) {
5928-
ggml_vk_sync_buffers(ctx, subctx);
5929-
}
5930-
}
59315926

59325927
if (x_non_contig) {
59335928
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_0, src0, { d_Qx, qx_buf_offset, VK_WHOLE_SIZE }, { d_X, 0, VK_WHOLE_SIZE });
@@ -5939,6 +5934,9 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
59395934
if (y_non_contig) {
59405935
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
59415936
ctx->prealloc_y_last_tensor_used != src1) {
5937+
if (ctx->prealloc_y_need_sync) {
5938+
ggml_vk_sync_buffers(ctx, subctx);
5939+
}
59425940
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
59435941
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
59445942
ctx->prealloc_y_last_tensor_used = src1;
@@ -5947,6 +5945,9 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
59475945
if (quantize_y) {
59485946
if (ctx->prealloc_y_last_pipeline_used != to_q8_1.get() ||
59495947
ctx->prealloc_y_last_tensor_used != src1) {
5948+
if (ctx->prealloc_y_need_sync) {
5949+
ggml_vk_sync_buffers(ctx, subctx);
5950+
}
59505951
ggml_vk_quantize_q8_1(ctx, subctx, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE }, y_ne * ne12 * ne13);
59515952
ctx->prealloc_y_last_pipeline_used = to_q8_1.get();
59525953
ctx->prealloc_y_last_tensor_used = src1;
@@ -6131,11 +6132,6 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
61316132
ggml_vk_sync_buffers(ctx, subctx);
61326133
}
61336134
}
6134-
if (y_non_contig) {
6135-
if (ctx->prealloc_y_need_sync) {
6136-
ggml_vk_sync_buffers(ctx, subctx);
6137-
}
6138-
}
61396135

61406136
if (x_non_contig) {
61416137
GGML_ASSERT(x_sz == ggml_vk_align_size(ggml_type_size(src0->type) * x_ne, ctx->device->properties.limits.minStorageBufferOffsetAlignment));
@@ -6145,6 +6141,9 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
61456141
GGML_ASSERT(y_sz == ggml_type_size(src1->type) * y_ne);
61466142
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
61476143
ctx->prealloc_y_last_tensor_used != src1) {
6144+
if (ctx->prealloc_y_need_sync) {
6145+
ggml_vk_sync_buffers(ctx, subctx);
6146+
}
61486147
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
61496148
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
61506149
ctx->prealloc_y_last_tensor_used = src1;
@@ -6577,11 +6576,6 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
65776576
ggml_vk_sync_buffers(ctx, subctx);
65786577
}
65796578
}
6580-
if (y_non_contig) {
6581-
if (ctx->prealloc_y_need_sync) {
6582-
ggml_vk_sync_buffers(ctx, subctx);
6583-
}
6584-
}
65856579

65866580
if (x_non_contig) {
65876581
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_0, src0, { d_Qx, qx_buf_offset, VK_WHOLE_SIZE }, { d_X, 0, VK_WHOLE_SIZE });
@@ -6594,6 +6588,9 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
65946588
if (y_non_contig) {
65956589
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
65966590
ctx->prealloc_y_last_tensor_used != src1) {
6591+
if (ctx->prealloc_y_need_sync) {
6592+
ggml_vk_sync_buffers(ctx, subctx);
6593+
}
65976594
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
65986595
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
65996596
ctx->prealloc_y_last_tensor_used = src1;
@@ -6791,11 +6788,6 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
67916788
ggml_vk_sync_buffers(ctx, subctx);
67926789
}
67936790
}
6794-
if (y_non_contig) {
6795-
if (ctx->prealloc_y_need_sync) {
6796-
ggml_vk_sync_buffers(ctx, subctx);
6797-
}
6798-
}
67996791

68006792
if (x_non_contig) {
68016793
GGML_ASSERT(x_sz == ggml_vk_align_size(ggml_type_size(src0->type) * x_ne, ctx->device->properties.limits.minStorageBufferOffsetAlignment));
@@ -6805,6 +6797,9 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
68056797
GGML_ASSERT(y_sz == ggml_type_size(src1->type) * y_ne);
68066798
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
68076799
ctx->prealloc_y_last_tensor_used != src1) {
6800+
if (ctx->prealloc_y_need_sync) {
6801+
ggml_vk_sync_buffers(ctx, subctx);
6802+
}
68086803
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
68096804
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
68106805
ctx->prealloc_y_last_tensor_used = src1;

0 commit comments

Comments
 (0)