Skip to content

Commit 51f8e6a

Browse files
lhezMinh141120
authored andcommitted
opencl: add mul_mv_id_q4_0_f32_8x_flat (ggml-org#14003)
1 parent 573694e commit 51f8e6a

File tree

1 file changed

+19
-104
lines changed

1 file changed

+19
-104
lines changed

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 19 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -439,108 +439,6 @@ struct ggml_backend_opencl_context {
439439
cl_kernel kernel_timestep_embedding;
440440
cl_kernel kernel_mul_mv_id_q4_0_f32_8x_flat;
441441

442-
std::vector<ProfilingInfo> profiling_info;
443-
444-
void write_profiling_info() {
445-
FILE * fperf = fopen("cl_profiling.csv", "w");
446-
if (!fperf) {
447-
GGML_LOG_ERROR("Failed to open cl_profiling.csv\n");
448-
return;
449-
}
450-
451-
// Populate profiling info
452-
for (ProfilingInfo & info : profiling_info) {
453-
cl_ulong cmd_queued;
454-
cl_ulong cmd_submit;
455-
cl_ulong cmd_start;
456-
cl_ulong cmd_end;
457-
cl_ulong cmd_complete;
458-
459-
CL_CHECK(clWaitForEvents(1, &info.evt));
460-
CL_CHECK(clGetEventProfilingInfo(
461-
info.evt, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &cmd_queued, NULL));
462-
CL_CHECK(clGetEventProfilingInfo(
463-
info.evt, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &cmd_submit, NULL));
464-
CL_CHECK(clGetEventProfilingInfo(
465-
info.evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &cmd_start, NULL));
466-
CL_CHECK(clGetEventProfilingInfo(
467-
info.evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &cmd_end, NULL));
468-
CL_CHECK(clGetEventProfilingInfo(
469-
info.evt, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &cmd_complete, NULL));
470-
CL_CHECK(clReleaseEvent(info.evt));
471-
472-
char kernel_name[512];
473-
CL_CHECK(clGetKernelInfo(info.kernel, CL_KERNEL_FUNCTION_NAME,
474-
sizeof(kernel_name), kernel_name, NULL));
475-
info.kernel_name = kernel_name;
476-
477-
info.cmd_queued = cmd_queued;
478-
info.cmd_submit = cmd_submit;
479-
info.cmd_start = cmd_start;
480-
info.cmd_end = cmd_end;
481-
482-
info.cmd_queued_duration_ns = cmd_submit - cmd_queued;
483-
info.cmd_submit_duration_ns = cmd_start - cmd_submit;
484-
info.cmd_duration_ns = cmd_end - cmd_start;
485-
info.cmd_complete_duration_ns = cmd_complete - cmd_end;
486-
info.cmd_total_duration_ns = cmd_complete - cmd_queued;
487-
}
488-
489-
// Dump a csv
490-
float total_kernel_time = 0;
491-
fprintf(fperf, "op name, kernel name, queued duration (ms), submit duration(ms), exec duration (ms), complete duration (ms), total duration (ms), global size, local size, output size\n");
492-
for (const ProfilingInfo & info : profiling_info) {
493-
total_kernel_time += info.cmd_duration_ns/1.e6f;
494-
fprintf(fperf, "%s,%s,%f,%f,%f,%f,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n",
495-
info.op_name.c_str(), info.kernel_name.c_str(),
496-
info.cmd_queued_duration_ns/1.e6f,
497-
info.cmd_submit_duration_ns/1.e6f,
498-
info.cmd_duration_ns/1.e6f,
499-
info.cmd_complete_duration_ns/1.e6f,
500-
info.cmd_total_duration_ns/1.e6f,
501-
info.global_size[0], info.global_size[1], info.global_size[2],
502-
info.local_size[0], info.local_size[1], info.local_size[2],
503-
info.output_size[0], info.output_size[1], info.output_size[2], info.output_size[3]);
504-
}
505-
fclose(fperf);
506-
507-
GGML_LOG_INFO("ggml_opencl: total kernel time: %f\n", total_kernel_time);
508-
509-
// Dump a simple chrome trace
510-
FILE* ftrace = fopen("cl_trace.json", "w");
511-
if (!ftrace) {
512-
GGML_LOG_ERROR("Failed to open cl_trace.json\n");
513-
return;
514-
}
515-
516-
fprintf(ftrace, "[\n");
517-
for (const ProfilingInfo & info : profiling_info) {
518-
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Host\"},\n",
519-
info.kernel_name.c_str(), info.cmd_queued/1000);
520-
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Host\"},\n",
521-
info.kernel_name.c_str(), info.cmd_submit/1000);
522-
523-
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Device\"},\n",
524-
info.kernel_name.c_str(), info.cmd_start/1000);
525-
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Device\"},\n",
526-
info.kernel_name.c_str(), info.cmd_end/1000);
527-
}
528-
fclose(ftrace);
529-
}
530-
531-
void enqueue_ndrange_kernel(cl_kernel kernel, cl_uint work_dim, size_t *global_work_size, size_t *local_work_size, const ggml_tensor * tensor) {
532-
#ifdef GGML_OPENCL_PROFILING
533-
cl_event evt;
534-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, &evt));
535-
536-
profiling_info.emplace_back();
537-
populateProfilingInfo(profiling_info.back(), evt, kernel, work_dim, global_work_size, local_work_size, tensor);
538-
#else
539-
GGML_UNUSED(tensor);
540-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, NULL));
541-
#endif
542-
}
543-
544442
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
545443
// Transpose kernels
546444
cl_program program_transpose;
@@ -5384,7 +5282,15 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
53845282
size_t global_work_size[] = {(size_t)ne01*nth0, (size_t)ny*nth1, (size_t)ne12*ne13};
53855283
size_t local_work_size[] = {(size_t)nth0, (size_t)nth1, 1};
53865284

5387-
backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst);
5285+
#ifdef GGML_OPENCL_PROFILING
5286+
cl_event evt;
5287+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt));
5288+
5289+
g_profiling_info.emplace_back();
5290+
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst);
5291+
#else
5292+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL));
5293+
#endif
53885294
}
53895295
}
53905296

@@ -5401,6 +5307,7 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0,
54015307
GGML_ASSERT(src2->extra);
54025308

54035309
ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context;
5310+
cl_command_queue queue = backend_ctx->queue;
54045311

54055312
ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra;
54065313
ggml_tensor_extra_cl * extra2 = (ggml_tensor_extra_cl *)src2->extra;
@@ -5506,7 +5413,15 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0,
55065413
size_t global_work_size[] = {(size_t)(ne01+ndst*nsg-1)/(ndst*nsg)*sgs, (size_t)(_ne1+nrows-1)/nrows*nsg, (size_t)ne123};
55075414
size_t local_work_size[] = {(size_t)sgs, (size_t)nsg, 1};
55085415

5509-
backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst);
5416+
#ifdef GGML_OPENCL_PROFILING
5417+
cl_event evt;
5418+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt));
5419+
5420+
g_profiling_info.emplace_back();
5421+
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst);
5422+
#else
5423+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL));
5424+
#endif
55105425
}
55115426

55125427
static void ggml_cl_scale(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {

0 commit comments

Comments
 (0)