@@ -4222,15 +4222,19 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c
42224222 GGML_ASSERT (dst);
42234223 GGML_ASSERT (dst->extra );
42244224
4225- const int ne00 = src0 ? src0->ne [0 ] : 0 ;
4226- const cl_ulong nb01 = src0 ? src0->nb [1 ] : 0 ;
4227- const cl_ulong nb02 = src0 ? src0->nb [2 ] : 0 ;
4228- const int ne10 = src1 ? src1->ne [0 ] : 0 ;
4229- const cl_ulong nb10 = src1 ? src1->nb [0 ] : 0 ;
4230- const int ne11 = src1 ? src1->ne [1 ] : 0 ;
4231- const cl_ulong nb11 = src1 ? src1->nb [1 ] : 0 ;
4232- const cl_ulong nb1 = dst ? dst->nb [1 ] : 0 ;
4233- const cl_ulong nb2 = dst ? dst->nb [2 ] : 0 ;
4225+ const int ne00 = src0->ne [0 ];
4226+ const cl_ulong nb01 = src0->nb [1 ];
4227+ const cl_ulong nb02 = src0->nb [2 ];
4228+ const cl_ulong nb03 = src0->nb [3 ];
4229+ const int ne10 = src1->ne [0 ];
4230+ const cl_ulong nb10 = src1->nb [0 ];
4231+ const int ne11 = src1->ne [1 ];
4232+ const int ne12 = src1->ne [2 ];
4233+ const cl_ulong nb11 = src1->nb [1 ];
4234+ const cl_ulong nb12 = src1->nb [2 ];
4235+ const cl_ulong nb1 = dst->nb [1 ];
4236+ const cl_ulong nb2 = dst->nb [2 ];
4237+ const cl_ulong nb3 = dst->nb [3 ];
42344238
42354239 ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context ;
42364240
@@ -4267,14 +4271,17 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c
42674271 CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (int ), &ne00));
42684272 CL_CHECK (clSetKernelArg (kernel, 7 , sizeof (cl_ulong), &nb01));
42694273 CL_CHECK (clSetKernelArg (kernel, 8 , sizeof (cl_ulong), &nb02));
4270- CL_CHECK (clSetKernelArg (kernel, 9 , sizeof (int ), &ne10));
4271- CL_CHECK (clSetKernelArg (kernel, 10 , sizeof (cl_ulong), &nb10));
4272- CL_CHECK (clSetKernelArg (kernel, 11 , sizeof (cl_ulong), &nb11));
4273- CL_CHECK (clSetKernelArg (kernel, 12 , sizeof (cl_ulong), &nb1));
4274- CL_CHECK (clSetKernelArg (kernel, 13 , sizeof (cl_ulong), &nb2));
4275-
4276- size_t global_work_size[] = {(size_t )ne10, (size_t )ne11, 1 };
4277- size_t local_work_size[] = {1 , 1 , 1 };
4274+ CL_CHECK (clSetKernelArg (kernel, 9 , sizeof (cl_ulong), &nb03));
4275+ CL_CHECK (clSetKernelArg (kernel, 10 , sizeof (int ), &ne10));
4276+ CL_CHECK (clSetKernelArg (kernel, 11 , sizeof (cl_ulong), &nb10));
4277+ CL_CHECK (clSetKernelArg (kernel, 12 , sizeof (cl_ulong), &nb11));
4278+ CL_CHECK (clSetKernelArg (kernel, 13 , sizeof (cl_ulong), &nb12));
4279+ CL_CHECK (clSetKernelArg (kernel, 14 , sizeof (cl_ulong), &nb1));
4280+ CL_CHECK (clSetKernelArg (kernel, 15 , sizeof (cl_ulong), &nb2));
4281+ CL_CHECK (clSetKernelArg (kernel, 16 , sizeof (cl_ulong), &nb3));
4282+
4283+ size_t global_work_size[] = {(size_t )ne10*64 , (size_t )ne11, (size_t )ne12};
4284+ size_t local_work_size[] = {64 , 1 , 1 };
42784285
42794286 backend_ctx->enqueue_ndrange_kernel (kernel, 3 , global_work_size, local_work_size, dst);
42804287}
0 commit comments