Skip to content

Commit bcf937c

Browse files
Djip007ggerganov
authored andcommitted
ggml : more perfo with llamafile tinyblas on x86_64 (llama/10714)
* more perfo with llamafile tinyblas on x86_64. - add bf16 suport - change dispache strategie (thanks: ikawrakow/ik_llama.cpp#71 ) - reduce memory bandwidth simple tinyblas dispache and more cache freindly * tinyblas dynamic dispaching * sgemm: add M blocs. * - git 2.47 use short id of len 9. - show-progress is not part of GNU Wget2 * remove not stable test
1 parent b8d9095 commit bcf937c

File tree

3 files changed

+270
-264
lines changed

3 files changed

+270
-264
lines changed

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7419,14 +7419,14 @@ static void ggml_compute_forward_mul_mat(
74197419
if (src1_cont) {
74207420
for (int64_t i13 = 0; i13 < ne13; i13++)
74217421
for (int64_t i12 = 0; i12 < ne12; i12++)
7422-
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
7422+
if (!llamafile_sgemm(params,
7423+
ne01, ne11, ne00/ggml_blck_size(src0->type),
74237424
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
74247425
nb01/ggml_type_size(src0->type),
74257426
(const char *)src1->data + i12*nb12 + i13*nb13,
74267427
nb11/ggml_type_size(src1->type),
74277428
(char *)dst->data + i12*nb2 + i13*nb3,
74287429
nb1/ggml_type_size(dst->type),
7429-
ith, nth,
74307430
src0->type,
74317431
src1->type,
74327432
dst->type))
@@ -7471,14 +7471,14 @@ UseGgmlGemm1:;
74717471

74727472
for (int64_t i13 = 0; i13 < ne13; i13++)
74737473
for (int64_t i12 = 0; i12 < ne12; i12++)
7474-
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
7474+
if (!llamafile_sgemm(params,
7475+
ne01, ne11, ne00/ggml_blck_size(src0->type),
74757476
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
74767477
nb01/ggml_type_size(src0->type),
74777478
(const char *)wdata + (i12*ne11 + i13*ne12*ne11)*row_size,
74787479
row_size/ggml_type_size(vec_dot_type),
74797480
(char *)dst->data + i12*nb2 + i13*nb3,
74807481
nb1/ggml_type_size(dst->type),
7481-
ith, nth,
74827482
src0->type,
74837483
vec_dot_type,
74847484
dst->type))

0 commit comments

Comments
 (0)