Skip to content

Commit fd8f4a2

Browse files
committed
ggml-cpu: update q5_0 unroll to 4
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
1 parent 5a94a01 commit fd8f4a2

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

ggml/src/ggml-cpu/arch/s390/quants.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
289289

290290
const uint8x16_t v_m = vec_splats((uint8_t)0x0F);
291291

292-
#pragma GCC unroll 8
292+
#pragma GCC unroll 4
293293
for (; ib + 1 < nb; ib += 2) {
294294
const block_q5_0 * GGML_RESTRICT x0 = &x[ib + 0];
295295
const block_q5_0 * GGML_RESTRICT x1 = &x[ib + 1];
@@ -353,7 +353,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
353353

354354
sumf += vec_hsum(v_sum0) + vec_hsum(v_sum1);
355355

356-
#pragma GCC unroll 8
356+
#pragma GCC unroll 4
357357
for (; ib < nb; ++ib) {
358358
const block_q5_0 * GGML_RESTRICT x0 = &x[ib];
359359
const block_q8_0 * GGML_RESTRICT y0 = &y[ib];

0 commit comments

Comments
 (0)