ggml-cpu: update q5_0 unroll to 4

taronaeo · taronaeo · commit fd8f4a2d0599 · 2025-08-22T02:05:47.000+08:00
Signed-off-by: Aaron Teo &lt;aaron.teo1@ibm.com&gt;
diff --git a/ggml/src/ggml-cpu/arch/s390/quants.c b/ggml/src/ggml-cpu/arch/s390/quants.c
@@ -289,7 +289,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
 
     const uint8x16_t v_m = vec_splats((uint8_t)0x0F);
 
-    #pragma GCC unroll 8
+    #pragma GCC unroll 4
     for (; ib + 1 < nb; ib += 2) {
         const block_q5_0 * GGML_RESTRICT x0 = &x[ib + 0];
         const block_q5_0 * GGML_RESTRICT x1 = &x[ib + 1];
@@ -353,7 +353,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
 
     sumf += vec_hsum(v_sum0) + vec_hsum(v_sum1);
 
-    #pragma GCC unroll 8
+    #pragma GCC unroll 4
     for (; ib < nb; ++ib) {
         const block_q5_0 * GGML_RESTRICT x0 = &x[ib];
         const block_q8_0 * GGML_RESTRICT y0 = &y[ib];