8
8
#include " mmq.h"
9
9
#include " ggml-impl.h"
10
10
#include " ggml-cpu-impl.h"
11
+ #include " simd-mappings.h"
11
12
#include " quants.h"
12
13
#include " ggml-quants.h"
13
14
#include < algorithm>
@@ -453,7 +454,7 @@ void quantize_row_q8_K_vnni(const float * RESTRICT x, void * RESTRICT vy, int64_
453
454
454
455
// Quantize these floats
455
456
const float iscale = 127 .f / amax;
456
- y[i].d = GGML_FP32_TO_FP16 (1 / iscale);
457
+ y[i].d = GGML_CPU_FP32_TO_FP16 (1 / iscale);
457
458
const float id = ( amax != 0 .0f ) ? iscale : 0 .f ;
458
459
const __m512 vscale = _mm512_set1_ps (id);
459
460
@@ -1090,7 +1091,7 @@ struct acc_C<block_q8_0, block_q4_0, is_acc> {
1090
1091
const __m512 vd0 = _mm512_cvtph_ps (_mm256_loadu_si256 ((const __m256i *)((const char *)packed_B + offset)));
1091
1092
1092
1093
for (int m = 0 ; m < nr; ++m) {
1093
- const __m512 vd1 = _mm512_set1_ps (GGML_FP16_TO_FP32 (A[m * lda].d ));
1094
+ const __m512 vd1 = _mm512_set1_ps (GGML_CPU_FP16_TO_FP32 (A[m * lda].d ));
1094
1095
const __m512 vtile = _mm512_cvtepi32_ps (_mm512_loadu_si512 (tile + m * TILE_N));
1095
1096
1096
1097
__m512 vsum;
@@ -1113,8 +1114,8 @@ struct acc_C<block_q8_1, block_q4_1, is_acc> {
1113
1114
const __m512 vm0 = _mm512_cvtph_ps (_mm256_loadu_si256 ((const __m256i *)((const char *)packed_B + offset + TILE_N * sizeof (ggml_half))));
1114
1115
1115
1116
for (int m = 0 ; m < nr; ++m) {
1116
- const __m512 vd1 = _mm512_set1_ps (GGML_FP16_TO_FP32 (A[m * lda].d ));
1117
- const __m512 vs1 = _mm512_set1_ps (GGML_FP16_TO_FP32 (A[m * lda].s ));
1117
+ const __m512 vd1 = _mm512_set1_ps (GGML_CPU_FP16_TO_FP32 (A[m * lda].d ));
1118
+ const __m512 vs1 = _mm512_set1_ps (GGML_CPU_FP16_TO_FP32 (A[m * lda].s ));
1118
1119
const __m512 vtile = _mm512_cvtepi32_ps (_mm512_loadu_si512 (tile + m * TILE_N));
1119
1120
1120
1121
__m512 vsum;
@@ -1137,7 +1138,7 @@ struct acc_C<block_q8_0, block_q8_0, is_acc> {
1137
1138
const __m512 vd0 = _mm512_cvtph_ps (_mm256_loadu_si256 ((const __m256i *)((const char *)packed_B + offset)));
1138
1139
1139
1140
for (int m = 0 ; m < nr; ++m) {
1140
- const __m512 vd1 = _mm512_set1_ps (GGML_FP16_TO_FP32 (A[m * lda].d ));
1141
+ const __m512 vd1 = _mm512_set1_ps (GGML_CPU_FP16_TO_FP32 (A[m * lda].d ));
1141
1142
const __m512 vtile = _mm512_cvtepi32_ps (_mm512_loadu_si512 (tile + m * TILE_N));
1142
1143
1143
1144
__m512 vsum;
@@ -1437,7 +1438,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOCK_N, BLO
1437
1438
va[k] = _mm512_set1_epi32 (a_ptr[k]);
1438
1439
vcomp = _mm512_dpbusd_epi32 (vcomp, off, va[k]);
1439
1440
}
1440
- vd1 = _mm512_set1_ps (GGML_FP16_TO_FP32 (A[0 * KB + i].d ));
1441
+ vd1 = _mm512_set1_ps (GGML_CPU_FP16_TO_FP32 (A[0 * KB + i].d ));
1441
1442
}
1442
1443
1443
1444
// load b
@@ -1498,8 +1499,8 @@ struct tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, BLOCK_K>
1498
1499
for (int k = 0 ; k < 8 ; ++k) {
1499
1500
va[k] = _mm512_set1_epi32 (a_ptr[k]);
1500
1501
}
1501
- vd1 = _mm512_set1_ps (GGML_FP16_TO_FP32 (A[0 * KB + i].d ));
1502
- vs1 = _mm512_set1_ps (GGML_FP16_TO_FP32 (A[0 * KB + i].s ));
1502
+ vd1 = _mm512_set1_ps (GGML_CPU_FP16_TO_FP32 (A[0 * KB + i].d ));
1503
+ vs1 = _mm512_set1_ps (GGML_CPU_FP16_TO_FP32 (A[0 * KB + i].s ));
1503
1504
}
1504
1505
1505
1506
// load b
@@ -1571,7 +1572,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOCK_N, BLO
1571
1572
va[k] = _mm512_set1_epi32 (a_ptr[k]);
1572
1573
va[k] = _mm512_add_epi8 (va[k], off);
1573
1574
}
1574
- vd1 = _mm512_set1_ps (GGML_FP16_TO_FP32 (A[0 * KB + i].d ));
1575
+ vd1 = _mm512_set1_ps (GGML_CPU_FP16_TO_FP32 (A[0 * KB + i].d ));
1575
1576
}
1576
1577
1577
1578
// load b
0 commit comments