@@ -3506,7 +3506,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
35063506 // Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
35073507 for (int sb = 0; sb < QK_K / 128; sb++) {
35083508
3509- // Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
3509+ // Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
35103510 const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
35113511 const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
35123512 const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -4230,16 +4230,16 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
42304230 }
42314231 // For super block
42324232 for (int64_t b = 0; b < nb; b++) {
4233- // Delta values - Load the sixteen scale values from two block_q4_kx8 structures
4233+ // Delta values - Load the sixteen scale values from two block_q2_kx8 structures
42344234 const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
42354235
4236- // dmin values - Load the sixteen dmin values from two block_q4_kx8 structures
4236+ // dmin values - Load the sixteen dmin values from two block_q2_kx8 structures
42374237 const __m512 col_dmin_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].dmin, b_ptr_1[b].dmin);
42384238
42394239 // Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
42404240 for (int sb = 0; sb < QK_K / 128; sb++) {
42414241
4242- // Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
4242+ // Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
42434243 const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
42444244 const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
42454245 const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -4980,7 +4980,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
49804980 // Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
49814981 for (int sb = 0; sb < QK_K / 128; sb++) {
49824982
4983- // Load the eight block_q4_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
4983+ // Load the eight block_q2_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
49844984 const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
49854985 const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
49864986 const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));
@@ -5654,7 +5654,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
56545654 // Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
56555655 for (int sb = 0; sb < QK_K / 128; sb++) {
56565656
5657- // Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
5657+ // Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
56585658 const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
56595659 const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
56605660 const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));
0 commit comments