Skip to content

Commit 6f99895

Browse files
committed
Update comments in repack.cpp
1 parent 3f6c61d commit 6f99895

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

ggml/src/ggml-cpu/arch/x86/repack.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3739,7 +3739,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
37393739
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
37403740
for (int sb = 0; sb < QK_K / 128; sb++) {
37413741

3742-
// Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
3742+
// Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
37433743
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
37443744
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
37453745
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -4463,16 +4463,16 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
44634463
}
44644464
// For super block
44654465
for (int64_t b = 0; b < nb; b++) {
4466-
// Delta values - Load the sixteen scale values from two block_q4_kx8 structures
4466+
// Delta values - Load the sixteen scale values from two block_q2_kx8 structures
44674467
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
44684468

4469-
// dmin values - Load the sixteen dmin values from two block_q4_kx8 structures
4469+
// dmin values - Load the sixteen dmin values from two block_q2_kx8 structures
44704470
const __m512 col_dmin_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].dmin, b_ptr_1[b].dmin);
44714471

44724472
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
44734473
for (int sb = 0; sb < QK_K / 128; sb++) {
44744474

4475-
// Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
4475+
// Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
44764476
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
44774477
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
44784478
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -5213,7 +5213,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
52135213
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
52145214
for (int sb = 0; sb < QK_K / 128; sb++) {
52155215

5216-
// Load the eight block_q4_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
5216+
// Load the eight block_q2_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
52175217
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
52185218
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
52195219
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));
@@ -5887,7 +5887,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
58875887
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
58885888
for (int sb = 0; sb < QK_K / 128; sb++) {
58895889

5890-
// Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
5890+
// Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
58915891
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
58925892
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
58935893
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));

ggml/src/ggml-cpu/repack.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,7 +1096,7 @@ static block_q2_Kx8 make_block_q2_Kx8(block_q2_K * in, unsigned int blck_size_in
10961096

10971097
const int end = QK_K * 2 / blck_size_interleave;
10981098

1099-
// Interleave Q4_K quants by taking 8 bytes at a time
1099+
// Interleave Q2_K quants by taking 8 bytes at a time
11001100
for (int i = 0; i < end; ++i) {
11011101
int src_id = i % 8;
11021102
int src_offset = (i / 8) * blck_size_interleave;
@@ -1107,7 +1107,7 @@ static block_q2_Kx8 make_block_q2_Kx8(block_q2_K * in, unsigned int blck_size_in
11071107
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
11081108
}
11091109

1110-
// The below logic is designed so as to unapck and rearrange scales and mins values in Q2_K
1110+
// The below logic is designed so as to unpack and rearrange scales and mins values in Q2_K
11111111
// Currently the Q2_K structure has 16 scales and 16 mins packed in 16 bytes ( 4 bits for each value)
11121112
// The output Q2_Kx8 structure has 128 bytes for storing scales and mins
11131113
// Every 16 byte is packed such that it contains scales and mins for corresponding sub blocks from Q2_K structure

0 commit comments

Comments
 (0)