Skip to content

Commit

Permalink
ggml : uniform 5th bit extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed May 8, 2023
1 parent 948d124 commit 0e48eb6
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
8 changes: 4 additions & 4 deletions ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ static __global__ void dequantize_block_q5_0(const void * vx, float * y) {
memcpy(&qh, x[i].qh, sizeof(qh));

for (int j = 0; j < qk/2; ++j) {
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;

const int32_t x0 = ((x[i].qs[j] & 0xf) | xh_0) - 16;
const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16;
Expand All @@ -148,8 +148,8 @@ static __global__ void dequantize_block_q5_1(const void * vx, float * y) {
memcpy(&qh, x[i].qh, sizeof(qh));

for (int j = 0; j < qk/2; ++j) {
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;

const int x0 = (x[i].qs[j] & 0xf) | xh_0;
const int x1 = (x[i].qs[j] >> 4) | xh_1;
Expand Down
16 changes: 8 additions & 8 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -1311,8 +1311,8 @@ static void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict
memcpy(&qh, x[i].qh, sizeof(qh));

for (int j = 0; j < qk/2; ++j) {
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;

const int32_t x0 = ((x[i].qs[j] & 0x0F) | xh_0) - 16;
const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16;
Expand All @@ -1338,8 +1338,8 @@ static void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict
memcpy(&qh, x[i].qh, sizeof(qh));

for (int j = 0; j < qk/2; ++j) {
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;

const int x0 = (x[i].qs[j] & 0x0F) | xh_0;
const int x1 = (x[i].qs[j] >> 4) | xh_1;
Expand Down Expand Up @@ -12086,8 +12086,8 @@ size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t *
memcpy(&qh, &y[i].qh, sizeof(qh));

for (int j = 0; j < QK5_0; j += 2) {
const uint8_t vh0 = ((qh & (1u << (j + 0))) >> (j + 0)) << 4;
const uint8_t vh1 = ((qh & (1u << (j + 1))) >> (j + 1)) << 4;
const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12));

// cast to 16 bins
const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;
Expand Down Expand Up @@ -12116,8 +12116,8 @@ size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t *
memcpy(&qh, &y[i].qh, sizeof(qh));

for (int j = 0; j < QK5_1; j += 2) {
const uint8_t vh0 = ((qh & (1u << (j + 0))) >> (j + 0)) << 4;
const uint8_t vh1 = ((qh & (1u << (j + 1))) >> (j + 1)) << 4;
const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12));

// cast to 16 bins
const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;
Expand Down

0 comments on commit 0e48eb6

Please sign in to comment.