Skip to content

Commit

Permalink
Added broken new q4k quant
Browse files Browse the repository at this point in the history
  • Loading branch information
LostRuins committed Jun 19, 2023
1 parent 16b9cd1 commit 266d436
Showing 1 changed file with 77 additions and 1 deletion.
78 changes: 77 additions & 1 deletion ggml-opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ static std::string program_source = MULTILINE_QUOTE(

typedef char int8_t;
typedef uchar uint8_t;
typedef short int16_t;
typedef ushort uint16_t;
typedef int int32_t;
typedef uint uint32_t;

Expand Down Expand Up @@ -420,6 +422,80 @@ void vec_dot_q4_K(__global const struct block_q4_K* x, const int ib, const int i
*result = sum;
}

__kernel void dequantize_mul_mat_vec_q4_K_fast(__global struct block_q4_K * xx, __local float* tmp, __global float* y, __global float* dst, const int ncols) {

//to rename it later, just to test now
const uint16_t kmask1 = 0x3f3f;
const uint16_t kmask2 = 0x0f0f;
const uint16_t kmask3 = 0xc0c0;

const int block_size = get_local_size(0);
const int row = get_group_id(0);
const int num_blocks_per_row = ncols / 256;
const int ib0 = row*num_blocks_per_row;

const int tid = get_local_id(0)/2; // 0...15
const int ix = get_local_id(0)%2;

const int il = tid/4; // 0...3
const int ir = tid - 4*il;// 0...3
const int n = 4;

const int im = il/2; // 0 or 1. 0 computes 0,32 + 128,160, 1 computes 64,96 + 192,224
const int in = il%2;

const int l0 = n*(2*ir + in);
const int q_offset = 32*im + l0;
const int y_offset = 64*im + l0;


uint16_t aux[4];
const uint8_t * sc = (const uint8_t *)aux;

const struct block_q4_K * x = xx;

tmp[tid] = 0;

for (int i = ix; i < num_blocks_per_row; i += 2) {

const uint8_t * q1 = x[i].qs + q_offset;
const uint8_t * q2 = q1 + 64;
const float * y1 = y + i*256 + y_offset;
const float * y2 = y1 + 128;

const float dall = vload_half(0, &x[i].d);
const float dmin = vload_half(0, &x[i].dmin);

const uint16_t * a = (const uint16_t *)x[i].scales;
aux[0] = a[im+0] & kmask1;
aux[1] = a[im+2] & kmask1;
aux[2] = ((a[im+4] >> 0) & kmask2) | ((a[im+0] & kmask3) >> 2);
aux[3] = ((a[im+4] >> 4) & kmask2) | ((a[im+2] & kmask3) >> 2);

float4 s = {0.f, 0.f, 0.f, 0.f};
float smin = 0;
for (int l = 0; l < n; ++l) {
s.x += y1[l] * (q1[l] & 0xF); s.y += y1[l+32] * (q1[l] >> 4);
s.z += y2[l] * (q2[l] & 0xF); s.w += y2[l+32] * (q2[l] >> 4);
smin += y1[l] * sc[2] + y1[l+32] * sc[3] + y2[l] * sc[6] + y2[l+32] * sc[7];
}
tmp[tid] += dall * (s.x * sc[0] + s.y * sc[1] + s.z * sc[4] + s.w * sc[5]) - dmin * smin;

}

// sum up partial sums and write back result
barrier(CLK_LOCAL_MEM_FENCE);
for (int s=block_size/2; s>0; s>>=1) {
if (tid < s) {
tmp[tid] += tmp[tid + s];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (tid == 0) {
dst[row] = tmp[0];
}
}

void vec_dot_q5_K(__global const struct block_q5_K* x, const int ib, const int iqs, const __global float *yy, float *result) {

const int j = iqs / 64;
Expand Down Expand Up @@ -956,7 +1032,7 @@ void ggml_cl_init(void) {
CL_CHECK((convert_mul_mat_vec_f16_cl = clCreateKernel(program, "convert_mul_mat_vec_f16", &err), err));
CL_CHECK((dequantize_mul_mat_vec_q2_K_cl = clCreateKernel(program, "dequantize_mul_mat_vec_q2_K", &err), err));
CL_CHECK((dequantize_mul_mat_vec_q3_K_cl = clCreateKernel(program, "dequantize_mul_mat_vec_q3_K", &err), err));
CL_CHECK((dequantize_mul_mat_vec_q4_K_cl = clCreateKernel(program, "dequantize_mul_mat_vec_q4_K", &err), err));
CL_CHECK((dequantize_mul_mat_vec_q4_K_cl = clCreateKernel(program, "dequantize_mul_mat_vec_q4_K_fast", &err), err));
CL_CHECK((dequantize_mul_mat_vec_q5_K_cl = clCreateKernel(program, "dequantize_mul_mat_vec_q5_K", &err), err));
CL_CHECK((dequantize_mul_mat_vec_q6_K_cl = clCreateKernel(program, "dequantize_mul_mat_vec_q6_K", &err), err));

Expand Down

0 comments on commit 266d436

Please sign in to comment.