@@ -225,41 +225,6 @@ static __device__ void cpy_blck_f32_q5_1(const char * cxi, char * cdsti) {
225225 memcpy (dsti->qh , &qh, sizeof (qh));
226226}
227227
228- static __device__ void cpy_blck_f32_q6_0 (const char * cxi, char * cdsti) {
229- const float * xi = (const float *) cxi;
230- block_q6_0 * dsti = (block_q6_0 *) cdsti;
231-
232- float amax = 0 .0f ;
233- float vmax = 0 .0f ;
234-
235- for (int j = 0 ; j < QK6_0; ++j) {
236- const float v = xi[j];
237- const float av = fabsf (xi[j]);
238- if (amax < av) {
239- amax = av;
240- vmax = v;
241- }
242- }
243-
244- const float d = vmax / -32 ;
245- const float id = d ? 1 .0f /d : 0 .0f ;
246-
247- dsti->d = d;
248- memset (dsti->qh , 0 , QK6_0/4 );
249-
250- for (int j = 0 ; j < QK6_0/2 ; ++j) {
251- const float x0 = xi[0 + j]*id;
252- const float x1 = xi[QK4_0/2 + j]*id;
253-
254- const uint8_t xi0 = min (63 , (int8_t )(x0 + 32 .5f ));
255- const uint8_t xi1 = min (63 , (int8_t )(x1 + 32 .5f ));
256-
257- dsti->qs [j] = (xi0 & 0xf ) | ((xi1 & 0xf ) << 4 );
258- const uint8_t h = (xi0 >> 4 ) | ((xi1 >> 4 ) << 2 );
259- dsti->qh [j%(QK6_0/4 )] |= (h << 4 *(j/(QK6_0/4 )));
260- }
261- }
262-
263228static __device__ const int8_t iq4nl_index[241 ] = {
264229 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 16 , 16 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
265230 1 , 17 , 17 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 18 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 ,
@@ -462,17 +427,6 @@ static void ggml_cpy_f32_q5_1_cuda(
462427 (cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
463428}
464429
465- static void ggml_cpy_f32_q6_0_cuda (
466- const char * cx, char * cdst, const int ne,
467- const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
468- const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream) {
469-
470- GGML_ASSERT (ne % QK6_0 == 0 );
471- const int num_blocks = ne / QK6_0;
472- cpy_f32_q<cpy_blck_f32_q6_0, QK6_0><<<num_blocks, 1 , 0 , stream>>>
473- (cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
474- }
475-
476430static void ggml_cpy_f32_iq4_nl_cuda (
477431 const char * cx, char * cdst, const int ne,
478432 const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
@@ -545,8 +499,6 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
545499 ggml_cpy_f32_q4_1_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
546500 } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q5_0) {
547501 ggml_cpy_f32_q5_0_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
548- } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q6_0) {
549- ggml_cpy_f32_q6_0_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
550502 } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_IQ4_NL) {
551503 ggml_cpy_f32_iq4_nl_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
552504 } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q5_1) {
@@ -587,8 +539,6 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
587539 return (void *) cpy_f32_q<cpy_blck_f32_iq4_nl, QK4_NL>;
588540 } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q5_1) {
589541 return (void *) cpy_f32_q<cpy_blck_f32_q5_1, QK5_1>;
590- } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q6_0) {
591- return (void *) cpy_f32_q<cpy_blck_f32_q6_0, QK6_0>;
592542 } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16) {
593543 return (void *) cpy_f32_f16<cpy_1_f32_f16>;
594544 } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
0 commit comments