Disable I2_K cpu quantization.

Nexesenex · Nexesenex · commit b042b552c09e · 2025-10-25T13:12:09.000+02:00
To allow compilation.
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -363,7 +363,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ2_K] = {
-        .from_float               = quantize_row_iq2_k,
+        // .from_float               = quantize_row_iq2_k,
         .vec_dot                  = ggml_vec_dot_iq2_k_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
         .nrows                    = 1,
diff --git a/ggml/src/ggml-cpu/quants.h b/ggml/src/ggml-cpu/quants.h
@@ -35,7 +35,7 @@ void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, i
 void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 
-void quantize_row_iq2_k (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
+// void quantize_row_iq2_k (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq3_k (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq4_k (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq5_k (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
@@ -5487,7 +5487,7 @@ inline int best_index_iq2nl(const int8_t * values, float x) {
     return x - values[idx] < values[idx+1] - x ? idx : idx + 1;
 }
 
-static void quantize_row_iq2_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {
+/* static void quantize_row_iq2_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {
 
     constexpr int kBlockSize = 16;
 
@@ -5645,7 +5645,7 @@ size_t quantize_iq2_k(const float * src, void * dst, int64_t nrows, int64_t n_pe
         qrow += nblock*sizeof(block_iq2_k);
     }
     return nrows * nblock * sizeof(block_iq2_k);
-}
+} */
 
 void dequantize_row_iq2_k(const block_iq2_k  * x, float * y, int64_t k) {
     assert(k % QK_K == 0);
@@ -5696,7 +5696,7 @@ inline int best_index_iq3nl(const int8_t * values, float x) {
 }
 static void quantize_row_iq3_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {
 
-    constexpr int ntry = 3;
+    int ntry = 3;
 
     block_iq3_k * y = (block_iq3_k *)vy;
 
diff --git a/ggml/src/ggml-quants.h b/ggml/src/ggml-quants.h
@@ -40,8 +40,8 @@ GGML_API void quantize_row_iq4_xs_ref (const float * GGML_RESTRICT x, block_iq4_
 GGML_API void quantize_row_iq3_s_ref  (const float * GGML_RESTRICT x, block_iq3_s   * GGML_RESTRICT y, int64_t k);
 GGML_API void quantize_row_iq2_s_ref  (const float * GGML_RESTRICT x, block_iq2_s   * GGML_RESTRICT y, int64_t k);
 
-GGML_API void quantize_row_iq2_k_ref  (const float * GGML_RESTRICT x, block_iq2_k  * GGML_RESTRICT y, int64_t k);
-GGML_API void quantize_row_iq3_k_ref  (const float * GGML_RESTRICT x, block_iq3_k * GGML_RESTRICT y, int64_t k);
+// GGML_API void quantize_row_iq2_k_ref  (const float * GGML_RESTRICT x, block_iq2_k  * GGML_RESTRICT y, int64_t k);
+GGML_API void quantize_row_iq3_k_ref  (const float * GGML_RESTRICT x, block_iq3_k  * GGML_RESTRICT y, int64_t k);
 GGML_API void quantize_row_iq4_k_ref  (const float * GGML_RESTRICT x, block_iq4_k   * GGML_RESTRICT y, int64_t k);
 GGML_API void quantize_row_iq5_k_ref  (const float * GGML_RESTRICT x, block_iq5_k   * GGML_RESTRICT y, int64_t k);
 GGML_API void quantize_row_iq6_k_ref  (const float * GGML_RESTRICT x, block_iq6_k   * GGML_RESTRICT y, int64_t k);
@@ -135,7 +135,7 @@ GGML_API size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTR
 GGML_API size_t quantize_q6_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
 GGML_API size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
 
-GGML_API size_t quantize_iq2_k(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
+// GGML_API size_t quantize_iq2_k(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
 GGML_API size_t quantize_iq3_k(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
 GGML_API size_t quantize_iq4_k(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
 GGML_API size_t quantize_iq5_k(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -883,7 +883,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
         .is_quantized             = true,
         .to_float                 = (ggml_to_float_t) dequantize_row_iq2_k,
         // .from_float               = quantize_row_iq2_k,
-        .from_float_ref           = (ggml_from_float_t) quantize_row_iq2_k_ref,
+        // .from_float_ref           = (ggml_from_float_t) quantize_row_iq2_k_ref,
         // .vec_dot                  = vec_dot_iq2_k_q8_k,
         // .vec_dot_type             = GGML_TYPE_Q8_K,
         // .nrows                    = 1,

Original file line number	Diff line number	Diff line change
`@@ -5487,7 +5487,7 @@ inline int best_index_iq2nl(const int8_t * values, float x) {`
`5487`	`5487`	`return x - values[idx] < values[idx+1] - x ? idx : idx + 1;`
`5488`	`5488`	`}`
`5489`	`5489`
`5490`		`-static void quantize_row_iq2_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {`
	`5490`	`+/* static void quantize_row_iq2_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {`
`5491`	`5491`
`5492`	`5492`	`constexpr int kBlockSize = 16;`
`5493`	`5493`
`@@ -5645,7 +5645,7 @@ size_t quantize_iq2_k(const float * src, void * dst, int64_t nrows, int64_t n_pe`
`5645`	`5645`	`qrow += nblock*sizeof(block_iq2_k);`
`5646`	`5646`	`}`
`5647`	`5647`	`return nrows * nblock * sizeof(block_iq2_k);`
`5648`		`-}`
	`5648`	`+} */`
`5649`	`5649`
`5650`	`5650`	`void dequantize_row_iq2_k(const block_iq2_k * x, float * y, int64_t k) {`
`5651`	`5651`	`assert(k % QK_K == 0);`
`@@ -5696,7 +5696,7 @@ inline int best_index_iq3nl(const int8_t * values, float x) {`
`5696`	`5696`	`}`
`5697`	`5697`	`static void quantize_row_iq3_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {`
`5698`	`5698`
`5699`		`- constexpr int ntry = 3;`
	`5699`	`+ int ntry = 3;`
`5700`	`5700`
`5701`	`5701`	`block_iq3_k * y = (block_iq3_k *)vy;`
`5702`	`5702`