@@ -1313,6 +1313,19 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
13131313 .nrows = 1,
13141314 .row_meta_size = 0,
13151315 },
1316+ [GGML_TYPE_IQ4_K_R4] = {
1317+ .type_name = "iq4_k_r4",
1318+ .blck_size = QK_K,
1319+ .type_size = sizeof(block_iq4_k),
1320+ .is_quantized = true,
1321+ .to_float = (ggml_to_float_t) dequantize_row_iq4_k_r4,
1322+ .from_float = quantize_row_iq4_k_r4,
1323+ .from_float_ref = (ggml_from_float_t)quantize_row_iq4_k_r4_ref,
1324+ .vec_dot = vec_dot_iq4_k_r4_q8_k,
1325+ .vec_dot_type = GGML_TYPE_Q8_K,
1326+ .nrows = 1,
1327+ .row_meta_size = 0,
1328+ },
13161329 [GGML_TYPE_IQ5_K] = {
13171330 .type_name = "iq5_k",
13181331 .blck_size = QK_K,
@@ -4114,6 +4127,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
41144127 case GGML_FTYPE_MOSTLY_IQ2_KS: wtype = GGML_TYPE_IQ2_KS; break;
41154128 case GGML_FTYPE_MOSTLY_IQ3_K: wtype = GGML_TYPE_IQ3_K; break;
41164129 case GGML_FTYPE_MOSTLY_IQ4_K: wtype = GGML_TYPE_IQ4_K; break;
4130+ case GGML_FTYPE_MOSTLY_IQ4_K_R4: wtype = GGML_TYPE_IQ4_K_R4; break;
41174131 case GGML_FTYPE_MOSTLY_IQ5_K: wtype = GGML_TYPE_IQ5_K; break;
41184132 case GGML_FTYPE_MOSTLY_IQ6_K: wtype = GGML_TYPE_IQ6_K; break;
41194133 case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
@@ -10649,6 +10663,7 @@ static void ggml_compute_forward_add(
1064910663 case GGML_TYPE_IQ2_KS:
1065010664 case GGML_TYPE_IQ3_K:
1065110665 case GGML_TYPE_IQ4_K:
10666+ case GGML_TYPE_IQ4_K_R4:
1065210667 case GGML_TYPE_IQ5_K:
1065310668 case GGML_TYPE_IQ6_K:
1065410669 case GGML_TYPE_IQ3_S:
@@ -11103,6 +11118,7 @@ static void ggml_compute_forward_add1(
1110311118 case GGML_TYPE_IQ2_KS:
1110411119 case GGML_TYPE_IQ3_K:
1110511120 case GGML_TYPE_IQ4_K:
11121+ case GGML_TYPE_IQ4_K_R4:
1110611122 case GGML_TYPE_IQ5_K:
1110711123 case GGML_TYPE_IQ6_K:
1110811124 case GGML_TYPE_IQ3_S:
@@ -11254,6 +11270,7 @@ static void ggml_compute_forward_acc(
1125411270 case GGML_TYPE_IQ2_KS:
1125511271 case GGML_TYPE_IQ3_K:
1125611272 case GGML_TYPE_IQ4_K:
11273+ case GGML_TYPE_IQ4_K_R4:
1125711274 case GGML_TYPE_IQ5_K:
1125811275 case GGML_TYPE_IQ6_K:
1125911276 case GGML_TYPE_IQ3_S:
@@ -14451,6 +14468,7 @@ static void ggml_compute_forward_out_prod(
1445114468 case GGML_TYPE_IQ2_KS:
1445214469 case GGML_TYPE_IQ3_K:
1445314470 case GGML_TYPE_IQ4_K:
14471+ case GGML_TYPE_IQ4_K_R4:
1445414472 case GGML_TYPE_IQ5_K:
1445514473 case GGML_TYPE_IQ6_K:
1445614474 case GGML_TYPE_IQ3_S:
@@ -14842,6 +14860,7 @@ static void ggml_compute_forward_set(
1484214860 case GGML_TYPE_IQ2_KS:
1484314861 case GGML_TYPE_IQ3_K:
1484414862 case GGML_TYPE_IQ4_K:
14863+ case GGML_TYPE_IQ4_K_R4:
1484514864 case GGML_TYPE_IQ5_K:
1484614865 case GGML_TYPE_IQ6_K:
1484714866 case GGML_TYPE_IQ3_S:
@@ -15127,6 +15146,7 @@ static void ggml_compute_forward_get_rows(
1512715146 case GGML_TYPE_IQ2_KS:
1512815147 case GGML_TYPE_IQ3_K:
1512915148 case GGML_TYPE_IQ4_K:
15149+ case GGML_TYPE_IQ4_K_R4:
1513015150 case GGML_TYPE_IQ5_K:
1513115151 case GGML_TYPE_IQ6_K:
1513215152 case GGML_TYPE_IQ3_S:
@@ -15739,6 +15759,7 @@ static void ggml_compute_forward_clamp(
1573915759 case GGML_TYPE_IQ2_KS:
1574015760 case GGML_TYPE_IQ3_K:
1574115761 case GGML_TYPE_IQ4_K:
15762+ case GGML_TYPE_IQ4_K_R4:
1574215763 case GGML_TYPE_IQ5_K:
1574315764 case GGML_TYPE_IQ6_K:
1574415765 case GGML_TYPE_IQ3_S:
@@ -22581,6 +22602,7 @@ size_t ggml_quantize_chunk(
2258122602 case GGML_TYPE_IQ2_KS: result = quantize_iq2_ks (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2258222603 case GGML_TYPE_IQ3_K: result = quantize_iq3_k (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2258322604 case GGML_TYPE_IQ4_K: result = quantize_iq4_k (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
22605+ case GGML_TYPE_IQ4_K_R4:result = quantize_iq4_k_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2258422606 case GGML_TYPE_IQ5_K: result = quantize_iq5_k (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2258522607 case GGML_TYPE_IQ6_K: result = quantize_iq6_k (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2258622608 case GGML_TYPE_Q4_0_4_4: result = quantize_q4_0_4x4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
0 commit comments