@@ -16,41 +16,41 @@ struct quant_option {
1616};
1717
1818static const std::vector<struct quant_option > QUANT_OPTIONS = {
19- { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0 , " 3.56G , +0.2166 ppl @ LLaMA-v1-7B " , },
20- { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1 , " 3.90G , +0.1585 ppl @ LLaMA-v1-7B " , },
21- { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0 , " 4.33G , +0.0683 ppl @ LLaMA-v1-7B " , },
22- { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1 , " 4.70G , +0.0349 ppl @ LLaMA-v1-7B " , },
19+ { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0 , " 4.34G , +0.4685 ppl @ Llama-3-8B " , },
20+ { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1 , " 4.78G , +0.4511 ppl @ Llama-3-8B " , },
21+ { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0 , " 5.21G , +0.1316 ppl @ Llama-3-8B " , },
22+ { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1 , " 5.65G , +0.1062 ppl @ Llama-3-8B " , },
2323 { " IQ2_XXS" ,LLAMA_FTYPE_MOSTLY_IQ2_XXS ," 2.06 bpw quantization" , },
2424 { " IQ2_XS" , LLAMA_FTYPE_MOSTLY_IQ2_XS , " 2.31 bpw quantization" , },
2525 { " IQ2_S" , LLAMA_FTYPE_MOSTLY_IQ2_S , " 2.5 bpw quantization" , },
2626 { " IQ2_M" , LLAMA_FTYPE_MOSTLY_IQ2_M , " 2.7 bpw quantization" , },
2727 { " IQ1_S" , LLAMA_FTYPE_MOSTLY_IQ1_S , " 1.56 bpw quantization" , },
2828 { " IQ1_M" , LLAMA_FTYPE_MOSTLY_IQ1_M , " 1.75 bpw quantization" , },
29- { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K , " 2.63G , +0.6717 ppl @ LLaMA-v1-7B " , },
30- { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S , " 2.16G , +9.0634 ppl @ LLaMA-v1-7B " , },
29+ { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K , " 2.96G , +3.5199 ppl @ Llama-3-8B " , },
30+ { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S , " 2.96G , +3.1836 ppl @ Llama-3-8B " , },
3131 { " IQ3_XXS" ,LLAMA_FTYPE_MOSTLY_IQ3_XXS ," 3.06 bpw quantization" , },
3232 { " IQ3_S" , LLAMA_FTYPE_MOSTLY_IQ3_S , " 3.44 bpw quantization" , },
3333 { " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M , " 3.66 bpw quantization mix" , },
34- { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M , " alias for Q3_K_M" },
35- { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS , " 3.3 bpw quantization" , },
36- { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S , " 2.75G , +0.5551 ppl @ LLaMA-v1-7B " , },
37- { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M , " 3.07G , +0.2496 ppl @ LLaMA-v1-7B " , },
38- { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L , " 3.35G , +0.1764 ppl @ LLaMA-v1-7B " , },
34+ { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M , " alias for Q3_K_M" },
35+ { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS , " 3.3 bpw quantization" , },
36+ { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S , " 3.41G , +1.6321 ppl @ Llama-3-8B " , },
37+ { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M , " 3.74G , +0.6569 ppl @ Llama-3-8B " , },
38+ { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L , " 4.03G , +0.5562 ppl @ Llama-3-8B " , },
3939 { " IQ4_NL" , LLAMA_FTYPE_MOSTLY_IQ4_NL , " 4.50 bpw non-linear quantization" , },
4040 { " IQ4_XS" , LLAMA_FTYPE_MOSTLY_IQ4_XS , " 4.25 bpw non-linear quantization" , },
41- { " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M , " alias for Q4_K_M" , },
42- { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S , " 3.59G , +0.0992 ppl @ LLaMA-v1-7B " , },
43- { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M , " 3.80G , +0.0532 ppl @ LLaMA-v1-7B " , },
44- { " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M , " alias for Q5_K_M" , },
45- { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S , " 4.33G , +0.0400 ppl @ LLaMA-v1-7B " , },
46- { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M , " 4.45G , +0.0122 ppl @ LLaMA-v1-7B " , },
47- { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K , " 5.15G , +0.0008 ppl @ LLaMA-v1-7B " , },
48- { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0 , " 6.70G , +0.0004 ppl @ LLaMA-v1-7B " , },
49- { " F16" , LLAMA_FTYPE_MOSTLY_F16 , " 14.00G, - 0.0020 ppl @ Mistral-7B" , },
50- { " BF16" , LLAMA_FTYPE_MOSTLY_BF16 , " 14.00G, -0.0050 ppl @ Mistral-7B" , },
51- { " F32" , LLAMA_FTYPE_ALL_F32 , " 26.00G @ 7B" , },
41+ { " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M , " alias for Q4_K_M" , },
42+ { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S , " 4.37G , +0.2689 ppl @ Llama-3-8B " , },
43+ { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M , " 4.58G , +0.1754 ppl @ Llama-3-8B " , },
44+ { " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M , " alias for Q5_K_M" , },
45+ { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S , " 5.21G , +0.1049 ppl @ Llama-3-8B " , },
46+ { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M , " 5.33G , +0.0569 ppl @ Llama-3-8B " , },
47+ { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K , " 6.14G , +0.0217 ppl @ Llama-3-8B " , },
48+ { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0 , " 7.96G , +0.0026 ppl @ Llama-3-8B " , },
49+ { " F16" , LLAMA_FTYPE_MOSTLY_F16 , " 14.00G, + 0.0020 ppl @ Mistral-7B" , },
50+ { " BF16" , LLAMA_FTYPE_MOSTLY_BF16 , " 14.00G, -0.0050 ppl @ Mistral-7B" , },
51+ { " F32" , LLAMA_FTYPE_ALL_F32 , " 26.00G @ 7B" , },
5252 // Note: Ensure COPY comes after F32 to avoid ftype 0 from matching.
53- { " COPY" , LLAMA_FTYPE_ALL_F32 , " only copy tensors, no quantizing" , },
53+ { " COPY" , LLAMA_FTYPE_ALL_F32 , " only copy tensors, no quantizing" , },
5454};
5555
5656static const char * const LLM_KV_QUANTIZE_IMATRIX_FILE = " quantize.imatrix.file" ;
0 commit comments