@@ -15051,15 +15051,21 @@ size_t quantize_iq1_m(const float * restrict src, void * restrict dst, int64_t n
1505115051
1505215052// ============================ 4-bit non-linear quants
1505315053
15054- static inline int best_index_int8(int n, const int8_t * val, float x) {
15055- if (x <= val[0]) return 0;
15056- if (x >= val[n-1]) return n-1;
15057- int ml = 0, mu = n-1;
15058- while (mu-ml > 1) {
15059- int mav = (ml+mu)/2;
15060- if (x < val[mav]) mu = mav; else ml = mav;
15061- }
15062- return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
15054+ static const int8_t iq4nl_index[241] = {
15055+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15056+ 1, 17, 17, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 18, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
15057+ 3, 3, 3, 3, 3, 3, 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
15058+ 5, 5, 21, 21, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 22, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 23, 23, 8, 8, 8, 8,
15059+ 8, 8, 8, 8, 8, 8, 24, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 26, 26,
15060+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 27, 27, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 28, 13, 13, 13,
15061+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 29, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15062+ 14, 14, 14, 14, 30, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15
15063+ };
15064+ static inline int best_index_iq4nl(const int8_t * values, float x) {
15065+ int ix = (int)x - values[0];
15066+ if (ix < 0 || ix >= 241) return ix < 0 ? 0 : 15;
15067+ ix = iq4nl_index[ix];
15068+ return ix < 16 ? ix : x - values[ix-16] < values[ix-15] - x ? ix-16 : ix-15;
1506315069}
1506415070
1506515071static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * restrict x,
@@ -15102,7 +15108,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
1510215108 float sumqx = 0, sumq2 = 0;
1510315109 for (int j = 0; j < block_size; ++j) {
1510415110 float al = id*xb[j];
15105- int l = best_index_int8(16, values, al);
15111+ int l = best_index_iq4nl( values, al);
1510615112 Lb[j] = l;
1510715113 float q = values[l];
1510815114 float w = weight[j];
@@ -15116,7 +15122,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
1511615122 sumqx = sumq2 = 0;
1511715123 for (int j = 0; j < block_size; ++j) {
1511815124 float al = id*xb[j];
15119- int l = best_index_int8(16, values, al);
15125+ int l = best_index_iq4nl( values, al);
1512015126 float q = values[l];
1512115127 float w = weight[j];
1512215128 sumqx += w*q*xb[j];
@@ -15147,7 +15153,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
1514715153 uint8_t * Lb = L + ib*block_size;
1514815154 const float * xb = x + ib*block_size;
1514915155 for (int j = 0; j < block_size; ++j) {
15150- Lb[j] = best_index_int8(16, values, idl*xb[j]);
15156+ Lb[j] = best_index_iq4nl( values, idl*xb[j]);
1515115157 }
1515215158 l += 32;
1515315159 uint8_t l_l = l & 0xf;
@@ -15161,7 +15167,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
1516115167 if (ntry > 0) {
1516215168 float id = scales[0] ? 1/scales[0] : 0;
1516315169 for (int j = 0; j < super_block_size; ++j) {
15164- L[j] = best_index_int8(16, values, id*x[j]);
15170+ L[j] = best_index_iq4nl( values, id*x[j]);
1516515171 }
1516615172 }
1516715173 }
0 commit comments