Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IQ3_S: a much better alternative to Q3_K #5676

Merged
merged 27 commits into from
Feb 24, 2024
Merged
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
10a47fa
iq4_nl: squash commits for easier rebase
Kawrakow Feb 19, 2024
5691fec
Resurrecting iq3_xs
Kawrakow Feb 20, 2024
76aff09
Minor PPL improvement via a block scale fudge factor
Kawrakow Feb 20, 2024
5be4e7a
Minor improvement via 3 neighbours
Kawrakow Feb 20, 2024
f1255c5
iq3_xs: working scalar and AVX2 dot products
Kawrakow Feb 20, 2024
76214ab
iq3_xs: ARM_NEON dot product - works but extremely slow (10 t/s)
Kawrakow Feb 20, 2024
38aa7b1
iq3_xs: working Metal implementation
Kawrakow Feb 20, 2024
2ec600b
Adding IQ3_M - IQ3_XS mix with mostly Q4_K
Kawrakow Feb 21, 2024
d83fdda
iiq3_xs: a 3.4375 bpw variant
Kawrakow Feb 22, 2024
eacff4a
iq3_xs: make CUDA work for new version
Kawrakow Feb 22, 2024
1fef4b8
iq3_xs: make scalar and AVX2 work for new version
Kawrakow Feb 22, 2024
1328331
iq3_s: make ARM_NEON work with new version
Kawrakow Feb 22, 2024
1777825
iq3_xs: make new version work on metal
Kawrakow Feb 22, 2024
87038fe
iq3_xs: tiny Metal speed improvement
Kawrakow Feb 22, 2024
4d5feeb
iq3_xs: tiny Metal speed improvement
Kawrakow Feb 22, 2024
b25f996
Fix stupid warning
Kawrakow Feb 22, 2024
272c7f7
Q3_K_XS now uses a mix of IQ3_XS and IQ3_XXS
Kawrakow Feb 22, 2024
2730225
iq3_xs: rename to iq3_s
Kawrakow Feb 22, 2024
47cf30b
iq3_s: make tests pass
Kawrakow Feb 22, 2024
cd6a0f0
Move Q3_K_XS mix to 3.25 bpw
Kawrakow Feb 23, 2024
436a146
Attempt to fix failing tests
Kawrakow Feb 23, 2024
303f3f3
Another attempt to fix the Windows builds
Kawrakow Feb 23, 2024
0d6d185
Attempt to fix ROCm
Kawrakow Feb 23, 2024
1d47de3
ROCm again
Kawrakow Feb 23, 2024
e6e61e3
iq3_s: partial fix for QK_K = 64
Kawrakow Feb 23, 2024
cbd950b
iq3_s: make it work on metal for QK_K = 64
Kawrakow Feb 23, 2024
e1b8efb
Will this fix ROCm?
Kawrakow Feb 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Attempt to fix failing tests
  • Loading branch information
Kawrakow committed Feb 23, 2024
commit 436a146f98d6099080dc7f008c2bb9cac6c07018
38 changes: 25 additions & 13 deletions ggml-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -8916,6 +8916,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r

#endif

#if defined (__AVX2__) || defined (__ARM_NEON)
static const int8_t keven_signs_q2xs[1024] = {
1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, -1, 1, -1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1,
1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, 1, 1, 1, 1, 1, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, 1, 1, -1,
Expand Down Expand Up @@ -8950,6 +8951,7 @@ static const int8_t keven_signs_q2xs[1024] = {
1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 1, -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, 1,
1, 1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
};
#endif

void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
assert(n % QK_K == 0);
Expand Down Expand Up @@ -10991,7 +10993,16 @@ void quantize_row_iq3_xxs_reference(const float * restrict x, block_iq3_xxs * re
}

static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, void * restrict vy, int n,
const float * restrict quant_weights) {
const float * restrict quant_weights,
float * scales,
float * weight,
float * xval,
int8_t * L,
int8_t * Laux,
float * waux,
bool * is_on_grid,
bool * is_on_grid_aux,
uint8_t * block_signs) {

const int gindex = iq3_data_index(512);

Expand All @@ -11011,16 +11022,6 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo

block_iq3_s * y = vy;

float scales[QK_K/block_size];
float weight[block_size];
float xval[block_size];
int8_t L[block_size];
int8_t Laux[block_size];
float waux[block_size];
bool is_on_grid[block_size/4];
bool is_on_grid_aux[block_size/4];
uint8_t block_signs[block_size/8];

const int bs4 = block_size/4;
const int bs8 = block_size/8;

Expand Down Expand Up @@ -11176,9 +11177,20 @@ size_t quantize_iq3_s(const float * src, void * dst, int nrow, int n_per_row, in
(void)hist;
GGML_ASSERT(n_per_row%QK_K == 0);
int nblock = n_per_row/QK_K;
const int block_size = 32;
float scales[QK_K/block_size];
float weight[block_size];
float xval[block_size];
int8_t L[block_size];
int8_t Laux[block_size];
float waux[block_size];
bool is_on_grid[block_size/4];
bool is_on_grid_aux[block_size/4];
uint8_t block_signs[block_size/8];
char * qrow = (char *)dst;
for (int row = 0; row < nrow; ++row) {
quantize_row_iq3_s_impl(32, src, qrow, n_per_row, quant_weights);
quantize_row_iq3_s_impl(32, src, qrow, n_per_row, quant_weights,
scales, weight, xval, L, Laux, waux, is_on_grid, is_on_grid_aux, block_signs);
src += n_per_row;
qrow += nblock*sizeof(block_iq3_s);
}
Expand All @@ -11193,7 +11205,7 @@ void quantize_row_iq3_s(const float * restrict x, void * restrict vy, int k) {

void quantize_row_iq3_s_reference(const float * restrict x, block_iq3_s * restrict y, int k) {
assert(k % QK_K == 0);
quantize_row_iq3_s_impl(32, x, y, k, NULL);
quantize_iq3_s(x, y, 1, k, NULL, NULL);
}


Expand Down
Loading