Skip to content

Commit

Permalink
Fix quantization of all 0s (ml-explore#1028)
Browse files Browse the repository at this point in the history
  • Loading branch information
angeloskath authored and Rifur13 committed Apr 24, 2024
1 parent 59b8346 commit d28a4f6
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
5 changes: 4 additions & 1 deletion mlx/ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3300,7 +3300,10 @@ std::tuple<array, array, array> quantize(
reshape(w, {w.shape(0), w.shape(1) / group_size, group_size}, s);
array w_max = max(packed_w, /* axis= */ -1, /* keepdims= */ true, s);
array w_min = min(packed_w, /* axis= */ -1, /* keepdims= */ true, s);
array delta = divide(subtract(w_max, w_min, s), array(n_bins, w.dtype()), s);
array delta = maximum(
divide(subtract(w_max, w_min, s), array(n_bins, w.dtype()), s),
array(1e-7, w.dtype()),
s);
array scales = squeeze(delta, -1, s);
array biases = squeeze(w_min, -1, s);

Expand Down
8 changes: 8 additions & 0 deletions python/tests/test_quantized.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ def test_quantize_dequantize(self):
eps = 1e-6
self.assertTrue((errors <= (scales[..., None] + eps)).all())

# test quantize/dequantize 0s
a = mx.zeros((256, 512))
for gs in [32, 64, 128]:
for b in [2, 4, 8]:
w_q, scales, biases = mx.quantize(a, gs, b)
a_hat = mx.dequantize(w_q, scales, biases, gs, b)
self.assertTrue(mx.all(a_hat == 0))

def test_qmm(self):
key = mx.random.key(0)
k1, k2 = mx.random.split(key)
Expand Down

0 comments on commit d28a4f6

Please sign in to comment.