drop quantize.py (now that models are using a single file)

christianazinn · Mar 30, 2023 · 9733104 · 9733104
1 parent 3df890a
commit 9733104
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 133 deletions.
diff --git a/README.md b/README.md
@@ -155,8 +155,8 @@ python3 -m pip install torch numpy sentencepiece
 # convert the 7B model to ggml FP16 format
 python3 convert-pth-to-ggml.py models/7B/ 1
 
-# quantize the model to 4-bits
-python3 quantize.py 7B
+# quantize the model to 4-bits (using method 2 = q4_0)
+./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin 2
 
 # run the inference
 ./main -m ./models/7B/ggml-model-q4_0.bin -n 128

diff --git a/quantize.py b/quantize.py