File tree 1 file changed +1
-0
lines changed 1 file changed +1
-0
lines changed Original file line number Diff line number Diff line change @@ -21,6 +21,7 @@ export MODEL_REPO=meta-llama/Meta-Llama-3.1-8B
21
21
python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --write_result benchmark_results.txt
22
22
python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization int8wo --write_result benchmark_results.txt
23
23
python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization int4wo-64 --write_result benchmark_results.txt
24
+ # Runs on H100, float8 is not supported on CUDA arch < 8.9
24
25
python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization float8wo --write_result benchmark_results.txt
25
26
python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization float8dq-tensor --write_result benchmark_results.txt
26
27
python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization float8dq-wo --write_result benchmark_results.txt
You can’t perform that action at this time.
0 commit comments