Skip to content

Commit 8ce094c

Browse files
authored
Freeze IPEX version for INT8 SQ support (#2221)
Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
1 parent 4eaef0f commit 8ce094c

File tree

4 files changed

+26
-15
lines changed

4 files changed

+26
-15
lines changed

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ python run_clm_no_trainer.py \
4949
```
5050

5151
### LLAMA2-7b/13b/70b
52-
>Note: LLAMA requires IPEX requirements >= 2.1 to get better accuracy.
52+
>Note: LLAMA requires IPEX requirements >= 2.1 to get better accuracy. LLAMA requires transformers < 4.48.0.
5353
#### Quantization
5454

5555
```bash

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ accelerate
22
protobuf
33
sentencepiece != 0.1.92
44
datasets >= 1.1.3
5-
torch >= 1.10
6-
transformers < 4.48.0 # TODO: ILITV-3858
5+
torch == 2.7.0
6+
transformers
77
pytest
88
wandb
99
einops
1010
neural-compressor
1111
lm_eval <= 0.4.7
1212
peft
1313
optimum-intel
14-
intel_extension_for_pytorch
14+
intel_extension_for_pytorch == 2.7.0

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
"--approach", type=str, default="static", help="Select from ['dynamic', 'static', 'weight-only']"
3131
)
3232
parser.add_argument("--optimized", action="store_true")
33+
parser.add_argument("--autotune", action="store_true", help="Use autotune to find the best alpha for SmoothQuant.")
3334
parser.add_argument("--ipex", action="store_true", help="Use intel extension for pytorch.")
3435
parser.add_argument("--load", action="store_true", help="Load quantized model.")
3536
parser.add_argument("--accuracy", action="store_true")
@@ -204,15 +205,25 @@ def eval_func(model):
204205

205206
example_inputs = get_example_inputs(user_model, calib_dataloader)
206207

207-
from neural_compressor.torch.quantization import SmoothQuantConfig, autotune, TuningConfig
208-
tune_config = TuningConfig(config_set=SmoothQuantConfig.get_config_set_for_tuning())
209-
user_model = autotune(
210-
user_model,
211-
tune_config=tune_config,
212-
eval_fn=eval_func,
213-
run_fn=run_fn,
214-
example_inputs=example_inputs,
215-
)
208+
if args.autotune:
209+
from neural_compressor.torch.quantization import SmoothQuantConfig, autotune, TuningConfig
210+
tune_config = TuningConfig(config_set=SmoothQuantConfig.get_config_set_for_tuning())
211+
user_model = autotune(
212+
user_model,
213+
tune_config=tune_config,
214+
eval_fn=eval_func,
215+
run_fn=run_fn,
216+
example_inputs=example_inputs,
217+
)
218+
else:
219+
from neural_compressor.torch.quantization import SmoothQuantConfig, prepare, convert
220+
args.alpha = eval(args.alpha)
221+
excluded_precisions = [] if args.int8_bf16_mixed else ["bf16"]
222+
quant_config = SmoothQuantConfig(alpha=args.alpha, folding=False, excluded_precisions=excluded_precisions)
223+
224+
user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
225+
run_fn(user_model)
226+
user_model = convert(user_model)
216227
user_model.save(args.output_dir)
217228

218229

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_quant.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ function run_tuning {
4747
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
4848
elif [ "${topology}" = "llama2_7b_ipex_sq" ]; then
4949
model_name_or_path="meta-llama/Llama-2-7b-hf"
50-
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.8"
50+
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.65"
5151
elif [ "${topology}" = "gpt_j_ipex_sq" ]; then
5252
model_name_or_path="EleutherAI/gpt-j-6b"
53-
extra_cmd=$extra_cmd" --ipex --sq --alpha 1.0"
53+
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
5454
fi
5555

5656
python -u run_clm_no_trainer.py \

0 commit comments

Comments
 (0)