Skip to content

Commit 10d82c5

Browse files
ONNXRT block fallback for nlp model quantization (#704)
Signed-off-by: yuwenzho <yuwen.zhou@intel.com> Co-authored-by: Kaihui-intel <kaihui.tang@intel.com>
1 parent a1baa04 commit 10d82c5

File tree

30 files changed

+1584
-159
lines changed

30 files changed

+1584
-159
lines changed

examples/.config/model_params_onnxrt.json

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,13 +525,27 @@
525525
"main_script": "main.py",
526526
"batch_size": 8
527527
},
528+
"hf_xlm-roberta-base": {
529+
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
530+
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
531+
"input_model": "/tf_dataset2/models/onnx/hf_xlm-roberta-base_dynamic/xlm-roberta-base-mrpc.onnx",
532+
"main_script": "main.py",
533+
"batch_size": 8
534+
},
528535
"hf_camembert-base_dynamic": {
529536
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
530537
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
531538
"input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx",
532539
"main_script": "main.py",
533540
"batch_size": 8
534541
},
542+
"hf_camembert-base": {
543+
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
544+
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
545+
"input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx",
546+
"main_script": "main.py",
547+
"batch_size": 8
548+
},
535549
"hf_MiniLM-L12-H384-uncased_dynamic": {
536550
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
537551
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
@@ -567,6 +581,13 @@
567581
"main_script": "main.py",
568582
"batch_size": 8
569583
},
584+
"hf_albert-base-v2": {
585+
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
586+
"dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
587+
"input_model": "/tf_dataset2/models/onnx/hf_albert-base-v2_dynamic/albert-base-v2-sst2.onnx",
588+
"main_script": "main.py",
589+
"batch_size": 8
590+
},
570591
"hf_MiniLM-L6-H384-uncased_dynamic": {
571592
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
572593
"dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
@@ -672,13 +693,27 @@
672693
"main_script": "main.py",
673694
"batch_size": 8
674695
},
696+
"hf_bart-large": {
697+
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
698+
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
699+
"input_model": "/tf_dataset2/models/onnx/hf_bart-large_dynamic/bart-large-mrpc-hf.onnx",
700+
"main_script": "main.py",
701+
"batch_size": 8
702+
},
675703
"hf_distilbert-base-uncased-distilled_dynamic": {
676704
"model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic",
677705
"dataset_location": "/tf_dataset2/datasets/squad",
678706
"input_model": "/tf_dataset2/models/onnx/hf_distilbert-squad_dynamic/distilbert-base-uncased-distilled-squad.onnx",
679707
"main_script": "main.py",
680708
"batch_size": 1
681709
},
710+
"hf_distilbert-base-uncased-distilled": {
711+
"model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_static",
712+
"dataset_location": "/tf_dataset2/datasets/squad",
713+
"input_model": "/tf_dataset2/models/onnx/hf_distilbert-squad_dynamic/distilbert-base-uncased-distilled-squad.onnx",
714+
"main_script": "main.py",
715+
"batch_size": 1
716+
},
682717
"hf_bert-large-uncased_dynamic": {
683718
"model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic",
684719
"dataset_location": "/tf_dataset2/datasets/squad",
@@ -699,6 +734,41 @@
699734
"input_model": "/tf_dataset2/models/onnx/hf_roberta-large_dynamic/roberta-large-squad2.onnx",
700735
"main_script": "main.py",
701736
"batch_size": 1
737+
},
738+
"hf_roberta-large": {
739+
"model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_static",
740+
"dataset_location": "/tf_dataset2/datasets/squad",
741+
"input_model": "/tf_dataset2/models/onnx/hf_roberta-large_dynamic/roberta-large-squad2.onnx",
742+
"main_script": "main.py",
743+
"batch_size": 1
744+
},
745+
"hf_gpt2_dynamic": {
746+
"model_src_dir": "nlp/huggingface_model/language_modeling/quantization/ptq_dynamic",
747+
"dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/wiki.test.raw",
748+
"input_model": "/tf_dataset2/models/onnx/gpt2/gpt2_lm_head_wikitext_model_zoo.onnx",
749+
"main_script": "main.py",
750+
"batch_size": 1
751+
},
752+
"hf_gpt2": {
753+
"model_src_dir": "nlp/huggingface_model/language_modeling/quantization/ptq_static",
754+
"dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/wiki.test.raw",
755+
"input_model": "/tf_dataset2/models/onnx/gpt2/gpt2_lm_head_wikitext_model_zoo.onnx",
756+
"main_script": "main.py",
757+
"batch_size": 1
758+
},
759+
"hf_distilgpt2_dynamic": {
760+
"model_src_dir": "nlp/huggingface_model/language_modeling/quantization/ptq_dynamic",
761+
"dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/wiki.test.raw",
762+
"input_model": "/tf_dataset2/models/onnx/hf_distilgpt2/distilgpt2.onnx",
763+
"main_script": "main.py",
764+
"batch_size": 1
765+
},
766+
"hf_distilgpt2": {
767+
"model_src_dir": "nlp/huggingface_model/language_modeling/quantization/ptq_static",
768+
"dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/wiki.test.raw",
769+
"input_model": "/tf_dataset2/models/onnx/hf_distilgpt2/distilgpt2.onnx",
770+
"main_script": "main.py",
771+
"batch_size": 1
702772
}
703773
}
704774
}

examples/README.md

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,127 +1087,153 @@ Intel® Neural Compressor validated examples with multiple compression technique
10871087
<td>Roberta base MRPC (HuggingFace)</td>
10881088
<td>Natural Language Processing</td>
10891089
<td>Post-Training Dynamic / Static Quantization</td>
1090-
<td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /
1091-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
1090+
<td>
1091+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
10921092
</td>
10931093
</tr>
10941094
<tr>
10951095
<td>XLM Roberta base MRPC (HuggingFace)</td>
10961096
<td>Natural Language Processing</td>
1097-
<td>Post-Training Dynamic Quantization</td>
1097+
<td>Post-Training Dynamic / Static Quantization</td>
10981098
<td>
1099-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a></td>
1099+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11001100
</td>
11011101
</tr>
11021102
<tr>
11031103
<td>Camembert base MRPC (HuggingFace)</td>
11041104
<td>Natural Language Processing</td>
1105-
<td>Post-Training Dynamic Quantization</td>
1105+
<td>Post-Training Dynamic / Static Quantization</td>
11061106
<td>
1107-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a></td>
1107+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11081108
</td>
11091109
</tr>
11101110
<tr>
11111111
<td>MiniLM L12 H384 uncased MRPC (HuggingFace)</td>
11121112
<td>Natural Language Processing</td>
11131113
<td>Post-Training Dynamic / Static Quantization</td>
11141114
<td>
1115-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
1115+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11161116
</td>
11171117
</tr>
11181118
<tr>
11191119
<td>DistilBERT base uncased SST-2 (HuggingFace)</td>
11201120
<td>Natural Language Processing</td>
11211121
<td>Post-Training Dynamic / Static Quantization</td>
11221122
<td>
1123-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
1123+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11241124
</td>
11251125
</tr>
11261126
<tr>
11271127
<td>Albert base v2 SST-2 (HuggingFace)</td>
11281128
<td>Natural Language Processing</td>
1129-
<td>Post-Training Dynamic Quantization</td>
1129+
<td>Post-Training Dynamic / Static Quantization</td>
11301130
<td>
1131-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a></td>
1131+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11321132
</td>
11331133
</tr>
11341134
<tr>
11351135
<td>MiniLM L6 H384 uncased SST-2 (HuggingFace)</td>
11361136
<td>Natural Language Processing</td>
11371137
<td>Post-Training Dynamic / Static Quantization</td>
11381138
<td>
1139-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
1139+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11401140
</td>
11411141
</tr>
11421142
<tr>
11431143
<td>BERT base cased MRPC (HuggingFace)</td>
11441144
<td>Natural Language Processing</td>
11451145
<td>Post-Training Dynamic / Static Quantization</td>
11461146
<td>
1147-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
1147+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11481148
</td>
11491149
</tr>
11501150
<tr>
11511151
<td>Electra small discriminator MRPC (HuggingFace)</td>
11521152
<td>Natural Language Processing</td>
11531153
<td>Post-Training Dynamic / Static Quantization</td>
11541154
<td>
1155-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
1155+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11561156
</td>
11571157
</tr>
11581158
<tr>
11591159
<td>BERT mini MRPC (HuggingFace)</td>
11601160
<td>Natural Language Processing</td>
11611161
<td>Post-Training Dynamic / Static Quantization</td>
11621162
<td>
1163-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
1163+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11641164
</td>
11651165
</tr>
11661166
<tr>
11671167
<td>Xlnet base cased MRPC (HuggingFace)</td>
11681168
<td>Natural Language Processing</td>
11691169
<td>Post-Training Dynamic / Static Quantization</td>
11701170
<td>
1171-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
1171+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11721172
</td>
11731173
</tr>
11741174
<tr>
11751175
<td>BART large MRPC (HuggingFace)</td>
11761176
<td>Natural Language Processing</td>
1177-
<td>Post-Training Dynamic Quantization</td>
1177+
<td>Post-Training Dynamic / Static Quantization</td>
11781178
<td>
1179-
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a></td>
1179+
<a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
11801180
</td>
11811181
</tr>
11821182
<tr>
11831183
<td>Spanbert SQuAD (HuggingFace)</td>
11841184
<td>Natural Language Processing</td>
11851185
<td>Post-Training Dynamic / Static Quantization</td>
1186-
<td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a></td>
1186+
<td>
1187+
<a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
1188+
</td>
11871189
</tr>
11881190
<tr>
11891191
<td>Bert base multilingual cased SQuAD (HuggingFace)</td>
11901192
<td>Natural Language Processing</td>
11911193
<td>Post-Training Dynamic / Static Quantization</td>
1192-
<td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a></td>
1194+
<td>
1195+
<a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
1196+
</td>
11931197
</tr>
11941198
<tr>
11951199
<td>DistilBert base uncased SQuAD (HuggingFace)</td>
11961200
<td>Natural Language Processing</td>
1197-
<td>Post-Training Dynamic Quantization</td>
1198-
<td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a></td>
1201+
<td>Post-Training Dynamic / Static Quantization</td>
1202+
<td>
1203+
<a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
1204+
</td>
11991205
</tr>
12001206
<tr>
12011207
<td>BERT large uncased whole word masking SQuAD (HuggingFace)</td>
12021208
<td>Natural Language Processing</td>
12031209
<td>Post-Training Dynamic / Static Quantization</td>
1204-
<td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a></td>
1210+
<td>
1211+
<a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
1212+
</td>
12051213
</tr>
12061214
<tr>
12071215
<td>Roberta large SQuAD v2 (HuggingFace)</td>
12081216
<td>Natural Language Processing</td>
1209-
<td>Post-Training Dynamic Quantization</td>
1210-
<td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a></td>
1217+
<td>Post-Training Dynamic / Static Quantization</td>
1218+
<td>
1219+
<a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
1220+
</td>
1221+
</tr>
1222+
<tr>
1223+
<td>GPT2 WikiText (HuggingFace)</td>
1224+
<td>Natural Language Processing</td>
1225+
<td>Post-Training Dynamic / Static Quantization</td>
1226+
<td>
1227+
<a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static">qlinearops</a>
1228+
</td>
1229+
</tr>
1230+
<tr>
1231+
<td>DistilGPT2 WikiText (HuggingFace)</td>
1232+
<td>Natural Language Processing</td>
1233+
<td>Post-Training Dynamic / Static Quantization</td>
1234+
<td>
1235+
<a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static">qlinearops</a>
1236+
</td>
12111237
</tr>
12121238
<tr>
12131239
<td>SSD MobileNet V1</td>
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
Step-by-Step
2+
============
3+
4+
This example load a language translation model and confirm its accuracy and speed based on [WikiText](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/) dataset.
5+
6+
# Prerequisite
7+
8+
## 1. Environment
9+
```shell
10+
pip install neural-compressor
11+
pip install -r requirements.txt
12+
```
13+
> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
14+
15+
## 2. Prepare Model
16+
17+
Supported model identifier from [huggingface.co](https://huggingface.co/):
18+
19+
| Model Identifier |
20+
|:-----------------------------------------------:|
21+
| gpt2 |
22+
| distilgpt2 |
23+
24+
Use `export.py` script for ONNX model conversion.
25+
Require transformers==3.2.0.
26+
27+
```shell
28+
python export.py --model_name_or_path=gpt2 # or other supported model identifier
29+
```
30+
31+
## 3. Prepare Dataset
32+
Please download [WikiText-2 dataset](https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip).
33+
34+
# Run
35+
36+
## 1. Quantization
37+
38+
Quantize model with dynamic quantization:
39+
40+
```bash
41+
bash run_tuning.sh --dataset_location=/path/to/wikitext-2-raw/wiki.test.raw \
42+
--input_model=path/to/model \ # model path as *.onnx
43+
--output_model=path/to/model_tune
44+
```
45+
46+
## 2. Benchmark
47+
48+
```bash
49+
bash run_benchmark.sh --dataset_location=/path/to/wikitext-2-raw/wiki.test.raw \
50+
--input_model=path/to/model \ # model path as *.onnx
51+
--batch_size=batch_size \
52+
--mode=performance # or accuracy
53+
```

0 commit comments

Comments
 (0)