intel
diff --git a/‎examples/.config/model_params_onnxrt.json‎
Lines changed: 70 additions & 0 deletions b/‎examples/.config/model_params_onnxrt.json‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎examples/README.md‎
Lines changed: 50 additions & 24 deletions b/‎examples/README.md‎
Lines changed: 50 additions & 24 deletions
diff --git a/‎examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/README.md‎
Lines changed: 53 additions & 0 deletions b/‎examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/README.md‎
Lines changed: 53 additions & 0 deletions
@@ -525,13 +525,27 @@
       "main_script": "main.py",
       "batch_size": 8
     },
+    "hf_xlm-roberta-base": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
+      "input_model": "/tf_dataset2/models/onnx/hf_xlm-roberta-base_dynamic/xlm-roberta-base-mrpc.onnx",
+      "main_script": "main.py",
+      "batch_size": 8
+    },
     "hf_camembert-base_dynamic": {
       "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx",
       "main_script": "main.py",
       "batch_size": 8
     },
+    "hf_camembert-base": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
+      "input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx",
+      "main_script": "main.py",
+      "batch_size": 8
+    },
     "hf_MiniLM-L12-H384-uncased_dynamic": {
       "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
@@ -567,6 +581,13 @@
       "main_script": "main.py",
       "batch_size": 8
     },
+    "hf_albert-base-v2": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
+      "input_model": "/tf_dataset2/models/onnx/hf_albert-base-v2_dynamic/albert-base-v2-sst2.onnx",
+      "main_script": "main.py",
+      "batch_size": 8
+    },
     "hf_MiniLM-L6-H384-uncased_dynamic": {
       "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
       "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
@@ -672,13 +693,27 @@
       "main_script": "main.py",
       "batch_size": 8
     },
+    "hf_bart-large": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
+      "input_model": "/tf_dataset2/models/onnx/hf_bart-large_dynamic/bart-large-mrpc-hf.onnx",
+      "main_script": "main.py",
+      "batch_size": 8
+    },
     "hf_distilbert-base-uncased-distilled_dynamic": {
       "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic",
       "dataset_location": "/tf_dataset2/datasets/squad",
       "input_model": "/tf_dataset2/models/onnx/hf_distilbert-squad_dynamic/distilbert-base-uncased-distilled-squad.onnx",
       "main_script": "main.py",
       "batch_size": 1
     },
+    "hf_distilbert-base-uncased-distilled": {
+      "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_static",
+      "dataset_location": "/tf_dataset2/datasets/squad",
+      "input_model": "/tf_dataset2/models/onnx/hf_distilbert-squad_dynamic/distilbert-base-uncased-distilled-squad.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
+    },
     "hf_bert-large-uncased_dynamic": {
       "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic",
       "dataset_location": "/tf_dataset2/datasets/squad",
@@ -699,6 +734,41 @@
       "input_model": "/tf_dataset2/models/onnx/hf_roberta-large_dynamic/roberta-large-squad2.onnx",
       "main_script": "main.py",
       "batch_size": 1
+    },
+    "hf_roberta-large": {
+      "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_static",
+      "dataset_location": "/tf_dataset2/datasets/squad",
+      "input_model": "/tf_dataset2/models/onnx/hf_roberta-large_dynamic/roberta-large-squad2.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
+    },
+    "hf_gpt2_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/language_modeling/quantization/ptq_dynamic",
+      "dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/wiki.test.raw",
+      "input_model": "/tf_dataset2/models/onnx/gpt2/gpt2_lm_head_wikitext_model_zoo.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
+    },
+    "hf_gpt2": {
+      "model_src_dir": "nlp/huggingface_model/language_modeling/quantization/ptq_static",
+      "dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/wiki.test.raw",
+      "input_model": "/tf_dataset2/models/onnx/gpt2/gpt2_lm_head_wikitext_model_zoo.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
+    },
+    "hf_distilgpt2_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/language_modeling/quantization/ptq_dynamic",
+      "dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/wiki.test.raw",
+      "input_model": "/tf_dataset2/models/onnx/hf_distilgpt2/distilgpt2.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
+    },
+    "hf_distilgpt2": {
+      "model_src_dir": "nlp/huggingface_model/language_modeling/quantization/ptq_static",
+      "dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/wiki.test.raw",
+      "input_model": "/tf_dataset2/models/onnx/hf_distilgpt2/distilgpt2.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
     }
   }
 }
 
@@ -1087,127 +1087,153 @@ Intel® Neural Compressor validated examples with multiple compression technique
     <td>Roberta base MRPC (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>XLM Roberta base MRPC (HuggingFace)</td>
     <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a></td>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>Camembert base MRPC (HuggingFace)</td>
     <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a></td>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>MiniLM L12 H384 uncased MRPC (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>DistilBERT base uncased SST-2 (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>Albert base v2 SST-2 (HuggingFace)</td>
     <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a></td>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>MiniLM L6 H384 uncased SST-2 (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>BERT base cased MRPC (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>Electra small discriminator MRPC (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>BERT mini MRPC (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>Xlnet base cased MRPC (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>BART large MRPC (HuggingFace)</td>
     <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
     <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a></td>
+        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
     </td>
   </tr>
   <tr>
     <td>Spanbert SQuAD (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a></td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
   </tr>
   <tr>
     <td>Bert base multilingual cased SQuAD (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a></td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
   </tr>
   <tr>
     <td>DistilBert base uncased SQuAD (HuggingFace)</td>
     <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a></td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
   </tr>
   <tr>
     <td>BERT large uncased whole word masking SQuAD (HuggingFace)</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a></td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
   </tr>
   <tr>
     <td>Roberta large SQuAD v2 (HuggingFace)</td>
     <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a></td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>GPT2 WikiText (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+      <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>DistilGPT2 WikiText (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+      <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static">qlinearops</a>
+    </td>
   </tr>
   <tr>
     <td>SSD MobileNet V1</td>
 
@@ -0,0 +1,53 @@
+Step-by-Step
+============
+
+This example load a language translation model and confirm its accuracy and speed based on [WikiText](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/) dataset.
+
+# Prerequisite
+
+## 1. Environment
+```shell
+pip install neural-compressor
+pip install -r requirements.txt
+```
+> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+## 2. Prepare Model
+
+Supported model identifier from [huggingface.co](https://huggingface.co/):
+
+|                 Model Identifier                |
+|:-----------------------------------------------:|
+|           gpt2          |
+|             distilgpt2           |
+
+Use `export.py` script for ONNX model conversion.
+Require transformers==3.2.0.
+
+```shell
+python export.py --model_name_or_path=gpt2 # or other supported model identifier
+```
+
+## 3. Prepare Dataset
+Please download [WikiText-2 dataset](https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip).
+
+# Run
+
+## 1. Quantization
+
+Quantize model with dynamic quantization:
+
+```bash
+bash run_tuning.sh --dataset_location=/path/to/wikitext-2-raw/wiki.test.raw \ 
+                   --input_model=path/to/model \ # model path as *.onnx
+                   --output_model=path/to/model_tune
+```
+
+## 2. Benchmark
+
+```bash
+bash run_benchmark.sh --dataset_location=/path/to/wikitext-2-raw/wiki.test.raw \ 
+                      --input_model=path/to/model \ # model path as *.onnx
+                      --batch_size=batch_size \
+                      --mode=performance # or accuracy
+```