microsoft · chilo-ms · Oct 2, 2024 · Oct 2, 2024 · Oct 2, 2024 · Oct 4, 2024
diff --git a/quantization/image_classification/trt/resnet50/README.md b/quantization/image_classification/trt/resnet50/README.md
@@ -0,0 +1,94 @@
+# ONNX PTQ overview
+Following is the end-to-end example using ORT quantization tool to quantize ONNX model, specifially image classification model, and run/evaluate the quantized model with TRT EP.  
+
+## Note
+Implicit quantization is deprecated in TRT 10.1 (using calibration table in TRT EP to set `setDynamicRange`), we suggest to use explicit quantization aka QDQ format.
+## Environment setup
+### dataset
+First, prepare the dataset for calibration. TensorRT recommends calibration data size to be at least 500 for CNN and ViT models.
+Generally, the dataset used for calibration should differ from the one used for evaluation. However, to simplify the sample code, we will use the same dataset for both calibration and evaluation. We recommend utilizing the ImageNet 2012 classification dataset for this purpose.
+
+In addition to the sample code we provide below, TensorRT model optimizer which leverages torchvision.datasets already provides the ability to work with ImageNet dataset.
+
+#### Prepare ImageNet dataset
+You can either download from [Kaggle](https://www.kaggle.com/c/imagenet-object-localization-challenge/data) or origianl image-net website: val [tarball](https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar) and devkit [tarball](https://image-net.org/data/ILSVRC/2012/ILSVRC2012_devkit_t12.tar.gz)
+```shell
+mkdir ILSVRC2012
+cd ILSVRC2012
+wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar --no-check-certificate
+wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_devkit_t12.tar.gz --no-check-certificate
+```
+Untar the tarballs to `val` and `ILSVRC2012_devkit_t12` folder separately.
+
+The dataset layout should look like below and the sample code expects this dataset layout
+
+```
+|-- ILSVRC2012_devkit_t12
+|   |-- COPYING
+|   |-- data
+|   |   |-- ILSVRC2012_validation_ground_truth.txt
+|   |   `-- meta.mat
+|   |-- evaluation
+|   |   |-- VOCreadrecxml.m
+|   |   |-- VOCreadxml.m
+|   |   |-- VOCxml2struct.m
+|   |   |-- compute_overlap.m
+|   |   |-- demo.val.pred.det.txt
+|   |   |-- demo.val.pred.txt
+|   |   |-- demo_eval.m
+|   |   |-- eval_flat.m
+|   |   |-- eval_localization_flat.m
+|   |   |-- get_class2node.m
+|   |   `-- make_hash.m
+|   `-- readme.txt
+|-- meta.bin
+|-- synset_words.txt
+`-- val
+    |-- ILSVRC2012_val_00000001.JPEG
+    |-- ILSVRC2012_val_00000002.JPEG
+    |-- ILSVRC2012_val_00000003.JPEG
+...
+```
+
+However, if you are using ImageNet, then please run following command to reconstruct the layout to be grouped by class.
+```shell
+cd val/
+wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
+```
+```
+|-- ILSVRC2012_devkit_t12
+|   |-- COPYING
+|   |-- data
+|   |   |-- ILSVRC2012_validation_ground_truth.txt
+|   |   `-- meta.mat
+|   |-- evaluation
+|   |   |-- VOCreadrecxml.m
+|   |   |-- VOCreadxml.m
+|   |   |-- VOCxml2struct.m
+|   |   |-- compute_overlap.m
+|   |   |-- demo.val.pred.det.txt
+|   |   |-- demo.val.pred.txt
+|   |   |-- demo_eval.m
+|   |   |-- eval_flat.m
+|   |   |-- eval_localization_flat.m
+|   |   |-- get_class2node.m
+|   |   `-- make_hash.m
+|   `-- readme.txt
+|-- meta.bin
+`-- val
+    |-- n01440764
+    |   |-- ILSVRC2012_val_00000293.JPEG
+    |   |-- ILSVRC2012_val_00002138.JPEG
+    |   |-- ILSVRC2012_val_00003014.JPEG
+...
+```
+Lastly, download `synset_words.txt` from https://github.com/HoldenCaulfieldRye/caffe/blob/master/data/ilsvrc12/synset_words.txt into `ILSVRC2012` (top-level folder)
+
+## Quantize an ONNX model
+```shell
+python e2e_tensorrt_resnet_example.py
+````
+The e2e script will do
+* Preprocess the image in the dataset
+* Run calibration and generate calibration table (deprecated)
+* Launch ORT session with TRT EP and run the evaluation (top 1 and top 5)
diff --git a/quantization/image_classification/trt/resnet50/e2e_tensorrt_resnet_example.py b/quantization/image_classification/trt/resnet50/e2e_tensorrt_resnet_example.py
@@ -9,6 +9,7 @@
 import onnxruntime
 from onnxruntime.quantization import CalibrationDataReader, create_calibrator, write_calibration_table
 
+# onnxruntime.set_default_logger_severity(0)
 
 class ImageNetDataReader(CalibrationDataReader):
     def __init__(self,
@@ -126,12 +127,13 @@ def preprocess_imagenet(self, images_folder, height, width, start_index=0, size_
         return: list of matrices characterizing multiple images
         '''
         def preprocess_images(input, channels=3, height=224, width=224):
-            image = input.resize((width, height), Image.ANTIALIAS)
+            image = input.resize((width, height), Image.Resampling.LANCZOS) # Image.ANTIALIAS was removed in Pillow 10.0.0
             input_data = np.asarray(image).astype(np.float32)
             if len(input_data.shape) != 2:
                 input_data = input_data.transpose([2, 0, 1])
             else:
                 input_data = np.stack([input_data] * 3)
+            # image normalization
             mean = np.array([0.079, 0.05, 0]) + 0.406
             std = np.array([0.005, 0, 0.001]) + 0.224
             for channel in range(input_data.shape[0]):
@@ -153,7 +155,8 @@ def preprocess_images(input, channels=3, height=224, width=224):
 
         for image_name in batch_filenames:
             image_filepath = images_folder + '/' + image_name
-            img = Image.open(image_filepath)
+            # Note: There is one image ILSVRC2012_val_00019877.JPEG which has 4 channels, so here we convert it to RGB with 3 channels for all images
+            img = Image.open(image_filepath).convert("RGB")
             image_data = preprocess_images(img)
             image_data = np.expand_dims(image_data, 0)
             unconcatenated_batch_data.append(image_data)
@@ -163,7 +166,7 @@ def preprocess_images(input, channels=3, height=224, width=224):
         return batch_data, batch_filenames, image_size_list
 
     def get_synset_id(self, image_folder, offset, dataset_size):
-        ilsvrc2012_meta = scipy.io.loadmat(image_folder + "/devkit/data/meta.mat")
+        ilsvrc2012_meta = scipy.io.loadmat(image_folder + "/ILSVRC2012_devkit_t12/data/meta.mat")
         id_to_synset = {}
         for i in range(1000):
             id = int(ilsvrc2012_meta["synsets"][i, 0][0][0][0])
@@ -178,7 +181,7 @@ def get_synset_id(self, image_folder, offset, dataset_size):
             index = index + 1
         file.close()
 
-        file = open(image_folder + "/devkit/data/ILSVRC2012_validation_ground_truth.txt", "r")
+        file = open(image_folder + "/ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt", "r")
         id = file.read().strip().split("\n")
         id = list(map(int, id))
         file.close()
@@ -307,6 +310,11 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
     Untar the model into the workspace
     '''
 
+    ###################################################################################################################
+    # Note: Implicit quantization is deprecated in TRT 10.1 (using calibration table in TRT EP to set setDynamicRange),
+    #       we suggest to use explicit quantization aka QDQ format.
+    ###################################################################################################################
+
     # Dataset settings
     model_path = "./resnet50-v2-7.onnx"
     ilsvrc2012_dataset_path = "./ILSVRC2012"
@@ -318,11 +326,14 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
     calibration_table_generation_enable = True  # Enable/Disable INT8 calibration
 
     # TensorRT EP INT8 settings
-    os.environ["ORT_TENSORRT_FP16_ENABLE"] = "1"  # Enable FP16 precision
-    os.environ["ORT_TENSORRT_INT8_ENABLE"] = "1"  # Enable INT8 precision
-    os.environ["ORT_TENSORRT_INT8_CALIBRATION_TABLE_NAME"] = "calibration.flatbuffers"  # Calibration table name
-    os.environ["ORT_TENSORRT_ENGINE_CACHE_ENABLE"] = "1"  # Enable engine caching
-    execution_provider = ["TensorrtExecutionProvider"]
+    execution_provider = [
+        ('TensorrtExecutionProvider', {
+            'trt_int8_enable': True,
+            'trt_fp16_enable': True,
+            'trt_engine_cache_enable': True,
+            'trt_int8_calibration_table_name': 'calibration.flatbuffers', # The implicit quantization is deprecated in TRT 10
+        })
+    ]
 
     # Convert static batch to dynamic batch
     [new_model_path, input_name] = convert_model_batch_to_dynamic(model_path)
@@ -343,7 +354,7 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
                                          model_path=augmented_model_path,
                                          input_name=input_name)
         calibrator.collect_data(data_reader)
-        write_calibration_table(calibrator.compute_range())
+        write_calibration_table(calibrator.compute_data())
 
     # Run prediction in Tensorrt EP
     data_reader = ImageNetDataReader(ilsvrc2012_dataset_path,