modify aciq observer and use aciq laplace for last 2 detr bbox embed weights

Jiang-Stan · Jiang-Stan · commit 2ae66e81f95f · 2022-10-18T17:07:33.000+08:00
update readme
diff --git a/examples/post_training_quantization/coco2017/DETR/README.md b/examples/post_training_quantization/coco2017/DETR/README.md
@@ -17,10 +17,11 @@ Since mask is not well supported by onnx, we removed mask-related codes and assi
 
 |DETR-R50|mAPc|AP50|AP75| remarks|
 |-|-|-|-|-|
-|Float|0.421|0.623|0.443|baseline|
-|8w8f|0.332|0.588|0.320| Inputs of Add&LN not quantized.|
-|8w8f|0.395|0.607|0.409|Inputs of Add&LN not quantized. Float w&f for last bbox embed layer.|
-|8w8f|0.396|0.606|0.411|Inputs of Add&LN not quantized. Float w&f for last bbox&class embed layers.|
+|float|0.421 | 0.623 | 0.443 | baseline
+|8w8f|0.332|0.588|0.320| minmax observer|
+|8w8f|0.404|0.612|0.421| minmax observer, float w&f for last 2 bbox embed layers|
+|8w8f|0.384|0.598|0.402| minmax observer, apply aciq laplace observer for last bbox embed layer|
+|8w8f|0.398|0.609|0.420| minmax observer, apply aciq laplace observer for last 2 bbox embed layer|
 
 TRT DETR w/ fixed input shape, enable int8&fp16 QPS: 118.334 on Nvidia 2080Ti. For detailed visualization, please refer to 
 ```shell
diff --git a/examples/post_training_quantization/coco2017/DETR/main.py b/examples/post_training_quantization/coco2017/DETR/main.py
@@ -151,8 +151,6 @@ def main():
         qmodel.calc_qparams()
     qmodel.set_quant(w_quant=True, a_quant=True)
 
-    qmodel.model.bbox_embed_layers_2.set_quant(w_quant=False, a_quant=False)
-
     test_stats, coco_evaluator = evaluate(qmodel, criterion, postprocessors,
                                               data_loader_val, base_ds, device, args.output_dir)
 
diff --git a/examples/post_training_quantization/coco2017/DETR/qconfig.yaml b/examples/post_training_quantization/coco2017/DETR/qconfig.yaml
@@ -8,6 +8,10 @@ W:
     BIT: 8
   OBSERVER:
     TYPE: MINMAX
+  SPECIFIC: [{
+    "bbox_embed_layers_1": ["OBSERVER.TYPE", "aciq", "OBSERVER.ACIQ.DISTRIBUTION", "laplace"],
+    "bbox_embed_layers_2": ["OBSERVER.TYPE", "aciq", "OBSERVER.ACIQ.DISTRIBUTION", "laplace"]
+  }]
 A:
   QSCHEME: per-tensor-symmetric
   QUANTIZER:
diff --git a/sparsebit/quantization/observers/aciq.py b/sparsebit/quantization/observers/aciq.py
@@ -1,6 +1,5 @@
 import torch
 import math
-from .utils import mse_loss
 from sparsebit.quantization.observers import Observer as BaseObserver
 from sparsebit.quantization.observers import register_observer
 from sparsebit.quantization.quantizers.quant_tensor import STE
@@ -56,6 +55,8 @@ def __init__(self, config, qdesc):
             8: 11.16,
         }
         self.gaus_const = (0.5 * 0.35) * (1 + (math.pi * math.log(4)) ** 0.5)
+        self.distribution = config.OBSERVER.ACIQ.DISTRIBUTION.lower()
+        assert self.distribution in ["gaus", "laplace"]
 
     def calc_laplace_minmax(self, data, is_half_range):
         if self.is_perchannel:
@@ -115,53 +116,13 @@ def calc_minmax(self):
         data = self.get_calibration_data(c_first=True)
         is_half_range = data.min() >= 0
 
-        laplace_min_val, laplace_max_val = self.calc_laplace_minmax(data, is_half_range)
-        scale_laplace, zero_point_laplace = self.calc_qparams_with_minmax(
-            laplace_min_val, laplace_max_val
-        )
-        mse_laplace = mse_loss(
-            STE.apply(
-                data, scale_laplace, zero_point_laplace, self.qdesc, self.backend
-            ),
-            data,
-            self.is_perchannel,
-        )
-
-        gaus_min_val, gaus_max_val = self.calc_gaus_minmax(
-            data, batch_size, is_half_range
-        )
-        scale_gaus, zero_point_gaus = self.calc_qparams_with_minmax(
-            gaus_min_val, gaus_max_val
-        )
-
-        mse_gaus = mse_loss(
-            STE.apply(data, scale_gaus, zero_point_gaus, self.qdesc, self.backend),
-            data,
-            self.is_perchannel,
-        )
-
-        naive_min_val, naive_max_val = self.calc_naive_minmax(data)
-        scale_minmax, zero_point_minmax = self.calc_qparams_with_minmax(
-            naive_min_val, naive_max_val
-        )
-        mse_minmax = mse_loss(
-            STE.apply(data, scale_minmax, zero_point_minmax, self.qdesc, self.backend),
-            data,
-            self.is_perchannel,
-        )
-
-        mse_gaus_laplace = torch.minimum(mse_gaus, mse_laplace)
-        self.min_val = torch.where(
-            mse_gaus < mse_laplace, gaus_min_val, laplace_min_val
-        )
-        self.min_val = torch.where(
-            mse_minmax < mse_gaus_laplace, naive_min_val, self.min_val
-        ).to(self.device)
-        self.max_val = torch.where(
-            mse_gaus < mse_laplace, gaus_max_val, laplace_max_val
-        )
-        self.max_val = torch.where(
-            mse_minmax < mse_gaus_laplace, naive_max_val, self.max_val
-        ).to(self.device)
+        if self.distribution == "laplace":
+            min_val, max_val = self.calc_laplace_minmax(data, is_half_range)
+        else:
+            min_val, max_val = self.calc_gaus_minmax(
+                data, batch_size, is_half_range
+            )
+        self.min_val = min_val.to(self.device)
+        self.max_val = max_val.to(self.device)
 
         return self.min_val, self.max_val
diff --git a/sparsebit/quantization/quant_config.py b/sparsebit/quantization/quant_config.py
@@ -20,6 +20,8 @@
 _C.W.OBSERVER.TYPE = "MINMAX"  # "MINMAX"/"MSE"/"PERCENTILE"/"KL_HISTOGRAM"
 _C.W.OBSERVER.PERCENTILE = CN()
 _C.W.OBSERVER.PERCENTILE.ALPHA = 0.001  # alpha for percentile observer
+_C.W.OBSERVER.ACIQ = CN()
+_C.W.OBSERVER.ACIQ.DISTRIBUTION = "GAUS" #"LAPLACE"/"GAUS"
 _C.W.SPECIFIC = []
 
 _C.A = CN()
@@ -35,6 +37,8 @@
 _C.A.OBSERVER.PERCENTILE.ALPHA = 0.001  # alpha for percentile observer
 _C.A.OBSERVER.MOVING_AVERAGE = CN()
 _C.A.OBSERVER.MOVING_AVERAGE.EMA_RATIO = 0.9  # ema_ratio for moving_average observer
+_C.A.OBSERVER.ACIQ = CN()
+_C.A.OBSERVER.ACIQ.DISTRIBUTION = "GAUS" #"LAPLACE"/"GAUS"
 _C.A.OBSERVER.LAYOUT = "NCHW"  # NCHW / NLC
 _C.A.SPECIFIC = []