Add quantization on-the-fly feature

hakim-cherif-mchp · hakim-cherif-mchp · commit addc9a13c3fd · 2025-07-18T15:55:30.000+02:00
diff --git a/tf2mplabh3/main.py b/tf2mplabh3/main.py
@@ -67,7 +67,16 @@ def main():
             '--overwrite', action='store_true',
             help="Overwrite existing ONNX or C model files. By default, existing files are not overwritten."
         )
-
+        parser.add_argument(
+            '-quant', '--int8_quantize',
+            default=0,
+            help="Quantize on the fly from FP32 to INT8"
+        )
+        parser.add_argument(
+            '-onnx_quant', '--onnx_quant_model',
+            default=os.path.join(PROJECT_ROOT, "examples", "model_int8.onnx"),
+            help="Path where to store the ONNX Model File"
+        )
         args = parser.parse_args()
         global verbosity
         verbosity = args.verbosity
@@ -97,6 +106,13 @@ def main():
         # Convert TensorFlow model to ONNX
         print(color_text("[MAIN] Starting Tensorflow to ONNX Conversion", "green"), flush=True)
         tf2onnx_converter(args.model, args.onnx_model, args.tag, args.signature_def, verbosity)
+        onnx_model_to_convert=args.onnx_model
+
+        if bool(args.int8_quantize):
+            from .onnx_quantization import quantize_and_compare_nodes
+            print(color_text("[MAIN] Starting ONNX FP32 to ONNX INT8 Quantization", "green"), flush=True)
+            quantize_and_compare_nodes(args.onnx_model,args.onnx_quant_model,verbosity_level=verbosity)
+            onnx_model_to_convert=args.onnx_quant_model
 
         verbose("[MAIN] Ensuring the parent directory of the C model file exists")
         parent_dir = os.path.dirname(args.c_model_file)
@@ -111,7 +127,7 @@ def main():
         t.start()
         with open(args.c_model_file, "w") as c_file:
             process = subprocess.Popen(
-                [args.onnx2c, args.onnx_model],
+                [args.onnx2c, onnx_model_to_convert],
                 stdout=c_file,
                 stderr=subprocess.PIPE,
                 text=True
diff --git a/tf2mplabh3/onnx_quantization.py b/tf2mplabh3/onnx_quantization.py
@@ -0,0 +1,55 @@
+from onnxruntime.quantization import quantize_dynamic, QuantType
+import onnx
+from .utils import color_text
+
+verbosity=0
+
+def verbose(msg):
+    global verbosity
+    if verbosity==1:
+        print(color_text(msg,"yellow"),flush=True)
+
+def quantize_and_compare_nodes(
+    model_input_path,
+    model_output_path,
+    nodes_to_exclude=None,
+    weight_type=QuantType.QInt8,
+    verbosity_level=0
+):
+    """
+    Quantizes an ONNX model and prints new nodes introduced in the quantized model.
+
+    Args:
+        model_input_path (str): Path to the original ONNX model.
+        model_output_path (str): Path to save the quantized ONNX model.
+        nodes_to_exclude (list, optional): List of node names to exclude from quantization.
+        weight_type (QuantType, optional): Weight quantization type (default: QuantType.QInt8).
+        verbosity_level: 0 for almost no logs, 1 for full logs
+
+    """
+    global verbosity
+    verbosity=verbosity_level
+
+    def get_node_names_and_types(model_path):
+        model = onnx.load(model_path)
+        return [(node.name, node.op_type) for node in model.graph.node]
+
+    # Quantize the model
+    quantize_dynamic(
+        model_input=model_input_path,
+        model_output=model_output_path,
+        weight_type=weight_type,
+        nodes_to_exclude=nodes_to_exclude or []
+    )
+
+    # Compare nodes
+    original_nodes = get_node_names_and_types(model_input_path)
+    quantized_nodes = get_node_names_and_types(model_output_path)
+
+    original_set = set(original_nodes)
+    quantized_set = set(quantized_nodes)
+    new_nodes = quantized_set - original_set
+
+    verbose("([ONNX_QUANTIZATION] New Nodes introduced by the quantization")
+    for name, op_type in new_nodes:
+        verbose(f"[ONNX_QUANTIZATION] Name: {name}, OpType: {op_type}")