@@ -67,7 +67,16 @@ def main():
67
67
'--overwrite' , action = 'store_true' ,
68
68
help = "Overwrite existing ONNX or C model files. By default, existing files are not overwritten."
69
69
)
70
-
70
+ parser .add_argument (
71
+ '-quant' , '--int8_quantize' ,
72
+ default = 0 ,
73
+ help = "Quantize on the fly from FP32 to INT8"
74
+ )
75
+ parser .add_argument (
76
+ '-onnx_quant' , '--onnx_quant_model' ,
77
+ default = os .path .join (PROJECT_ROOT , "examples" , "model_int8.onnx" ),
78
+ help = "Path where to store the ONNX Model File"
79
+ )
71
80
args = parser .parse_args ()
72
81
global verbosity
73
82
verbosity = args .verbosity
@@ -97,6 +106,13 @@ def main():
97
106
# Convert TensorFlow model to ONNX
98
107
print (color_text ("[MAIN] Starting Tensorflow to ONNX Conversion" , "green" ), flush = True )
99
108
tf2onnx_converter (args .model , args .onnx_model , args .tag , args .signature_def , verbosity )
109
+ onnx_model_to_convert = args .onnx_model
110
+
111
+ if bool (args .int8_quantize ):
112
+ from .onnx_quantization import quantize_and_compare_nodes
113
+ print (color_text ("[MAIN] Starting ONNX FP32 to ONNX INT8 Quantization" , "green" ), flush = True )
114
+ quantize_and_compare_nodes (args .onnx_model ,args .onnx_quant_model ,verbosity_level = verbosity )
115
+ onnx_model_to_convert = args .onnx_quant_model
100
116
101
117
verbose ("[MAIN] Ensuring the parent directory of the C model file exists" )
102
118
parent_dir = os .path .dirname (args .c_model_file )
@@ -111,7 +127,7 @@ def main():
111
127
t .start ()
112
128
with open (args .c_model_file , "w" ) as c_file :
113
129
process = subprocess .Popen (
114
- [args .onnx2c , args . onnx_model ],
130
+ [args .onnx2c , onnx_model_to_convert ],
115
131
stdout = c_file ,
116
132
stderr = subprocess .PIPE ,
117
133
text = True
0 commit comments