@@ -83,6 +83,10 @@ def __init__(self, framework_specific_info):
8383 self .format = "integerops"
8484 if "format" in framework_specific_info and framework_specific_info ["format" ].lower () == "qdq" :
8585 logger .warning ("Dynamic approach doesn't support QDQ format." )
86+
87+ # do not load TensorRT if backend is not TensorrtExecutionProvider
88+ if self .backend != "TensorrtExecutionProvider" :
89+ os .environ ["ORT_TENSORRT_UNAVAILABLE" ] = "1"
8690
8791 # get quantization config file according to backend
8892 config_file = None
@@ -700,9 +704,9 @@ def _detect_domain(self, model):
700704 # typically, NLP models have multiple inputs,
701705 # and the dimension of each input is usually 2 (batch_size, max_seq_len)
702706 if not model .is_large_model :
703- sess = ort .InferenceSession (model .model .SerializeToString (), providers = ort . get_available_providers () )
707+ sess = ort .InferenceSession (model .model .SerializeToString (), providers = [ "CPUExecutionProvider" ] )
704708 elif model .model_path is not None : # pragma: no cover
705- sess = ort .InferenceSession (model .model_path , providers = ort . get_available_providers () )
709+ sess = ort .InferenceSession (model .model_path , providers = [ "CPUExecutionProvider" ] )
706710 else : # pragma: no cover
707711 assert False , "Please use model path instead of onnx model object to quantize."
708712 input_shape_lens = [len (input .shape ) for input in sess .get_inputs ()]
0 commit comments