@@ -11,6 +11,22 @@ namespace {
11
11
12
12
#if NV_TENSORRT_MAJOR > 7
13
13
// clang-format off
14
+
15
+ bool add_qdq (ConversionCtx *ctx, const torch::jit::Node* n, nvinfer1::ITensor* input, nvinfer1::ITensor* scale, std::string& opName) {
16
+ nvinfer1::IQuantizeLayer* quantize_layer = ctx->net ->addQuantize (*input, *scale);
17
+ TORCHTRT_CHECK (quantize_layer, " Unable to create QuantizeLayer from node: " << *n);
18
+ quantize_layer->setAxis (0 );
19
+
20
+ nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net ->addDequantize (*quantize_layer->getOutput (0 ), *scale);
21
+ TORCHTRT_CHECK (dequantize_layer, " Unable to create DequantizeLayer from node: " << *n);
22
+ dequantize_layer->setAxis (0 );
23
+
24
+ auto qdq_out = ctx->AssociateValueAndTensor (n->outputs ()[0 ], dequantize_layer->getOutput (0 ));
25
+ LOG_DEBUG (" [" << opName << " ]" << " Output tensor shape: " << qdq_out->getDimensions ());
26
+
27
+ return true ;
28
+ }
29
+
14
30
auto quantization_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns()
15
31
.pattern({" aten::fake_quantize_per_tensor_affine(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> (Tensor)" ,
16
32
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -20,18 +36,16 @@ auto quantization_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns
20
36
auto scale = args[1 ].unwrapToScalar ().to <float >();
21
37
auto scaleTensor = tensor_to_const (ctx, torch::tensor ({scale}));
22
38
// Add and configure a QuantizeLayer.
23
- nvinfer1::IQuantizeLayer* quantize_layer = ctx->net ->addQuantize (*input, *scaleTensor);
24
- quantize_layer->setAxis (0 );
25
-
26
- // Add and configure DequantizeLayer following a QuantizeLayer
27
- nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net ->addDequantize (*quantize_layer->getOutput (0 ), *scaleTensor);
28
- dequantize_layer->setAxis (0 );
29
-
30
- auto qdq_out = ctx->AssociateValueAndTensor (n->outputs ()[0 ], dequantize_layer->getOutput (0 ));
31
- LOG_DEBUG (" [fake_quantize_per_tensor_affine] Output tensor shape: " << qdq_out->getDimensions ());
32
-
33
- return true ;
39
+ std::string opName (" aten::fake_quantize_per_tensor_affine" );
40
+ return add_qdq (ctx, n, input, scaleTensor, opName);
34
41
}})
42
+ .pattern({" aten::fake_quantize_per_tensor_affine.tensor_qparams(Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max) -> (Tensor)" ,
43
+ [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
44
+ auto input = args[0 ].ITensorOrFreeze (ctx);
45
+ auto scale = args[1 ].ITensorOrFreeze (ctx);
46
+ std::string opName (" aten::fake_quantize_per_tensor_affine.tensor_qparams" );
47
+ return add_qdq (ctx, n, input, scale, opName);
48
+ }})
35
49
.pattern({" aten::fake_quantize_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> (Tensor)" ,
36
50
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
37
51
// This aten operator is generated from torch.fake_quantize_per_channel_affine op in Pytorch python API.
0 commit comments