forked from apache/tvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Relay] [Quantization] WIP - Protoyping the quantized convolution op
Goal - Act as medium of discussion for pull request apache#2351 Features - New quantized conv2D op in Relay - Python API interface to instantiate the Relay op - Infer Type implemented - Lowering of quantized_conv op to low-level Relay ops Discussion points - Does the namespace look correct? - Relay op is called 'relay.op.nn._quantize.quantized_conv2d' - Idea is that any op under '_quantize' namespace will go through rewrite. - Should we reuse Conv2DRel and Conv2DAttrs - Tried protoyping. Found it hard to derive from Conv2DAttr struct - Infer Type has a param field. This need to come from the right datatype. Missing implememtation - Lowering of quantized conv into conv+cast is incomplete. - Will work on it async. This is orthogonal to the discussion.
- Loading branch information
1 parent
9596535
commit 788b20c
Showing
10 changed files
with
661 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
/*! | ||
* \file tvm/relay/attrs/nn.h | ||
* \brief Auxiliary attributes for nn operators. | ||
*/ | ||
#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_ | ||
#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_ | ||
|
||
#include <tvm/attrs.h> | ||
#include <string> | ||
|
||
namespace tvm { | ||
namespace relay { | ||
|
||
// TODO(anijain2305) - Copy of QuantizedConv2DAttrs. Should we inherit? | ||
/*! \brief Attribute for quantized conv2d operator */ | ||
struct QuantizedConv2DAttrs : public tvm::AttrsNode<QuantizedConv2DAttrs> { | ||
// Traditional conv2d attributes. | ||
Array<IndexExpr> strides; | ||
Array<IndexExpr> padding; | ||
Array<IndexExpr> dilation; | ||
int groups; | ||
IndexExpr channels; | ||
Array<IndexExpr> kernel_size; | ||
std::string data_layout; | ||
std::string kernel_layout; | ||
std::string out_layout; | ||
DataType out_dtype; | ||
|
||
// Quantization related attributes. | ||
int32_t input_zero_point; | ||
int32_t kernel_zero_point; | ||
int32_t output_zero_point; | ||
double input_scale; | ||
double kernel_scale; | ||
double output_scale; | ||
|
||
TVM_DECLARE_ATTRS(QuantizedConv2DAttrs, "relay.attrs.QuantizedConv2DAttrs") { | ||
TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1})) | ||
.describe("Specifies the strides of the convolution."); | ||
TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0})) | ||
.describe("If padding is non-zero, then the input is implicitly zero-padded" | ||
"on both sides for padding number of points"); | ||
TVM_ATTR_FIELD(dilation).set_default(Array<IndexExpr>({1, 1})) | ||
.describe("Specifies the dilation rate to use for dilated convolution."); | ||
TVM_ATTR_FIELD(groups).set_default(1) | ||
.describe("Controls the connections between inputs and outputs." | ||
"At groups=1, all inputs are convolved to all outputs." | ||
"At groups=2, the operation becomes equivalent to having two convolution" | ||
"layers side by side, each seeing half the input channels, and producing" | ||
"half the output channels, and both subsequently concatenated."); | ||
TVM_ATTR_FIELD(channels) | ||
.describe("The number of output channels in the convolution." | ||
" If it is not set, inferred by shape of the weight.") | ||
.set_default(NullValue<IndexExpr>()); | ||
TVM_ATTR_FIELD(kernel_size) | ||
.describe("Specifies the dimensions of the convolution window.") | ||
.set_default(NullValue<Array<IndexExpr> >()); | ||
TVM_ATTR_FIELD(data_layout).set_default("NCHW") | ||
.describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc." | ||
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width" | ||
"dimensions respectively. Convolution is applied on the 'H' and" | ||
"'W' dimensions."); | ||
TVM_ATTR_FIELD(kernel_layout).set_default("OIHW") | ||
.describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc." | ||
"'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width" | ||
"dimensions respectively."); | ||
TVM_ATTR_FIELD(out_layout).set_default("") | ||
.describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc." | ||
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width" | ||
"dimensions respectively. Default to be same as input layout."); | ||
|
||
// use 0 bits to indicate none. | ||
TVM_ATTR_FIELD(out_dtype) | ||
.set_default(NullValue<DataType>()) | ||
.describe("Output data type, set to explicit type under mixed precision setting"); | ||
|
||
|
||
TVM_ATTR_FIELD(input_zero_point) | ||
.describe("The zero point of the input tensor."); | ||
TVM_ATTR_FIELD(kernel_zero_point) | ||
.describe("The zero point of the kernel tensor."); | ||
TVM_ATTR_FIELD(output_zero_point) | ||
.describe("The zero point of the output tensor."); | ||
TVM_ATTR_FIELD(input_scale) | ||
.describe("The scale of the input tensor."); | ||
TVM_ATTR_FIELD(kernel_scale) | ||
.describe("The scale of the kernel tensor."); | ||
TVM_ATTR_FIELD(output_scale) | ||
.describe("The scale of the output tensor."); | ||
|
||
|
||
} | ||
}; | ||
|
||
} // namespace relay | ||
} // namespace tvm | ||
#endif // TVM_RELAY_ATTRS_NN_QUANTIZE_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
"""Constructor APIs""" | ||
from ...._ffi.function import _init_api | ||
|
||
_init_api("relay.op.nn._quantize._make", __name__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
#pylint: disable=invalid-name, too-many-lines | ||
"""Neural network operations.""" | ||
from __future__ import absolute_import as _abs | ||
from . import _make_quantize | ||
|
||
|
||
def quantized_conv2d(quantized_data, | ||
quantized_weight, | ||
input_zero_point, | ||
kernel_zero_point, | ||
output_zero_point, | ||
input_scale, | ||
kernel_scale, | ||
output_scale, | ||
strides=(1, 1), | ||
padding=(0, 0), | ||
dilation=(1, 1), | ||
groups=1, | ||
channels=None, | ||
kernel_size=None, | ||
data_layout="NCHW", | ||
kernel_layout="OIHW", | ||
out_layout="", | ||
out_dtype=""): | ||
r"""Quantized 2D convolution. | ||
This operator takes the quantized_weight as the convolution kernel | ||
and convolves it with quantized_data to produce an output. | ||
In the default case, where the data_layout is `NCHW` | ||
and kernel_layout is `OIHW`, conv2d takes in | ||
a quantized_data Tensor with shape `(batch_size, in_channels, height, width)`, | ||
and a quantized_weight Tensor with shape `(channels, in_channels, kernel_size[0], kernel_size[1])` | ||
to produce an output Tensor with the following rule: | ||
.. math:: | ||
\mbox{out}[b, c, y, x] = \sum_{dy, dx, k} | ||
\mbox{quantized_data}[b, k, \mbox{strides}[0] * y + dy, \mbox{strides}[1] * x + dx] * | ||
\mbox{quantized_weight}[c, k, dy, dx] | ||
Padding and dilation are applied to quantized_data and quantized_weight respectively before the computation. | ||
This operator accepts quantized_data layout specification. | ||
Semantically, the operator will convert the layout to the canonical layout | ||
(`NCHW` for quantized_data and `OIHW` for quantized_weight), perform the computation, | ||
then convert to the out_layout. | ||
Parameters | ||
---------- | ||
quantized_data : tvm.relay.Expr | ||
The input quantized_data to the operator. | ||
quantized_weight : tvm.relay.Expr | ||
The quantized_weight expressions. | ||
input_scale: float | ||
The float scalar to scale the quantized_data int8 values back to FP32. | ||
kernel_scale: float | ||
The float scalar to scale the quantized_kernel int8 values back to FP32. | ||
output_scale: float | ||
The float scalar to scale the quantized_output int8 values back to FP32. | ||
input_zero_point: int | ||
The zero point of the quantized_data distribution. | ||
kernel_zero_point: int | ||
The zero point of the quantized_kernel distribution. | ||
output_zero_point: int | ||
The zero point of the quantized_output distribution. | ||
strides : tuple of int, optional | ||
The strides of convolution. | ||
padding : tuple of int, optional | ||
The padding of convolution on both sides of inputs before convolution. | ||
dilation : tuple of int, optional | ||
Specifies the dilation rate to be used for dilated convolution. | ||
groups : int, optional | ||
Number of groups for grouped convolution. | ||
channels : int, optional | ||
Number of output channels of this convolution. | ||
kernel_size : tuple of int, optional | ||
The spatial of the convolution kernel. | ||
data_layout : str, optional | ||
Layout of the input. | ||
kernel_layout : str, optional | ||
Layout of the quantized_weight. | ||
out_layout : str, optional | ||
Layout of the output, by default, out_layout is the same as data_layout | ||
out_dtype : str, optional | ||
Specifies the output quantized_data type for mixed precision conv2d. | ||
Returns | ||
------- | ||
result : tvm.relay.Expr | ||
The computed result. | ||
""" | ||
return _make_quantize.quantized_conv2d(quantized_data, quantized_weight, | ||
input_zero_point, kernel_zero_point, output_zero_point, | ||
input_scale, kernel_scale, output_scale, | ||
strides, padding, dilation, | ||
groups, channels, kernel_size, | ||
data_layout, kernel_layout, out_layout, | ||
out_dtype) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
#pylint: disable=unused-argument | ||
"""Automatic quantization toolkit.""" | ||
from __future__ import absolute_import | ||
|
||
from . import _quantize | ||
from .. import expr as _expr | ||
|
||
def quantize_rewrite(expr): | ||
""" | ||
Rewrites the high-level quantized ops into low-level exisiting Relay ops. | ||
Parameters | ||
---------- | ||
expr : tvm.relay.Expr | ||
The input expression. | ||
Returns | ||
------- | ||
expr : tvm.relay.Expr | ||
The output expression. | ||
""" | ||
return _quantize.quantize_rewrite(expr) |
Oops, something went wrong.