Skip to content

Commit

Permalink
[Relay] [Quantization] WIP - Protoyping the quantized convolution op
Browse files Browse the repository at this point in the history
Goal - Act as medium of discussion for pull request apache#2351

Features
- New quantized conv2D op in Relay
- Python API interface to instantiate the Relay op
- Infer Type implemented
- Lowering of quantized_conv op to low-level Relay ops

Discussion points
- Does the namespace look correct?
    - Relay op is called 'relay.op.nn._quantize.quantized_conv2d'
    - Idea is that any op under '_quantize' namespace will go through rewrite.
- Should we reuse Conv2DRel and Conv2DAttrs
    - Tried protoyping. Found it hard to derive from Conv2DAttr struct
    - Infer Type has a param field. This need to come from the right datatype.

Missing implememtation
    - Lowering of quantized conv into conv+cast is incomplete.
    - Will work on it async. This is orthogonal to the discussion.
  • Loading branch information
anijain2305 committed Jul 8, 2019
1 parent 9596535 commit 788b20c
Show file tree
Hide file tree
Showing 10 changed files with 661 additions and 0 deletions.
116 changes: 116 additions & 0 deletions include/tvm/relay/attrs/nn_quantize.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file tvm/relay/attrs/nn.h
* \brief Auxiliary attributes for nn operators.
*/
#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_
#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_

#include <tvm/attrs.h>
#include <string>

namespace tvm {
namespace relay {

// TODO(anijain2305) - Copy of QuantizedConv2DAttrs. Should we inherit?
/*! \brief Attribute for quantized conv2d operator */
struct QuantizedConv2DAttrs : public tvm::AttrsNode<QuantizedConv2DAttrs> {
// Traditional conv2d attributes.
Array<IndexExpr> strides;
Array<IndexExpr> padding;
Array<IndexExpr> dilation;
int groups;
IndexExpr channels;
Array<IndexExpr> kernel_size;
std::string data_layout;
std::string kernel_layout;
std::string out_layout;
DataType out_dtype;

// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;
int32_t output_zero_point;
double input_scale;
double kernel_scale;
double output_scale;

TVM_DECLARE_ATTRS(QuantizedConv2DAttrs, "relay.attrs.QuantizedConv2DAttrs") {
TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the strides of the convolution.");
TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0}))
.describe("If padding is non-zero, then the input is implicitly zero-padded"
"on both sides for padding number of points");
TVM_ATTR_FIELD(dilation).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the dilation rate to use for dilated convolution.");
TVM_ATTR_FIELD(groups).set_default(1)
.describe("Controls the connections between inputs and outputs."
"At groups=1, all inputs are convolved to all outputs."
"At groups=2, the operation becomes equivalent to having two convolution"
"layers side by side, each seeing half the input channels, and producing"
"half the output channels, and both subsequently concatenated.");
TVM_ATTR_FIELD(channels)
.describe("The number of output channels in the convolution."
" If it is not set, inferred by shape of the weight.")
.set_default(NullValue<IndexExpr>());
TVM_ATTR_FIELD(kernel_size)
.describe("Specifies the dimensions of the convolution window.")
.set_default(NullValue<Array<IndexExpr> >());
TVM_ATTR_FIELD(data_layout).set_default("NCHW")
.describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Convolution is applied on the 'H' and"
"'W' dimensions.");
TVM_ATTR_FIELD(kernel_layout).set_default("OIHW")
.describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
"'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
"dimensions respectively.");
TVM_ATTR_FIELD(out_layout).set_default("")
.describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Default to be same as input layout.");

// use 0 bits to indicate none.
TVM_ATTR_FIELD(out_dtype)
.set_default(NullValue<DataType>())
.describe("Output data type, set to explicit type under mixed precision setting");


TVM_ATTR_FIELD(input_zero_point)
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
TVM_ATTR_FIELD(output_zero_point)
.describe("The zero point of the output tensor.");
TVM_ATTR_FIELD(input_scale)
.describe("The scale of the input tensor.");
TVM_ATTR_FIELD(kernel_scale)
.describe("The scale of the kernel tensor.");
TVM_ATTR_FIELD(output_scale)
.describe("The scale of the output tensor.");


}
};

} // namespace relay
} // namespace tvm
#endif // TVM_RELAY_ATTRS_NN_QUANTIZE_H_
1 change: 1 addition & 0 deletions python/tvm/relay/op/nn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@
"""Neural network related operators."""
from __future__ import absolute_import as _abs
from .nn import *
from . import _quantize
from . import _nn
20 changes: 20 additions & 0 deletions python/tvm/relay/op/nn/_make_quantize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Constructor APIs"""
from ...._ffi.function import _init_api

_init_api("relay.op.nn._quantize._make", __name__)
133 changes: 133 additions & 0 deletions python/tvm/relay/op/nn/_quantize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#pylint: disable=invalid-name, too-many-lines
"""Neural network operations."""
from __future__ import absolute_import as _abs
from . import _make_quantize


def quantized_conv2d(quantized_data,
quantized_weight,
input_zero_point,
kernel_zero_point,
output_zero_point,
input_scale,
kernel_scale,
output_scale,
strides=(1, 1),
padding=(0, 0),
dilation=(1, 1),
groups=1,
channels=None,
kernel_size=None,
data_layout="NCHW",
kernel_layout="OIHW",
out_layout="",
out_dtype=""):
r"""Quantized 2D convolution.
This operator takes the quantized_weight as the convolution kernel
and convolves it with quantized_data to produce an output.
In the default case, where the data_layout is `NCHW`
and kernel_layout is `OIHW`, conv2d takes in
a quantized_data Tensor with shape `(batch_size, in_channels, height, width)`,
and a quantized_weight Tensor with shape `(channels, in_channels, kernel_size[0], kernel_size[1])`
to produce an output Tensor with the following rule:
.. math::
\mbox{out}[b, c, y, x] = \sum_{dy, dx, k}
\mbox{quantized_data}[b, k, \mbox{strides}[0] * y + dy, \mbox{strides}[1] * x + dx] *
\mbox{quantized_weight}[c, k, dy, dx]
Padding and dilation are applied to quantized_data and quantized_weight respectively before the computation.
This operator accepts quantized_data layout specification.
Semantically, the operator will convert the layout to the canonical layout
(`NCHW` for quantized_data and `OIHW` for quantized_weight), perform the computation,
then convert to the out_layout.
Parameters
----------
quantized_data : tvm.relay.Expr
The input quantized_data to the operator.
quantized_weight : tvm.relay.Expr
The quantized_weight expressions.
input_scale: float
The float scalar to scale the quantized_data int8 values back to FP32.
kernel_scale: float
The float scalar to scale the quantized_kernel int8 values back to FP32.
output_scale: float
The float scalar to scale the quantized_output int8 values back to FP32.
input_zero_point: int
The zero point of the quantized_data distribution.
kernel_zero_point: int
The zero point of the quantized_kernel distribution.
output_zero_point: int
The zero point of the quantized_output distribution.
strides : tuple of int, optional
The strides of convolution.
padding : tuple of int, optional
The padding of convolution on both sides of inputs before convolution.
dilation : tuple of int, optional
Specifies the dilation rate to be used for dilated convolution.
groups : int, optional
Number of groups for grouped convolution.
channels : int, optional
Number of output channels of this convolution.
kernel_size : tuple of int, optional
The spatial of the convolution kernel.
data_layout : str, optional
Layout of the input.
kernel_layout : str, optional
Layout of the quantized_weight.
out_layout : str, optional
Layout of the output, by default, out_layout is the same as data_layout
out_dtype : str, optional
Specifies the output quantized_data type for mixed precision conv2d.
Returns
-------
result : tvm.relay.Expr
The computed result.
"""
return _make_quantize.quantized_conv2d(quantized_data, quantized_weight,
input_zero_point, kernel_zero_point, output_zero_point,
input_scale, kernel_scale, output_scale,
strides, padding, dilation,
groups, channels, kernel_size,
data_layout, kernel_layout, out_layout,
out_dtype)
1 change: 1 addition & 0 deletions python/tvm/relay/quantize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@
from __future__ import absolute_import as _abs

from .quantize import *
from .quantize_rewrite import *
from ._annotate import register_annotate_function
38 changes: 38 additions & 0 deletions python/tvm/relay/quantize/quantize_rewrite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#pylint: disable=unused-argument
"""Automatic quantization toolkit."""
from __future__ import absolute_import

from . import _quantize
from .. import expr as _expr

def quantize_rewrite(expr):
"""
Rewrites the high-level quantized ops into low-level exisiting Relay ops.
Parameters
----------
expr : tvm.relay.Expr
The input expression.
Returns
-------
expr : tvm.relay.Expr
The output expression.
"""
return _quantize.quantize_rewrite(expr)
Loading

0 comments on commit 788b20c

Please sign in to comment.