diff --git a/include/tvm/relay/attrs/nn_quantize.h b/include/tvm/relay/attrs/nn_quantize.h new file mode 100644 index 0000000000000..07029e8d76e2d --- /dev/null +++ b/include/tvm/relay/attrs/nn_quantize.h @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/relay/attrs/nn.h + * \brief Auxiliary attributes for nn operators. + */ +#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_ +#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_ + +#include +#include + +namespace tvm { +namespace relay { + +// TODO(anijain2305) - Copy of QuantizedConv2DAttrs. Should we inherit? +/*! \brief Attribute for quantized conv2d operator */ +struct QuantizedConv2DAttrs : public tvm::AttrsNode { + // Traditional conv2d attributes. + Array strides; + Array padding; + Array dilation; + int groups; + IndexExpr channels; + Array kernel_size; + std::string data_layout; + std::string kernel_layout; + std::string out_layout; + DataType out_dtype; + + // Quantization related attributes. + int32_t input_zero_point; + int32_t kernel_zero_point; + int32_t output_zero_point; + double input_scale; + double kernel_scale; + double output_scale; + + TVM_DECLARE_ATTRS(QuantizedConv2DAttrs, "relay.attrs.QuantizedConv2DAttrs") { + TVM_ATTR_FIELD(strides).set_default(Array({1, 1})) + .describe("Specifies the strides of the convolution."); + TVM_ATTR_FIELD(padding).set_default(Array({0, 0})) + .describe("If padding is non-zero, then the input is implicitly zero-padded" + "on both sides for padding number of points"); + TVM_ATTR_FIELD(dilation).set_default(Array({1, 1})) + .describe("Specifies the dilation rate to use for dilated convolution."); + TVM_ATTR_FIELD(groups).set_default(1) + .describe("Controls the connections between inputs and outputs." + "At groups=1, all inputs are convolved to all outputs." + "At groups=2, the operation becomes equivalent to having two convolution" + "layers side by side, each seeing half the input channels, and producing" + "half the output channels, and both subsequently concatenated."); + TVM_ATTR_FIELD(channels) + .describe("The number of output channels in the convolution." + " If it is not set, inferred by shape of the weight.") + .set_default(NullValue()); + TVM_ATTR_FIELD(kernel_size) + .describe("Specifies the dimensions of the convolution window.") + .set_default(NullValue >()); + TVM_ATTR_FIELD(data_layout).set_default("NCHW") + .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Convolution is applied on the 'H' and" + "'W' dimensions."); + TVM_ATTR_FIELD(kernel_layout).set_default("OIHW") + .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc." + "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width" + "dimensions respectively."); + TVM_ATTR_FIELD(out_layout).set_default("") + .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Default to be same as input layout."); + + // use 0 bits to indicate none. + TVM_ATTR_FIELD(out_dtype) + .set_default(NullValue()) + .describe("Output data type, set to explicit type under mixed precision setting"); + + + TVM_ATTR_FIELD(input_zero_point) + .describe("The zero point of the input tensor."); + TVM_ATTR_FIELD(kernel_zero_point) + .describe("The zero point of the kernel tensor."); + TVM_ATTR_FIELD(output_zero_point) + .describe("The zero point of the output tensor."); + TVM_ATTR_FIELD(input_scale) + .describe("The scale of the input tensor."); + TVM_ATTR_FIELD(kernel_scale) + .describe("The scale of the kernel tensor."); + TVM_ATTR_FIELD(output_scale) + .describe("The scale of the output tensor."); + + + } +}; + +} // namespace relay +} // namespace tvm +#endif // TVM_RELAY_ATTRS_NN_QUANTIZE_H_ diff --git a/python/tvm/relay/op/nn/__init__.py b/python/tvm/relay/op/nn/__init__.py index ebabbbcd9d3ad..20bc48d879184 100644 --- a/python/tvm/relay/op/nn/__init__.py +++ b/python/tvm/relay/op/nn/__init__.py @@ -18,4 +18,5 @@ """Neural network related operators.""" from __future__ import absolute_import as _abs from .nn import * +from . import _quantize from . import _nn diff --git a/python/tvm/relay/op/nn/_make_quantize.py b/python/tvm/relay/op/nn/_make_quantize.py new file mode 100644 index 0000000000000..2480c99068c4c --- /dev/null +++ b/python/tvm/relay/op/nn/_make_quantize.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Constructor APIs""" +from ...._ffi.function import _init_api + +_init_api("relay.op.nn._quantize._make", __name__) diff --git a/python/tvm/relay/op/nn/_quantize.py b/python/tvm/relay/op/nn/_quantize.py new file mode 100644 index 0000000000000..56a193ac4205f --- /dev/null +++ b/python/tvm/relay/op/nn/_quantize.py @@ -0,0 +1,133 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#pylint: disable=invalid-name, too-many-lines +"""Neural network operations.""" +from __future__ import absolute_import as _abs +from . import _make_quantize + + +def quantized_conv2d(quantized_data, + quantized_weight, + input_zero_point, + kernel_zero_point, + output_zero_point, + input_scale, + kernel_scale, + output_scale, + strides=(1, 1), + padding=(0, 0), + dilation=(1, 1), + groups=1, + channels=None, + kernel_size=None, + data_layout="NCHW", + kernel_layout="OIHW", + out_layout="", + out_dtype=""): + r"""Quantized 2D convolution. + + This operator takes the quantized_weight as the convolution kernel + and convolves it with quantized_data to produce an output. + + + In the default case, where the data_layout is `NCHW` + and kernel_layout is `OIHW`, conv2d takes in + a quantized_data Tensor with shape `(batch_size, in_channels, height, width)`, + and a quantized_weight Tensor with shape `(channels, in_channels, kernel_size[0], kernel_size[1])` + to produce an output Tensor with the following rule: + + .. math:: + + \mbox{out}[b, c, y, x] = \sum_{dy, dx, k} + \mbox{quantized_data}[b, k, \mbox{strides}[0] * y + dy, \mbox{strides}[1] * x + dx] * + \mbox{quantized_weight}[c, k, dy, dx] + + Padding and dilation are applied to quantized_data and quantized_weight respectively before the computation. + This operator accepts quantized_data layout specification. + Semantically, the operator will convert the layout to the canonical layout + (`NCHW` for quantized_data and `OIHW` for quantized_weight), perform the computation, + then convert to the out_layout. + + + Parameters + ---------- + quantized_data : tvm.relay.Expr + The input quantized_data to the operator. + + quantized_weight : tvm.relay.Expr + The quantized_weight expressions. + + input_scale: float + The float scalar to scale the quantized_data int8 values back to FP32. + + kernel_scale: float + The float scalar to scale the quantized_kernel int8 values back to FP32. + + output_scale: float + The float scalar to scale the quantized_output int8 values back to FP32. + + input_zero_point: int + The zero point of the quantized_data distribution. + + kernel_zero_point: int + The zero point of the quantized_kernel distribution. + + output_zero_point: int + The zero point of the quantized_output distribution. + + strides : tuple of int, optional + The strides of convolution. + + padding : tuple of int, optional + The padding of convolution on both sides of inputs before convolution. + + dilation : tuple of int, optional + Specifies the dilation rate to be used for dilated convolution. + + groups : int, optional + Number of groups for grouped convolution. + + channels : int, optional + Number of output channels of this convolution. + + kernel_size : tuple of int, optional + The spatial of the convolution kernel. + + data_layout : str, optional + Layout of the input. + + kernel_layout : str, optional + Layout of the quantized_weight. + + out_layout : str, optional + Layout of the output, by default, out_layout is the same as data_layout + + out_dtype : str, optional + Specifies the output quantized_data type for mixed precision conv2d. + + Returns + ------- + result : tvm.relay.Expr + The computed result. + """ + return _make_quantize.quantized_conv2d(quantized_data, quantized_weight, + input_zero_point, kernel_zero_point, output_zero_point, + input_scale, kernel_scale, output_scale, + strides, padding, dilation, + groups, channels, kernel_size, + data_layout, kernel_layout, out_layout, + out_dtype) diff --git a/python/tvm/relay/quantize/__init__.py b/python/tvm/relay/quantize/__init__.py index 45bb62e668537..d51ae80e9fb15 100644 --- a/python/tvm/relay/quantize/__init__.py +++ b/python/tvm/relay/quantize/__init__.py @@ -19,4 +19,5 @@ from __future__ import absolute_import as _abs from .quantize import * +from .quantize_rewrite import * from ._annotate import register_annotate_function diff --git a/python/tvm/relay/quantize/quantize_rewrite.py b/python/tvm/relay/quantize/quantize_rewrite.py new file mode 100644 index 0000000000000..c2099b65298e6 --- /dev/null +++ b/python/tvm/relay/quantize/quantize_rewrite.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#pylint: disable=unused-argument +"""Automatic quantization toolkit.""" +from __future__ import absolute_import + +from . import _quantize +from .. import expr as _expr + +def quantize_rewrite(expr): + """ + Rewrites the high-level quantized ops into low-level exisiting Relay ops. + + Parameters + ---------- + expr : tvm.relay.Expr + The input expression. + + Returns + ------- + expr : tvm.relay.Expr + The output expression. + """ + return _quantize.quantize_rewrite(expr) diff --git a/src/relay/op/nn/quantized_convolution.cc b/src/relay/op/nn/quantized_convolution.cc new file mode 100644 index 0000000000000..af243e237d285 --- /dev/null +++ b/src/relay/op/nn/quantized_convolution.cc @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file quantized_convolution.cc + * \brief Quantized convolution operators + */ + +#include +#include +#include +#include + +namespace tvm { +namespace relay { + +TVM_REGISTER_NODE_TYPE(QuantizedConv2DAttrs); + +// TODO(anijain2305) - Copy of Conv2D Rel. Should be share? +// Need separation of header/implementation. +bool QuantizeConv2DRel(const Array& types, + int num_inputs, + const Attrs& attrs, + const TypeReporter& reporter) { + CHECK_EQ(types.size(), 3); + const auto* data = types[0].as(); + const auto* weight = types[1].as(); + if (data == nullptr) return false; + static const Layout kNCHW("NCHW"); + static const Layout kOIHW("OIHW"); + + const QuantizedConv2DAttrs* param = attrs.as(); + CHECK(param != nullptr); + DataType out_dtype = param->out_dtype; + CHECK_NE(out_dtype, NullValue()) + << "Quantized convolution out_dtype has to be passed\n"; + const Layout in_layout(param->data_layout); + const Layout kernel_layout(param->kernel_layout); + + const auto trans_in_layout = BijectiveLayoutNode::make(in_layout, kNCHW); + CHECK(trans_in_layout.defined()) + << "Conv only support input layouts that are convertible from NCHW." + << " But got " << in_layout; + + const auto trans_kernel_layout = BijectiveLayoutNode::make(kernel_layout, kOIHW); + CHECK(trans_kernel_layout.defined()) + << "Conv only support kernel layouts that are convertible from OIHW." + << " But got "<< kernel_layout; + + Layout out_layout(param->out_layout == "" ? param->data_layout : param->out_layout); + const auto trans_out_layout = BijectiveLayoutNode::make(out_layout, kNCHW); + CHECK(trans_out_layout.defined()) + << "Conv only support output layouts that are convertible from NCHW." + << " But got " << out_layout; + + Array dshape_nchw = trans_in_layout.ForwardShape(data->shape); + + IndexExpr channels, dilated_ksize_y, dilated_ksize_x; + // infer weight if the kernel_size and channels are defined + if (param->kernel_size.defined() && param->channels.defined()) { + CHECK_EQ(param->kernel_size.size(), 2); + CHECK_EQ(param->dilation.size(), 2); + Array wshape( + {param->channels, + dshape_nchw[1] / param->groups, + param->kernel_size[0], + param->kernel_size[1]}); + wshape = trans_kernel_layout.BackwardShape(wshape); + channels = param->channels; + dilated_ksize_y = 1 + (param->kernel_size[0] - 1) * param->dilation[0]; + dilated_ksize_x = 1 + (param->kernel_size[1] - 1) * param->dilation[1]; + // assign result to reporter + reporter->Assign(types[1], TensorTypeNode::make(wshape, data->dtype)); + } else { + // use weight to infer the conv shape. + if (weight == nullptr) return false; + auto wshape = trans_kernel_layout.ForwardShape(weight->shape); + if (param->kernel_size.defined()) { + CHECK_EQ(param->kernel_size.size(), 2); + // check the size + CHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) && + reporter->AssertEQ(param->kernel_size[1], wshape[3])) + << "Conv2D: shape of weight is inconsistent with kernel_size, " + << " kernel_size=" << param->kernel_size + << " wshape=" << wshape; + } + if (param->channels.defined()) { + CHECK(reporter->AssertEQ(param->channels, wshape[0])) + << "Conv2D: shape of weight is inconsistent with channels, " + << " channels=" << param->channels + << " wshape=" << wshape; + } + CHECK(reporter->AssertEQ(dshape_nchw[1] / param->groups, wshape[1])); + channels = wshape[0]; + dilated_ksize_y = 1 + (wshape[2] - 1) * param->dilation[0]; + dilated_ksize_x = 1 + (wshape[3] - 1) * param->dilation[1]; + } + // dilation + Array oshape({dshape_nchw[0], channels, 0, 0}); + + oshape.Set(2, (dshape_nchw[2] + param->padding[0] * 2 - dilated_ksize_y) / param->strides[0] + 1); + oshape.Set(3, (dshape_nchw[3] + param->padding[1] * 2 - dilated_ksize_x) / param->strides[1] + 1); + if (out_dtype.bits() == 0) { + out_dtype = data->dtype; + } + oshape = trans_out_layout.BackwardShape(oshape); + // assign output type + reporter->Assign(types[2], TensorTypeNode::make(oshape, out_dtype)); + return true; +} + + +// Positional relay function to create quantized conv2d operator +// used by frontend FFI. +Expr MakeQuantizeConv2D(Expr quantized_data, + Expr quantized_weight, + int32_t input_zero_point, + int32_t kernel_zero_point, + int32_t output_zero_point, + double input_scale, + double kernel_scale, + double output_scale, + Array strides, + Array padding, + Array dilation, + int groups, + IndexExpr channels, + Array kernel_size, + std::string data_layout, + std::string kernel_layout, + std::string out_layout, + DataType out_dtype) { + auto attrs = make_node(); + attrs->strides = std::move(strides); + attrs->padding = std::move(padding); + attrs->dilation = std::move(dilation); + attrs->groups = groups; + attrs->channels = std::move(channels); + attrs->kernel_size = std::move(kernel_size); + attrs->data_layout = std::move(data_layout); + attrs->kernel_layout = std::move(kernel_layout); + attrs->out_layout = std::move(out_layout); + attrs->out_dtype = std::move(out_dtype); + attrs->input_zero_point = std::move(input_zero_point); + attrs->kernel_zero_point = std::move(kernel_zero_point); + attrs->output_zero_point = std::move(output_zero_point); + attrs->input_scale = std::move(input_scale); + attrs->kernel_scale = std::move(kernel_scale); + attrs->output_scale = std::move(output_scale); + static const Op& op = Op::Get("nn_quantized.quantized_conv2d"); + return CallNode::make(op, {quantized_data, quantized_weight}, Attrs(attrs), {}); +} + +RELAY_REGISTER_OP("nn_quantized.quantized_conv2d") +.describe(R"code(2D quantized convolution layer. + +This operator creates a quantized convolution kernel that is convolved +with the quantized input to produce a tensor of quantized outputs. The +operator is further lowered to existing set of Relay operators. + +- **quantized_data**: This depends on the `layout` parameter. Input is 4D array of shape + (batch_size, in_channels, height, width) if `layout` is `NCHW`. +- **quantized_weight**: (channels, in_channels, kernel_size[0], kernel_size[1]) +- **quantized_out**: This depends on the `layout` parameter. Output is 4D array of shape + (batch_size, channels, out_height, out_width) if `layout` is `NCHW`. + +)code" TVM_ADD_FILELINE) +.set_attrs_type_key("relay.attrs.QuantizedConv2DAttrs") +.set_num_inputs(2) +.add_argument("quantized_data", "Tensor", "The quantized input quantized_data tensor.") +.add_argument("quantized_weight", "Tensor", "The quantized quantized_weight tensor.") +.set_support_level(10) +.add_type_rel("QuantizeConv2D", QuantizeConv2DRel); + +TVM_REGISTER_API("relay.op.nn._quantize._make.quantized_conv2d") +.set_body_typed(MakeQuantizeConv2D); + +} // namespace relay +} // namespace tvm diff --git a/src/relay/pass/pattern_util.h b/src/relay/pass/pattern_util.h index 5c303905968ee..c96e07ac786b9 100644 --- a/src/relay/pass/pattern_util.h +++ b/src/relay/pass/pattern_util.h @@ -372,6 +372,32 @@ inline Expr Copy(Expr data) { return CallNode::make(op, {data}, Attrs(), {}); } +inline Expr Conv2D(Expr data, + Expr weight, + Array strides, + Array padding, + Array dilation, + int groups, + IndexExpr channels, + Array kernel_size, + std::string data_layout, + std::string kernel_layout, + std::string out_layout, + DataType out_dtype) { + auto attrs = make_node(); + attrs->strides = std::move(strides); + attrs->padding = std::move(padding); + attrs->dilation = std::move(dilation); + attrs->groups = groups; + attrs->channels = std::move(channels); + attrs->kernel_size = std::move(kernel_size); + attrs->data_layout = std::move(data_layout); + attrs->kernel_layout = std::move(kernel_layout); + attrs->out_layout = std::move(out_layout); + attrs->out_dtype = std::move(out_dtype); + static const Op& op = Op::Get("nn.conv2d"); + return CallNode::make(op, {data, weight}, Attrs(attrs), {}); +} Expr MakeConcatenate(Expr data, int axis); diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc new file mode 100644 index 0000000000000..38022dd15f3c7 --- /dev/null +++ b/src/relay/pass/quantize_rewrite.cc @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file quantize_rewrite.cc + * \brief Lower quantized ops to exisiting Relay ops. + */ + +#include +#include +#include +#include "pattern_util.h" + +namespace tvm { +namespace relay { + +Expr QuantizedConv2DForwardRewrite(const Call& ref_call, + const Array& new_args, + const NodeRef& ctx) { + // TODO(janimesh) - This is not the right calculation. This only serves as a + // prototype to discuss the flow of lowering of quantization ops and + // namespaces. + CHECK_EQ(new_args.size(), 2); + Expr quantized_data = new_args[0]; + Expr quantized_kernel = new_args[1]; + const auto* param = ref_call->attrs.as(); + CHECK_EQ(param->input_zero_point, 0) << "Only symmetric support yet"; + CHECK_EQ(param->kernel_zero_point, 0) << "Only symmetric support yet"; + CHECK_EQ(param->output_zero_point, 0) << "Only symmetric support yet"; + // TODO(janimesh) - The out_dtype should be something else, like "int32". + Expr int8_conv = Conv2D(quantized_data, + quantized_kernel, + param->strides, + param->padding, + param->dilation, + param->groups, + param->channels, + param->kernel_size, + param->data_layout, + param->kernel_layout, + param->out_layout, + Int(32)); + // TODO(janimesh) - The out_dtype should come from outside.. + int8_conv = Cast(int8_conv, param->out_dtype); + // TODO(janimesh) - Look at the literature and use the right scale + // calculations. + return int8_conv; +} + +RELAY_REGISTER_OP("nn_quantized.quantized_conv2d") +.set_attr("FQuantizeForwardRewrite", QuantizedConv2DForwardRewrite); + +TVM_REGISTER_API("relay._quantize.quantize_rewrite") +.set_body_typed([](const Expr& e) { + Expr ret = ForwardRewrite(e, "FQuantizeForwardRewrite", nullptr, nullptr); + return ret; +}); + + +} // namespace relay +} // namespace tvm diff --git a/tests/python/unittest/test_quantized_ops.py b/tests/python/unittest/test_quantized_ops.py new file mode 100644 index 0000000000000..d0b8cc74ffa10 --- /dev/null +++ b/tests/python/unittest/test_quantized_ops.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import tvm +from tvm import relay +from tvm.relay.testing import create_workload + +def test_quantized_conv2d(): + quantized_data = relay.var("quantized_data", shape=(1, 128, 16, 16), dtype='int8') + quantized_weight = relay.var("weight", shape=(64, 128, 3, 3), dtype='int8') + quantized_output = relay.op.nn._quantize.quantized_conv2d( \ + quantized_data, quantized_weight, + input_zero_point=0, + kernel_zero_point=0, + output_zero_point=0, + input_scale=0.5, + kernel_scale=0.5, + output_scale=0.5, + channels=64, + kernel_size=(3,3), + out_dtype="int8") + func = relay.Function(relay.ir_pass.free_vars(quantized_output), + quantized_output) + print("###### Original graph starts ######") + print(func) + print("###### Original graph ends ######") + func = relay.ir_pass.infer_type(func) + print("###### TypeInferred graph starts ######") + print(func) + print("###### TypeInferred graph ends ######") + func = relay.quantize.quantize_rewrite(func) + func = relay.ir_pass.infer_type(func) + print("###### Lowered graph starts ######") + print(func) + print("###### Lowered graph ends ######") + +if __name__ == "__main__": + test_quantized_conv2d()