diff --git a/include/tvm/relay/attrs/nn_quantize.h b/include/tvm/relay/attrs/nn_quantize.h
new file mode 100644
index 0000000000000..07029e8d76e2d
--- /dev/null
+++ b/include/tvm/relay/attrs/nn_quantize.h
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/relay/attrs/nn.h
+ * \brief Auxiliary attributes for nn operators.
+ */
+#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_
+#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_
+
+#include <tvm/attrs.h>
+#include <string>
+
+namespace tvm {
+namespace relay {
+
+// TODO(anijain2305) - Copy of QuantizedConv2DAttrs. Should we inherit?
+/*! \brief Attribute for quantized conv2d operator */
+struct QuantizedConv2DAttrs : public tvm::AttrsNode<QuantizedConv2DAttrs> {
+  // Traditional conv2d attributes.
+  Array<IndexExpr> strides;
+  Array<IndexExpr> padding;
+  Array<IndexExpr> dilation;
+  int groups;
+  IndexExpr channels;
+  Array<IndexExpr> kernel_size;
+  std::string data_layout;
+  std::string kernel_layout;
+  std::string out_layout;
+  DataType out_dtype;
+
+  // Quantization related attributes.
+  int32_t input_zero_point;
+  int32_t kernel_zero_point;
+  int32_t output_zero_point;
+  double input_scale;
+  double kernel_scale;
+  double output_scale;
+
+  TVM_DECLARE_ATTRS(QuantizedConv2DAttrs, "relay.attrs.QuantizedConv2DAttrs") {
+    TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
+        .describe("Specifies the strides of the convolution.");
+    TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0}))
+        .describe("If padding is non-zero, then the input is implicitly zero-padded"
+                  "on both sides for padding number of points");
+    TVM_ATTR_FIELD(dilation).set_default(Array<IndexExpr>({1, 1}))
+        .describe("Specifies the dilation rate to use for dilated convolution.");
+    TVM_ATTR_FIELD(groups).set_default(1)
+        .describe("Controls the connections between inputs and outputs."
+                  "At groups=1, all inputs are convolved to all outputs."
+                  "At groups=2, the operation becomes equivalent to having two convolution"
+                  "layers side by side, each seeing half the input channels, and producing"
+                  "half the output channels, and both subsequently concatenated.");
+    TVM_ATTR_FIELD(channels)
+        .describe("The number of output channels in the convolution."
+                  " If it is not set, inferred by shape of the weight.")
+        .set_default(NullValue<IndexExpr>());
+    TVM_ATTR_FIELD(kernel_size)
+        .describe("Specifies the dimensions of the convolution window.")
+        .set_default(NullValue<Array<IndexExpr> >());
+    TVM_ATTR_FIELD(data_layout).set_default("NCHW")
+        .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
+                  "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
+                  "dimensions respectively. Convolution is applied on the 'H' and"
+                  "'W' dimensions.");
+    TVM_ATTR_FIELD(kernel_layout).set_default("OIHW")
+        .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
+                  "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
+                  "dimensions respectively.");
+    TVM_ATTR_FIELD(out_layout).set_default("")
+        .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
+                  "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
+                  "dimensions respectively. Default to be same as input layout.");
+
+    // use 0 bits to indicate none.
+    TVM_ATTR_FIELD(out_dtype)
+        .set_default(NullValue<DataType>())
+        .describe("Output data type, set to explicit type under mixed precision setting");
+
+
+    TVM_ATTR_FIELD(input_zero_point)
+        .describe("The zero point of the input tensor.");
+    TVM_ATTR_FIELD(kernel_zero_point)
+        .describe("The zero point of the kernel tensor.");
+    TVM_ATTR_FIELD(output_zero_point)
+        .describe("The zero point of the output tensor.");
+    TVM_ATTR_FIELD(input_scale)
+        .describe("The scale of the input tensor.");
+    TVM_ATTR_FIELD(kernel_scale)
+        .describe("The scale of the kernel tensor.");
+    TVM_ATTR_FIELD(output_scale)
+        .describe("The scale of the output tensor.");
+
+
+  }
+};
+
+}  // namespace relay
+}  // namespace tvm
+#endif  // TVM_RELAY_ATTRS_NN_QUANTIZE_H_
diff --git a/python/tvm/relay/op/nn/__init__.py b/python/tvm/relay/op/nn/__init__.py
index ebabbbcd9d3ad..20bc48d879184 100644
--- a/python/tvm/relay/op/nn/__init__.py
+++ b/python/tvm/relay/op/nn/__init__.py
@@ -18,4 +18,5 @@
 """Neural network related operators."""
 from __future__ import absolute_import as _abs
 from .nn import *
+from . import _quantize
 from . import _nn
diff --git a/python/tvm/relay/op/nn/_make_quantize.py b/python/tvm/relay/op/nn/_make_quantize.py
new file mode 100644
index 0000000000000..2480c99068c4c
--- /dev/null
+++ b/python/tvm/relay/op/nn/_make_quantize.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Constructor APIs"""
+from ...._ffi.function import _init_api
+
+_init_api("relay.op.nn._quantize._make", __name__)
diff --git a/python/tvm/relay/op/nn/_quantize.py b/python/tvm/relay/op/nn/_quantize.py
new file mode 100644
index 0000000000000..56a193ac4205f
--- /dev/null
+++ b/python/tvm/relay/op/nn/_quantize.py
@@ -0,0 +1,133 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=invalid-name, too-many-lines
+"""Neural network operations."""
+from __future__ import absolute_import as _abs
+from . import _make_quantize
+
+
+def quantized_conv2d(quantized_data,
+                    quantized_weight,
+                    input_zero_point,
+                    kernel_zero_point,
+                    output_zero_point,
+                    input_scale,
+                    kernel_scale,
+                    output_scale,
+                    strides=(1, 1),
+                    padding=(0, 0),
+                    dilation=(1, 1),
+                    groups=1,
+                    channels=None,
+                    kernel_size=None,
+                    data_layout="NCHW",
+                    kernel_layout="OIHW",
+                    out_layout="",
+                    out_dtype=""):
+    r"""Quantized 2D convolution.
+
+    This operator takes the quantized_weight as the convolution kernel
+    and convolves it with quantized_data to produce an output.
+
+
+    In the default case, where the data_layout is `NCHW`
+    and kernel_layout is `OIHW`, conv2d takes in
+    a quantized_data Tensor with shape `(batch_size, in_channels, height, width)`,
+    and a quantized_weight Tensor with shape `(channels, in_channels, kernel_size[0], kernel_size[1])`
+    to produce an output Tensor with the following rule:
+
+    .. math::
+
+        \mbox{out}[b, c, y, x] = \sum_{dy, dx, k}
+           \mbox{quantized_data}[b, k, \mbox{strides}[0] * y  + dy, \mbox{strides}[1] * x + dx] *
+           \mbox{quantized_weight}[c, k, dy, dx]
+
+    Padding and dilation are applied to quantized_data and quantized_weight respectively before the computation.
+    This operator accepts quantized_data layout specification.
+    Semantically, the operator will convert the layout to the canonical layout
+    (`NCHW` for quantized_data and `OIHW` for quantized_weight), perform the computation,
+    then convert to the out_layout.
+
+
+    Parameters
+    ----------
+    quantized_data : tvm.relay.Expr
+        The input quantized_data to the operator.
+
+    quantized_weight : tvm.relay.Expr
+        The quantized_weight expressions.
+
+    input_scale: float
+           The float scalar to scale the quantized_data int8 values back to FP32.
+
+    kernel_scale: float 
+           The float scalar to scale the quantized_kernel int8 values back to FP32.
+
+    output_scale: float
+           The float scalar to scale the quantized_output int8 values back to FP32.
+
+    input_zero_point: int 
+           The zero point of the quantized_data distribution.
+
+    kernel_zero_point: int 
+           The zero point of the quantized_kernel distribution.
+
+    output_zero_point: int 
+           The zero point of the quantized_output distribution.
+
+    strides : tuple of int, optional
+        The strides of convolution.
+
+    padding : tuple of int, optional
+        The padding of convolution on both sides of inputs before convolution.
+
+    dilation : tuple of int, optional
+        Specifies the dilation rate to be used for dilated convolution.
+
+    groups : int, optional
+        Number of groups for grouped convolution.
+
+    channels : int, optional
+        Number of output channels of this convolution.
+
+    kernel_size : tuple of int, optional
+        The spatial of the convolution kernel.
+
+    data_layout : str, optional
+        Layout of the input.
+
+    kernel_layout : str, optional
+        Layout of the quantized_weight.
+
+    out_layout : str, optional
+        Layout of the output, by default, out_layout is the same as data_layout
+
+    out_dtype : str, optional
+        Specifies the output quantized_data type for mixed precision conv2d.
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    return _make_quantize.quantized_conv2d(quantized_data, quantized_weight,
+                        input_zero_point, kernel_zero_point, output_zero_point,
+                        input_scale, kernel_scale, output_scale,
+                        strides, padding, dilation,
+                        groups, channels, kernel_size,
+                        data_layout, kernel_layout, out_layout,
+                        out_dtype)
diff --git a/python/tvm/relay/quantize/__init__.py b/python/tvm/relay/quantize/__init__.py
index 45bb62e668537..d51ae80e9fb15 100644
--- a/python/tvm/relay/quantize/__init__.py
+++ b/python/tvm/relay/quantize/__init__.py
@@ -19,4 +19,5 @@
 from __future__ import absolute_import as _abs
 
 from .quantize import *
+from .quantize_rewrite import *
 from ._annotate import register_annotate_function
diff --git a/python/tvm/relay/quantize/quantize_rewrite.py b/python/tvm/relay/quantize/quantize_rewrite.py
new file mode 100644
index 0000000000000..c2099b65298e6
--- /dev/null
+++ b/python/tvm/relay/quantize/quantize_rewrite.py
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=unused-argument
+"""Automatic quantization toolkit."""
+from __future__ import absolute_import
+
+from . import _quantize
+from .. import expr as _expr
+
+def quantize_rewrite(expr):
+    """
+    Rewrites the high-level quantized ops into low-level exisiting Relay ops.
+
+    Parameters
+    ----------
+    expr : tvm.relay.Expr
+        The input expression.
+
+    Returns
+    -------
+    expr : tvm.relay.Expr
+        The output expression.
+    """
+    return _quantize.quantize_rewrite(expr)
diff --git a/src/relay/op/nn/quantized_convolution.cc b/src/relay/op/nn/quantized_convolution.cc
new file mode 100644
index 0000000000000..af243e237d285
--- /dev/null
+++ b/src/relay/op/nn/quantized_convolution.cc
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file quantized_convolution.cc
+ * \brief Quantized convolution operators
+ */
+
+#include <tvm/data_layout.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/pass.h>
+#include <tvm/relay/attrs/nn_quantize.h>
+
+namespace tvm {
+namespace relay {
+
+TVM_REGISTER_NODE_TYPE(QuantizedConv2DAttrs);
+
+// TODO(anijain2305) - Copy of Conv2D Rel. Should be share?
+// Need separation of header/implementation.
+bool QuantizeConv2DRel(const Array<Type>& types,
+               int num_inputs,
+               const Attrs& attrs,
+               const TypeReporter& reporter) {
+  CHECK_EQ(types.size(), 3);
+  const auto* data = types[0].as<TensorTypeNode>();
+  const auto* weight = types[1].as<TensorTypeNode>();
+  if (data == nullptr) return false;
+  static const Layout kNCHW("NCHW");
+  static const Layout kOIHW("OIHW");
+
+  const QuantizedConv2DAttrs* param = attrs.as<QuantizedConv2DAttrs>();
+  CHECK(param != nullptr);
+  DataType out_dtype = param->out_dtype;
+  CHECK_NE(out_dtype, NullValue<DataType>())
+    << "Quantized convolution out_dtype has to be passed\n";
+  const Layout in_layout(param->data_layout);
+  const Layout kernel_layout(param->kernel_layout);
+
+  const auto trans_in_layout = BijectiveLayoutNode::make(in_layout, kNCHW);
+  CHECK(trans_in_layout.defined())
+    << "Conv only support input layouts that are convertible from NCHW."
+    << " But got " << in_layout;
+
+  const auto trans_kernel_layout = BijectiveLayoutNode::make(kernel_layout, kOIHW);
+  CHECK(trans_kernel_layout.defined())
+    << "Conv only support kernel layouts that are convertible from OIHW."
+    << " But got "<< kernel_layout;
+
+  Layout out_layout(param->out_layout == "" ? param->data_layout : param->out_layout);
+  const auto trans_out_layout = BijectiveLayoutNode::make(out_layout, kNCHW);
+  CHECK(trans_out_layout.defined())
+      << "Conv only support output layouts that are convertible from NCHW."
+      << " But got " << out_layout;
+
+  Array<IndexExpr> dshape_nchw = trans_in_layout.ForwardShape(data->shape);
+
+  IndexExpr channels, dilated_ksize_y, dilated_ksize_x;
+  // infer weight if the kernel_size and channels are defined
+  if (param->kernel_size.defined() && param->channels.defined()) {
+    CHECK_EQ(param->kernel_size.size(), 2);
+    CHECK_EQ(param->dilation.size(), 2);
+    Array<IndexExpr> wshape(
+       {param->channels,
+         dshape_nchw[1] / param->groups,
+         param->kernel_size[0],
+         param->kernel_size[1]});
+    wshape = trans_kernel_layout.BackwardShape(wshape);
+    channels = param->channels;
+    dilated_ksize_y = 1 + (param->kernel_size[0] - 1) * param->dilation[0];
+    dilated_ksize_x = 1 + (param->kernel_size[1] - 1) * param->dilation[1];
+    // assign result to reporter
+    reporter->Assign(types[1], TensorTypeNode::make(wshape, data->dtype));
+  } else {
+    // use weight to infer the conv shape.
+    if (weight == nullptr) return false;
+    auto wshape = trans_kernel_layout.ForwardShape(weight->shape);
+    if (param->kernel_size.defined()) {
+      CHECK_EQ(param->kernel_size.size(), 2);
+      // check the size
+      CHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
+            reporter->AssertEQ(param->kernel_size[1], wshape[3]))
+          << "Conv2D: shape of weight is inconsistent with kernel_size, "
+          << " kernel_size=" << param->kernel_size
+          << " wshape=" << wshape;
+    }
+    if (param->channels.defined()) {
+      CHECK(reporter->AssertEQ(param->channels, wshape[0]))
+          << "Conv2D: shape of weight is inconsistent with channels, "
+          << " channels=" << param->channels
+          << " wshape=" << wshape;
+    }
+    CHECK(reporter->AssertEQ(dshape_nchw[1] / param->groups, wshape[1]));
+    channels = wshape[0];
+    dilated_ksize_y = 1 + (wshape[2] - 1) * param->dilation[0];
+    dilated_ksize_x = 1 + (wshape[3] - 1) * param->dilation[1];
+  }
+  // dilation
+  Array<IndexExpr> oshape({dshape_nchw[0], channels, 0, 0});
+
+  oshape.Set(2, (dshape_nchw[2] + param->padding[0] * 2 - dilated_ksize_y) / param->strides[0] + 1);
+  oshape.Set(3, (dshape_nchw[3] + param->padding[1] * 2 - dilated_ksize_x) / param->strides[1] + 1);
+  if (out_dtype.bits() == 0) {
+    out_dtype = data->dtype;
+  }
+  oshape = trans_out_layout.BackwardShape(oshape);
+  // assign output type
+  reporter->Assign(types[2], TensorTypeNode::make(oshape, out_dtype));
+  return true;
+}
+
+
+// Positional relay function to create quantized conv2d operator
+// used by frontend FFI.
+Expr MakeQuantizeConv2D(Expr quantized_data,
+                        Expr quantized_weight,
+                        int32_t input_zero_point,
+                        int32_t kernel_zero_point,
+                        int32_t output_zero_point,
+                        double input_scale,
+                        double kernel_scale,
+                        double output_scale,
+                        Array<IndexExpr> strides,
+                        Array<IndexExpr> padding,
+                        Array<IndexExpr> dilation,
+                        int groups,
+                        IndexExpr channels,
+                        Array<IndexExpr> kernel_size,
+                        std::string data_layout,
+                        std::string kernel_layout,
+                        std::string out_layout,
+                        DataType out_dtype) {
+  auto attrs = make_node<QuantizedConv2DAttrs>();
+  attrs->strides = std::move(strides);
+  attrs->padding = std::move(padding);
+  attrs->dilation = std::move(dilation);
+  attrs->groups = groups;
+  attrs->channels = std::move(channels);
+  attrs->kernel_size = std::move(kernel_size);
+  attrs->data_layout = std::move(data_layout);
+  attrs->kernel_layout = std::move(kernel_layout);
+  attrs->out_layout = std::move(out_layout);
+  attrs->out_dtype = std::move(out_dtype);
+  attrs->input_zero_point = std::move(input_zero_point);
+  attrs->kernel_zero_point = std::move(kernel_zero_point);
+  attrs->output_zero_point = std::move(output_zero_point);
+  attrs->input_scale = std::move(input_scale);
+  attrs->kernel_scale = std::move(kernel_scale);
+  attrs->output_scale = std::move(output_scale);
+  static const Op& op = Op::Get("nn_quantized.quantized_conv2d");
+  return CallNode::make(op, {quantized_data, quantized_weight}, Attrs(attrs), {});
+}
+
+RELAY_REGISTER_OP("nn_quantized.quantized_conv2d")
+.describe(R"code(2D quantized convolution layer.
+
+This operator creates a quantized convolution kernel that is convolved
+with the quantized input to produce a tensor of quantized outputs. The
+operator is further lowered to existing set of Relay operators.
+
+- **quantized_data**: This depends on the `layout` parameter. Input is 4D array of shape
+            (batch_size, in_channels, height, width) if `layout` is `NCHW`.
+- **quantized_weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
+- **quantized_out**:  This depends on the `layout` parameter. Output is 4D array of shape
+            (batch_size, channels, out_height, out_width) if `layout` is `NCHW`.
+
+)code" TVM_ADD_FILELINE)
+.set_attrs_type_key("relay.attrs.QuantizedConv2DAttrs")
+.set_num_inputs(2)
+.add_argument("quantized_data", "Tensor", "The quantized input quantized_data tensor.")
+.add_argument("quantized_weight", "Tensor", "The quantized quantized_weight tensor.")
+.set_support_level(10)
+.add_type_rel("QuantizeConv2D", QuantizeConv2DRel);
+
+TVM_REGISTER_API("relay.op.nn._quantize._make.quantized_conv2d")
+.set_body_typed(MakeQuantizeConv2D);
+
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/pass/pattern_util.h b/src/relay/pass/pattern_util.h
index 5c303905968ee..c96e07ac786b9 100644
--- a/src/relay/pass/pattern_util.h
+++ b/src/relay/pass/pattern_util.h
@@ -372,6 +372,32 @@ inline Expr Copy(Expr data) {
   return CallNode::make(op, {data}, Attrs(), {});
 }
 
+inline Expr Conv2D(Expr data,
+                   Expr weight,
+                   Array<IndexExpr> strides,
+                   Array<IndexExpr> padding,
+                   Array<IndexExpr> dilation,
+                   int groups,
+                   IndexExpr channels,
+                   Array<IndexExpr> kernel_size,
+                   std::string data_layout,
+                   std::string kernel_layout,
+                   std::string out_layout,
+                   DataType out_dtype) {
+  auto attrs = make_node<Conv2DAttrs>();
+  attrs->strides = std::move(strides);
+  attrs->padding = std::move(padding);
+  attrs->dilation = std::move(dilation);
+  attrs->groups = groups;
+  attrs->channels = std::move(channels);
+  attrs->kernel_size = std::move(kernel_size);
+  attrs->data_layout = std::move(data_layout);
+  attrs->kernel_layout = std::move(kernel_layout);
+  attrs->out_layout = std::move(out_layout);
+  attrs->out_dtype = std::move(out_dtype);
+  static const Op& op = Op::Get("nn.conv2d");
+  return CallNode::make(op, {data, weight}, Attrs(attrs), {});
+}
 
 Expr MakeConcatenate(Expr data, int axis);
 
diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc
new file mode 100644
index 0000000000000..38022dd15f3c7
--- /dev/null
+++ b/src/relay/pass/quantize_rewrite.cc
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file quantize_rewrite.cc
+ * \brief Lower quantized ops to exisiting Relay ops.
+ */
+
+#include <tvm/relay/pass.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/attrs/nn_quantize.h>
+#include "pattern_util.h"
+
+namespace tvm {
+namespace relay {
+
+Expr QuantizedConv2DForwardRewrite(const Call& ref_call,
+                                   const Array<Expr>& new_args,
+                                   const NodeRef& ctx) {
+  // TODO(janimesh) - This is not the right calculation. This only serves as a
+  // prototype to discuss the flow of lowering of quantization ops and
+  // namespaces.
+  CHECK_EQ(new_args.size(), 2);
+  Expr quantized_data = new_args[0];
+  Expr quantized_kernel = new_args[1];
+  const auto* param = ref_call->attrs.as<QuantizedConv2DAttrs>();
+  CHECK_EQ(param->input_zero_point, 0) << "Only symmetric support yet";
+  CHECK_EQ(param->kernel_zero_point, 0) << "Only symmetric support yet";
+  CHECK_EQ(param->output_zero_point, 0) << "Only symmetric support yet";
+  // TODO(janimesh) - The out_dtype should be something else, like "int32".
+  Expr int8_conv = Conv2D(quantized_data,
+                          quantized_kernel,
+                          param->strides,
+                          param->padding,
+                          param->dilation,
+                          param->groups,
+                          param->channels,
+                          param->kernel_size,
+                          param->data_layout,
+                          param->kernel_layout,
+                          param->out_layout,
+                          Int(32));
+  // TODO(janimesh) - The out_dtype should come from outside..
+  int8_conv = Cast(int8_conv, param->out_dtype);
+  // TODO(janimesh) - Look at the literature and use the right scale
+  // calculations.
+  return int8_conv;
+}
+
+RELAY_REGISTER_OP("nn_quantized.quantized_conv2d")
+.set_attr<FForwardRewrite>("FQuantizeForwardRewrite", QuantizedConv2DForwardRewrite);
+
+TVM_REGISTER_API("relay._quantize.quantize_rewrite")
+.set_body_typed<Expr(Expr)>([](const Expr& e) {
+  Expr ret = ForwardRewrite(e, "FQuantizeForwardRewrite", nullptr, nullptr);
+  return ret;
+});
+
+
+}  // namespace relay
+}  // namespace tvm
diff --git a/tests/python/unittest/test_quantized_ops.py b/tests/python/unittest/test_quantized_ops.py
new file mode 100644
index 0000000000000..d0b8cc74ffa10
--- /dev/null
+++ b/tests/python/unittest/test_quantized_ops.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import tvm
+from tvm import relay
+from tvm.relay.testing import create_workload
+
+def test_quantized_conv2d():
+    quantized_data = relay.var("quantized_data", shape=(1, 128, 16, 16), dtype='int8')
+    quantized_weight = relay.var("weight", shape=(64, 128, 3, 3), dtype='int8')
+    quantized_output = relay.op.nn._quantize.quantized_conv2d( \
+            quantized_data, quantized_weight,
+            input_zero_point=0,
+            kernel_zero_point=0,
+            output_zero_point=0,
+            input_scale=0.5,
+            kernel_scale=0.5,
+            output_scale=0.5,
+            channels=64,
+            kernel_size=(3,3),
+            out_dtype="int8")
+    func = relay.Function(relay.ir_pass.free_vars(quantized_output),
+            quantized_output)
+    print("###### Original graph starts ######")
+    print(func)
+    print("###### Original graph ends ######")
+    func = relay.ir_pass.infer_type(func)
+    print("###### TypeInferred graph starts ######")
+    print(func)
+    print("###### TypeInferred graph ends ######")
+    func = relay.quantize.quantize_rewrite(func)
+    func = relay.ir_pass.infer_type(func)
+    print("###### Lowered graph starts ######")
+    print(func)
+    print("###### Lowered graph ends ######")
+
+if __name__ == "__main__":
+    test_quantized_conv2d()