init

MarisaKirisame · MarisaKirisame · commit a962c968630b · 2019-09-09T13:01:00.000-07:00
drop changes

drop changes
diff --git a/python/tvm/relay/op/_tensor_grad.py b/python/tvm/relay/op/_tensor_grad.py
@@ -269,18 +269,6 @@ def conv2d_grad(orig, grad):
     return [backward_data, backward_weight]
 
 
-@register_gradient("max")
-def max_grad(orig, grad):
-    """Returns the gradient of max"""
-    # Only support axis=0, since broadcasting orig to x behaves incorrectly
-    x, axis = orig.args[0], orig.attrs.axis
-    assert(axis is not None and len(axis) == 1 and int(axis[0]) == 0)
-    orig = broadcast_to_like(orig, x)
-    grad = broadcast_to_like(grad, x)
-    indicators = cast_like(equal(orig, x), grad)
-    return [indicators * grad]
-
-
 @register_gradient("nn.softmax")
 def softmax_grad(orig, grad):
     """Gradient of softmax"""
@@ -302,6 +290,24 @@ def dense_grad(orig, grad):
     return [collapse_sum_like(transpose(grad) * weight, data),
             collapse_sum_like(data * transpose(grad), weight)]
 
+# UNTESTED
+@register_gradient("reshape")
+def reshape_grad(orig, grad):
+    return [reshape_like(grad, orig.args[0])]
+
+
+# UNTESTED
+@register_gradient("take")
+def take_grad(orig, grad):
+    x, y = orig.args
+    return [zeros_like(x), zeros_like(y)]
+    return [Call(op_get("take_grad"), [x, y, grad], orig.attrs), zeros_like(y)]
+
+
+@register_gradient("shape_of")
+def shape_of_grad(orig, grad):
+    return [zeros_like(orig.args[0])]
+
 
 @register_gradient("reshape")
 def reshape_grad(orig, grad):
@@ -347,3 +353,34 @@ def sum_grad(orig, grad):
     """Returns grad broadcasted to data dims"""
     data = orig.args[0]
     return [broadcast_to_like(grad, data)]
+
+
+@register_gradient("nn.global_avg_pool2d")
+def global_avg_pool2d_grad(orig, grad):
+    """repeat the h w dimension"""
+    # focuse on conv2d rn.
+    return [orig.args[0]]
+
+
+@register_gradient("nn.batch_norm")
+def batch_norm_grad(orig, grad):
+    """multiply some stuff"""
+    # batchnorm has a wrong api so we will not waste time implementing it.
+    a, b, c, d, e = orig.args
+    return [a, b, c, d, e]
+
+
+@register_gradient("split")
+def split_grad(orig, grad):
+    # return zero
+    return [orig.args[0]]
+
+
+@register_gradient("nn.cross_entropy")
+def cross_entropy_grad(orig, grad):
+    x, y = orig.args
+    sm = softmax(x)
+    shape = shape_of(x)
+    batch_size = take(shape, const(0, dtype='int32'), axis=0)
+    grad = grad / batch_size.astype('float32')
+    return [reduce_sum(y, axis=1) * grad * (sm - y), -grad * log(sm)]
diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py
@@ -717,3 +717,13 @@ def schedule_bitserial_dense(attrs, outputs, target):
 
 
 reg.register_pattern("nn.bitserial_dense", reg.OpPattern.OUT_ELEMWISE_FUSABLE)
+
+reg.register_schedule("nn.cross_entropy", schedule_injective)
+
+reg.register_pattern("nn.cross_entropy",
+                     OpPattern.OPAQUE)
+
+@reg.register_compute("nn.cross_entropy")
+def compute_cross_entropy(attrs, inputs, out_dtype, target):
+    x, y = inputs
+    return [-topi.sum(topi.nn.log_softmax(x) * y / x.shape[0])]
diff --git a/python/tvm/relay/op/nn/nn.py b/python/tvm/relay/op/nn/nn.py
@@ -1621,3 +1621,7 @@ def bitserial_dense(data,
     """
     return _make.bitserial_dense(data, weight, units, data_bits, weight_bits,
                                  pack_dtype, out_dtype, unipolar)
+
+
+def cross_entropy(predictions, targets):
+    return _make.cross_entropy(predictions, targets)
diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py
@@ -42,12 +42,15 @@ def cast(data, dtype):
 
 def cast_like(data, dtype_like):
     """Cast input tensor to data type of another tensor.
+
     Parameters
     ----------
     data : relay.Expr
         The input data to the operator.
+
     dtype_like: relay.Expr
         The tensor to cast to.
+
     Returns
     -------
     result : relay.Expr
@@ -228,8 +231,8 @@ def reshape_like(data, shape_like):
     data : relay.Expr
         The input data to the operator.
 
-    shape_like : tuple of int
-        The new shape. Should be compatible with the original shape.
+    shape_like : relay.Expr
+        The tensor to reshape to. Should be compatible with the original shape.
 
     Returns
     -------
@@ -239,6 +242,37 @@ def reshape_like(data, shape_like):
     return _make.reshape_like(data, shape_like)
 
 
+def embed_like(data, indices, type_like, axis=None, mode="clip"):
+    """Take elements from an array along an axis.
+
+    Parameters
+    ----------
+    data : relay.Expr
+        The source array.
+
+    indices : rely.Expr
+        The indices of the values to extract.
+
+    type_like : relay.Expr
+        The tensor that provide the type to embed into.
+
+    axis : int, optional
+        The axis over which to select values. By default,
+        the flattened input array is used.
+
+    mode : str, optional
+        Specifies how out-of-bound indices will behave [clip, wrap, fast].
+        clip: clip to the range (default).
+        wrap: wrap around the indices.
+        fast: no clip or wrap around (user must make sure indices are in-bound).
+
+    Returns
+    -------
+    ret : relay.Expr
+        The computed result.
+    """
+    return _make.embed_like(data, indices, type_like, axis, mode)
+
 def take(data, indices, axis=None, mode="clip"):
     """Take elements from an array along an axis.
 
diff --git a/src/relay/op/nn/nn.cc b/src/relay/op/nn/nn.cc
@@ -103,6 +103,50 @@ RELAY_REGISTER_OP("nn.bias_add")
 // relay.nn.dense
 TVM_REGISTER_NODE_TYPE(DenseAttrs);
 
+
+bool DenseRel(const Array<Type>& types,
+              int num_inputs,
+              const Attrs& attrs,
+              const TypeReporter& reporter) {
+  CHECK_EQ(types.size(), 3);
+  const auto* data = types[0].as<TensorTypeNode>();
+  const auto* weight = types[1].as<TensorTypeNode>();
+  if (data == nullptr) return false;
+
+  const DenseAttrs* param = attrs.as<DenseAttrs>();
+  CHECK(param != nullptr);
+
+  CHECK(static_cast<int>(data->shape.size()) != 0);
+
+  Array<tvm::Expr> oshape = data->shape;
+  if (param->units.defined()) {
+    Array<tvm::Expr> dshape = data->shape;
+    // validate the weight shape is proper if defined
+    // Assign weight type
+    Array<IndexExpr> wshape({param->units, dshape[dshape.size() - 1]});
+    reporter->Assign(types[1], TensorTypeNode::make(wshape, data->dtype));
+    oshape.Set((oshape.size() - 1), param->units);
+  } else {
+    if (weight == nullptr) return false;
+    Array<tvm::Expr> wshape = weight->shape;
+    CHECK(static_cast<int>(weight->shape.size()) == 2);
+    CHECK(reporter->AssertEQ(data->shape[data->shape.size() - 1], weight->shape[1]))
+      << "DenseRel: input dimension doesn't match,"
+      << " data shape=" << data->shape
+      << ", weight shape=" << weight->shape;
+    oshape.Set((oshape.size() - 1), wshape[0]);
+  }
+
+  DataType out_dtype = param->out_dtype;
+  if (out_dtype.bits() == 0) {
+    out_dtype = data->dtype;
+  }
+  // assign output type
+  reporter->Assign(types[2], TensorTypeNode::make(oshape, out_dtype));
+  return true;
+}
+
+
 // Positional relay function to create dense operator used by frontend FFI.
 Expr MakeDense(Expr data,
                Expr weight,
@@ -698,11 +742,11 @@ bool BatchMatmulRel(const Array<Type>& types,
   if (x == nullptr || y == nullptr) return false;
   CHECK(x->shape.size() == 3 && y->shape.size() == 3);
   CHECK(reporter->AssertEQ(x->shape[0], y->shape[0]))
-      << "BatchDot: batch dimension doesn't match, "
-      << " x shape=" << x->shape
-      << ", y shape=" << y->shape;
+    << "BatchDot: batch dimension doesn't match,"
+    << " x shape=" << x->shape
+    << ", y shape=" << y->shape;
   CHECK(reporter->AssertEQ(x->shape[2], y->shape[2]))
-      << "BatchDot: shapes of x and y is inconsistent, "
+      << "BatchDot: shapes of x and y is inconsistent,"
       << " x shape=" << x->shape
       << ", y shape=" << y->shape;
 
@@ -746,6 +790,51 @@ are data in batch.
 .set_support_level(10)
 .add_type_rel("BatchMatmul", BatchMatmulRel);
 
+// relay.nn.cross_entropy
+bool CrossEntropyRel(const Array<Type>& types,
+                    int num_inputs,
+                    const Attrs& attrs,
+                    const TypeReporter& reporter) {
+  CHECK_EQ(types.size(), 3);
+  const auto* x = types[0].as<TensorTypeNode>();
+  const auto* y = types[1].as<TensorTypeNode>();
+  if (x == nullptr || y == nullptr) return false;
+  CHECK(x->shape.size() == 2 && y->shape.size() == 2)
+    << "CrossEntropy: shapes of x and y is inconsistent,"
+    << " x shape=" << x->shape
+    << ", y shape=" << y->shape;
+  CHECK(reporter->AssertEQ(x->shape[0], y->shape[0]))
+    << "CrossEntropy: shapes of x and y is inconsistent,"
+    << " x shape=" << x->shape
+    << ", y shape=" << y->shape;
+  CHECK(reporter->AssertEQ(x->shape[1], y->shape[1]))
+    << "CrossEntropy: shapes of x and y is inconsistent,"
+    << " x shape=" << x->shape
+    << ", y shape=" << y->shape;
+  // assign output type
+  reporter->Assign(types[2], TensorTypeNode::make({}, x->dtype));
+  return true;
+}
+
+// Positional relay function to create batch_matmul operator used by frontend FFI.
+Expr MakeCrossEntropy(Expr predictions, Expr targets) {
+  static const Op& op = Op::Get("nn.cross_entropy");
+  return CallNode::make(op, {predictions, targets}, Attrs(), {});
+}
+
+
+TVM_REGISTER_API("relay.op.nn._make.cross_entropy")
+.set_body_typed(MakeCrossEntropy);
+
+
+RELAY_REGISTER_OP("nn.cross_entropy")
+.describe(R"code(Computes cross entropy given preditions and targets.)code" TVM_ADD_FILELINE)
+.set_num_inputs(2)
+.add_argument("x", "1D Tensor", "Predictions.")
+.add_argument("y", "1D Tensor", "Targets.")
+.set_support_level(10)
+.add_type_rel("CrossEntropy", CrossEntropyRel);
+
 
 }  // namespace relay
 }  // namespace tvm
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
@@ -186,6 +186,7 @@ RELAY_REGISTER_OP("reinterpret")
     .set_attr<TOpPattern>("TOpPattern", kElemWise)
     .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout);
 
+
 // relay.expand_dims
 TVM_REGISTER_NODE_TYPE(ExpandDimsAttrs);
 
@@ -914,6 +915,43 @@ Examples::
 .set_attr<FTVMCompute>("FTVMCompute", TakeCompute)
 .set_attr<TOpPattern>("TOpPattern", kInjective);
 
+bool EmbedLikeRel(const Array<Type>& types,
+                  int num_inputs,
+                  const Attrs& attrs,
+                  const TypeReporter& reporter) {
+  // `types` contains: [data, indices, type_like, result]
+  CHECK_EQ(types.size(), 4);
+  reporter->Assign(types[3], types[2]);
+  return TakeRel({types[2], types[1], types[0]}, 2, attrs, reporter);
+}
+
+Expr MakeEmbedLike(Expr data,
+                   Expr indices,
+                   Expr type_like,
+                   Integer axis,
+                   std::string mode) {
+  auto attrs = make_node<TakeAttrs>();
+  attrs->axis = std::move(axis);
+  attrs->mode = std::move(mode);
+  static const Op& op = Op::Get("embed_like");
+  return CallNode::make(op, {data, indices, type_like}, Attrs(attrs), {});
+}
+
+TVM_REGISTER_API("relay.op._make.embed_like")
+.set_body_typed(MakeEmbedLike);
+
+RELAY_REGISTER_OP("embed_like")
+.describe(R"code(The inverse of take.)code" TVM_ADD_FILELINE)
+.set_attrs_type_key("relay.attrs.TakeAttrs")
+.set_num_inputs(3)
+.add_argument("data", "Tensor", "The input tensor.")
+.add_argument("indices", "Tensor", "The indices tensor.")
+.add_argument("type_like", "Tensor", "The tensor that provide the type and shape to embed into.")
+.set_support_level(3)
+.add_type_rel("EmbedLike", EmbedLikeRel)
+.set_attr<FTVMCompute>("FTVMCompute", TakeCompute) // implement this at python side?
+.set_attr<TOpPattern>("TOpPattern", kInjective);
+
 
 // Init ops
 TVM_REGISTER_NODE_TYPE(InitOpAttrs);
diff --git a/tests/python/relay/test_pass_gradient.py b/tests/python/relay/test_pass_gradient.py
@@ -23,9 +23,14 @@
 from tvm.relay.transform import gradient
 from tvm.relay.prelude import Prelude
 from tvm.relay.testing import add_nat_definitions, make_nat_expr, run_infer_type, check_grad, rand
+from tvm.relay.testing import resnet, inception_v3, squeezenet, densenet, lstm
 import tvm.relay.op as op
 
 
+def rand(dtype='float32', *shape):
+    return tvm.nd.array(np.random.rand(*shape).astype(dtype))
+
+
 def test_id():
     shape = (10, 10)
     dtype = 'float32'
@@ -198,10 +203,9 @@ def test_pow():
     double = relay.Function([x], x + x)
     i = relay.var("i", t)
     func = relay.Function([i], p.nat_iterate(double, make_nat_expr(p, 3))(i))
-    mod["main"] = func
-    mod["main"] = gradient(mod["main"], mod=mod)
-    m = transform.InferType()(mod)
-    back_func = m["main"]
+    mod["func"] = func
+    mod["back_func"] = gradient(mod["func"], mod=mod)
+    back_func = mod["back_func"]
     assert back_func.checked_type == relay.FuncType([t], relay.TupleType([t, relay.TupleType([t])]))
     i_nd = rand(dtype, *shape)
     ex = create_executor(mod=mod)
@@ -295,6 +299,19 @@ def test_concat():
     # no value validation as concatenate has dummy gradient right now.
 
 
+def rand_from_type(t):
+    assert isinstance(t, relay.ty.TensorType)
+    return rand(t.dtype, *[int(s) for s in t.shape])
+
+
+def test_resnet():
+    x, _ = densenet.get_workload()
+    x = gradient(x["main"])
+    args = [rand_from_type(e.checked_type) for e in x.params]
+    ex = create_executor()
+    ex.evaluate(x)(*args)
+
+
 if __name__ == "__main__":
     test_id()
     test_add()