Oneflow-Inc · youxiudeshouyeren · Feb 14, 2023 · Feb 14, 2023 · Feb 15, 2023 · Feb 15, 2023
@@ -285,6 +285,7 @@ Pointwise Ops
     negative 
     pow 
     reciprocal 
+    remainder 
     round 
     rsqrt 
     selu

@@ -311,6 +311,7 @@ Tensor class reference
     Tensor.repeat_interleave
     Tensor.requires_grad
     Tensor.requires_grad_
+    Tensor.remainder
     Tensor.reshape
     Tensor.reshape_as
     Tensor.retain_grad

diff --git a/oneflow/api/python/framework/tensor_functions.cpp b/oneflow/api/python/framework/tensor_functions.cpp
@@ -264,6 +264,7 @@ DIRECT_PASS_FUNC(PyTensorObject_div_, functional::div_)
 DIRECT_PASS_FUNC(PyTensorObject_mul, functional::mul)
 DIRECT_PASS_FUNC(PyTensorObject_mul_, functional::mul_)
 DIRECT_PASS_FUNC(PyTensorObject_fmod, functional::fmod)
+DIRECT_PASS_FUNC(PyTensorObject_remainder, functional::remainder)
 DIRECT_PASS_FUNC(PyTensorObject_logical_and, functional::logical_and)
 DIRECT_PASS_FUNC(PyTensorObject_logical_or, functional::logical_or)
 DIRECT_PASS_FUNC(PyTensorObject_logical_xor, functional::logical_xor)
@@ -965,6 +966,7 @@ PyMethodDef PyTensorObject_extra_methods[] = {
     {"mul", (PyCFunction)PyTensorObject_mul, METH_VARARGS | METH_KEYWORDS, NULL},
     {"mul_", (PyCFunction)PyTensorObject_mul_, METH_VARARGS | METH_KEYWORDS, NULL},
     {"fmod", (PyCFunction)PyTensorObject_fmod, METH_VARARGS | METH_KEYWORDS, NULL},
+    {"remainder", (PyCFunction)PyTensorObject_remainder, METH_VARARGS | METH_KEYWORDS, NULL},
     {"logical_and", (PyCFunction)PyTensorObject_logical_and, METH_VARARGS | METH_KEYWORDS, NULL},
     {"logical_or", (PyCFunction)PyTensorObject_logical_or, METH_VARARGS | METH_KEYWORDS, NULL},
     {"logical_xor", (PyCFunction)PyTensorObject_logical_xor, METH_VARARGS | METH_KEYWORDS, NULL},

diff --git a/oneflow/core/autograd/gradient_funcs/broadcast_binary_ops.cpp b/oneflow/core/autograd/gradient_funcs/broadcast_binary_ops.cpp
@@ -421,5 +421,80 @@ class BroadcastFMod : public BroadcastBinaryGrad {
 
 REGISTER_OP_EXPR_GRAD_FUNCTION("broadcast_fmod", BroadcastFMod);
 
+class BroadcastFloorMod : public BroadcastBinaryGrad {
+ public:
+  Maybe<void> Apply(const BroadcastBinaryCaptureState* ctx, const TensorTuple& out_grads,
+                    TensorTuple* in_grads) const override {
+    const auto& out_shape = *(JUST(VectorAt(out_grads, 0))->shape());
+    in_grads->resize(2);
+    if (ctx->x_requires_grad || ctx->y_requires_grad) {
+      const auto& x = JUST(VectorAt(ctx->SavedTensors(), ctx->x_index));
+      const auto& y = JUST(VectorAt(ctx->SavedTensors(), ctx->y_index));
+      auto broad_x_ = x;
+      auto broad_y_ = y;
+      if (ctx->broadcast_x) {
+        const auto& x_shape = *(x->shape());
+        const Shape& left_extended_x_shape =
+            CreateLeftExtendedShape(ShapeView(x_shape), out_shape.NumAxes());
+        if (left_extended_x_shape == out_shape) {
+          broad_x_ = JUST(functional::ReshapeLike(x, JUST(VectorAt(out_grads, 0))));
+        } else {
+          const AxisVector& broadcast_axis_vec = left_extended_x_shape.Axes4BroadcastTo(out_shape);
+          const std::vector<int32_t> x_axis =
+              std::vector<int32_t>{broadcast_axis_vec.begin(), broadcast_axis_vec.end()};
+          broad_x_ = JUST(functional::BroadcastLike(x, JUST(VectorAt(out_grads, 0)), x_axis));
+        }
+      }
+      if (ctx->broadcast_y) {
+        const auto& y_shape = *(y->shape());
+        const Shape& left_extended_y_shape =
+            CreateLeftExtendedShape(ShapeView(y_shape), out_shape.NumAxes());
+        if (left_extended_y_shape == out_shape) {
+          broad_y_ = JUST(functional::ReshapeLike(y, JUST(VectorAt(out_grads, 0))));
+        } else {
+          const AxisVector& broadcast_axis_vec = left_extended_y_shape.Axes4BroadcastTo(out_shape);
+          const std::vector<int32_t> y_axis =
+              std::vector<int32_t>{broadcast_axis_vec.begin(), broadcast_axis_vec.end()};
+          broad_y_ = JUST(functional::BroadcastLike(y, JUST(VectorAt(out_grads, 0)), y_axis));
+        }
+      }
+      if (ctx->x_requires_grad) {
+        if (ctx->broadcast_x) {
+          JUST(VectorAt(*in_grads, 0)) =
+              JUST(functional::BroadcastReduceSumLike(JUST(VectorAt(out_grads, 0)), x));
+        } else {
+          JUST(VectorAt(*in_grads, 0)) = JUST(VectorAt(out_grads, 0));
+        }
+      }
+      if (ctx->y_requires_grad) {
+        auto result = JUST(functional::FloorDiv(broad_x_, broad_y_));
+        result = JUST(functional::Mul(JUST(VectorAt(out_grads, 0)), result));
+        JUST(functional::ScalarMul(result, Scalar(-1.f), true));
+        if (ctx->broadcast_y) {
+          in_grads->at(1) = JUST(functional::BroadcastReduceSumLike(result, y));
+        } else {
+          in_grads->at(1) = result;
+        }
+      }
+    }
+    return Maybe<void>::Ok();
+  }
+
+ protected:
+  Maybe<void> SaveTensorForBackward(BroadcastBinaryCaptureState* ctx, const TensorTuple& inputs,
+                                    const TensorTuple& outputs) const override {
+    if (ctx->x_requires_grad && ctx->broadcast_x) {
+      ctx->x_index = ctx->SaveTensorForBackward(JUST(VectorAt(inputs, 0)));
+    }
+    if (ctx->y_requires_grad) {
+      ctx->x_index = ctx->SaveTensorForBackward(JUST(VectorAt(inputs, 0)));
+      ctx->y_index = ctx->SaveTensorForBackward(JUST(VectorAt(inputs, 1)));
+    }
+    return Maybe<void>::Ok();
+  }
+};
+
+REGISTER_OP_EXPR_GRAD_FUNCTION("broadcast_floor_mod", BroadcastFloorMod);
+
 }  // namespace one
 }  // namespace oneflow
diff --git a/oneflow/core/autograd/gradient_funcs/scalar_floor_mod.cpp b/oneflow/core/autograd/gradient_funcs/scalar_floor_mod.cpp
@@ -0,0 +1,50 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "oneflow/core/framework/op_expr_grad_function.h"
+#include "oneflow/core/functional/functional.h"
+
+namespace oneflow {
+namespace one {
+
+struct ScalarFloorModGradCaptureState : public AutoGradCaptureState {
+  bool requires_grad = true;
+};
+
+class ScalarFloorModGrad : public OpExprGradFunction<ScalarFloorModGradCaptureState> {
+ public:
+  Maybe<void> Init(const OpExpr& op) override { return Maybe<void>::Ok(); }
+
+  Maybe<void> Capture(ScalarFloorModGradCaptureState* ctx, const TensorTuple& inputs,
+                      const TensorTuple& outputs, const AttrMap& attrs) const override {
+    CHECK_EQ_OR_RETURN(inputs.size(), 1);  // NOLINT(maybe-need-error-msg)
+    ctx->requires_grad = inputs.at(0)->requires_grad();
+    return Maybe<void>::Ok();
+  }
+
+  Maybe<void> Apply(const ScalarFloorModGradCaptureState* ctx, const TensorTuple& out_grads,
+                    TensorTuple* in_grads) const override {
+    CHECK_EQ_OR_RETURN(out_grads.size(), 1);  // NOLINT(maybe-need-error-msg)
+    in_grads->resize(1);
+    if (ctx->requires_grad) { in_grads->at(0) = out_grads.at(0); }
+    return Maybe<void>::Ok();
+  }
+};
+
+REGISTER_OP_EXPR_GRAD_FUNCTION("scalar_floor_mod", ScalarFloorModGrad);
+
+}  // namespace one
+}  // namespace oneflow
@@ -569,6 +569,16 @@
     ]
   bind_python: true
 
+- name: "remainder"
+  signature:
+    [
+      "Tensor (Tensor input, Tensor other) => BroadcastRemainder",
+      "Tensor (Tensor input, Scalar other, *, Bool inplace=False) => ScalarRemainder",
+      "Tensor (Tensor input, Scalar other) => ScalarRemainder",
+      "Tensor (Scalar input, Tensor other) => ScalarTensorRemainder",
+    ]
+  bind_python: true
+
 - name: "log"
   signature: "Tensor (Tensor x) => Log"
   bind_python: True

@@ -423,6 +423,14 @@ class BroadcastFModFunctor : public BinaryFunctor {
   }
 };
 
+class BroadcastRemainderFunctor : public BinaryFunctor {
+ public:
+  BroadcastRemainderFunctor() {
+    op_ =
+        CHECK_JUST(one::OpBuilder("broadcast_floor_mod").Input("x").Input("y").Output("z").Build());
+  }
+};
+
 class BroadcastEqualFunctor : public BinaryFunctor {
  public:
   BroadcastEqualFunctor() {
@@ -654,6 +662,7 @@ ONEFLOW_FUNCTION_LIBRARY(m) {
   m.add_functor<impl::ScalarMulByTensorFunctor>("ScalarMulByTensor");
   m.add_functor<impl::ScalarDivByTensorFunctor>("ScalarDivByTensor");
   m.add_functor<impl::BroadcastFModFunctor>("BroadcastFMod");
+  m.add_functor<impl::BroadcastRemainderFunctor>("BroadcastRemainder");
   m.add_functor<impl::FloorDivFunctor>("FloorDiv");
   m.add_functor<impl::TruncDivFunctor>("TruncDiv");
   m.add_functor<impl::BroadcastIsCloseFunctor>("IsClose");

@@ -345,6 +345,19 @@ class ScalarFModFunctor : public ScalarMathBaseFunctor {
   ScalarFModFunctor() : ScalarMathBaseFunctor(/*op_name=*/"scalar_fmod") {}
 };
 
+class ScalarRemainderFunctor : public ScalarMathBaseFunctor {
+ public:
+  ScalarRemainderFunctor() : ScalarMathBaseFunctor(/*op_name=*/"scalar_floor_mod") {}
+};
+
+class ScalarTensorRemainderFunctor {
+ public:
+  Maybe<Tensor> operator()(const Scalar& scalar, const std::shared_ptr<one::Tensor>& x) const {
+    auto zeros_tensor = JUST(functional::ZerosLike(x));
+    auto tensor_fill_scalar = JUST(functional::ScalarAdd(scalar, zeros_tensor, /*alpha*/ 1));
+    return JUST(functional::BroadcastRemainder(tensor_fill_scalar, x));
+  }
+};
 class ReduceMaxFunctor {
  public:
   ReduceMaxFunctor() {
@@ -4161,6 +4174,8 @@ ONEFLOW_FUNCTION_LIBRARY(m) {
   m.add_functor<MaximumFunctor>("Maximum");
   m.add_functor<MaximumFunctor>("Max");
   m.add_functor<ScalarFModFunctor>("ScalarFMod");
+  m.add_functor<ScalarRemainderFunctor>("ScalarRemainder");
+  m.add_functor<ScalarTensorRemainderFunctor>("ScalarTensorRemainder");
   m.add_functor<ScalarFloorDivFunctor>("ScalarFloorDiv");
   m.add_functor<ScalarTruncDivFunctor>("ScalarTruncDiv");
   m.add_functor<ScalarLogicalEqualFunctor, ScalarLogicalEqual2Functor>("ScalarLogicalEqual");

@@ -8287,8 +8287,8 @@ def OneFlow_ReshapeLikeOp : OneFlow_BaseOp<"reshape_like", [NoSideEffect, Declar
 #endif // GET_ONEFLOW_RESHAPE_OP_DEFINITIONS
 
 // Group: SCALAR
-// clip_by_scalar, clip_by_scalar_grad, clip_by_scalar_max, clip_by_scalar_max_grad, clip_by_scalar_min, clip_by_scalar_min_grad, scalar_add, scalar_add_by_tensor, scalar_div_by_tensor, scalar_floordiv, scalar_truncdiv, scalar_fmod, scalar_logical_and, scalar_logical_equal, scalar_logical_greater, scalar_logical_greater_equal, scalar_logical_inplace_greater, scalar_logical_less, scalar_logical_less_equal, scalar_logical_not_equal, scalar_logical_or, scalar_logical_xor, scalar_mul, scalar_mul_by_tensor, scalar_pow, scalar_pow_grad, scalar_reverse_pow, scalar_reverse_pow_grad, scalar_sub_by_tensor, scalar_bitwise_and, scalar_bitwise_or, scalar_bitwise_xor
-// Total: 32
+// clip_by_scalar, clip_by_scalar_grad, clip_by_scalar_max, clip_by_scalar_max_grad, clip_by_scalar_min, clip_by_scalar_min_grad, scalar_add, scalar_add_by_tensor, scalar_div_by_tensor, scalar_floordiv, scalar_truncdiv, scalar_fmod, scalar_floor_mod, scalar_logical_and, scalar_logical_equal, scalar_logical_greater, scalar_logical_greater_equal, scalar_logical_inplace_greater, scalar_logical_less, scalar_logical_less_equal, scalar_logical_not_equal, scalar_logical_or, scalar_logical_xor, scalar_mul, scalar_mul_by_tensor, scalar_pow, scalar_pow_grad, scalar_reverse_pow, scalar_reverse_pow_grad, scalar_sub_by_tensor, scalar_bitwise_and, scalar_bitwise_or, scalar_bitwise_xor
+// Total: 33
 
 #ifdef GET_ONEFLOW_SCALAR_OP_DEFINITIONS
 
@@ -8506,6 +8506,25 @@ def OneFlow_ScalarFmodOp : OneFlow_BaseOp<"scalar_fmod", [NoSideEffect, DeclareO
   let has_data_type_infer_fn = 1;
 }
 
+def OneFlow_ScalarFloorModOp : OneFlow_BaseOp<"scalar_floor_mod", [NoSideEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
+  let input = (ins
+    OneFlow_Tensor:$in
+  );
+  let output = (outs
+    OneFlow_Tensor:$out
+  );
+  let attrs = (ins
+    DefaultValuedAttr<BoolAttr, "false">:$has_int_operand,
+    DefaultValuedAttr<BoolAttr, "false">:$has_float_operand,
+    DefaultValuedAttr<SI64Attr, "0">:$int_operand,
+    DefaultValuedAttr<F64Attr, "0.">:$float_operand
+  );
+  let has_logical_tensor_desc_infer_fn = 1;
+  let has_physical_tensor_desc_infer_fn = 1;
+  let has_get_sbp_fn = 1;
+  let has_data_type_infer_fn = 1;
+}
+
 def OneFlow_ScalarLogicalAndOp : OneFlow_BaseOp<"scalar_logical_and", [NoSideEffect, NoGrad, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
   let input = (ins
     OneFlow_Tensor:$in

diff --git a/oneflow/user/kernels/scalar_math_kernels.cpp b/oneflow/user/kernels/scalar_math_kernels.cpp
@@ -141,13 +141,14 @@ class ScalarReverseMathKernel final : public user_op::OpKernel {
   bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; }
 };
 
-#define SCALAR_MATH_SEQ                                                       \
-  OF_PP_MAKE_TUPLE_SEQ("scalar_add", ep::primitive::BinaryOp::kAdd)           \
-  OF_PP_MAKE_TUPLE_SEQ("scalar_mul", ep::primitive::BinaryOp::kMul)           \
-  OF_PP_MAKE_TUPLE_SEQ("scalar_div", ep::primitive::BinaryOp::kDiv)           \
-  OF_PP_MAKE_TUPLE_SEQ("scalar_floordiv", ep::primitive::BinaryOp::kFloorDiv) \
-  OF_PP_MAKE_TUPLE_SEQ("scalar_truncdiv", ep::primitive::BinaryOp::kTruncDiv) \
-  OF_PP_MAKE_TUPLE_SEQ("scalar_fmod", ep::primitive::BinaryOp::kFmod)         \
+#define SCALAR_MATH_SEQ                                                        \
+  OF_PP_MAKE_TUPLE_SEQ("scalar_add", ep::primitive::BinaryOp::kAdd)            \
+  OF_PP_MAKE_TUPLE_SEQ("scalar_mul", ep::primitive::BinaryOp::kMul)            \
+  OF_PP_MAKE_TUPLE_SEQ("scalar_div", ep::primitive::BinaryOp::kDiv)            \
+  OF_PP_MAKE_TUPLE_SEQ("scalar_floordiv", ep::primitive::BinaryOp::kFloorDiv)  \
+  OF_PP_MAKE_TUPLE_SEQ("scalar_truncdiv", ep::primitive::BinaryOp::kTruncDiv)  \
+  OF_PP_MAKE_TUPLE_SEQ("scalar_fmod", ep::primitive::BinaryOp::kFmod)          \
+  OF_PP_MAKE_TUPLE_SEQ("scalar_floor_mod", ep::primitive::BinaryOp::kFloorMod) \
   OF_PP_MAKE_TUPLE_SEQ("scalar_pow", ep::primitive::BinaryOp::kPow)
 
 #define REGISTER_UNARY_MATH_SCALAR_ELEMWISE_USER_KERNEL(op_name, binary_op)          \

diff --git a/oneflow/user/ops/scalar_math_op.cpp b/oneflow/user/ops/scalar_math_op.cpp
@@ -58,6 +58,7 @@ IMPLEMENT_SCALAR_MATH_OP_FUNCS(ScalarAdd, GetSbp4ScalarMath)
 IMPLEMENT_SCALAR_MATH_OP_FUNCS(ScalarFloordiv, GetSbp4ScalarMath)
 IMPLEMENT_SCALAR_MATH_OP_FUNCS(ScalarTruncdiv, GetSbp4ScalarMath)
 IMPLEMENT_SCALAR_MATH_OP_FUNCS(ScalarFmod, GetSbp4ScalarMath)
+IMPLEMENT_SCALAR_MATH_OP_FUNCS(ScalarFloorMod, GetSbp4ScalarMath)
 IMPLEMENT_SCALAR_MATH_OP_FUNCS(ScalarMul, GetSbp4ScalarMul)
 IMPLEMENT_SCALAR_MATH_OP_FUNCS(ScalarDiv, GetSbp4ScalarMul)
 IMPLEMENT_SCALAR_MATH_OP_FUNCS(ScalarPow, GetSbp4ScalarMath)

diff --git a/python/oneflow/__init__.py b/python/oneflow/__init__.py
@@ -165,6 +165,7 @@ def is_deprecated(func_or_class):
 from oneflow._C import reduce_all as all
 from oneflow._C import reduce_any as any
 from oneflow._C import reduce_nansum as nansum
+from oneflow._C import remainder
 from oneflow._C import logsumexp
 from oneflow._C import rsqrt
 from oneflow._C import sqrt

diff --git a/python/oneflow/framework/docstr/math_ops.py b/python/oneflow/framework/docstr/math_ops.py
@@ -906,13 +906,19 @@
     r"""
     fmod(input, other, *, out=None) -> Tensor
 
-    Computes the element-wise remainder of division.
+    Applies C++'s `std::fmod <https://en.cppreference.com/w/cpp/numeric/math/fmod>`_ entrywise.
+    The result has the same sign as the dividend :attr:`input` and its absolute value
+    is less than that of :attr:`other`.
 
-    The dividend and divisor may contain both for integer and floating point
-    numbers. The remainder has the same sign as the dividend :attr:`input`.
+    This function may be defined in terms of :func:`oneflow.div` as
 
-    Supports broadcasting to a common shape, integer and float inputs.
+    .. code:: python
 
+        oneflow.fmod(a, b) == a - a.div(b, rounding_mode="trunc") * b
+
+    Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+    :ref:`type promotion <type-promotion-doc>`, and integer and float inputs.
+
 
     Args:
         input (Tensor): the dividend
@@ -934,6 +940,47 @@
     """,
 )
 
+add_docstr(
+    oneflow.remainder,
+    r"""
+    remainder(input, other, *, out=None) -> Tensor
+
+    `Python's modulus operation <https://docs.python.org/3/reference/expressions.html#binary-arithmetic-operations>`_
+    entrywise.  The result has the same sign as the divisor :attr:`other` and its absolute value
+    is less than that of :attr:`other`.
+
+    This function may be defined in terms of :func:`oneflow.div` as
+
+    .. code:: python
+
+        oneflow.remainder(a, b) == a - a.div(b, rounding_mode="floor") * b
+
+    Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+    :ref:`type promotion <type-promotion-doc>`, and integer and float inputs.
+
+
+    Args:
+        input (Tensor or Scalar): the dividend
+        other (Tensor or Scalar): the divisor
+
+    Keyword args:
+        out (Tensor, optional): the output tensor.
+
+    Example::
+
+        >>> import oneflow as flow
+        >>> flow.remainder(flow.tensor([-3., -2, -1, 1, 2, 3], dtype=flow.float32), 2.)
+        tensor([1., -0., 1., 1., 0., 1.], dtype=oneflow.float32)
+        >>> flow.remainder(flow.tensor([1, 2, 3, 4, 5.], dtype=flow.float32), 1.5)
+        tensor([1.0000, 0.5000, 0.0000, 1.0000, 0.5000], dtype=oneflow.float32)
+        >>> flow.remainder(flow.tensor([1, 2, 3, 4., -5]), flow.tensor([4, 2, 1, 3., 1]))
+        tensor([1., 0., 0., 1., -0.], dtype=oneflow.float32)
+        >>> flow.remainder(1.5, flow.tensor([1, 2, 3, 4, 5.], dtype=flow.float32))
+        tensor([0.5000, 1.5000, 1.5000, 1.5000, 1.5000], dtype=oneflow.float32)
+
+    """,
+)
+
 add_docstr(
     oneflow.log,
     r"""

diff --git a/python/oneflow/framework/docstr/tensor.py b/python/oneflow/framework/docstr/tensor.py
@@ -1771,6 +1771,16 @@
     """,
 )
 
+add_docstr(
+    oneflow.Tensor.remainder,
+    """
+    Tensor.remainder(other) -> Tensor
+
+    See :func:`oneflow.remainder`
+
+    """,
+)
+
 add_docstr(
     oneflow.Tensor.logical_and,
     """