Oneflow-Inc · oneflow-ci-bot · Sep 8, 2021 · Sep 8, 2021 · Sep 8, 2021 · Sep 8, 2021
diff --git a/oneflow/core/kernel/kernel_util.cpp b/oneflow/core/kernel/kernel_util.cpp
@@ -248,14 +248,6 @@ void IncreaseIndex(const int64_t* shape, DimVector& index) {
   }
 }
 
-template<typename T, T (*reduce_core_func)(const T, const T)>
-void MatrixRowReduce(const int64_t row_num, const int64_t col_num, const T* x, T* y) {
-  FOR_RANGE(int64_t, i, 0, row_num) {
-    y[i] = x[i * col_num];
-    FOR_RANGE(int64_t, j, 1, col_num) { y[i] = reduce_core_func(y[i], x[i * col_num + j]); }
-  }
-}
-
 }  // namespace
 
 void AutoMemcpy(DeviceCtx* ctx, void* dst, const void* src, size_t sz,
@@ -308,21 +300,6 @@ KU_IF_METHOD Axpy(DeviceCtx* ctx, const int n, const T* alpha, const T* x, const
                   const int incy) {
   Derived::Axpy(ctx, n, *alpha, x, incx, y, incy);
 }
-KU_IF_METHOD Max(DeviceCtx* ctx, const int64_t n, const T* x, T* max_ptr) {
-  *max_ptr = *std::max_element(x, x + n);
-}
-KU_IF_METHOD Max(DeviceCtx* ctx, const int64_t n, const T* x, T* max_ptr, T* temp_storage,
-                 size_t temp_storage_bytes) {
-  Max(ctx, n, x, max_ptr);
-}
-KU_IF_METHOD Sum(DeviceCtx* ctx, const int64_t n, const T* x, T* sum_ptr) {
-  *sum_ptr = 0;
-  for (int64_t i = 0; i < n; ++i) { *sum_ptr += x[i]; }
-}
-KU_IF_METHOD Sum(DeviceCtx* ctx, const int64_t n, const T* x, T* sum_ptr, T* temp_storage,
-                 size_t temp_storage_bytes) {
-  Sum(ctx, n, x, sum_ptr);
-}
 KU_IF_METHOD CopyColsRegion(DeviceCtx* ctx, const int64_t row_num, const int64_t col_num,
                             const T* x, const int64_t x_col_offset, const int64_t x_lda, T* y,
                             const int64_t y_col_offset, const int64_t y_lda) {
@@ -332,14 +309,6 @@ KU_IF_METHOD CopyColsRegion(DeviceCtx* ctx, const int64_t row_num, const int64_t
     }
   }
 }
-KU_IF_METHOD RowMax(DeviceCtx* ctx, const int64_t row_num, const int64_t col_num, const T* x,
-                    T* y) {
-  MatrixRowReduce<T, ReduceCoreMax>(row_num, col_num, x, y);
-}
-KU_IF_METHOD RowSum(DeviceCtx* ctx, const int64_t row_num, const int64_t col_num, const T* x,
-                    T* y) {
-  MatrixRowReduce<T, ReduceCoreAdd>(row_num, col_num, x, y);
-}
 KU_IF_METHOD Transpose(DeviceCtx* ctx, const int32_t num_axis, const ShapeView& x_shape,
                        const ShapeView& y_shape, const PbRf<int32_t>& permutation,
                        const int64_t elem_cnt, const T* x, T* y) {
@@ -370,9 +339,6 @@ KU_IF_METHOD Transpose(DeviceCtx* ctx, const int32_t num_axis, const ShapeView&
   }
 }
 KU_IF_METHOD Set(DeviceCtx* ctx, const T value, T* addr) { *addr = value; }
-KU_IF_METHOD Replicate(DeviceCtx* ctx, const int64_t n, T* y, const T* x) {
-  for (int64_t i = 0; i < n; ++i) { y[i] = *x; }
-}
 
 #define KU_FLOATING_METHOD \
   template<typename T>     \
@@ -382,98 +348,17 @@ KU_FLOATING_METHOD Dot(DeviceCtx* ctx, const int n, const T* x, const int incx,
                        const int incy, T* result) {
   *result = cblas_dot<T>(n, x, incx, y, incy);
 }
-KU_FLOATING_METHOD Copy(DeviceCtx* ctx, const int n, const T* x, const int incx, T* y,
-                        const int incy) {
-  cblas_copy<T>(n, x, incx, y, incy);
-}
 KU_FLOATING_METHOD Axpy(DeviceCtx* ctx, const int n, const T alpha, const T* x, const int incx,
                         T* y, const int incy) {
   cblas_axpy<T>(n, alpha, x, incx, y, incy);
 }
-KU_FLOATING_METHOD Scal(DeviceCtx* ctx, const int n, const T alpha, T* x, const int incx) {
-  cblas_scal<T>(n, alpha, x, incx);
-}
-KU_FLOATING_METHOD Scal(DeviceCtx* ctx, const int n, const T* alpha, T* x, const int incx) {
-  Scal(ctx, n, *alpha, x, incx);
-}
-KU_FLOATING_METHOD Gemv(DeviceCtx* ctx, const enum CBLAS_TRANSPOSE trans, int m, int n,
-                        const T alpha, const T* a, int lda, const T* x, const int incx,
-                        const T beta, T* y, const int incy) {
-  // Set col major to keep it as the same with cublas
-  cblas_gemv<T>(CBLAS_ORDER::CblasColMajor, trans, n, m, alpha, a, lda, x, incx, beta, y, incy);
-}
-KU_FLOATING_METHOD Gemm(DeviceCtx* ctx, const enum CBLAS_ORDER order,
-                        const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
-                        const int m, const int n, const int k, const T alpha, const T* a,
-                        const int lda, const T* b, const int ldb, const T beta, T* c,
-                        const int ldc) {
-  cblas_gemm<T>(order, trans_a, trans_b, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
-}
-KU_FLOATING_METHOD BatchedGemm(DeviceCtx* ctx, const enum CBLAS_ORDER order,
-                               const enum CBLAS_TRANSPOSE trans_a,
-                               const enum CBLAS_TRANSPOSE trans_b, int batch_size, int m, int n,
-                               int k, const T alpha, const T* a, const T* b, const T beta, T* c,
-                               T** buf) {
-  const int a_stride = m * k;
-  const int b_stride = k * n;
-  const int c_stride = m * n;
-  FOR_RANGE(int32_t, i, 0, batch_size) {
-    KernelUtil<DeviceType::kCPU, T>::OFGemm(ctx, trans_a, trans_b, m, n, k, alpha, a + i * a_stride,
-                                            b + i * b_stride, beta, c + i * c_stride);
-  }
-}
-
-KU_FLOATING_METHOD Exp(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
-  for (int64_t i = 0; i < n; ++i) { y[i] = std::exp(x[i]); }
-}
-KU_FLOATING_METHOD Div(DeviceCtx* ctx, const int64_t n, T* x, const T* alpha) {
-  for (int64_t i = 0; i < n; ++i) { x[i] = x[i] / (*alpha); }
-}
-KU_FLOATING_METHOD Div(DeviceCtx* ctx, const int64_t n, T* x, const T alpha) {
-  for (int64_t i = 0; i < n; ++i) { x[i] = x[i] / alpha; }
-}
 KU_FLOATING_METHOD Mul(DeviceCtx* ctx, const int64_t n, const T* x, const T* y, T* z) {
   for (int64_t i = 0; i < n; ++i) { z[i] = x[i] * y[i]; }
 }
-KU_FLOATING_METHOD Div(DeviceCtx* ctx, const int64_t n, const T* x, const T* y, T* z) {
-  for (int64_t i = 0; i < n; ++i) { z[i] = x[i] / y[i]; }
-}
-KU_FLOATING_METHOD Square(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
-  for (int64_t i = 0; i < n; ++i) { y[i] = x[i] * x[i]; }
-}
 KU_FLOATING_METHOD Sqrt(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
   for (int64_t i = 0; i < n; ++i) { y[i] = std::sqrt(x[i]); }
 }
-KU_FLOATING_METHOD Reciprocal(DeviceCtx* ctx, const int n, const T* x, T* y) {
-  for (int64_t i = 0; i < n; ++i) { y[i] = static_cast<T>(1.0) / x[i]; }
-}
-KU_FLOATING_METHOD Rsqrt(DeviceCtx* ctx, const int64_t n, T* x, const float epsilon) {
-  for (int64_t i = 0; i < n; ++i) { x[i] = 1.0 / std::sqrt(x[i] + epsilon); }
-}
-KU_FLOATING_METHOD Rsqrt(DeviceCtx* ctx, const int64_t n, const T* x, T* y, const float epsilon) {
-  for (int64_t i = 0; i < n; ++i) { y[i] = 1.0 / std::sqrt(x[i] + epsilon); }
-}
-KU_FLOATING_METHOD Powx(DeviceCtx* ctx, const int64_t n, const T* x, const float power, T* y) {
-  for (int64_t i = 0; i < n; ++i) { y[i] = std::pow(x[i], power); }
-}
 
-KU_FLOATING_METHOD Sigmoid(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
-  T half = static_cast<T>(0.5);
-  for (int64_t i = 0; i != n; ++i) { y[i] = half * std::tanh(half * x[i]) + half; }
-}
-KU_FLOATING_METHOD SigmoidBackward(DeviceCtx* ctx, const int64_t n, const T* x, const T* y,
-                                   const T* dy, T* dx) {
-  for (int64_t i = 0; i != n; ++i) { dx[i] = y[i] * (1 - y[i]) * dy[i]; }
-}
-KU_FLOATING_METHOD Relu(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
-  T zero = GetZeroVal<T>();
-  for (int64_t i = 0; i != n; ++i) { y[i] = std::max(x[i], zero); }
-}
-KU_FLOATING_METHOD ReluBackward(DeviceCtx* ctx, const int64_t n, const T* x, const T* y,
-                                const T* dy, T* dx) {
-  T zero = GetZeroVal<T>();
-  for (int64_t i = 0; i != n; ++i) { dx[i] = (y[i] > zero) * dy[i]; }
-}
 KU_FLOATING_METHOD Addition(DeviceCtx* ctx, const int64_t n, T* out, const T* in_0) {
   for (int64_t i = 0; i != n; ++i) { out[i] = in_0[i]; }
 }