Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup KernelUtil #6212

Merged
merged 3 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 0 additions & 115 deletions oneflow/core/kernel/kernel_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,14 +248,6 @@ void IncreaseIndex(const int64_t* shape, DimVector& index) {
}
}

template<typename T, T (*reduce_core_func)(const T, const T)>
void MatrixRowReduce(const int64_t row_num, const int64_t col_num, const T* x, T* y) {
FOR_RANGE(int64_t, i, 0, row_num) {
y[i] = x[i * col_num];
FOR_RANGE(int64_t, j, 1, col_num) { y[i] = reduce_core_func(y[i], x[i * col_num + j]); }
}
}

} // namespace

void AutoMemcpy(DeviceCtx* ctx, void* dst, const void* src, size_t sz,
Expand Down Expand Up @@ -308,21 +300,6 @@ KU_IF_METHOD Axpy(DeviceCtx* ctx, const int n, const T* alpha, const T* x, const
const int incy) {
Derived::Axpy(ctx, n, *alpha, x, incx, y, incy);
}
KU_IF_METHOD Max(DeviceCtx* ctx, const int64_t n, const T* x, T* max_ptr) {
*max_ptr = *std::max_element(x, x + n);
}
KU_IF_METHOD Max(DeviceCtx* ctx, const int64_t n, const T* x, T* max_ptr, T* temp_storage,
size_t temp_storage_bytes) {
Max(ctx, n, x, max_ptr);
}
KU_IF_METHOD Sum(DeviceCtx* ctx, const int64_t n, const T* x, T* sum_ptr) {
*sum_ptr = 0;
for (int64_t i = 0; i < n; ++i) { *sum_ptr += x[i]; }
}
KU_IF_METHOD Sum(DeviceCtx* ctx, const int64_t n, const T* x, T* sum_ptr, T* temp_storage,
size_t temp_storage_bytes) {
Sum(ctx, n, x, sum_ptr);
}
KU_IF_METHOD CopyColsRegion(DeviceCtx* ctx, const int64_t row_num, const int64_t col_num,
const T* x, const int64_t x_col_offset, const int64_t x_lda, T* y,
const int64_t y_col_offset, const int64_t y_lda) {
Expand All @@ -332,14 +309,6 @@ KU_IF_METHOD CopyColsRegion(DeviceCtx* ctx, const int64_t row_num, const int64_t
}
}
}
KU_IF_METHOD RowMax(DeviceCtx* ctx, const int64_t row_num, const int64_t col_num, const T* x,
T* y) {
MatrixRowReduce<T, ReduceCoreMax>(row_num, col_num, x, y);
}
KU_IF_METHOD RowSum(DeviceCtx* ctx, const int64_t row_num, const int64_t col_num, const T* x,
T* y) {
MatrixRowReduce<T, ReduceCoreAdd>(row_num, col_num, x, y);
}
KU_IF_METHOD Transpose(DeviceCtx* ctx, const int32_t num_axis, const ShapeView& x_shape,
const ShapeView& y_shape, const PbRf<int32_t>& permutation,
const int64_t elem_cnt, const T* x, T* y) {
Expand Down Expand Up @@ -370,9 +339,6 @@ KU_IF_METHOD Transpose(DeviceCtx* ctx, const int32_t num_axis, const ShapeView&
}
}
KU_IF_METHOD Set(DeviceCtx* ctx, const T value, T* addr) { *addr = value; }
KU_IF_METHOD Replicate(DeviceCtx* ctx, const int64_t n, T* y, const T* x) {
for (int64_t i = 0; i < n; ++i) { y[i] = *x; }
}

#define KU_FLOATING_METHOD \
template<typename T> \
Expand All @@ -382,98 +348,17 @@ KU_FLOATING_METHOD Dot(DeviceCtx* ctx, const int n, const T* x, const int incx,
const int incy, T* result) {
*result = cblas_dot<T>(n, x, incx, y, incy);
}
KU_FLOATING_METHOD Copy(DeviceCtx* ctx, const int n, const T* x, const int incx, T* y,
const int incy) {
cblas_copy<T>(n, x, incx, y, incy);
}
KU_FLOATING_METHOD Axpy(DeviceCtx* ctx, const int n, const T alpha, const T* x, const int incx,
T* y, const int incy) {
cblas_axpy<T>(n, alpha, x, incx, y, incy);
}
KU_FLOATING_METHOD Scal(DeviceCtx* ctx, const int n, const T alpha, T* x, const int incx) {
cblas_scal<T>(n, alpha, x, incx);
}
KU_FLOATING_METHOD Scal(DeviceCtx* ctx, const int n, const T* alpha, T* x, const int incx) {
Scal(ctx, n, *alpha, x, incx);
}
KU_FLOATING_METHOD Gemv(DeviceCtx* ctx, const enum CBLAS_TRANSPOSE trans, int m, int n,
const T alpha, const T* a, int lda, const T* x, const int incx,
const T beta, T* y, const int incy) {
// Set col major to keep it as the same with cublas
cblas_gemv<T>(CBLAS_ORDER::CblasColMajor, trans, n, m, alpha, a, lda, x, incx, beta, y, incy);
}
KU_FLOATING_METHOD Gemm(DeviceCtx* ctx, const enum CBLAS_ORDER order,
const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
const int m, const int n, const int k, const T alpha, const T* a,
const int lda, const T* b, const int ldb, const T beta, T* c,
const int ldc) {
cblas_gemm<T>(order, trans_a, trans_b, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
KU_FLOATING_METHOD BatchedGemm(DeviceCtx* ctx, const enum CBLAS_ORDER order,
const enum CBLAS_TRANSPOSE trans_a,
const enum CBLAS_TRANSPOSE trans_b, int batch_size, int m, int n,
int k, const T alpha, const T* a, const T* b, const T beta, T* c,
T** buf) {
const int a_stride = m * k;
const int b_stride = k * n;
const int c_stride = m * n;
FOR_RANGE(int32_t, i, 0, batch_size) {
KernelUtil<DeviceType::kCPU, T>::OFGemm(ctx, trans_a, trans_b, m, n, k, alpha, a + i * a_stride,
b + i * b_stride, beta, c + i * c_stride);
}
}

KU_FLOATING_METHOD Exp(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
for (int64_t i = 0; i < n; ++i) { y[i] = std::exp(x[i]); }
}
KU_FLOATING_METHOD Div(DeviceCtx* ctx, const int64_t n, T* x, const T* alpha) {
for (int64_t i = 0; i < n; ++i) { x[i] = x[i] / (*alpha); }
}
KU_FLOATING_METHOD Div(DeviceCtx* ctx, const int64_t n, T* x, const T alpha) {
for (int64_t i = 0; i < n; ++i) { x[i] = x[i] / alpha; }
}
KU_FLOATING_METHOD Mul(DeviceCtx* ctx, const int64_t n, const T* x, const T* y, T* z) {
for (int64_t i = 0; i < n; ++i) { z[i] = x[i] * y[i]; }
}
KU_FLOATING_METHOD Div(DeviceCtx* ctx, const int64_t n, const T* x, const T* y, T* z) {
for (int64_t i = 0; i < n; ++i) { z[i] = x[i] / y[i]; }
}
KU_FLOATING_METHOD Square(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
for (int64_t i = 0; i < n; ++i) { y[i] = x[i] * x[i]; }
}
KU_FLOATING_METHOD Sqrt(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
for (int64_t i = 0; i < n; ++i) { y[i] = std::sqrt(x[i]); }
}
KU_FLOATING_METHOD Reciprocal(DeviceCtx* ctx, const int n, const T* x, T* y) {
for (int64_t i = 0; i < n; ++i) { y[i] = static_cast<T>(1.0) / x[i]; }
}
KU_FLOATING_METHOD Rsqrt(DeviceCtx* ctx, const int64_t n, T* x, const float epsilon) {
for (int64_t i = 0; i < n; ++i) { x[i] = 1.0 / std::sqrt(x[i] + epsilon); }
}
KU_FLOATING_METHOD Rsqrt(DeviceCtx* ctx, const int64_t n, const T* x, T* y, const float epsilon) {
for (int64_t i = 0; i < n; ++i) { y[i] = 1.0 / std::sqrt(x[i] + epsilon); }
}
KU_FLOATING_METHOD Powx(DeviceCtx* ctx, const int64_t n, const T* x, const float power, T* y) {
for (int64_t i = 0; i < n; ++i) { y[i] = std::pow(x[i], power); }
}

KU_FLOATING_METHOD Sigmoid(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
T half = static_cast<T>(0.5);
for (int64_t i = 0; i != n; ++i) { y[i] = half * std::tanh(half * x[i]) + half; }
}
KU_FLOATING_METHOD SigmoidBackward(DeviceCtx* ctx, const int64_t n, const T* x, const T* y,
const T* dy, T* dx) {
for (int64_t i = 0; i != n; ++i) { dx[i] = y[i] * (1 - y[i]) * dy[i]; }
}
KU_FLOATING_METHOD Relu(DeviceCtx* ctx, const int64_t n, const T* x, T* y) {
T zero = GetZeroVal<T>();
for (int64_t i = 0; i != n; ++i) { y[i] = std::max(x[i], zero); }
}
KU_FLOATING_METHOD ReluBackward(DeviceCtx* ctx, const int64_t n, const T* x, const T* y,
const T* dy, T* dx) {
T zero = GetZeroVal<T>();
for (int64_t i = 0; i != n; ++i) { dx[i] = (y[i] > zero) * dy[i]; }
}
KU_FLOATING_METHOD Addition(DeviceCtx* ctx, const int64_t n, T* out, const T* in_0) {
for (int64_t i = 0; i != n; ++i) { out[i] = in_0[i]; }
}
Expand Down
Loading