Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clean up WITH_MLU #52546

Merged
merged 8 commits into from
Apr 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF)
option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF)
option(WITH_XPU_XFT "Compile PaddlePaddle with BAIDU XPU-XFT" OFF)
option(WITH_MLU "Compile PaddlePaddle with CAMBRICON MLU" OFF)
option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF)
option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
Expand Down Expand Up @@ -81,9 +80,6 @@ endif()
if(WITH_GPU AND WITH_ROCM)
message(FATAL_ERROR "Error when compile CUDA and ROCM at the same time")
endif()
if(WITH_GPU AND WITH_MLU)
message(FATAL_ERROR "Error when compile GPU and MLU at the same time")
endif()

if(WITH_GPU AND NOT APPLE)
enable_language(CUDA)
Expand Down Expand Up @@ -430,14 +426,6 @@ if(NOT WITH_XPU AND WITH_XPU_BKCL)
CACHE STRING "Disable BKCL when compiling without XPU" FORCE)
endif()

if(NOT WITH_MLU AND WITH_CNCL)
message(
WARNING "Disable CNCL when compiling without MLU. Force WITH_MLU=OFF.")
set(WITH_MLU
OFF
CACHE STRING "Disable CNCL when compiling without MLU" FORCE)
endif()

if(WITH_NCCL)
add_definitions("-DPADDLE_WITH_NCCL")
include(nccl)
Expand Down Expand Up @@ -469,10 +457,6 @@ if(WITH_GPU)
endif()
endif()

if(WITH_MLU)
include(neuware)
endif()

if(WITH_ROCM)
include(hip)
include(miopen) # set miopen libraries, must before configure
Expand Down
5 changes: 0 additions & 5 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,6 @@ if(WITH_IPU)
add_definitions(-DPADDLE_WITH_IPU)
endif()

if(WITH_MLU)
message(STATUS "Compile with MLU!")
add_definitions(-DPADDLE_WITH_MLU)
endif()

if(WITH_GPU)
add_definitions(-DPADDLE_WITH_CUDA)
add_definitions(-DEIGEN_USE_GPU)
Expand Down
34 changes: 0 additions & 34 deletions cmake/neuware.cmake

This file was deleted.

23 changes: 0 additions & 23 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ function(op_library TARGET)
set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer
common_infer_shape_functions)
if(WITH_MLU)
set(op_common_deps ${op_common_deps} mlu_baseop)
endif()

# Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
set(options UNITY)
Expand Down Expand Up @@ -169,12 +166,6 @@ function(op_library TARGET)
list(APPEND xpu_kp_cc_srcs ${TARGET}.kps)
endif()
endif()
if(WITH_MLU)
string(REPLACE "_op" "_op_mlu" MLU_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MLU_FILE}.cc)
list(APPEND mlu_cc_srcs ${MLU_FILE}.cc)
endif()
endif()
else()
foreach(src ${op_library_SRCS})
if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$")
Expand All @@ -201,8 +192,6 @@ function(op_library TARGET)
list(APPEND xpu_kp_cc_srcs ${src})
elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.kps$")
list(APPEND xpu_kp_cc_srcs ${src})
elseif(WITH_MLU AND ${src} MATCHES ".*_op_mlu.cc$")
list(APPEND mlu_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src})
elseif((WITH_ROCM OR WITH_GPU) AND ${src} MATCHES ".*\\.kps$")
Expand Down Expand Up @@ -519,18 +508,6 @@ function(op_library TARGET)
endforeach()
endif()

# pybind USE_OP_DEVICE_KERNEL for MLU
if(WITH_MLU AND ${mlu_cc_srcs_len} GREATER 0)
foreach(mlu_src ${mlu_cc_srcs})
set(op_name "")
find_register(${mlu_src} "REGISTER_OP_MLU_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MLU);\n")
set(pybind_flag 1)
endif()
endforeach()
endif()

# pybind USE_OP_DEVICE_KERNEL for MKLDNN
if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
# Append first implemented MKLDNN activation operator
Expand Down
5 changes: 0 additions & 5 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -356,11 +356,6 @@ if(WITH_XPU)
list(APPEND third_party_deps extern_xpu)
endif()

if(WITH_MLU)
include(external/concurrentqueue) # download, build, install concurrentqueue
list(APPEND third_party_deps extern_concurrentqueue)
endif()

if(WITH_PSLIB)
include(external/pslib) # download, build, install pslib
list(APPEND third_party_deps extern_pslib)
Expand Down
5 changes: 0 additions & 5 deletions paddle/fluid/framework/dlpack_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,6 @@ struct DLDeviceVisitor
"platform::NPUPinnedPlace is not supported"));
}

inline ::DLDevice operator()(const platform::MLUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::MLUPlace is not supported"));
}

inline ::DLDevice operator()(const platform::CustomPlace &place) const {
PADDLE_THROW(platform::errors::Unimplemented(
"platform::CustomPlace is not supported"));
Expand Down
11 changes: 0 additions & 11 deletions paddle/fluid/framework/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -516,17 +516,6 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
#else
PADDLE_THROW(
platform::errors::Unimplemented("No IPU gc found in CPU/IPU paddle"));
#endif
} else if (platform::is_mlu_place(place_)) {
#ifdef PADDLE_WITH_MLU
if (IsFastEagerDeletionModeEnabled()) {
gc.reset(new MLUUnsafeFastGarbageCollector(place_, max_memory_size));
} else {
gc.reset(new MLUDefaultStreamGarbageCollector(place_, max_memory_size));
}
#else
PADDLE_THROW(
platform::errors::Unimplemented("No MLU gc found in CPU/MLU paddle"));
#endif
} else if (platform::is_custom_place(place_)) {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
Expand Down
50 changes: 0 additions & 50 deletions paddle/fluid/framework/garbage_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,56 +125,6 @@ void CUDAPinnedGarbageCollector::ClearCallback(
}
#endif

#ifdef PADDLE_WITH_MLU
MLUDefaultStreamGarbageCollector::MLUDefaultStreamGarbageCollector(
const platform::MLUPlace &place, size_t max_memory_size)
: GarbageCollector(place, max_memory_size) {}

void MLUDefaultStreamGarbageCollector::Wait() const {
static_cast<platform::MLUDeviceContext *>(this->dev_ctx_)
->WaitStreamCallback();
}

void MLUDefaultStreamGarbageCollector::ClearCallback(
const std::function<void()> &callback) {
static_cast<platform::MLUDeviceContext *>(this->dev_ctx_)
->AddStreamCallback(callback);
}
MLUUnsafeFastGarbageCollector::MLUUnsafeFastGarbageCollector(
const platform::MLUPlace &place, size_t max_memory_size)
: GarbageCollector(place, max_memory_size) {}

void MLUUnsafeFastGarbageCollector::ClearCallback(
const std::function<void()> &callback) {
callback();
}

MLUStreamGarbageCollector::MLUStreamGarbageCollector(
const platform::MLUPlace &place, size_t max_memory_size)
: GarbageCollector(place, max_memory_size) {
platform::MLUDeviceGuard guard(place.device);
PADDLE_ENFORCE_MLU_SUCCESS(cnrtQueueCreate(&stream_));
callback_manager_.reset(
new platform::StreamCallbackManager<mluStream>(stream_));
}

MLUStreamGarbageCollector::~MLUStreamGarbageCollector() {
auto place = this->dev_ctx_->GetPlace();
platform::MLUDeviceGuard guard(place.device);
PADDLE_ENFORCE_MLU_SUCCESS(cnrtQueueSync(stream_));
PADDLE_ENFORCE_MLU_SUCCESS(cnrtQueueDestroy(stream_));
}

mluStream MLUStreamGarbageCollector::stream() const { return stream_; }

void MLUStreamGarbageCollector::Wait() const { callback_manager_->Wait(); }

void MLUStreamGarbageCollector::ClearCallback(
const std::function<void()> &callback) {
callback_manager_->AddCallback(callback);
}
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
CustomDefaultStreamGarbageCollector::CustomDefaultStreamGarbageCollector(
const platform::CustomPlace &place, size_t max_memory_size)
Expand Down
43 changes: 0 additions & 43 deletions paddle/fluid/framework/garbage_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@

#include "gflags/gflags.h"
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/platform/device/mlu/device_context.h"
#endif
#include "paddle/fluid/platform/stream_callback_manager.h"

namespace paddle {
Expand Down Expand Up @@ -139,46 +136,6 @@ class CUDAPinnedGarbageCollector : public GarbageCollector {
};
#endif

#ifdef PADDLE_WITH_MLU
class MLUDefaultStreamGarbageCollector : public GarbageCollector {
public:
MLUDefaultStreamGarbageCollector(const platform::MLUPlace &place,
size_t max_memory_size);

void Wait() const override;

protected:
void ClearCallback(const std::function<void()> &callback) override;
};

class MLUUnsafeFastGarbageCollector : public GarbageCollector {
public:
MLUUnsafeFastGarbageCollector(const platform::MLUPlace &place,
size_t max_memory_size);

protected:
void ClearCallback(const std::function<void()> &callback) override;
};
class MLUStreamGarbageCollector : public GarbageCollector {
public:
MLUStreamGarbageCollector(const platform::MLUPlace &place,
size_t max_memory_size);

~MLUStreamGarbageCollector();

void Wait() const override;

mluStream stream() const;

protected:
void ClearCallback(const std::function<void()> &callback) override;

private:
mluStream stream_;
std::unique_ptr<platform::StreamCallbackManager<mluStream>> callback_manager_;
};
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
class CustomDefaultStreamGarbageCollector : public GarbageCollector {
public:
Expand Down
9 changes: 0 additions & 9 deletions paddle/fluid/framework/op_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
#define REGISTER_OP_NPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, NPU, ::paddle::platform::NPUPlace, __VA_ARGS__)

#define REGISTER_OP_MLU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, MLU, ::paddle::platform::MLUPlace, __VA_ARGS__)

#define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class, \
customized_name, \
customized_type_value, \
Expand Down Expand Up @@ -421,12 +418,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
__VA_ARGS__)

#define REGISTER_OP_MLU_KERNEL_FUNCTOR(op_type, ...) \
REGISTER_OP_KERNEL_EX( \
op_type, MLU, ::paddle::platform::MLUPlace, DEFAULT_TYPE, \
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
__VA_ARGS__)

#define REGISTER_OP_IPU_KERNEL_FUNCTOR(op_type, ...) \
REGISTER_OP_KERNEL_EX( \
op_type, IPU, ::paddle::platform::IPUPlace, DEFAULT_TYPE, \
Expand Down
24 changes: 0 additions & 24 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@ class DenseTensor;
#include "paddle/fluid/platform/mkldnn_op_list.h"
#endif

#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#endif

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#endif
Expand Down Expand Up @@ -770,16 +766,6 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
#else
auto dev_id = place.device;
platform::SetXPUDeviceId(dev_id);
#endif
} else if (platform::is_mlu_place(place)) {
#ifndef PADDLE_WITH_MLU
PADDLE_THROW(platform::errors::Unavailable(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with MLU support.",
place));
#else
auto dev_id = place.device;
platform::SetMLUDeviceId(dev_id);
#endif
} else if (platform::is_custom_place(place)) {
#ifndef PADDLE_WITH_CUSTOM_DEVICE
Expand Down Expand Up @@ -2301,16 +2287,6 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
}
#endif

#ifdef PADDLE_WITH_MLU
if (kernel_iter == kernels.end() &&
platform::is_mlu_place(expected_kernel_key.place_)) {
VLOG(3) << "missing MLU kernel: " << type_
<< ", expected_kernel_key:" << expected_kernel_key
<< ", fallbacking to CPU one!";
expected_kernel_key.place_ = platform::CPUPlace();
kernel_iter = kernels.find(expected_kernel_key);
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if (kernel_iter == kernels.end() &&
platform::is_custom_place(expected_kernel_key.place_)) {
Expand Down
13 changes: 0 additions & 13 deletions paddle/fluid/framework/parallel_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -522,19 +522,6 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
PADDLE_THROW(platform::errors::PermissionDenied(
"Paddle can't use CUDA device since it's not compiled with CUDA,"
"Please recompile or reinstall Paddle with GPU support."));
#endif
} else if (platform::is_mlu_place(place)) {
#ifdef PADDLE_WITH_MLU
if (IsFastEagerDeletionModeEnabled()) {
gc.reset(new MLUUnsafeFastGarbageCollector(place, max_memory_size));
} else {
gc.reset(new MLUStreamGarbageCollector(place, max_memory_size));
}
VLOG(10) << "Created " << i << "-th GarbageCollector at " << place;
#else
PADDLE_THROW(platform::errors::PermissionDenied(
"Paddle can't use MLU device since it's not compiled with MLU,"
"Please recompile or reinstall Paddle with MLU support."));
#endif
} else if (platform::is_xpu_place(place)) {
#if defined(PADDLE_WITH_XPU)
Expand Down
Loading