PaddlePaddle · luotao1 · Apr 7, 2023 · Apr 5, 2023 · Apr 5, 2023 · Apr 5, 2023
@@ -53,7 +53,6 @@ option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF)
 option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
 option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF)
 option(WITH_XPU_XFT "Compile PaddlePaddle with BAIDU XPU-XFT" OFF)
-option(WITH_MLU "Compile PaddlePaddle with CAMBRICON MLU" OFF)
 option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
 option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF)
 option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
@@ -81,9 +80,6 @@ endif()
 if(WITH_GPU AND WITH_ROCM)
   message(FATAL_ERROR "Error when compile CUDA and ROCM at the same time")
 endif()
-if(WITH_GPU AND WITH_MLU)
-  message(FATAL_ERROR "Error when compile GPU and MLU at the same time")
-endif()
 
 if(WITH_GPU AND NOT APPLE)
   enable_language(CUDA)
@@ -430,14 +426,6 @@ if(NOT WITH_XPU AND WITH_XPU_BKCL)
       CACHE STRING "Disable BKCL when compiling without XPU" FORCE)
 endif()
 
-if(NOT WITH_MLU AND WITH_CNCL)
-  message(
-    WARNING "Disable CNCL when compiling without MLU. Force WITH_MLU=OFF.")
-  set(WITH_MLU
-      OFF
-      CACHE STRING "Disable CNCL when compiling without MLU" FORCE)
-endif()
-
 if(WITH_NCCL)
   add_definitions("-DPADDLE_WITH_NCCL")
   include(nccl)
@@ -469,10 +457,6 @@ if(WITH_GPU)
   endif()
 endif()
 
-if(WITH_MLU)
-  include(neuware)
-endif()
-
 if(WITH_ROCM)
   include(hip)
   include(miopen) # set miopen libraries, must before configure

diff --git a/cmake/configure.cmake b/cmake/configure.cmake
@@ -116,11 +116,6 @@ if(WITH_IPU)
   add_definitions(-DPADDLE_WITH_IPU)
 endif()
 
-if(WITH_MLU)
-  message(STATUS "Compile with MLU!")
-  add_definitions(-DPADDLE_WITH_MLU)
-endif()
-
 if(WITH_GPU)
   add_definitions(-DPADDLE_WITH_CUDA)
   add_definitions(-DEIGEN_USE_GPU)

diff --git a/cmake/neuware.cmake b/cmake/neuware.cmake
diff --git a/cmake/operators.cmake b/cmake/operators.cmake
@@ -74,9 +74,6 @@ function(op_library TARGET)
   set(MKLDNN_FILE)
   set(op_common_deps operator op_registry math_function layer
                      common_infer_shape_functions)
-  if(WITH_MLU)
-    set(op_common_deps ${op_common_deps} mlu_baseop)
-  endif()
 
   # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
   set(options UNITY)
@@ -172,12 +169,6 @@ function(op_library TARGET)
         list(APPEND xpu_kp_cc_srcs ${TARGET}.kps)
       endif()
     endif()
-    if(WITH_MLU)
-      string(REPLACE "_op" "_op_mlu" MLU_FILE "${TARGET}")
-      if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MLU_FILE}.cc)
-        list(APPEND mlu_cc_srcs ${MLU_FILE}.cc)
-      endif()
-    endif()
   else()
     foreach(src ${op_library_SRCS})
       if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$")
@@ -204,8 +195,6 @@ function(op_library TARGET)
         list(APPEND xpu_kp_cc_srcs ${src})
       elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.kps$")
         list(APPEND xpu_kp_cc_srcs ${src})
-      elseif(WITH_MLU AND ${src} MATCHES ".*_op_mlu.cc$")
-        list(APPEND mlu_cc_srcs ${src})
       elseif(${src} MATCHES ".*\\.cc$")
         list(APPEND cc_srcs ${src})
       elseif((WITH_ROCM OR WITH_GPU) AND ${src} MATCHES ".*\\.kps$")
@@ -523,18 +512,6 @@ function(op_library TARGET)
     endforeach()
   endif()
 
-  # pybind USE_OP_DEVICE_KERNEL for MLU
-  if(WITH_MLU AND ${mlu_cc_srcs_len} GREATER 0)
-    foreach(mlu_src ${mlu_cc_srcs})
-      set(op_name "")
-      find_register(${mlu_src} "REGISTER_OP_MLU_KERNEL" op_name)
-      if(NOT ${op_name} EQUAL "")
-        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MLU);\n")
-        set(pybind_flag 1)
-      endif()
-    endforeach()
-  endif()
-
   # pybind USE_OP_DEVICE_KERNEL for MKLDNN
   if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
     # Append first implemented MKLDNN activation operator

diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
@@ -356,11 +356,6 @@ if(WITH_XPU)
   list(APPEND third_party_deps extern_xpu)
 endif()
 
-if(WITH_MLU)
-  include(external/concurrentqueue) # download, build, install concurrentqueue
-  list(APPEND third_party_deps extern_concurrentqueue)
-endif()
-
 if(WITH_PSLIB)
   include(external/pslib) # download, build, install pslib
   list(APPEND third_party_deps extern_pslib)

diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc
@@ -99,11 +99,6 @@ struct DLDeviceVisitor
         "platform::NPUPinnedPlace is not supported"));
   }
 
-  inline ::DLDevice operator()(const platform::MLUPlace &place) const {
-    PADDLE_THROW(
-        platform::errors::Unimplemented("platform::MLUPlace is not supported"));
-  }
-
   inline ::DLDevice operator()(const platform::CustomPlace &place) const {
     PADDLE_THROW(platform::errors::Unimplemented(
         "platform::CustomPlace is not supported"));

diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc
@@ -516,17 +516,6 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
 #else
       PADDLE_THROW(
           platform::errors::Unimplemented("No IPU gc found in CPU/IPU paddle"));
-#endif
-    } else if (platform::is_mlu_place(place_)) {
-#ifdef PADDLE_WITH_MLU
-      if (IsFastEagerDeletionModeEnabled()) {
-        gc.reset(new MLUUnsafeFastGarbageCollector(place_, max_memory_size));
-      } else {
-        gc.reset(new MLUDefaultStreamGarbageCollector(place_, max_memory_size));
-      }
-#else
-      PADDLE_THROW(
-          platform::errors::Unimplemented("No MLU gc found in CPU/MLU paddle"));
 #endif
     } else if (platform::is_custom_place(place_)) {
 #ifdef PADDLE_WITH_CUSTOM_DEVICE

diff --git a/paddle/fluid/framework/garbage_collector.cc b/paddle/fluid/framework/garbage_collector.cc
@@ -125,56 +125,6 @@ void CUDAPinnedGarbageCollector::ClearCallback(
 }
 #endif
 
-#ifdef PADDLE_WITH_MLU
-MLUDefaultStreamGarbageCollector::MLUDefaultStreamGarbageCollector(
-    const platform::MLUPlace &place, size_t max_memory_size)
-    : GarbageCollector(place, max_memory_size) {}
-
-void MLUDefaultStreamGarbageCollector::Wait() const {
-  static_cast<platform::MLUDeviceContext *>(this->dev_ctx_)
-      ->WaitStreamCallback();
-}
-
-void MLUDefaultStreamGarbageCollector::ClearCallback(
-    const std::function<void()> &callback) {
-  static_cast<platform::MLUDeviceContext *>(this->dev_ctx_)
-      ->AddStreamCallback(callback);
-}
-MLUUnsafeFastGarbageCollector::MLUUnsafeFastGarbageCollector(
-    const platform::MLUPlace &place, size_t max_memory_size)
-    : GarbageCollector(place, max_memory_size) {}
-
-void MLUUnsafeFastGarbageCollector::ClearCallback(
-    const std::function<void()> &callback) {
-  callback();
-}
-
-MLUStreamGarbageCollector::MLUStreamGarbageCollector(
-    const platform::MLUPlace &place, size_t max_memory_size)
-    : GarbageCollector(place, max_memory_size) {
-  platform::MLUDeviceGuard guard(place.device);
-  PADDLE_ENFORCE_MLU_SUCCESS(cnrtQueueCreate(&stream_));
-  callback_manager_.reset(
-      new platform::StreamCallbackManager<mluStream>(stream_));
-}
-
-MLUStreamGarbageCollector::~MLUStreamGarbageCollector() {
-  auto place = this->dev_ctx_->GetPlace();
-  platform::MLUDeviceGuard guard(place.device);
-  PADDLE_ENFORCE_MLU_SUCCESS(cnrtQueueSync(stream_));
-  PADDLE_ENFORCE_MLU_SUCCESS(cnrtQueueDestroy(stream_));
-}
-
-mluStream MLUStreamGarbageCollector::stream() const { return stream_; }
-
-void MLUStreamGarbageCollector::Wait() const { callback_manager_->Wait(); }
-
-void MLUStreamGarbageCollector::ClearCallback(
-    const std::function<void()> &callback) {
-  callback_manager_->AddCallback(callback);
-}
-#endif
-
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
 CustomDefaultStreamGarbageCollector::CustomDefaultStreamGarbageCollector(
     const platform::CustomPlace &place, size_t max_memory_size)

diff --git a/paddle/fluid/framework/garbage_collector.h b/paddle/fluid/framework/garbage_collector.h
@@ -22,9 +22,6 @@
 
 #include "gflags/gflags.h"
 #include "paddle/fluid/platform/device_context.h"
-#ifdef PADDLE_WITH_MLU
-#include "paddle/fluid/platform/device/mlu/device_context.h"
-#endif
 #include "paddle/fluid/platform/stream_callback_manager.h"
 
 namespace paddle {
@@ -139,46 +136,6 @@ class CUDAPinnedGarbageCollector : public GarbageCollector {
 };
 #endif
 
-#ifdef PADDLE_WITH_MLU
-class MLUDefaultStreamGarbageCollector : public GarbageCollector {
- public:
-  MLUDefaultStreamGarbageCollector(const platform::MLUPlace &place,
-                                   size_t max_memory_size);
-
-  void Wait() const override;
-
- protected:
-  void ClearCallback(const std::function<void()> &callback) override;
-};
-
-class MLUUnsafeFastGarbageCollector : public GarbageCollector {
- public:
-  MLUUnsafeFastGarbageCollector(const platform::MLUPlace &place,
-                                size_t max_memory_size);
-
- protected:
-  void ClearCallback(const std::function<void()> &callback) override;
-};
-class MLUStreamGarbageCollector : public GarbageCollector {
- public:
-  MLUStreamGarbageCollector(const platform::MLUPlace &place,
-                            size_t max_memory_size);
-
-  ~MLUStreamGarbageCollector();
-
-  void Wait() const override;
-
-  mluStream stream() const;
-
- protected:
-  void ClearCallback(const std::function<void()> &callback) override;
-
- private:
-  mluStream stream_;
-  std::unique_ptr<platform::StreamCallbackManager<mluStream>> callback_manager_;
-};
-#endif
-
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
 class CustomDefaultStreamGarbageCollector : public GarbageCollector {
  public:

diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
@@ -376,9 +376,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
 #define REGISTER_OP_NPU_KERNEL(op_type, ...) \
   REGISTER_OP_KERNEL(op_type, NPU, ::paddle::platform::NPUPlace, __VA_ARGS__)
 
-#define REGISTER_OP_MLU_KERNEL(op_type, ...) \
-  REGISTER_OP_KERNEL(op_type, MLU, ::paddle::platform::MLUPlace, __VA_ARGS__)
-
 #define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class,  \
                               customized_name,                     \
                               customized_type_value,               \
@@ -421,12 +418,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
       ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
       __VA_ARGS__)
 
-#define REGISTER_OP_MLU_KERNEL_FUNCTOR(op_type, ...)                  \
-  REGISTER_OP_KERNEL_EX(                                              \
-      op_type, MLU, ::paddle::platform::MLUPlace, DEFAULT_TYPE,       \
-      ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
-      __VA_ARGS__)
-
 #define REGISTER_OP_IPU_KERNEL_FUNCTOR(op_type, ...)                  \
   REGISTER_OP_KERNEL_EX(                                              \
       op_type, IPU, ::paddle::platform::IPUPlace, DEFAULT_TYPE,       \

diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
@@ -57,10 +57,6 @@ class DenseTensor;
 #include "paddle/fluid/platform/mkldnn_op_list.h"
 #endif
 
-#ifdef PADDLE_WITH_MLU
-#include "paddle/fluid/platform/device/mlu/mlu_info.h"
-#endif
-
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
 #endif
@@ -770,16 +766,6 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
 #else
       auto dev_id = place.device;
       platform::SetXPUDeviceId(dev_id);
-#endif
-    } else if (platform::is_mlu_place(place)) {
-#ifndef PADDLE_WITH_MLU
-      PADDLE_THROW(platform::errors::Unavailable(
-          "Cannot run operator on place %s, please recompile paddle or "
-          "reinstall Paddle with MLU support.",
-          place));
-#else
-      auto dev_id = place.device;
-      platform::SetMLUDeviceId(dev_id);
 #endif
     } else if (platform::is_custom_place(place)) {
 #ifndef PADDLE_WITH_CUSTOM_DEVICE
@@ -2315,16 +2301,6 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
     kernel_iter = kernels.find(expected_kernel_key);
   }
 #endif
-#ifdef PADDLE_WITH_MLU
-  if (kernel_iter == kernels.end() &&
-      platform::is_mlu_place(expected_kernel_key.place_)) {
-    VLOG(3) << "missing MLU kernel: " << type_
-            << ", expected_kernel_key:" << expected_kernel_key
-            << ", fallbacking to CPU one!";
-    expected_kernel_key.place_ = platform::CPUPlace();
-    kernel_iter = kernels.find(expected_kernel_key);
-  }
-#endif
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
   if (kernel_iter == kernels.end() &&
       platform::is_custom_place(expected_kernel_key.place_)) {

diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
@@ -522,19 +522,6 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
       PADDLE_THROW(platform::errors::PermissionDenied(
           "Paddle can't use CUDA device since it's not compiled with CUDA,"
           "Please recompile or reinstall Paddle with GPU support."));
-#endif
-    } else if (platform::is_mlu_place(place)) {
-#ifdef PADDLE_WITH_MLU
-      if (IsFastEagerDeletionModeEnabled()) {
-        gc.reset(new MLUUnsafeFastGarbageCollector(place, max_memory_size));
-      } else {
-        gc.reset(new MLUStreamGarbageCollector(place, max_memory_size));
-      }
-      VLOG(10) << "Created " << i << "-th GarbageCollector at " << place;
-#else
-      PADDLE_THROW(platform::errors::PermissionDenied(
-          "Paddle can't use MLU device since it's not compiled with MLU,"
-          "Please recompile or reinstall Paddle with MLU support."));
 #endif
     } else if (platform::is_xpu_place(place)) {
 #if defined(PADDLE_WITH_XPU)