diff --git a/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc b/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc
index 38c9ce3d8091e4..693365c9f47ca4 100644
--- a/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc
@@ -150,7 +150,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext(
 
   DeviceContext* dev_ctx = nullptr;
 
-  // only gpu needs update. xpu not need, because xpu memcpy op kernel is
+  // only gpu need update. xpu not need, because xpu memcpy op kernel is
   // synchronous.
   if (platform::is_gpu_place(place_) || platform::is_custom_place(place_)) {
     VLOG(6) << "Parse DeviceContext for " << op_type
diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc
index 527e843d05bb8e..931372a0d9a433 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -1039,8 +1039,8 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
 #elif defined(PADDLE_WITH_XPU)
   return GetAllocator(place)->Allocate(size);
 #else
-  PADDLE_THROW(platform::errors::PreconditionNotMet(
-      "Not compiled with GPU or XPU or NPU."));
+  PADDLE_THROW(
+      platform::errors::PreconditionNotMet("Not compiled with GPU or XPU."));
 #endif
 }
 
diff --git a/paddle/fluid/operators/beam_search_decode_op.h b/paddle/fluid/operators/beam_search_decode_op.h
index a6d807b028c1b7..07a1c46ac09232 100644
--- a/paddle/fluid/operators/beam_search_decode_op.h
+++ b/paddle/fluid/operators/beam_search_decode_op.h
@@ -95,7 +95,7 @@ struct BeamSearchDecodeFunctor {
 
     } else {
       BeamSearchDecoder<T> beam_search_decoder(beam_size_, end_id_);
-      // Check if the tensor is on GPU or NPU. If so, use the CPU copy instead
+      // Check if the tensor is on GPU. If so, use the CPU copy instead
       if (tensor_on_gpu_ || tensor_on_npu_) {
         beam_search_decoder.Backtrace(
             step_ids_, step_scores_, id_tensor_, score_tensor_);
diff --git a/paddle/fluid/operators/collective/c_embedding_op.cc b/paddle/fluid/operators/collective/c_embedding_op.cc
index aee2b0c86b81ee..2efd5b46bdc09c 100644
--- a/paddle/fluid/operators/collective/c_embedding_op.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op.cc
@@ -79,7 +79,7 @@ class CEmbeddingOpMaker : public framework::OpProtoAndCheckerMaker {
              "(Tensor) The input represents embedding tensors, "
              "which is a learnable parameter.");
     AddInput("Ids",
-             "An input with type int32 or int64 in CPU and GPU, int32 in NPU "
+             "An input with type int32 or int64 in CPU and GPU, "
              "contains the ids to be looked up in W.");
     AddOutput("Out", "The lookup results, which have the same type as W.");
 
diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h
index 5cea4fa9e05734..40c82619db4a3c 100644
--- a/paddle/fluid/operators/reduce_ops/reduce_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_op.h
@@ -622,13 +622,12 @@ class ReduceBaseOp : public framework::OperatorWithKernel {
     // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_MKLDNN
 
     if (input_data_type == framework::proto::VarType::FP16) {
-      PADDLE_ENFORCE_EQ(
-          platform::is_gpu_place(ctx.GetPlace()) ||
-              platform::is_xpu_place(ctx.GetPlace()) ||
-              platform::is_custom_place(ctx.GetPlace()),
-          true,
-          platform::errors::InvalidArgument(
-              "float16 can only be used on GPU or NPU or XPU place"));
+      PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()) ||
+                            platform::is_xpu_place(ctx.GetPlace()) ||
+                            platform::is_custom_place(ctx.GetPlace()),
+                        true,
+                        platform::errors::InvalidArgument(
+                            "float16 can only be used on GPU or XPU place"));
     }
     return phi::KernelKey(input_data_type, ctx.GetPlace());
   }
diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc
index 633ef748be698a..2fb7883cb3f71e 100644
--- a/paddle/fluid/operators/softmax_op.cc
+++ b/paddle/fluid/operators/softmax_op.cc
@@ -47,7 +47,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
               platform::is_custom_place(ctx.GetPlace()),
           true,
           platform::errors::InvalidArgument(
-              "float16 can only be used on GPU/NPU/XPU and custom place"));
+              "float16 can only be used on GPU/XPU and custom place"));
     }
     return phi::KernelKey(
         ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));
@@ -130,7 +130,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
             platform::is_xpu_place(ctx.GetPlace()) ||
             platform::is_custom_place(ctx.GetPlace())))
         PADDLE_THROW(platform::errors::InvalidArgument(
-            "float16 can only be used on GPU/NPU/XPU and custom place"));
+            "float16 can only be used on GPU/XPU and custom place"));
     }
     return phi::KernelKey(
         ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));
diff --git a/paddle/fluid/platform/device_event_base.h b/paddle/fluid/platform/device_event_base.h
index f56688de09a326..e2de1e5a9abe32 100644
--- a/paddle/fluid/platform/device_event_base.h
+++ b/paddle/fluid/platform/device_event_base.h
@@ -65,7 +65,7 @@ class DeviceEvent {
                           MaxDeviceTypes,
                           type_id_));
 #ifndef PADDLE_WITH_CUSTOM_DEVICE
-    // TODO(Aurelius84): only support CPU/CUDA/NPU.
+    // TODO(Aurelius84): only support CPU/CUDA.
     PADDLE_ENFORCE_LT(type_id_,
                       3,
                       platform::errors::Unavailable(
diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h
index 65132bc68fa0db..6f8ae115bd12ac 100644
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -434,7 +434,7 @@ void SetTensorFromPyArrayT(
     }
 #else
     PADDLE_THROW(platform::errors::PermissionDenied(
-        "Cannot use IPUPlace in CPU/GPU/XPU/NPU version, "
+        "Cannot use IPUPlace in CPU/GPU/XPU version, "
         "Please recompile or reinstall Paddle with IPU support."));
 #endif
   } else if (paddle::platform::is_custom_place(place)) {
@@ -1106,7 +1106,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
     return py_arr;
 #else
     PADDLE_THROW(platform::errors::PermissionDenied(
-        "Cannot use CustomPlace in CPU/GPU/XPU/NPU version, "
+        "Cannot use CustomPlace in CPU/GPU/XPU version, "
         "Please recompile or reinstall Paddle with CustomPlace "
         "support."));
 #endif
diff --git a/paddle/phi/backends/device_memory_aligment.h b/paddle/phi/backends/device_memory_aligment.h
index 72562f0c001f04..06d9b450a83abd 100644
--- a/paddle/phi/backends/device_memory_aligment.h
+++ b/paddle/phi/backends/device_memory_aligment.h
@@ -41,7 +41,7 @@ inline size_t Alignment(size_t size,
       alignment = alignment;
 #else
       PADDLE_THROW(phi::errors::PreconditionNotMet(
-          "Fluid is not compiled with CUDA/XPU/NPU."));
+          "Fluid is not compiled with CUDA/XPU."));
 #endif
     }
   }
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
index 928c93df0b40af..4c47cbfcc1d0bb 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
@@ -323,6 +323,6 @@ def cvt_to_device(x, dev_id, blocking=True):
         place = paddle.XPUPlace(dev_id)
     else:
         raise OSError(
-            "Only supported compiled paddle with gpu/rocm, npu and xpu , but current verison is compiled with cpu."
+            "Only supported compiled paddle with gpu/rocm and xpu , but current verison is compiled with cpu."
         )
     return x._copy_to(place, blocking)
diff --git a/python/paddle/distributed/launch/main.py b/python/paddle/distributed/launch/main.py
index 02099c743933e5..a2c3e34f6dee51 100644
--- a/python/paddle/distributed/launch/main.py
+++ b/python/paddle/distributed/launch/main.py
@@ -52,7 +52,7 @@ def launch():
 
         - ``--job_id``: The job unique id, it affects the log files' name. e.g., ``--job_id=job1``. Default ``--job_id=default``.
 
-        - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.
+        - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.
 
         - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py``
 
diff --git a/python/paddle/distributed/passes/auto_parallel_fp16.py b/python/paddle/distributed/passes/auto_parallel_fp16.py
index 3216c1b4082768..6a763ce15030f7 100644
--- a/python/paddle/distributed/passes/auto_parallel_fp16.py
+++ b/python/paddle/distributed/passes/auto_parallel_fp16.py
@@ -685,7 +685,7 @@ def _insert_memcopy(block, idx, src_var, dist_context, direction="D2H"):
         world_process_group.ranks,
     )
 
-    # TODO to support CUDAPinned/NPU/XPU Places
+    # TODO to support CUDAPinned/XPU Places
     if direction == "D2H":
         dst_place_type = 0
     else:
diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py
index f8e00eabecf5ec..c7e550f7aa1175 100644
--- a/python/paddle/optimizer/adamw.py
+++ b/python/paddle/optimizer/adamw.py
@@ -181,9 +181,7 @@ def __init__(
                 not core.is_compiled_with_cuda()
                 and not core.is_compiled_with_xpu()
             ):
-                raise NotImplementedError(
-                    "'lr_ratio' is unimplemented in CPU, and NPU"
-                )
+                raise NotImplementedError("'lr_ratio' is unimplemented in CPU.")
 
         if parameters is not None:
             # paddle.Tensor is also iterable, so here we don't check whether
diff --git a/tools/get_pr_ut.py b/tools/get_pr_ut.py
index 8bbe39b3b76592..4da57036b68c81 100644
--- a/tools/get_pr_ut.py
+++ b/tools/get_pr_ut.py
@@ -340,10 +340,8 @@ def get_pr_ut(self):
                         file_list.append(filename)
                     else:
                         filterFiles.append(filename)
-                elif (
-                    ('/xpu/' in filename.lower())
-                    or ('/npu/' in filename.lower())
-                    or ('/ipu/' in filename.lower())
+                elif ('/xpu/' in filename.lower()) or (
+                    '/ipu/' in filename.lower()
                 ):
                     filterFiles.append(filename)
                 else: