PaddlePaddle
diff --git a/‎paddle/fluid/framework/new_executor/standalone_executor_test.cc
Lines changed: 3 additions & 1 deletion b/‎paddle/fluid/framework/new_executor/standalone_executor_test.cc
Lines changed: 3 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/phi_utils.cc
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/framework/phi_utils.cc
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/imperative/prepared_operator.cc
Lines changed: 5 additions & 0 deletions b/‎paddle/fluid/imperative/prepared_operator.cc
Lines changed: 5 additions & 0 deletions
diff --git a/‎paddle/fluid/operators/math/cross_entropy.cc
Lines changed: 29 additions & 28 deletions b/‎paddle/fluid/operators/math/cross_entropy.cc
Lines changed: 29 additions & 28 deletions
diff --git a/‎paddle/fluid/operators/math/cross_entropy.cu
Lines changed: 33 additions & 30 deletions b/‎paddle/fluid/operators/math/cross_entropy.cu
Lines changed: 33 additions & 30 deletions
diff --git a/‎paddle/fluid/operators/math/softmax.cu
Lines changed: 26 additions & 14 deletions b/‎paddle/fluid/operators/math/softmax.cu
Lines changed: 26 additions & 14 deletions
diff --git a/‎paddle/fluid/operators/math/softmax.h
Lines changed: 6 additions & 7 deletions b/‎paddle/fluid/operators/math/softmax.h
Lines changed: 6 additions & 7 deletions
diff --git a/‎paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc
Lines changed: 2 additions & 2 deletions b/‎paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/fluid/operators/softmax_with_cross_entropy_op.cc
Lines changed: 2 additions & 7 deletions b/‎paddle/fluid/operators/softmax_with_cross_entropy_op.cc
Lines changed: 2 additions & 7 deletions
@@ -35,7 +35,7 @@ USE_OP_ITSELF(elementwise_add);
 USE_OP_ITSELF(sigmoid);
 USE_OP_ITSELF(tanh);
 USE_OP_ITSELF(elementwise_mul);
-USE_OP(softmax_with_cross_entropy);
+USE_OP_ITSELF(softmax_with_cross_entropy);
 USE_OP_ITSELF(reduce_mean);
 USE_OP_ITSELF(reduce_sum);
 USE_OP_ITSELF(reduce_sum_grad);
@@ -83,6 +83,8 @@ PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(sgd, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(slice, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(slice_grad, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(cross_entropy_with_softmax, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(cross_entropy_with_softmax_grad, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(sqrt, GPU, ALL_LAYOUT);
 
 DECLARE_double(eager_delete_tensor_gb);
 
@@ -87,7 +87,7 @@ phi::KernelKey TransOpKernelTypeToPhiKernelKey(
   } else if (kernel_type.library_type_ == LibraryType::kKP) {
     backend = phi::Backend::KPS;
   } else {
-    // do
+    // do nothing
   }
   paddle::experimental::DataLayout layout = kernel_type.data_layout_;
   paddle::experimental::DataType dtype =
 
@@ -484,6 +484,11 @@ static void PreparedOpRunPtImpl(
     pt_kernel(&pt_kernel_context);
   }
 
+  if (FLAGS_check_nan_inf) {
+    framework::details::CheckOpHasNanOrInfInDygraph<VarType>(
+        op.Type(), outs, dev_ctx->GetPlace());
+  }
+
   if (FLAGS_benchmark) {
     dev_ctx->Wait();
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/framework/convert_utils.h"
+#include "paddle/phi/backends/cpu/cpu_context.h"
 
 namespace paddle {
 namespace platform {
@@ -89,38 +90,38 @@ struct HardLabelCrossEntropyCPUFunctorImpl {
   const int axis_dim_;
 };
 
-template <typename T>
-class CrossEntropyFunctor<platform::CPUDeviceContext, T> {
- public:
-  void operator()(const platform::CPUDeviceContext& ctx, framework::Tensor* out,
-                  const framework::Tensor* prob,
-                  const framework::Tensor* labels, const bool softLabel,
-                  const int ignore_index, const int axis_dim) {
-    if (softLabel) {
-      const int batch_size = prob->dims()[0];
-      const int num_classes = prob->dims()[1];
-      const int num_remain = num_classes / axis_dim;
-
-      Eigen::DSizes<int, 3> batch_axis_remain(batch_size, axis_dim, num_remain);
-      auto in = EigenMatrix<T>::From(*prob);
-      auto lbl = EigenMatrix<T>::From(*labels);
-      auto loss = EigenMatrix<T>::From(*out);
-
-      loss.device(*ctx.eigen_device()) =
-          -((lbl * in.log().unaryExpr(math::TolerableValue<T>()))
-                .reshape(batch_axis_remain)
-                .sum(Eigen::DSizes<int, 1>(1)));
-    } else {
-      HardLabelCrossEntropyCPUFunctorImpl<T> functor_impl(
-          out, prob, labels, ignore_index, axis_dim);
-      framework::VisitIntDataType(
-          framework::TransToProtoVarType(labels->dtype()), functor_impl);
-    }
+template <typename DeviceContext, typename T>
+void CrossEntropyFunctor<DeviceContext, T>::operator()(
+    const DeviceContext& ctx, framework::Tensor* out,
+    const framework::Tensor* prob, const framework::Tensor* labels,
+    const bool softLabel, const int ignore_index, const int axis_dim) {
+  if (softLabel) {
+    const int batch_size = prob->dims()[0];
+    const int num_classes = prob->dims()[1];
+    const int num_remain = num_classes / axis_dim;
+
+    Eigen::DSizes<int, 3> batch_axis_remain(batch_size, axis_dim, num_remain);
+    auto in = EigenMatrix<T>::From(*prob);
+    auto lbl = EigenMatrix<T>::From(*labels);
+    auto loss = EigenMatrix<T>::From(*out);
+
+    loss.device(*ctx.eigen_device()) =
+        -((lbl * in.log().unaryExpr(math::TolerableValue<T>()))
+              .reshape(batch_axis_remain)
+              .sum(Eigen::DSizes<int, 1>(1)));
+  } else {
+    HardLabelCrossEntropyCPUFunctorImpl<T> functor_impl(out, prob, labels,
+                                                        ignore_index, axis_dim);
+    framework::VisitIntDataType(framework::TransToProtoVarType(labels->dtype()),
+                                functor_impl);
   }
-};
+}
 
 template class CrossEntropyFunctor<platform::CPUDeviceContext, float>;
 template class CrossEntropyFunctor<platform::CPUDeviceContext, double>;
+
+template class CrossEntropyFunctor<phi::CPUContext, float>;
+template class CrossEntropyFunctor<phi::CPUContext, double>;
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
+#include "paddle/phi/backends/gpu/gpu_context.h"
 
 namespace paddle {
 namespace operators {
@@ -93,46 +94,48 @@ struct HardLabelCrossEntropyCUDAFunctorImpl {
   gpuStream_t stream_;
 };
 
-template <typename T>
-class CrossEntropyFunctor<platform::CUDADeviceContext, T> {
- public:
-  void operator()(const platform::CUDADeviceContext& ctx,
-                  framework::Tensor* out, const framework::Tensor* prob,
-                  const framework::Tensor* labels, const bool softLabel,
-                  const int ignore_index, const int axis_dim) {
-    const T* prob_data = prob->data<T>();
-    T* loss_data = out->mutable_data<T>(ctx.GetPlace());
-
-    int batch_size = prob->dims()[0];
-    int class_num = prob->dims()[1];
+template <typename DeviceContext, typename T>
+void CrossEntropyFunctor<DeviceContext, T>::operator()(
+    const DeviceContext& ctx, framework::Tensor* out,
+    const framework::Tensor* prob, const framework::Tensor* labels,
+    const bool softLabel, const int ignore_index, const int axis_dim) {
+  const T* prob_data = prob->data<T>();
+  T* loss_data = out->mutable_data<T>(ctx.GetPlace());
+
+  int batch_size = prob->dims()[0];
+  int class_num = prob->dims()[1];
 #ifdef __HIPCC__
-    constexpr int kMaxBlockDim = 256;
+  constexpr int kMaxBlockDim = 256;
 #else
-    constexpr int kMaxBlockDim = 512;
+  constexpr int kMaxBlockDim = 512;
 #endif
 
-    if (softLabel) {
-      const T* label_data = labels->data<T>();
-      int block = class_num > kMaxBlockDim
-                      ? kMaxBlockDim
-                      : pow(2, static_cast<int>(std::log2(class_num)));
-
-      SoftCrossEntropyKernel<T><<<batch_size, block, 0, ctx.stream()>>>(
-          loss_data, prob_data, label_data, class_num);
-    } else {
-      HardLabelCrossEntropyCUDAFunctorImpl<T> functor(
-          loss_data, prob_data, labels->data(), batch_size, class_num,
-          ignore_index, kMaxBlockDim, ctx.stream());
-      framework::VisitDataType(framework::TransToProtoVarType(labels->dtype()),
-                               functor);
-    }
+  if (softLabel) {
+    const T* label_data = labels->data<T>();
+    int block = class_num > kMaxBlockDim
+                    ? kMaxBlockDim
+                    : pow(2, static_cast<int>(std::log2(class_num)));
+
+    SoftCrossEntropyKernel<T><<<batch_size, block, 0, ctx.stream()>>>(
+        loss_data, prob_data, label_data, class_num);
+  } else {
+    HardLabelCrossEntropyCUDAFunctorImpl<T> functor(
+        loss_data, prob_data, labels->data(), batch_size, class_num,
+        ignore_index, kMaxBlockDim, ctx.stream());
+    framework::VisitDataType(framework::TransToProtoVarType(labels->dtype()),
+                             functor);
   }
-};
+}
 
 template class CrossEntropyFunctor<platform::CUDADeviceContext, float>;
 template class CrossEntropyFunctor<platform::CUDADeviceContext, double>;
 template class CrossEntropyFunctor<platform::CUDADeviceContext,
                                    platform::float16>;
+
+template class CrossEntropyFunctor<phi::GPUContext, float>;
+template class CrossEntropyFunctor<phi::GPUContext, double>;
+template class CrossEntropyFunctor<phi::GPUContext, platform::float16>;
+
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle
@@ -29,9 +29,9 @@ using DataLayout = platform::DataLayout;
 template <typename T>
 using CudnnDataType = platform::CudnnDataType<T>;
 
-template <typename T>
-void SoftmaxCUDNNFunctor<T>::operator()(
-    const platform::CUDADeviceContext& context, const framework::Tensor* X,
+template <typename T, typename DeviceContext>
+void SoftmaxCUDNNFunctor<T, DeviceContext>::operator()(
+    const DeviceContext& context, const framework::Tensor* X,
     framework::Tensor* Y) {
   // ------------------- cudnn descriptors ---------------------
   ScopedTensorDescriptor xDesc;
@@ -69,9 +69,9 @@ void SoftmaxCUDNNFunctor<T>::operator()(
 #endif
 }
 
-template <typename T>
-void SoftmaxGradCUDNNFunctor<T>::operator()(
-    const platform::CUDADeviceContext& context, const framework::Tensor* Y,
+template <typename T, typename DeviceContext>
+void SoftmaxGradCUDNNFunctor<T, DeviceContext>::operator()(
+    const DeviceContext& context, const framework::Tensor* Y,
     const framework::Tensor* YGrad, framework::Tensor* XGrad) {
   // ------------------- cudnn descriptors ---------------------
   ScopedTensorDescriptor yDesc;
@@ -116,19 +116,31 @@ void SoftmaxGradCUDNNFunctor<T>::operator()(
 #endif
 }
 
-template class SoftmaxCUDNNFunctor<float>;
-template class SoftmaxCUDNNFunctor<platform::float16>;
-template class SoftmaxGradCUDNNFunctor<float>;
-template class SoftmaxGradCUDNNFunctor<platform::float16>;
+template class SoftmaxCUDNNFunctor<float, platform::CUDADeviceContext>;
+template class SoftmaxCUDNNFunctor<platform::float16,
+                                   platform::CUDADeviceContext>;
+template class SoftmaxGradCUDNNFunctor<float, platform::CUDADeviceContext>;
+template class SoftmaxGradCUDNNFunctor<platform::float16,
+                                       platform::CUDADeviceContext>;
+template class SoftmaxCUDNNFunctor<float, phi::GPUContext>;
+template class SoftmaxCUDNNFunctor<platform::float16, phi::GPUContext>;
+template class SoftmaxGradCUDNNFunctor<float, phi::GPUContext>;
+template class SoftmaxGradCUDNNFunctor<platform::float16, phi::GPUContext>;
 #if CUDNN_VERSION_MIN(8, 1, 0)
-template class SoftmaxCUDNNFunctor<platform::bfloat16>;
-template class SoftmaxGradCUDNNFunctor<platform::bfloat16>;
+template class SoftmaxCUDNNFunctor<platform::bfloat16,
+                                   platform::CUDADeviceContext>;
+template class SoftmaxGradCUDNNFunctor<platform::bfloat16,
+                                       platform::CUDADeviceContext>;
+template class SoftmaxCUDNNFunctor<platform::bfloat16, phi::GPUContext>;
+template class SoftmaxGradCUDNNFunctor<platform::bfloat16, phi::GPUContext>;
 #endif
 
 // MIOPEN do not support double
 #ifndef PADDLE_WITH_HIP
-template class SoftmaxCUDNNFunctor<double>;
-template class SoftmaxGradCUDNNFunctor<double>;
+template class SoftmaxCUDNNFunctor<double, platform::CUDADeviceContext>;
+template class SoftmaxGradCUDNNFunctor<double, platform::CUDADeviceContext>;
+template class SoftmaxCUDNNFunctor<double, phi::GPUContext>;
+template class SoftmaxGradCUDNNFunctor<double, phi::GPUContext>;
 #endif
 
 template class SoftmaxFunctor<platform::CUDADeviceContext, platform::float16,
 
@@ -36,19 +36,18 @@ class SoftmaxGradFunctor {
 };
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-template <typename T>
+template <typename T, typename DeviceContext>
 class SoftmaxCUDNNFunctor {
  public:
-  void operator()(const platform::CUDADeviceContext& context,
-                  const framework::Tensor* X, framework::Tensor* Y);
+  void operator()(const DeviceContext& context, const framework::Tensor* X,
+                  framework::Tensor* Y);
 };
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class SoftmaxGradCUDNNFunctor {
  public:
-  void operator()(const platform::CUDADeviceContext& context,
-                  const framework::Tensor* Y, const framework::Tensor* y_grad,
-                  framework::Tensor* x_grad);
+  void operator()(const DeviceContext& context, const framework::Tensor* Y,
+                  const framework::Tensor* y_grad, framework::Tensor* x_grad);
 };
 
 #endif
 
@@ -58,7 +58,7 @@ class SequenceSoftmaxCUDNNKernel : public framework::OpKernel<T> {
           phi::make_ddim({1UL, end_pos - start_pos});
       x_i.Resize(dims_i);
       out_i.Resize(dims_i);
-      math::SoftmaxCUDNNFunctor<T>()(
+      math::SoftmaxCUDNNFunctor<T, platform::CUDADeviceContext>()(
           ctx.template device_context<platform::CUDADeviceContext>(), &x_i,
           &out_i);
     }
@@ -93,7 +93,7 @@ class SequenceSoftmaxGradCUDNNKernel : public framework::OpKernel<T> {
       out_i.Resize(dims_i);
       out_grad_i.Resize(dims_i);
       x_grad_i.Resize(dims_i);
-      math::SoftmaxGradCUDNNFunctor<T>()(
+      math::SoftmaxGradCUDNNFunctor<T, platform::CUDADeviceContext>()(
           ctx.template device_context<platform::CUDADeviceContext>(), &out_i,
           &out_grad_i, &x_grad_i);
     }
 
@@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/softmax_with_cross_entropy_op.h"
+#include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/op_version_registry.h"
+#include "paddle/phi/kernels/funcs/axis_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -335,12 +336,6 @@ REGISTER_OPERATOR(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp,
 REGISTER_OPERATOR(softmax_with_cross_entropy_grad,
                   ops::SoftmaxWithCrossEntropyOpGrad,
                   ops::SoftmaxWithCrossEntropyGradInplaceInferer);
-REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy,
-                       ops::SoftmaxWithCrossEntropyKernel<float>,
-                       ops::SoftmaxWithCrossEntropyKernel<double>);
-REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy_grad,
-                       ops::SoftmaxWithCrossEntropyGradKernel<float>,
-                       ops::SoftmaxWithCrossEntropyGradKernel<double>);
 
 REGISTER_OP_VERSION(softmax_with_cross_entropy)
 #if defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_MLU)
Original file line number	Diff line number	Diff line change
`@@ -87,7 +87,7 @@ phi::KernelKey TransOpKernelTypeToPhiKernelKey(`
`87`	`87`	`} else if (kernel_type.library_type_ == LibraryType::kKP) {`
`88`	`88`	`backend = phi::Backend::KPS;`
`89`	`89`	`} else {`
`90`		`- // do`
	`90`	`+ // do nothing`
`91`	`91`	`}`
`92`	`92`	`paddle::experimental::DataLayout layout = kernel_type.data_layout_;`
`93`	`93`	`paddle::experimental::DataType dtype =`