PaddlePaddle
diff --git a/‎paddle/fluid/inference/api/CMakeLists.txt
Lines changed: 4 additions & 2 deletions b/‎paddle/fluid/inference/api/CMakeLists.txt
Lines changed: 4 additions & 2 deletions
diff --git a/‎paddle/fluid/inference/tests/api/CMakeLists.txt
Lines changed: 10 additions & 4 deletions b/‎paddle/fluid/inference/tests/api/CMakeLists.txt
Lines changed: 10 additions & 4 deletions
diff --git a/‎paddle/fluid/operators/erfinv_op.cc
Lines changed: 1 addition & 14 deletions b/‎paddle/fluid/operators/erfinv_op.cc
Lines changed: 1 addition & 14 deletions
diff --git a/‎paddle/fluid/operators/erfinv_op.h
Lines changed: 0 additions & 65 deletions b/‎paddle/fluid/operators/erfinv_op.h
Lines changed: 0 additions & 65 deletions
diff --git a/‎paddle/fluid/operators/eye_op.cc
Lines changed: 1 addition & 7 deletions b/‎paddle/fluid/operators/eye_op.cc
Lines changed: 1 addition & 7 deletions
diff --git a/‎paddle/fluid/operators/eye_op.cu
Lines changed: 0 additions & 24 deletions b/‎paddle/fluid/operators/eye_op.cu
Lines changed: 0 additions & 24 deletions
diff --git a/‎paddle/fluid/operators/eye_op.h
Lines changed: 0 additions & 61 deletions b/‎paddle/fluid/operators/eye_op.h
Lines changed: 0 additions & 61 deletions
diff --git a/‎paddle/fluid/operators/eye_op_npu.cc
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/operators/eye_op_npu.cc
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/operators/log_softmax_op.cc
Lines changed: 15 additions & 3 deletions b/‎paddle/fluid/operators/log_softmax_op.cc
Lines changed: 15 additions & 3 deletions
diff --git a/‎paddle/fluid/operators/mkldnn/log_softmax_mkldnn_op.cc
Lines changed: 78 additions & 0 deletions b/‎paddle/fluid/operators/mkldnn/log_softmax_mkldnn_op.cc
Lines changed: 78 additions & 0 deletions
@@ -56,8 +56,10 @@ cc_test(test_paddle_inference_api SRCS api_tester.cc DEPS paddle_inference_api)
 
 if(WITH_TESTING)
   if (NOT APPLE AND NOT WIN32)
-    inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared
-      ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
+    if (WITH_GPU)
+      inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared
+        ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
+    endif()
   elseif(WIN32)
     inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps}
       ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
 
@@ -299,7 +299,9 @@ inference_analysis_api_test(test_analyzer_pyramid_dnn ${PYRAMID_DNN_INSTALL_DIR}
 set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie")
 download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_model.tar.gz" aa59192dd41ed377f9f168e3a1309fa6 "Ernie_data.txt.tar.gz" 5396e63548edad7ca561e7e26a9476d1)
 download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz" 73beea65abda2edb61c1662cd3180c62)
-inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)
+if (WITH_GPU)
+    inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)
+endif()
 inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} analyzer_ernie_int8_tester.cc)
 
 # Ernie large
@@ -551,7 +553,9 @@ endif()
 # bert, max_len=20, embedding_dim=128
 set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert_emb128")
 download_model_and_data_without_verify(${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz")
-inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc)
+if (WITH_GPU)
+    inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc)
+endif()
 
 # multiple models prediction
 set(MMP_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/multi_model_prediction")
@@ -741,13 +745,15 @@ set_tests_properties(lite_resnet50_test PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_mobilenet_transpose PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_resnet50 PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_ner PROPERTIES TIMEOUT 120)
-set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_ernie_int8 PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_googlenet PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_small_dam PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_transformer PROPERTIES TIMEOUT 120)
-set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_mobilenet_depthwise_conv PROPERTIES TIMEOUT 120)
+if (WITH_GPU)
+    set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120)
+    set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120)
+endif()
 if(WITH_GPU AND TENSORRT_FOUND)
     set_tests_properties(trt_mobilenet_test PROPERTIES TIMEOUT 120)
     if(WITH_MKLDNN)
 
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/operators/erfinv_op.h"
+#include "paddle/fluid/framework/op_registry.h"
 
 namespace paddle {
 namespace operators {
@@ -85,16 +85,3 @@ REGISTER_OPERATOR(
     paddle::operators::ErfinvInplaceInferer);
 
 REGISTER_OPERATOR(erfinv_grad, paddle::operators::ErfinvGradOp);
-
-REGISTER_OP_CPU_KERNEL(
-    erfinv,
-    paddle::operators::ErfinvKernel<paddle::platform::CPUDeviceContext, float>,
-    paddle::operators::ErfinvKernel<paddle::platform::CPUDeviceContext,
-                                    double>);
-
-REGISTER_OP_CPU_KERNEL(
-    erfinv_grad,
-    paddle::operators::ErfinvGradKernel<paddle::platform::CPUDeviceContext,
-                                        float>,
-    paddle::operators::ErfinvGradKernel<paddle::platform::CPUDeviceContext,
-                                        double>);
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/eye_op.h"
+#include "paddle/fluid/framework/op_registry.h"
 
 namespace paddle {
 namespace operators {
@@ -82,14 +82,8 @@ Return an identity tensor whose shape is [num_rows, num_columns].
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-using CPU = paddle::platform::CPUDeviceContext;
 
 REGISTER_OPERATOR(
     eye, ops::EyeOp, ops::EyeOpMaker, ops::EyeOpVarTypeInference,
     paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
     paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-
-REGISTER_OP_CPU_KERNEL(eye, ops::EyeKernel<CPU, float>,
-                       ops::EyeKernel<CPU, double>,
-                       ops::EyeKernel<CPU, int64_t>, ops::EyeKernel<CPU, int>,
-                       ops::EyeKernel<CPU, paddle::platform::float16>);
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/eye_op.h"
+#include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
 
 namespace paddle {
 
@@ -31,9 +31,17 @@ class LogSoftmaxOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(
-        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
-        ctx.device_context());
+    auto input_data_type =
+        framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");
+
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
+                                     framework::DataLayout::kMKLDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+    return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
 
@@ -48,6 +56,10 @@ class LogSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
                  "The dimension index of Input(x) to perform log_softmax,"
                  "default -1 for last dimension")
         .SetDefault(-1);
+    AddAttr<bool>("use_mkldnn",
+                  "(bool, default false) Only used in mkldnn kernel")
+        .SetDefault(false)
+        .AsExtra();
     AddComment(R"DOC(
 LogSoftmax Operator.
 
 
@@ -0,0 +1,78 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/softmax_op.h"
+#include "paddle/fluid/platform/mkldnn_reuse.h"
+
+namespace paddle {
+namespace operators {
+
+using framework::Tensor;
+
+template <typename T>
+class LogSoftmaxMKLDNNHandler
+    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::logsoftmax_forward> {
+ public:
+  LogSoftmaxMKLDNNHandler(const dnnl::engine mkldnn_engine,
+                          platform::Place cpu_place, const Tensor* x,
+                          const int axis)
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::logsoftmax_forward>(
+            mkldnn_engine, cpu_place) {
+    const auto logsoftmax_tz = phi::vectorize(x->dims());
+    const auto md = dnnl::memory::desc(
+        logsoftmax_tz, platform::MKLDNNGetDataType<T>(), x->format());
+
+    this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_inference,
+                                            md, axis);
+  }
+};
+
+template <typename T>
+class LogSoftmaxMKLDNNKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto& dev_ctx =
+        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& mkldnn_engine = dev_ctx.GetEngine();
+
+    const Tensor* x = ctx.Input<Tensor>("X");
+    Tensor* out = ctx.Output<Tensor>("Out");
+
+    int axis = ctx.Attr<int>("axis");
+    axis = axis >= 0 ? axis : x->dims().size() + axis;
+
+    LogSoftmaxMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), x, axis);
+
+    auto src_memory_p = handler.AcquireSrcMemory(x);
+    auto dst_memory_p = handler.AcquireDstMemory(out);
+
+    auto logsoftmax_p = handler.AcquireForwardPrimitive();
+
+    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    logsoftmax_p->execute(astream, {{DNNL_ARG_SRC, *src_memory_p},
+                                    {DNNL_ARG_DST, *dst_memory_p}});
+    astream.wait();
+
+    out->set_layout(framework::DataLayout::kMKLDNN);
+    out->set_format(x->format());
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_KERNEL(log_softmax, MKLDNN, ::paddle::platform::CPUPlace,
+                   ops::LogSoftmaxMKLDNNKernel<float>,
+                   ops::LogSoftmaxMKLDNNKernel<paddle::platform::bfloat16>);