From efd410c86f70066d1c7e0888d2a295364e54423e Mon Sep 17 00:00:00 2001
From: huangjiyi <43315610+huangjiyi@users.noreply.github.com>
Date: Mon, 15 May 2023 21:34:33 +0800
Subject: [PATCH] move dequantize kernel to phi (#53739)

* update

* fix bug

* fix output type def
---
 .../operators/mkldnn/dequantize_mkldnn_op.cc  | 96 -------------------
 paddle/phi/kernels/dequantize_kernel.h        | 28 ++++++
 .../phi/kernels/onednn/dequantize_kernel.cc   | 86 +++++++++++++++++
 paddle/phi/ops/compat/dequantize_sig.cc       | 26 +++++
 4 files changed, 140 insertions(+), 96 deletions(-)
 delete mode 100644 paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
 create mode 100644 paddle/phi/kernels/dequantize_kernel.h
 create mode 100644 paddle/phi/kernels/onednn/dequantize_kernel.cc
 create mode 100644 paddle/phi/ops/compat/dequantize_sig.cc
diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
deleted file mode 100644
index 6ed5aefd66d1b..0000000000000
--- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/dequantize_op.h"
-
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/phi/core/errors.h"
-
-namespace paddle {
-namespace operators {
-
-using dnnl::memory;
-using dnnl::primitive;
-using dnnl::reorder;
-using dnnl::stream;
-
-template <typename T>
-class DeQuantOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* x = ctx.Input<phi::DenseTensor>("Input");
-    const auto quantization_scale = ctx.Attr<float>("Scale");
-    const auto quantization_shift = ctx.Attr<float>("Shift");
-    const bool with_shift = quantization_shift != 0.0f;
-    auto* out = ctx.Output<phi::DenseTensor>("Output");
-
-    PADDLE_ENFORCE(quantization_scale != 0.0f,
-                   phi::errors::InvalidArgument(
-                       "Dequantization scale must be different than 0.0f"));
-
-    PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0,
-                   phi::errors::InvalidArgument(
-                       "Dequantization shift must be lower or equal to ",
-                       "255 and greater or equal to 0, but got %f",
-                       quantization_shift));
-
-    auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
-
-    auto x_tz = phi::vectorize<int64_t>(x->dims());
-    auto x_type = phi::funcs::ToOneDNNDataType(x->dtype());
-    auto out_type = phi::funcs::ToOneDNNDataType(out->dtype());
-
-    dnnl::primitive_attr attrs;
-    static constexpr int32_t mask = 0;  // same shift and scale for whole tensor
-
-    const float reorder_scale = 1. / quantization_scale;
-    attrs.set_output_scales(mask, {reorder_scale});
-
-    if (with_shift) {
-      attrs.set_zero_points(
-          DNNL_ARG_SRC, mask, {static_cast<int32_t>(quantization_shift)});
-    }
-
-    phi::funcs::ReorderOneDNNHandler reorder_handler(
-        x_tz, x->dtype(), x_type, out->dtype(), out_type, dev_ctx.GetEngine());
-
-    auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
-        x->mem_desc(), phi::funcs::to_void_cast(x->data<T>()));
-    auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
-        out, x->mem_desc(), dev_ctx.GetPlace());
-
-    auto reorder_p = reorder_handler.AcquireReorder(
-        reorder_dst_memory_p, reorder_src_memory_p, attrs);
-
-    auto& astream = phi::OneDNNContext::tls().get_stream();
-    reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
-    astream.wait();
-
-    out->set_mem_desc(reorder_dst_memory_p->get_desc());
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-REGISTER_OP_KERNEL(dequantize,
-                   MKLDNN,
-                   ::phi::CPUPlace,
-                   ops::DeQuantOpKernel<uint8_t>,
-                   ops::DeQuantOpKernel<int8_t>,
-                   ops::DeQuantOpKernel<paddle::platform::bfloat16>);
diff --git a/paddle/phi/kernels/dequantize_kernel.h b/paddle/phi/kernels/dequantize_kernel.h
new file mode 100644
index 0000000000000..62f76bba897ec
--- /dev/null
+++ b/paddle/phi/kernels/dequantize_kernel.h
@@ -0,0 +1,28 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void DeQuantKernel(const Context& dev_ctx,
+                   const DenseTensor& x,
+                   const float quantization_scale,
+                   const float quantization_shift,
+                   DenseTensor* out);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/onednn/dequantize_kernel.cc b/paddle/phi/kernels/onednn/dequantize_kernel.cc
new file mode 100644
index 0000000000000..9c04ec528f6c5
--- /dev/null
+++ b/paddle/phi/kernels/onednn/dequantize_kernel.cc
@@ -0,0 +1,86 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/dequantize_kernel.h"
+
+#include "paddle/phi/backends/onednn/onednn_context.h"
+#include "paddle/phi/backends/onednn/onednn_helper.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
+#include "paddle/phi/core/enforce.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void DeQuantKernel(const Context& dev_ctx,
+                   const DenseTensor& x,
+                   const float quantization_scale,
+                   const float quantization_shift,
+                   DenseTensor* out) {
+  const bool with_shift = quantization_shift != 0.0f;
+
+  PADDLE_ENFORCE(quantization_scale != 0.0f,
+                 phi::errors::InvalidArgument(
+                     "Dequantization scale must be different than 0.0f"));
+
+  PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0,
+                 phi::errors::InvalidArgument(
+                     "Dequantization shift must be lower or equal to ",
+                     "255 and greater or equal to 0, but got %f",
+                     quantization_shift));
+
+  auto x_tz = phi::vectorize<int64_t>(x.dims());
+  auto x_type = phi::funcs::ToOneDNNDataType(x.dtype());
+  auto out_type = phi::funcs::ToOneDNNDataType(out->dtype());
+
+  dnnl::primitive_attr attrs;
+  static constexpr int32_t mask = 0;  // same shift and scale for whole tensor
+
+  const float reorder_scale = 1. / quantization_scale;
+  attrs.set_output_scales(mask, {reorder_scale});
+
+  if (with_shift) {
+    attrs.set_zero_points(
+        DNNL_ARG_SRC, mask, {static_cast<int32_t>(quantization_shift)});
+  }
+
+  phi::funcs::ReorderOneDNNHandler reorder_handler(
+      x_tz, x.dtype(), x_type, out->dtype(), out_type, dev_ctx.GetEngine());
+
+  auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
+      x.mem_desc(), phi::funcs::to_void_cast(x.data<T>()));
+  auto reorder_dst_memory_p =
+      reorder_handler.AcquireDstMemory(out, x.mem_desc(), dev_ctx.GetPlace());
+
+  auto reorder_p = reorder_handler.AcquireReorder(
+      reorder_dst_memory_p, reorder_src_memory_p, attrs);
+
+  auto& astream = phi::OneDNNContext::tls().get_stream();
+  reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
+  astream.wait();
+
+  out->set_mem_desc(reorder_dst_memory_p->get_desc());
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(dequantize,
+                   OneDNN,
+                   ONEDNN,
+                   phi::DeQuantKernel,
+                   uint8_t,
+                   int8_t,
+                   phi::dtype::bfloat16) {
+  kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32);
+}
diff --git a/paddle/phi/ops/compat/dequantize_sig.cc b/paddle/phi/ops/compat/dequantize_sig.cc
new file mode 100644
index 0000000000000..18c76541033d8
--- /dev/null
+++ b/paddle/phi/ops/compat/dequantize_sig.cc
@@ -0,0 +1,26 @@
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/core/compat/op_utils.h"
+
+namespace phi {
+
+KernelSignature DeQuantOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "dequantize", {"Input"}, {"Scale", "Shift"}, {"Output"});
+}
+
+}  // namespace phi
+
+PD_REGISTER_ARG_MAPPING_FN(dequantize, phi::DeQuantOpArgumentMapping);