From efd410c86f70066d1c7e0888d2a295364e54423e Mon Sep 17 00:00:00 2001 From: huangjiyi <43315610+huangjiyi@users.noreply.github.com> Date: Mon, 15 May 2023 21:34:33 +0800 Subject: [PATCH] move dequantize kernel to phi (#53739) * update * fix bug * fix output type def --- .../operators/mkldnn/dequantize_mkldnn_op.cc | 96 ------------------- paddle/phi/kernels/dequantize_kernel.h | 28 ++++++ .../phi/kernels/onednn/dequantize_kernel.cc | 86 +++++++++++++++++ paddle/phi/ops/compat/dequantize_sig.cc | 26 +++++ 4 files changed, 140 insertions(+), 96 deletions(-) delete mode 100644 paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc create mode 100644 paddle/phi/kernels/dequantize_kernel.h create mode 100644 paddle/phi/kernels/onednn/dequantize_kernel.cc create mode 100644 paddle/phi/ops/compat/dequantize_sig.cc diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc deleted file mode 100644 index 6ed5aefd66d1b..0000000000000 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/dequantize_op.h" - -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/phi/backends/onednn/onednn_reuse.h" -#include "paddle/phi/core/errors.h" - -namespace paddle { -namespace operators { - -using dnnl::memory; -using dnnl::primitive; -using dnnl::reorder; -using dnnl::stream; - -template -class DeQuantOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("Input"); - const auto quantization_scale = ctx.Attr("Scale"); - const auto quantization_shift = ctx.Attr("Shift"); - const bool with_shift = quantization_shift != 0.0f; - auto* out = ctx.Output("Output"); - - PADDLE_ENFORCE(quantization_scale != 0.0f, - phi::errors::InvalidArgument( - "Dequantization scale must be different than 0.0f")); - - PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0, - phi::errors::InvalidArgument( - "Dequantization shift must be lower or equal to ", - "255 and greater or equal to 0, but got %f", - quantization_shift)); - - auto& dev_ctx = ctx.template device_context(); - - auto x_tz = phi::vectorize(x->dims()); - auto x_type = phi::funcs::ToOneDNNDataType(x->dtype()); - auto out_type = phi::funcs::ToOneDNNDataType(out->dtype()); - - dnnl::primitive_attr attrs; - static constexpr int32_t mask = 0; // same shift and scale for whole tensor - - const float reorder_scale = 1. / quantization_scale; - attrs.set_output_scales(mask, {reorder_scale}); - - if (with_shift) { - attrs.set_zero_points( - DNNL_ARG_SRC, mask, {static_cast(quantization_shift)}); - } - - phi::funcs::ReorderOneDNNHandler reorder_handler( - x_tz, x->dtype(), x_type, out->dtype(), out_type, dev_ctx.GetEngine()); - - auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( - x->mem_desc(), phi::funcs::to_void_cast(x->data())); - auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( - out, x->mem_desc(), dev_ctx.GetPlace()); - - auto reorder_p = reorder_handler.AcquireReorder( - reorder_dst_memory_p, reorder_src_memory_p, attrs); - - auto& astream = phi::OneDNNContext::tls().get_stream(); - reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); - astream.wait(); - - out->set_mem_desc(reorder_dst_memory_p->get_desc()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_KERNEL(dequantize, - MKLDNN, - ::phi::CPUPlace, - ops::DeQuantOpKernel, - ops::DeQuantOpKernel, - ops::DeQuantOpKernel); diff --git a/paddle/phi/kernels/dequantize_kernel.h b/paddle/phi/kernels/dequantize_kernel.h new file mode 100644 index 0000000000000..62f76bba897ec --- /dev/null +++ b/paddle/phi/kernels/dequantize_kernel.h @@ -0,0 +1,28 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void DeQuantKernel(const Context& dev_ctx, + const DenseTensor& x, + const float quantization_scale, + const float quantization_shift, + DenseTensor* out); + +} // namespace phi diff --git a/paddle/phi/kernels/onednn/dequantize_kernel.cc b/paddle/phi/kernels/onednn/dequantize_kernel.cc new file mode 100644 index 0000000000000..9c04ec528f6c5 --- /dev/null +++ b/paddle/phi/kernels/onednn/dequantize_kernel.cc @@ -0,0 +1,86 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/dequantize_kernel.h" + +#include "paddle/phi/backends/onednn/onednn_context.h" +#include "paddle/phi/backends/onednn/onednn_helper.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void DeQuantKernel(const Context& dev_ctx, + const DenseTensor& x, + const float quantization_scale, + const float quantization_shift, + DenseTensor* out) { + const bool with_shift = quantization_shift != 0.0f; + + PADDLE_ENFORCE(quantization_scale != 0.0f, + phi::errors::InvalidArgument( + "Dequantization scale must be different than 0.0f")); + + PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0, + phi::errors::InvalidArgument( + "Dequantization shift must be lower or equal to ", + "255 and greater or equal to 0, but got %f", + quantization_shift)); + + auto x_tz = phi::vectorize(x.dims()); + auto x_type = phi::funcs::ToOneDNNDataType(x.dtype()); + auto out_type = phi::funcs::ToOneDNNDataType(out->dtype()); + + dnnl::primitive_attr attrs; + static constexpr int32_t mask = 0; // same shift and scale for whole tensor + + const float reorder_scale = 1. / quantization_scale; + attrs.set_output_scales(mask, {reorder_scale}); + + if (with_shift) { + attrs.set_zero_points( + DNNL_ARG_SRC, mask, {static_cast(quantization_shift)}); + } + + phi::funcs::ReorderOneDNNHandler reorder_handler( + x_tz, x.dtype(), x_type, out->dtype(), out_type, dev_ctx.GetEngine()); + + auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( + x.mem_desc(), phi::funcs::to_void_cast(x.data())); + auto reorder_dst_memory_p = + reorder_handler.AcquireDstMemory(out, x.mem_desc(), dev_ctx.GetPlace()); + + auto reorder_p = reorder_handler.AcquireReorder( + reorder_dst_memory_p, reorder_src_memory_p, attrs); + + auto& astream = phi::OneDNNContext::tls().get_stream(); + reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); + astream.wait(); + + out->set_mem_desc(reorder_dst_memory_p->get_desc()); +} + +} // namespace phi + +PD_REGISTER_KERNEL(dequantize, + OneDNN, + ONEDNN, + phi::DeQuantKernel, + uint8_t, + int8_t, + phi::dtype::bfloat16) { + kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); +} diff --git a/paddle/phi/ops/compat/dequantize_sig.cc b/paddle/phi/ops/compat/dequantize_sig.cc new file mode 100644 index 0000000000000..18c76541033d8 --- /dev/null +++ b/paddle/phi/ops/compat/dequantize_sig.cc @@ -0,0 +1,26 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature DeQuantOpArgumentMapping(const ArgumentMappingContext& ctx) { + return KernelSignature( + "dequantize", {"Input"}, {"Scale", "Shift"}, {"Output"}); +} + +} // namespace phi + +PD_REGISTER_ARG_MAPPING_FN(dequantize, phi::DeQuantOpArgumentMapping);