Skip to content

Commit

Permalink
move dequantize kernel to phi (PaddlePaddle#53739)
Browse files Browse the repository at this point in the history
* update

* fix bug

* fix output type def
  • Loading branch information
huangjiyi authored May 15, 2023
1 parent 56fded1 commit efd410c
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 96 deletions.
96 changes: 0 additions & 96 deletions paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc

This file was deleted.

28 changes: 28 additions & 0 deletions paddle/phi/kernels/dequantize_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/phi/core/dense_tensor.h"

namespace phi {

template <typename T, typename Context>
void DeQuantKernel(const Context& dev_ctx,
const DenseTensor& x,
const float quantization_scale,
const float quantization_shift,
DenseTensor* out);

} // namespace phi
86 changes: 86 additions & 0 deletions paddle/phi/kernels/onednn/dequantize_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/kernels/dequantize_kernel.h"

#include "paddle/phi/backends/onednn/onednn_context.h"
#include "paddle/phi/backends/onednn/onednn_helper.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h"

namespace phi {

template <typename T, typename Context>
void DeQuantKernel(const Context& dev_ctx,
const DenseTensor& x,
const float quantization_scale,
const float quantization_shift,
DenseTensor* out) {
const bool with_shift = quantization_shift != 0.0f;

PADDLE_ENFORCE(quantization_scale != 0.0f,
phi::errors::InvalidArgument(
"Dequantization scale must be different than 0.0f"));

PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0,
phi::errors::InvalidArgument(
"Dequantization shift must be lower or equal to ",
"255 and greater or equal to 0, but got %f",
quantization_shift));

auto x_tz = phi::vectorize<int64_t>(x.dims());
auto x_type = phi::funcs::ToOneDNNDataType(x.dtype());
auto out_type = phi::funcs::ToOneDNNDataType(out->dtype());

dnnl::primitive_attr attrs;
static constexpr int32_t mask = 0; // same shift and scale for whole tensor

const float reorder_scale = 1. / quantization_scale;
attrs.set_output_scales(mask, {reorder_scale});

if (with_shift) {
attrs.set_zero_points(
DNNL_ARG_SRC, mask, {static_cast<int32_t>(quantization_shift)});
}

phi::funcs::ReorderOneDNNHandler reorder_handler(
x_tz, x.dtype(), x_type, out->dtype(), out_type, dev_ctx.GetEngine());

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
x.mem_desc(), phi::funcs::to_void_cast(x.data<T>()));
auto reorder_dst_memory_p =
reorder_handler.AcquireDstMemory(out, x.mem_desc(), dev_ctx.GetPlace());

auto reorder_p = reorder_handler.AcquireReorder(
reorder_dst_memory_p, reorder_src_memory_p, attrs);

auto& astream = phi::OneDNNContext::tls().get_stream();
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();

out->set_mem_desc(reorder_dst_memory_p->get_desc());
}

} // namespace phi

PD_REGISTER_KERNEL(dequantize,
OneDNN,
ONEDNN,
phi::DeQuantKernel,
uint8_t,
int8_t,
phi::dtype::bfloat16) {
kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32);
}
26 changes: 26 additions & 0 deletions paddle/phi/ops/compat/dequantize_sig.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/core/compat/op_utils.h"

namespace phi {

KernelSignature DeQuantOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature(
"dequantize", {"Input"}, {"Scale", "Shift"}, {"Output"});
}

} // namespace phi

PD_REGISTER_ARG_MAPPING_FN(dequantize, phi::DeQuantOpArgumentMapping);

0 comments on commit efd410c

Please sign in to comment.