Skip to content

Commit 1def310

Browse files
authored
[Precision Depth Alignment] align paddle.nn.functional.interpolate forward (#76246)
* add antialias * fix acc * impl antialias * fix common * use new kernel * fix * fix * use AreaPixelComputeScale for cpu grad * fix large tensor issue * fix cov and test * fix AreaPixelComputeScale * unified PreCalculatorForLinearInterpInputIndex * fix test * disable test_weight_decay * fix
1 parent f9fa267 commit 1def310

File tree

17 files changed

+1399
-328
lines changed

17 files changed

+1399
-328
lines changed

paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,12 @@ bool BilinearInterpOpInferSymbolicShape(
772772
return BicubicInterpOpInferSymbolicShape(op, infer_context);
773773
}
774774

775+
// TODO(zrr1999): add test
776+
// bool InterpAntialiasOpInferSymbolicShape(
777+
// pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) {
778+
// return BicubicInterpOpInferSymbolicShape(op, infer_context);
779+
// }
780+
775781
bool BoxCoderOpInferSymbolicShape(
776782
pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) {
777783
const symbol::ShapeOrDataDimExprs &prior_box_shape_or_data =

paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ OP_DECLARE_INFER_SYMBOLIC_SHAPE(BatchNorm_)
3333
OP_DECLARE_INFER_SYMBOLIC_SHAPE(BicubicInterp)
3434
OP_DECLARE_INFER_SYMBOLIC_SHAPE(Bilinear)
3535
OP_DECLARE_INFER_SYMBOLIC_SHAPE(BilinearInterp)
36+
OP_DECLARE_INFER_SYMBOLIC_SHAPE(InterpAntialias)
3637
OP_DECLARE_INFER_SYMBOLIC_SHAPE(BoxCoder)
3738
OP_DECLARE_INFER_SYMBOLIC_SHAPE(CheckFiniteAndUnscale)
3839
OP_DECLARE_INFER_SYMBOLIC_SHAPE(CheckFiniteAndUnscale_)

paddle/phi/kernels/cpu/interpolate_grad_kernel.cc

Lines changed: 20 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,8 @@ static void BicubicInterpolationGrad(const DenseTensor& output_grad,
206206
std::array<MT, 4> x_coeffs;
207207
std::array<MT, 4> y_coeffs;
208208

209-
funcs::get_cubic_upsample_coefficients<MT>(x_coeffs.data(), x_t);
210-
funcs::get_cubic_upsample_coefficients<MT>(y_coeffs.data(), y_t);
209+
funcs::GetCubicUpsampleCoefficients<MT>(x_coeffs.data(), x_t);
210+
funcs::GetCubicUpsampleCoefficients<MT>(y_coeffs.data(), y_t);
211211

212212
for (int i = 0; i < n; i++) { // loop for batches
213213
for (int j = 0; j < c; j++) { // loop for channels
@@ -592,27 +592,18 @@ static void Interpolate2DCPUBwd(
592592
return;
593593
}
594594

595-
float ratio_h = 0.f;
596-
float ratio_w = 0.f;
597-
if (out_h > 1) {
598-
float new_scale_h = 0.f;
599-
new_scale_h = static_cast<float>(
600-
(scale_h > 0) ? (1.f / scale_h)
601-
: static_cast<float>(in_h) / static_cast<float>(out_h));
602-
ratio_h =
603-
static_cast<float>(align_corners ? (static_cast<float>(in_h) - 1.f) /
604-
(static_cast<float>(out_h) - 1.f)
605-
: new_scale_h);
595+
using MT = typename phi::dtype::MPTypeTrait<T>::Type;
596+
double ratio_h =
597+
funcs::AreaPixelComputeScale<float>(in_h, out_h, align_corners, scale_h);
598+
double ratio_w =
599+
funcs::AreaPixelComputeScale<float>(in_w, out_w, align_corners, scale_w);
600+
601+
// TODO(zrr1999): to align xpu
602+
if (out_h <= 1) {
603+
ratio_h = 0;
606604
}
607-
if (out_w > 1) {
608-
float new_scale_w = 0.f;
609-
new_scale_w = static_cast<float>(
610-
(scale_w > 0) ? (1.f / scale_w)
611-
: static_cast<float>(in_w) / static_cast<float>(out_w));
612-
ratio_w =
613-
static_cast<float>(align_corners ? (static_cast<float>(in_w) - 1.f) /
614-
(static_cast<float>(out_w) - 1.f)
615-
: new_scale_w);
605+
if (out_w <= 1) {
606+
ratio_w = 0;
616607
}
617608

618609
if ("bilinear" == interp_method) {
@@ -778,38 +769,13 @@ static void Interpolate3DCPUBwd(
778769
return;
779770
}
780771

781-
float ratio_d = 0.f;
782-
float ratio_h = 0.f;
783-
float ratio_w = 0.f;
784-
if (out_d > 1) {
785-
float new_scale_d = 0.f;
786-
new_scale_d = static_cast<float>(
787-
(scale_d > 0) ? (1.f / scale_d)
788-
: static_cast<float>(in_d) / static_cast<float>(out_d));
789-
ratio_d =
790-
static_cast<float>(align_corners ? (static_cast<float>(in_d) - 1.f) /
791-
(static_cast<float>(out_d) - 1.f)
792-
: new_scale_d);
793-
}
794-
if (out_h > 1) {
795-
float new_scale_h = 0.f;
796-
new_scale_h = static_cast<float>(
797-
(scale_h > 0) ? (1.f / scale_h)
798-
: static_cast<float>(in_h) / static_cast<float>(out_h));
799-
ratio_h = (align_corners) ? static_cast<float>(in_h - 1) /
800-
(static_cast<float>(out_h) - 1)
801-
: static_cast<float>(new_scale_h);
802-
}
803-
if (out_w > 1) {
804-
float new_scale_w = 0.f;
805-
new_scale_w = static_cast<float>(
806-
(scale_w > 0) ? (1.f / scale_w)
807-
: static_cast<float>(in_w) / static_cast<float>(out_w));
808-
ratio_w =
809-
static_cast<float>(align_corners ? (static_cast<float>(in_w) - 1.f) /
810-
(static_cast<float>(out_w) - 1.f)
811-
: new_scale_w);
812-
}
772+
using MT = typename phi::dtype::MPTypeTrait<T>::Type;
773+
double ratio_d =
774+
funcs::AreaPixelComputeScale<float>(in_d, out_d, align_corners, scale_d);
775+
double ratio_h =
776+
funcs::AreaPixelComputeScale<float>(in_h, out_h, align_corners, scale_h);
777+
double ratio_w =
778+
funcs::AreaPixelComputeScale<float>(in_w, out_w, align_corners, scale_w);
813779

814780
if ("trilinear" == interp_method) {
815781
TrilinearInterpolationGrad<T>(output_grad,

paddle/phi/kernels/cpu/interpolate_kernel.cc

Lines changed: 17 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ namespace phi {
2626
template <typename T>
2727
static inline T cubic_interp(T x0, T x1, T x2, T x3, T t) {
2828
std::array<T, 4> coeffs;
29-
funcs::get_cubic_upsample_coefficients<T>(coeffs.data(), t);
29+
funcs::GetCubicUpsampleCoefficients<T>(coeffs.data(), t);
3030

3131
return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3];
3232
}
@@ -754,25 +754,17 @@ static void Interpolate2DCPUFwd(
754754
return;
755755
}
756756

757-
float ratio_h = 0.f;
758-
float ratio_w = 0.f;
759-
if (out_h > 1) {
760-
float new_scale_h = 0.f;
761-
new_scale_h = (scale_h > 0)
762-
? static_cast<float>(1. / scale_h)
763-
: static_cast<float>(in_h) / static_cast<float>(out_h);
764-
ratio_h = (align_corners)
765-
? static_cast<float>(in_h - 1) / static_cast<float>(out_h - 1)
766-
: static_cast<float>(new_scale_h);
757+
float ratio_h =
758+
funcs::AreaPixelComputeScale<float>(in_h, out_h, align_corners, scale_h);
759+
float ratio_w =
760+
funcs::AreaPixelComputeScale<float>(in_w, out_w, align_corners, scale_w);
761+
762+
// TODO(zrr1999): to align xpu
763+
if (out_h <= 1) {
764+
ratio_h = 0;
767765
}
768-
if (out_w > 1) {
769-
float new_scale_w = 0.f;
770-
new_scale_w = (scale_w > 0)
771-
? static_cast<float>(1. / scale_w)
772-
: static_cast<float>(in_w) / static_cast<float>(out_w);
773-
ratio_w = (align_corners)
774-
? static_cast<float>(in_w - 1) / static_cast<float>(out_w - 1)
775-
: static_cast<float>(new_scale_w);
766+
if (out_w <= 1) {
767+
ratio_w = 0;
776768
}
777769

778770
if ("bilinear" == interp_method) {
@@ -953,36 +945,12 @@ static void Interpolate3DCPUFwd(
953945
return;
954946
}
955947

956-
float ratio_d = 0.f;
957-
float ratio_h = 0.f;
958-
float ratio_w = 0.f;
959-
if (out_d > 1) {
960-
float new_scale_d = 0.f;
961-
new_scale_d = (scale_d > 0)
962-
? static_cast<float>(1. / scale_d)
963-
: static_cast<float>(in_d) / static_cast<float>(out_d);
964-
ratio_d = (align_corners)
965-
? static_cast<float>(in_d - 1) / static_cast<float>(out_d - 1)
966-
: static_cast<float>(new_scale_d);
967-
}
968-
if (out_h > 1) {
969-
float new_scale_h = 0.f;
970-
new_scale_h = (scale_h > 0)
971-
? static_cast<float>(1. / scale_h)
972-
: static_cast<float>(in_h) / static_cast<float>(out_h);
973-
ratio_h = (align_corners)
974-
? static_cast<float>(in_h - 1) / static_cast<float>(out_h - 1)
975-
: static_cast<float>(new_scale_h);
976-
}
977-
if (out_w > 1) {
978-
float new_scale_w = 0.f;
979-
new_scale_w = (scale_w > 0)
980-
? static_cast<float>(1. / scale_w)
981-
: static_cast<float>(in_w) / static_cast<float>(out_w);
982-
ratio_w = (align_corners)
983-
? static_cast<float>(in_w - 1) / static_cast<float>(out_w - 1)
984-
: static_cast<float>(new_scale_w);
985-
}
948+
float ratio_d =
949+
funcs::AreaPixelComputeScale<float>(in_d, out_d, align_corners, scale_d);
950+
float ratio_h =
951+
funcs::AreaPixelComputeScale<float>(in_h, out_h, align_corners, scale_h);
952+
float ratio_w =
953+
funcs::AreaPixelComputeScale<float>(in_w, out_w, align_corners, scale_w);
986954

987955
if ("trilinear" == interp_method) {
988956
TrilinearInterpolation<T>(x,

paddle/phi/kernels/funcs/interpolate_function.h

Lines changed: 79 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,30 +26,60 @@
2626
namespace phi {
2727
namespace funcs {
2828

29+
template <typename T>
30+
inline T AreaPixelComputeScale(int64_t input_size,
31+
int64_t output_size,
32+
bool align_corners,
33+
const T scale) {
34+
if (align_corners) {
35+
if (output_size > 1) {
36+
return static_cast<T>(input_size - 1) / (output_size - 1);
37+
}
38+
} else {
39+
if (scale > 0.) {
40+
return static_cast<T>(1.0) / scale;
41+
}
42+
if (output_size > 0) {
43+
return static_cast<T>(input_size) / output_size;
44+
}
45+
}
46+
return static_cast<T>(0);
47+
}
48+
49+
template <typename T>
50+
HOSTDEVICE inline T AreaPixelComputeSourceIndex(T scale,
51+
int64_t dst_index,
52+
bool align_corners,
53+
T align_type_value = 0.5) {
54+
if (align_corners) {
55+
return scale * dst_index;
56+
} else {
57+
return scale * (dst_index + align_type_value) - align_type_value;
58+
}
59+
}
60+
2961
template <typename T>
3062
HOSTDEVICE inline T CubicConvolution1(T x, T A) {
31-
return ((A + static_cast<T>(2)) * x - (A + static_cast<T>(3))) * x * x +
32-
static_cast<T>(1);
63+
return ((A + 2) * x - (A + 3)) * x * x + 1;
3364
}
3465

3566
template <typename T>
3667
HOSTDEVICE inline T CubicConvolution2(T x, T A) {
37-
return ((A * x - static_cast<T>(5) * A) * x + static_cast<T>(8) * A) * x -
38-
static_cast<T>(4) * A;
68+
return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A;
3969
}
4070

4171
template <typename T>
42-
HOSTDEVICE inline void get_cubic_upsample_coefficients(T coeffs[4], T t) {
72+
HOSTDEVICE inline void GetCubicUpsampleCoefficients(T coeffs[4], T t) {
4373
T A = static_cast<T>(-0.75);
4474

4575
T x1 = t;
46-
coeffs[0] = CubicConvolution2<T>(x1 + static_cast<T>(1.0), A);
76+
coeffs[0] = CubicConvolution2<T>(x1 + 1.0, A);
4777
coeffs[1] = CubicConvolution1<T>(x1, A);
4878

4979
// opposite coefficients
50-
T x2 = static_cast<T>(1.0) - t;
80+
T x2 = 1.0 - t;
5181
coeffs[2] = CubicConvolution1<T>(x2, A);
52-
coeffs[3] = CubicConvolution2<T>(x2 + static_cast<T>(1.0), A);
82+
coeffs[3] = CubicConvolution2<T>(x2 + 1.0, A);
5383
}
5484

5585
inline void ExtractNCDWH(const DDim& dims,
@@ -197,5 +227,46 @@ struct FastDivModForInterpolate {
197227

198228
#endif
199229

230+
namespace antialias {
231+
232+
// taken from
233+
// https://github.com/pytorch/pytorch/blob/a527e816935957a164d74dd7c5069310b2857695/
234+
// aten/src/ATen/native/cuda/UpSample.cuh#L207-L305
235+
struct BilinearFilterFunctor {
236+
template <typename T>
237+
HOSTDEVICE T operator()(T x) const {
238+
if (x < 0) {
239+
x = -x;
240+
}
241+
if (x < 1) {
242+
return 1 - x;
243+
}
244+
return 0;
245+
}
246+
247+
static constexpr int size = 2;
248+
};
249+
struct BicubicFilterFunctor {
250+
template <typename T>
251+
HOSTDEVICE T operator()(T x) const {
252+
// https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
253+
const T a = -0.5;
254+
if (x < 0) {
255+
x = -x;
256+
}
257+
if (x < 1) {
258+
return ((a + 2) * x - (a + 3)) * x * x + 1;
259+
}
260+
if (x < 2) {
261+
return (((x - 5) * x + 8) * x - 4) * a;
262+
}
263+
return 0;
264+
}
265+
266+
static constexpr int size = 4;
267+
};
268+
269+
} // namespace antialias
270+
200271
} // namespace funcs
201272
} // namespace phi

0 commit comments

Comments
 (0)