Skip to content

Commit 079f156

Browse files
titaiwangmsguschmue
authored andcommitted
Align AvgPool ceil_mode on last value to torch (#16752)
Fix #16203 Previous to this PR, if `ceil_mode` is on, the calculation of a value would divide the kernel size, even if remaining pixels is less than the kernel size, which causes the difference in this operator between ORT and torch. However, this fix only applies to the change in #15597, which only supports AvgPool since 19. The older opset version is remain the same, as it's using mlas files. Also, the PR fixes the shape mismatch caused by sliding window starting from padding. More detail: onnx/onnx#6650 (And this PR is also validated with the tests added in onnx/onnx#6650)
1 parent 19d6dee commit 079f156

File tree

4 files changed

+49
-9
lines changed

4 files changed

+49
-9
lines changed

onnxruntime/core/providers/cpu/nn/pool_attributes.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -150,30 +150,30 @@ struct PoolAttributes {
150150
case AutoPadType::VALID:
151151
*pad_head = 0;
152152
*pad_tail = 0;
153-
*out_size = ComputeOutputSize(in_size, stride, kernel, 0, dilation);
153+
*out_size = ComputeOutputSize(in_size, stride, kernel, 0, 0, dilation);
154154
break;
155155
case AutoPadType::SAME_LOWER: {
156156
int64_t legacy_target_size = (in_size + stride - 1) / stride;
157157
int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
158158
*pad_head = (pad_needed + 1) / 2;
159159
*pad_tail = pad_needed - *pad_head;
160-
*out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation);
160+
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation);
161161
break;
162162
}
163163
case AutoPadType::SAME_UPPER: {
164164
int64_t legacy_target_size = (in_size + stride - 1) / stride;
165165
int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
166166
*pad_head = pad_needed / 2;
167167
*pad_tail = pad_needed - *pad_head;
168-
*out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation);
168+
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation);
169169
break;
170170
}
171171
default: {
172172
ORT_THROW("Unsupported AutoPad Type.");
173173
}
174174
}
175175
} else {
176-
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head + *pad_tail, dilation);
176+
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation);
177177
}
178178
}
179179
#if defined(_MSC_VER) && !defined(__clang__)
@@ -184,13 +184,21 @@ struct PoolAttributes {
184184
int64_t ComputeOutputSize(int64_t in_size,
185185
int64_t stride,
186186
int64_t kernel,
187-
int64_t pad_needed,
187+
int64_t pad_head,
188+
int64_t pad_tail,
188189
int64_t dilation) const {
189-
if (ceil_mode == 0) {
190-
return static_cast<int64_t>(static_cast<float>(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1);
190+
int64_t numerator = in_size + pad_head + pad_tail - dilation * (kernel - 1) - 1;
191+
int64_t out_size = numerator / stride + 1;
192+
193+
if (ceil_mode == 1) {
194+
out_size = static_cast<int64_t>(std::ceil(static_cast<float>(numerator) / stride)) + 1;
195+
// Ensure that the last pooling starts inside the image (at least 1 pixel)
196+
// Reference: https://github.com/onnx/onnx/pull/5741
197+
if ((out_size - 1) * stride >= in_size + pad_head) {
198+
--out_size;
199+
}
191200
}
192-
return static_cast<int64_t>(
193-
std::ceil(static_cast<float>(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1));
201+
return out_size;
194202
}
195203
#if defined(_MSC_VER) && !defined(__clang__)
196204
#pragma warning(pop)

onnxruntime/core/providers/cpu/nn/pool_functors.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ struct AveragePool1DTask final {
406406
for (int64_t ph = 0; ph < pooled_height; ++ph) {
407407
int64_t hstart = ph * stride_h - pads[0];
408408
int64_t hend = hstart + kernel_shape[0] * dilation_h;
409+
hend = std::min(hend, height + pads[1]);
409410
y_d[ph] = 0;
410411
int total_elements = 0;
411412
for (int64_t h = hstart; h < hend; h += dilation_h) {
@@ -461,9 +462,11 @@ struct AveragePool2DTask final {
461462
for (int64_t ph = 0; ph < pooled_height; ++ph) {
462463
int64_t hstart = ph * stride_h - pads[0];
463464
int64_t hend = hstart + kernel_shape[0] * dilation_h;
465+
hend = std::min(hend, height + pads[1]);
464466
for (int64_t pw = 0; pw < pooled_width; ++pw) {
465467
int64_t wstart = pw * stride_w - pads[1];
466468
int64_t wend = wstart + kernel_shape[1] * dilation_w;
469+
wend = std::min(wend, width + pads[3]);
467470
const int64_t pool_index = ph * pooled_width + pw;
468471
y_d[pool_index] = 0;
469472
int total_elements = 0;
@@ -532,12 +535,15 @@ struct AveragePool3DTask {
532535
for (int64_t ph = 0; ph < pooled_height; ++ph) {
533536
int64_t hstart = ph * stride_h - pads[0];
534537
int64_t hend = hstart + kernel_shape[0] * dilation_h;
538+
hend = std::min(hend, height + pads[1]);
535539
for (int64_t pw = 0; pw < pooled_width; ++pw) {
536540
int64_t wstart = pw * stride_w - pads[1];
537541
int64_t wend = wstart + kernel_shape[1] * dilation_w;
542+
wend = std::min(wend, width + pads[3]);
538543
for (int64_t pd = 0; pd < pooled_depth; ++pd) {
539544
int64_t dstart = pd * stride_d - pads[2];
540545
int64_t dend = dstart + kernel_shape[2] * dilation_d;
546+
dend = std::min(dend, depth + pads[5]);
541547
const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
542548
y_d[pool_index] = 0;
543549
int total_elements = 0;

onnxruntime/test/onnx/TestCase.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -961,6 +961,7 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
961961
{"reduce_prod_empty_set", "unknown version", {}},
962962
{"reduce_sum_empty_set", "unknown version", {}},
963963
{"reduce_sum_square_empty_set_expanded", "unknown version", {}},
964+
{"averagepool_3d_dilations_large_count_include_pad_is_1_ceil_mode_is_True", "TODO(titaiwang): enable this in the next ONNX release."},
964965
#ifdef ENABLE_TRAINING_CORE
965966
{"adagrad", "not a registered function/op", {}}, // Op not registered.
966967
{"adagrad_multiple", "not a registered function/op", {}}, // Op not registered.

onnxruntime/test/providers/cpu/nn/pool_op_test.cc

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,31 @@ TEST(PoolTest, AveragePool_19_dilation_2d) {
10301030
kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider});
10311031
}
10321032

1033+
TEST(PoolTest, AveragePool_19_ceil_count_include_pad_1d) {
1034+
// TODO: Unskip when fixed #41968513
1035+
if (DefaultDmlExecutionProvider().get() != nullptr) {
1036+
GTEST_SKIP() << "Skipping because of the following error: MLOperatorAuthorImpl.cpp(2100): The parameter is incorrect.";
1037+
}
1038+
1039+
OpTester test("AveragePool", 19);
1040+
1041+
test.AddAttribute("auto_pad", "");
1042+
test.AddAttribute("strides", std::vector<int64_t>{3});
1043+
test.AddAttribute("pads", vector<int64_t>{3, 3});
1044+
test.AddAttribute("kernel_shape", vector<int64_t>{7});
1045+
test.AddAttribute("ceil_mode", (int64_t)1);
1046+
test.AddAttribute("count_include_pad", (int64_t)1);
1047+
1048+
std::vector<float> x_vals = {2.0903f, 4.6493f, 1.6320f, -3.2051f, 4.6975f, 4.7296f, 3.3653f, -1.5815f, -2.3832f, 0.9628f, -1.5899f, -2.6820f, 5.7529f, 7.7346f, -0.8910f, -2.0151f, 0.1313f, -0.5374f};
1049+
std::vector<int64_t> x_dims = {1, 2, 9};
1050+
std::vector<int64_t> expected_dims = {1, 2, 4};
1051+
std::vector<float> expected_vals = {0.73807144f, 2.5655572f, 0.8032287f, -0.09990001f, 0.34911433f, 1.0389f, 1.4536142f, -0.40353334f};
1052+
1053+
test.AddInput<float>("X", x_dims, x_vals);
1054+
test.AddOutput<float>("Y", expected_dims, expected_vals);
1055+
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider});
1056+
}
1057+
10331058
TEST(PoolTest, GlobalAveragePool) {
10341059
OpTester test("GlobalAveragePool");
10351060

0 commit comments

Comments
 (0)