Closed
Description
Please try module to reproduce the problem
position with the form (0, 8k -1) always get inf answer
module {
func.func @entry(%arg0: tensor<16x32x32xbf16>, %arg1: tensor<16x32x32xbf16>) -> tensor<16x32xf32> attributes {llvm.emit_c_interface} {
%cst_0 = arith.constant dense<2.000000e+00> : tensor<16x32x32xf32>
%dest0 = tensor.empty() : tensor<16x32x32xbf16>
%0 = linalg.add ins(%arg0, %arg1 : tensor<16x32x32xbf16>, tensor<16x32x32xbf16>) outs(%dest0 : tensor<16x32x32xbf16>) -> tensor<16x32x32xbf16>
%5 = tensor.empty() : tensor<16x32x32xf32>
%6 = linalg.copy ins(%0 : tensor<16x32x32xbf16>) outs(%5 : tensor<16x32x32xf32>) -> tensor<16x32x32xf32>
%7 = tensor.empty() : tensor<16x32x32xf32>
%8 = linalg.powf ins(%6, %cst_0 : tensor<16x32x32xf32>, tensor<16x32x32xf32>) outs(%7 : tensor<16x32x32xf32>) -> tensor<16x32x32xf32>
%dest1 = tensor.empty() : tensor<16x32xf32>
%1 = linalg.reduce { arith.addf } ins(%8 : tensor<16x32x32xf32>) outs(%dest1 : tensor<16x32xf32>) dimensions = [2]
return %1 : tensor<16x32xf32>
}
}
benchgc result
(0, 7): ref: 30.5999756 res: inf abs_diff: inf rel_diff: inf
(0, 8): ref: 46.1308594 res: 554.3241577 abs_diff: 508.1932983 rel_diff: 11.0163412
(0, 9): ref: 54.8237228 res: 224.7723694 abs_diff: 169.9486389 rel_diff: 3.0999105
(0, 10): ref: 62.6171188 res: 266.4804688 abs_diff: 203.8633423 rel_diff: 3.2557125
(0, 11): ref: 49.8187370 res: 331.9580688 abs_diff: 282.1393433 rel_diff: 5.6633177
(0, 12): ref: 91.7294464 res: 290.1029053 abs_diff: 198.3734589 rel_diff: 2.1625929
(0, 13): ref: 74.9132080 res: 92.7775345 abs_diff: 17.8643265 rel_diff: 0.2384670
(0, 14): ref: 68.6141815 res: 311.5630493 abs_diff: 242.9488678 rel_diff: 3.5407968
(0, 15): ref: 64.4217758 res: inf abs_diff: inf rel_diff: inf
(0, 23): ref: 47.1075172 res: inf abs_diff: inf rel_diff: inf