-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Flang] Add Maxloc to fir simplify intrinsics pass #75463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: David Green (davemgreen) ChangesThis takes the code from D144103 and extends it to maxloc, to allow the simplifyMinMaxlocReduction method to work with both min and max intrinsics by switching condition and limit/initial value. Patch is 27.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75463.diff 2 Files Affected:
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index 8ecf7fb44f15d0..c19d7bd2d7200f 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -99,8 +99,8 @@ class SimplifyIntrinsicsPass
void simplifyLogicalDim1Reduction(fir::CallOp call,
const fir::KindMapping &kindMap,
GenReductionBodyTy genBodyFunc);
- void simplifyMinlocReduction(fir::CallOp call,
- const fir::KindMapping &kindMap);
+ void simplifyMinMaxlocReduction(fir::CallOp call,
+ const fir::KindMapping &kindMap, bool isMax);
void simplifyReductionBody(fir::CallOp call, const fir::KindMapping &kindMap,
GenReductionBodyTy genBodyFunc,
fir::FirOpBuilder &builder,
@@ -357,12 +357,11 @@ using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
mlir::Value, llvm::SmallVector<mlir::Value, Fortran::common::maxRank> &)>;
-static void
-genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
- InitValGeneratorTy initVal,
- MinlocBodyOpGeneratorTy genBody, unsigned rank,
- mlir::Type elementType, mlir::Location loc, bool hasMask,
- mlir::Type maskElemType, mlir::Value resultArr) {
+static void genMinMaxlocReductionLoop(
+ fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
+ InitValGeneratorTy initVal, MinlocBodyOpGeneratorTy genBody, unsigned rank,
+ mlir::Type elementType, mlir::Location loc, bool hasMask,
+ mlir::Type maskElemType, mlir::Value resultArr) {
mlir::IndexType idxTy = builder.getIndexType();
@@ -751,20 +750,23 @@ static mlir::FunctionType genRuntimeMinlocType(fir::FirOpBuilder &builder,
{boxRefType, boxType, boxType}, {});
}
-static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
- mlir::func::FuncOp &funcOp, unsigned rank,
- int maskRank, mlir::Type elementType,
- mlir::Type maskElemType,
- mlir::Type resultElemTy) {
- auto init = [](fir::FirOpBuilder builder, mlir::Location loc,
- mlir::Type elementType) {
+static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
+ mlir::func::FuncOp &funcOp, bool isMax,
+ unsigned rank, int maskRank,
+ mlir::Type elementType,
+ mlir::Type maskElemType,
+ mlir::Type resultElemTy) {
+ auto init = [isMax](fir::FirOpBuilder builder, mlir::Location loc,
+ mlir::Type elementType) {
if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {
const llvm::fltSemantics &sem = ty.getFloatSemantics();
return builder.createRealConstant(
- loc, elementType, llvm::APFloat::getLargest(sem, /*Negative=*/false));
+ loc, elementType, llvm::APFloat::getLargest(sem, /*Negative=*/isMax));
}
unsigned bits = elementType.getIntOrFloatBitWidth();
- int64_t maxInt = llvm::APInt::getSignedMaxValue(bits).getSExtValue();
+ int64_t maxInt = (isMax ? llvm::APInt::getSignedMinValue(bits)
+ : llvm::APInt::getSignedMaxValue(bits))
+ .getSExtValue();
return builder.createIntegerConstant(loc, elementType, maxInt);
};
@@ -797,18 +799,24 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
}
auto genBodyOp =
- [&rank, &resultArr](
- fir::FirOpBuilder builder, mlir::Location loc, mlir::Type elementType,
- mlir::Value elem1, mlir::Value elem2,
- llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices)
+ [&rank, &resultArr,
+ isMax](fir::FirOpBuilder builder, mlir::Location loc,
+ mlir::Type elementType, mlir::Value elem1, mlir::Value elem2,
+ llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices)
-> mlir::Value {
mlir::Value cmp;
if (elementType.isa<mlir::FloatType>()) {
cmp = builder.create<mlir::arith::CmpFOp>(
- loc, mlir::arith::CmpFPredicate::OLT, elem1, elem2);
+ loc,
+ isMax ? mlir::arith::CmpFPredicate::OGT
+ : mlir::arith::CmpFPredicate::OLT,
+ elem1, elem2);
} else if (elementType.isa<mlir::IntegerType>()) {
cmp = builder.create<mlir::arith::CmpIOp>(
- loc, mlir::arith::CmpIPredicate::slt, elem1, elem2);
+ loc,
+ isMax ? mlir::arith::CmpIPredicate::sgt
+ : mlir::arith::CmpIPredicate::slt,
+ elem1, elem2);
} else {
llvm_unreachable("unsupported type");
}
@@ -875,9 +883,8 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
// bit of a hack - maskRank is set to -1 for absent mask arg, so don't
// generate high level mask or element by element mask.
bool hasMask = maskRank > 0;
-
- genMinlocReductionLoop(builder, funcOp, init, genBodyOp, rank, elementType,
- loc, hasMask, maskElemType, resultArr);
+ genMinMaxlocReductionLoop(builder, funcOp, init, genBodyOp, rank, elementType,
+ loc, hasMask, maskElemType, resultArr);
}
/// Generate function type for the simplified version of RTNAME(DotProduct)
@@ -1150,8 +1157,8 @@ void SimplifyIntrinsicsPass::simplifyLogicalDim1Reduction(
intElementType);
}
-void SimplifyIntrinsicsPass::simplifyMinlocReduction(
- fir::CallOp call, const fir::KindMapping &kindMap) {
+void SimplifyIntrinsicsPass::simplifyMinMaxlocReduction(
+ fir::CallOp call, const fir::KindMapping &kindMap, bool isMax) {
mlir::Operation::operand_range args = call.getArgs();
@@ -1217,11 +1224,11 @@ void SimplifyIntrinsicsPass::simplifyMinlocReduction(
auto typeGenerator = [rank](fir::FirOpBuilder &builder) {
return genRuntimeMinlocType(builder, rank);
};
- auto bodyGenerator = [rank, maskRank, inputType, logicalElemType,
- outType](fir::FirOpBuilder &builder,
- mlir::func::FuncOp &funcOp) {
- genRuntimeMinlocBody(builder, funcOp, rank, maskRank, inputType,
- logicalElemType, outType);
+ auto bodyGenerator = [rank, maskRank, inputType, logicalElemType, outType,
+ isMax](fir::FirOpBuilder &builder,
+ mlir::func::FuncOp &funcOp) {
+ genRuntimeMinMaxlocBody(builder, funcOp, isMax, rank, maskRank, inputType,
+ logicalElemType, outType);
};
mlir::func::FuncOp newFunc =
@@ -1367,7 +1374,11 @@ void SimplifyIntrinsicsPass::runOnOperation() {
return;
}
if (funcName.starts_with(RTNAME_STRING(Minloc))) {
- simplifyMinlocReduction(call, kindMap);
+ simplifyMinMaxlocReduction(call, kindMap, false);
+ return;
+ }
+ if (funcName.starts_with(RTNAME_STRING(Maxloc))) {
+ simplifyMinMaxlocReduction(call, kindMap, true);
return;
}
}
diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
index 39483a9cc18fe8..7dc758a522cd85 100644
--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -2348,3 +2348,250 @@ func.func @_QPtestminloc_doesntwork1d_unknownmask(%arg0: !fir.ref<!fir.array<10x
// CHECK-NOT: fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
// CHECK: fir.call @_FortranAMinlocInteger4({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
// CHECK-NOT: fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+
+// -----
+// Check Maxloc simplifies similarly to minloc
+func.func @_QPtestmaxloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}, %arg1: !fir.ref<!fir.array<10x!fir.logical<4>>> {fir.bindc_name = "b"}) -> !fir.array<1xi32> {
+ %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+ %c10 = arith.constant 10 : index
+ %c10_0 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+ %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testmaxloc_works1d", uniq_name = "_QFtestmaxloc_works1dEtestmaxloc_works1d"}
+ %2 = fir.shape %c1 : (index) -> !fir.shape<1>
+ %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.array<1xi32>
+ %4 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+ %6 = fir.shape %c10_0 : (index) -> !fir.shape<1>
+ %7 = fir.embox %arg1(%6) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
+ %c4 = arith.constant 4 : index
+ %false = arith.constant false
+ %8 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+ %c0 = arith.constant 0 : index
+ %9 = fir.shape %c0 : (index) -> !fir.shape<1>
+ %10 = fir.embox %8(%9) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+ fir.store %10 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ %11 = fir.address_of(@_QQclXea5bcf7f706678e1796661f8916f3379) : !fir.ref<!fir.char<1,55>>
+ %c5_i32 = arith.constant 5 : i32
+ %12 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+ %13 = fir.convert %5 : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+ %14 = fir.convert %c4 : (index) -> i32
+ %15 = fir.convert %11 : (!fir.ref<!fir.char<1,55>>) -> !fir.ref<i8>
+ %16 = fir.convert %7 : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
+ %17 = fir.call @_FortranAMaxlocInteger4(%12, %13, %14, %15, %c5_i32, %16, %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+ %18 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ %c0_1 = arith.constant 0 : index
+ %19:3 = fir.box_dims %18, %c0_1 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+ %20 = fir.box_addr %18 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+ %21 = fir.shape_shift %19#0, %19#1 : (index, index) -> !fir.shapeshift<1>
+ %22 = fir.array_load %20(%21) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.array<?xi32>
+ %c1_2 = arith.constant 1 : index
+ %c0_3 = arith.constant 0 : index
+ %23 = arith.subi %c1, %c1_2 : index
+ %24 = fir.do_loop %arg2 = %c0_3 to %23 step %c1_2 unordered iter_args(%arg3 = %3) -> (!fir.array<1xi32>) {
+ %26 = fir.array_fetch %22, %arg2 : (!fir.array<?xi32>, index) -> i32
+ %27 = fir.array_update %arg3, %26, %arg2 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32>
+ fir.result %27 : !fir.array<1xi32>
+ }
+ fir.array_merge_store %3, %24 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref<!fir.array<1xi32>>
+ fir.freemem %20 : !fir.heap<!fir.array<?xi32>>
+ %25 = fir.load %1 : !fir.ref<!fir.array<1xi32>>
+ return %25 : !fir.array<1xi32>
+}
+
+// CHECK-LABEL: func.func @_QPtestmaxloc_works1d(
+// CHECK-SAME: %[[INARR:.*]]: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME: %[[MASK:.*]]: !fir.ref<!fir.array<10x!fir.logical<4>>> {fir.bindc_name = "b"}) -> !fir.array<1xi32> {
+// CHECK: %[[OUTARR:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK: %[[SIZE10_0:.*]] = arith.constant 10 : index
+// CHECK: %[[SIZE10_1:.*]] = arith.constant 10 : index
+// CHECK: %[[INARR_SHAPE:.*]] = fir.shape %[[SIZE10_0]] : (index) -> !fir.shape<1>
+// CHECK: %[[BOX_INARR:.*]] = fir.embox %[[INARR]](%[[INARR_SHAPE]]) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+// CHECK: %[[MASK_SHAPE:.*]] = fir.shape %[[SIZE10_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[BOX_MASK:.*]] = fir.embox %[[MASK]](%[[MASK_SHAPE]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
+// CHECK: %[[REF_BOX_OUTARR_NONE:.*]] = fir.convert %[[OUTARR]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK: %[[BOX_INARR_NONE:.*]] = fir.convert %[[BOX_INARR]] : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+// CHECK: %[[BOX_MASK_NONE:.*]] = fir.convert %[[BOX_MASK]] : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
+// CHECK: fir.call @_FortranAMaxlocInteger4x1_Logical4x1_i32_contract_simplified(%[[REF_BOX_OUTARR_NONE]], %[[BOX_INARR_NONE]], %[[BOX_MASK_NONE]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+
+// CHECK-LABEL: func.func private @_FortranAMaxlocInteger4x1_Logical4x1_i32_contract_simplified(
+// CHECK-SAME: %[[REF_BOX_OUTARR_NONE:.*]]: !fir.ref<!fir.box<none>>,
+// CHECK-SAME: %[[BOX_INARR_NONE:.*]]: !fir.box<none>,
+// CHECK-SAME: %[[BOX_MASK_NONE:.*]]: !fir.box<none>) attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK: %[[FLAG_ALLOC:.*]] = fir.alloca i32
+// CHECK: %[[INIT_OUT_IDX:.*]] = arith.constant 0 : i32
+// CHECK: %[[OUTARR_SIZE:.*]] = arith.constant 1 : index
+// CHECK: %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32>
+// CHECK: %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1>
+// CHECK: %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<1xi32>>>
+// CHECK: %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
+// CHECK: %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
+// CHECK: %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
+// CHECK: fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref<i32>
+// CHECK: %[[CINDEX_0:.*]] = arith.constant 0 : index
+// CHECK: %[[BOX_INARR:.*]] = fir.convert %[[BOX_INARR_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK: %[[FLAG_SET:.*]] = arith.constant 1 : i32
+// CHECK: %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32
+// CHECK: fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK: %[[MAX:.*]] = arith.constant -2147483648 : i32
+// CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM_INDEX0:.*]] = arith.constant 0 : index
+// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX0]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[CINDEX_1]] : index
+// CHECK: %[[DOLOOP:.*]] = fir.do_loop %[[ITER:.*]] = %[[CINDEX_0]] to %[[EXTENT]] step %[[CINDEX_1]] iter_args(%[[MIN:.*]] = %[[MAX]]) -> (i32) {
+// CHECK: %[[MASK_ITEM:.*]] = fir.coordinate_of %[[BOX_MASK]], %[[ITER]] : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[MASK_ITEMVAL:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (!fir.logical<4>) -> i1
+// CHECK: %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) {
+// CHECK: %[[FLAG_SET2:.*]] = arith.constant 1 : i32
+// CHECK: fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK: %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
+// CHECK: %[[NEW_MIN:.*]] = arith.cmpi sgt, %[[INARR_ITEMVAL]], %[[MIN]] : i32
+// CHECK: %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (i32) {
+// CHECK: %[[ONE:.*]] = arith.constant 1 : i32
+// CHECK: %[[OUTARR_IDX:.*]] = arith.constant 0 : index
+// CHECK: %[[OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
+// CHECK: %[[ITER_I32:.*]] = fir.convert %[[ITER]] : (index) -> i32
+// CHECK: %[[FORTRAN_IDX:.*]] = arith.addi %[[ITER_I32]], %[[ONE]] : i32
+// CHECK: fir.store %[[FORTRAN_IDX]] to %[[OUTARR_ITEM]] : !fir.ref<i32>
+// CHECK: fir.result %[[INARR_ITEMVAL]] : i32
+// CHECK: } else {
+// CHECK: fir.result %[[MIN]] : i32
+// CHECK: }
+// CHECK: fir.result %[[IF_NEW_MIN:.*]] : i32
+// CHECK: } else {
+// CHECK: fir.result %[[MIN]] : i32
+// CHECK: }
+// CHECK: fir.result %[[IF_MASK:.*]] : i32
+// CHECK: }
+// CHECK: %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK: %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_SET]] : i32
+// CHECK: fir.if %[[FLAG_WAS_SET]] {
+// CHECK: %[[TEST_MAX:.*]] = arith.constant -2147483648 : i32
+// CHECK: %[[INIT_NOT_CHANGED:.*]] = arith.cmpi eq, %[[TEST_MAX]], %[[DO_LOOP:.*]] : i32
+// CHECK: fir.if %[[INIT_NOT_CHANGED]] {
+// CHECK: %[[FLAG_OUTARR_IDX:.*]] = arith.constant 0 : index
+// CHECK: %[[FLAG_OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
+// CHECK: fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM]] : !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[REF_BOX_OUTARR:.*]] = fir.convert %[[REF_BOX_OUTARR_NONE]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
+// CHECK: fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
+// CHECK: return
+// CHECK: }
+
+// -----
+// Check Maxloc simplifies correctly for 1D case with scalar mask and f64 input
+
+func.func @_QPtestmaxloc_works1d_scalarmask_f64(%arg0: !fir.ref<!fir.array<10xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) -> !fir.array<1xi32> {
+ %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+ %c10 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+ %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testmaxloc_works1d_scalarmask_f64", uniq_name = "_QFtestmaxloc_works1d_scalarmask_f64Etestminloc_works1d_scalarmask_f64"}
+ %2 = fir.shape %c1 : (index) -> !fir.shape<1>
+ %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.array<1xi32>
+ %4 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf64>>
+ %6 = fir.embox %arg1 : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+ %c4 = arith.constant 4 : index
+ %false = arith.constant false
+ %7 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+ %c0 = arith.constant 0 : index
+ %8 = fir.shape %c0 : (index) -> !fir.shape<1>
+ %9 = fir.embox %7(%8) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+ fir.store %9 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ %10 = fir.address_of(@_QQclX66951c28c5b8bab5cdb25c1ac762b978) : !fir.ref<!fir.char<1,65>>
+ %c6_i32 = ar...
[truncated]
|
|
This takes the code from D144103 and extends it to maxloc, to allow the simplifyMinMaxlocReduction method to work with both min and max intrinsics by switching condition and limit/initial value.
7c1fd8d
to
11f082e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This takes the code from D144103 and extends it to maxloc, to allow the simplifyMinMaxlocReduction method to work with both min and max intrinsics by switching condition and limit/initial value.