-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[flang][fir] Lower do concurrent
loop nests to fir.do_concurrent
#132904
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: Kareem Ergawy (ergawy) ChangesAdds support for lowering To that end, this PR emits the allocations for the iteration variables within the block of the For example, given the following input: do concurrent(i=1:10, j=11:20)
end do the changes in this PR emit the following MLIR: fir.do_concurrent {
%22 = fir.alloca i32 {bindc_name = "i"}
%23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%24 = fir.alloca i32 {bindc_name = "j"}
%25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) {
%26 = fir.convert %arg1 : (index) -> i32
fir.store %26 to %23#<!-- -->0 : !fir.ref<i32>
%27 = fir.convert %arg2 : (index) -> i32
fir.store %27 to %25#<!-- -->0 : !fir.ref<i32>
}
} Patch is 26.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132904.diff 7 Files Affected:
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6e6e88a32517c..622547e32c03f 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -130,7 +130,7 @@ struct IncrementLoopInfo {
mlir::Value loopVariable = nullptr;
// Data members for structured loops.
- fir::DoLoopOp doLoop = nullptr;
+ mlir::Operation *loopOp = nullptr;
// Data members for unstructured loops.
bool hasRealControl = false;
@@ -2267,8 +2267,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get(
builder->getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ ua,
/*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
- if (has_attrs)
- info.doLoop.setLoopAnnotationAttr(la);
+ if (has_attrs) {
+ if (auto loopOp = mlir::dyn_cast<fir::DoLoopOp>(info.loopOp))
+ loopOp.setLoopAnnotationAttr(la);
+
+ if (auto doConcurrentOp =
+ mlir::dyn_cast<fir::DoConcurrentLoopOp>(info.loopOp))
+ doConcurrentOp.setLoopAnnotationAttr(la);
+ }
}
/// Generate FIR to begin a structured or unstructured increment loop nest.
@@ -2277,96 +2283,77 @@ class FirConverter : public Fortran::lower::AbstractConverter {
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
mlir::Location loc = toLocation();
- mlir::Operation *boundsAndStepIP = nullptr;
mlir::arith::IntegerOverflowFlags iofBackup{};
- for (IncrementLoopInfo &info : incrementLoopNestInfo) {
- mlir::Value lowerValue;
- mlir::Value upperValue;
- mlir::Value stepValue;
-
- {
- mlir::OpBuilder::InsertionGuard guard(*builder);
+ llvm::SmallVector<mlir::Value> nestLBs;
+ llvm::SmallVector<mlir::Value> nestUBs;
+ llvm::SmallVector<mlir::Value> nestSts;
+ llvm::SmallVector<mlir::Value> nestReduceOperands;
+ llvm::SmallVector<mlir::Attribute> nestReduceAttrs;
+ bool genDoConcurrent = false;
- // Set the IP before the first loop in the nest so that all nest bounds
- // and step values are created outside the nest.
- if (boundsAndStepIP)
- builder->setInsertionPointAfter(boundsAndStepIP);
+ for (IncrementLoopInfo &info : incrementLoopNestInfo) {
+ genDoConcurrent = info.isStructured() && info.isUnordered;
+ if (!genDoConcurrent)
info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
info.isUnordered);
- if (!getLoweringOptions().getIntegerWrapAround()) {
- iofBackup = builder->getIntegerOverflowFlags();
- builder->setIntegerOverflowFlags(
- mlir::arith::IntegerOverflowFlags::nsw);
- }
- lowerValue = genControlValue(info.lowerExpr, info);
- upperValue = genControlValue(info.upperExpr, info);
- bool isConst = true;
- stepValue = genControlValue(info.stepExpr, info,
- info.isStructured() ? nullptr : &isConst);
- if (!getLoweringOptions().getIntegerWrapAround())
- builder->setIntegerOverflowFlags(iofBackup);
- boundsAndStepIP = stepValue.getDefiningOp();
-
- // Use a temp variable for unstructured loops with non-const step.
- if (!isConst) {
- info.stepVariable =
- builder->createTemporary(loc, stepValue.getType());
- boundsAndStepIP =
- builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
+
+ if (!getLoweringOptions().getIntegerWrapAround()) {
+ iofBackup = builder->getIntegerOverflowFlags();
+ builder->setIntegerOverflowFlags(
+ mlir::arith::IntegerOverflowFlags::nsw);
+ }
+
+ nestLBs.push_back(genControlValue(info.lowerExpr, info));
+ nestUBs.push_back(genControlValue(info.upperExpr, info));
+ bool isConst = true;
+ nestSts.push_back(genControlValue(
+ info.stepExpr, info, info.isStructured() ? nullptr : &isConst));
+
+ if (!getLoweringOptions().getIntegerWrapAround())
+ builder->setIntegerOverflowFlags(iofBackup);
+
+ // Use a temp variable for unstructured loops with non-const step.
+ if (!isConst) {
+ mlir::Value stepValue = nestSts.back();
+ info.stepVariable = builder->createTemporary(loc, stepValue.getType());
+ builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
+ }
+
+ if (genDoConcurrent && nestReduceOperands.empty()) {
+ // Create DO CONCURRENT reduce operands and attributes
+ for (const auto &reduceSym : info.reduceSymList) {
+ const fir::ReduceOperationEnum reduceOperation = reduceSym.first;
+ const Fortran::semantics::Symbol *sym = reduceSym.second;
+ fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
+ nestReduceOperands.push_back(fir::getBase(exv));
+ auto reduceAttr =
+ fir::ReduceAttr::get(builder->getContext(), reduceOperation);
+ nestReduceAttrs.push_back(reduceAttr);
}
}
+ }
+ for (auto [info, lowerValue, upperValue, stepValue] :
+ llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) {
// Structured loop - generate fir.do_loop.
if (info.isStructured()) {
+ if (info.isUnordered)
+ continue;
+
+ // The loop variable is a doLoop op argument.
mlir::Type loopVarType = info.getLoopVariableType();
- mlir::Value loopValue;
- if (info.isUnordered) {
- llvm::SmallVector<mlir::Value> reduceOperands;
- llvm::SmallVector<mlir::Attribute> reduceAttrs;
- // Create DO CONCURRENT reduce operands and attributes
- for (const auto &reduceSym : info.reduceSymList) {
- const fir::ReduceOperationEnum reduce_operation = reduceSym.first;
- const Fortran::semantics::Symbol *sym = reduceSym.second;
- fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
- reduceOperands.push_back(fir::getBase(exv));
- auto reduce_attr =
- fir::ReduceAttr::get(builder->getContext(), reduce_operation);
- reduceAttrs.push_back(reduce_attr);
- }
- // The loop variable value is explicitly updated.
- info.doLoop = builder->create<fir::DoLoopOp>(
- loc, lowerValue, upperValue, stepValue, /*unordered=*/true,
- /*finalCountValue=*/false, /*iterArgs=*/std::nullopt,
- llvm::ArrayRef<mlir::Value>(reduceOperands), reduceAttrs);
- builder->setInsertionPointToStart(info.doLoop.getBody());
- loopValue = builder->createConvert(loc, loopVarType,
- info.doLoop.getInductionVar());
- } else {
- // The loop variable is a doLoop op argument.
- info.doLoop = builder->create<fir::DoLoopOp>(
- loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
- /*finalCountValue=*/true,
- builder->createConvert(loc, loopVarType, lowerValue));
- builder->setInsertionPointToStart(info.doLoop.getBody());
- loopValue = info.doLoop.getRegionIterArgs()[0];
- }
+ auto loopOp = builder->create<fir::DoLoopOp>(
+ loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
+ /*finalCountValue=*/true,
+ builder->createConvert(loc, loopVarType, lowerValue));
+ info.loopOp = loopOp;
+ builder->setInsertionPointToStart(loopOp.getBody());
+ mlir::Value loopValue = loopOp.getRegionIterArgs()[0];
+
// Update the loop variable value in case it has non-index references.
builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);
- if (info.maskExpr) {
- Fortran::lower::StatementContext stmtCtx;
- mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
- stmtCtx.finalizeAndReset();
- mlir::Value maskCondCast =
- builder->createConvert(loc, builder->getI1Type(), maskCond);
- auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
- /*withElseRegion=*/false);
- builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
- }
- if (info.hasLocalitySpecs())
- handleLocalitySpecs(info);
-
addLoopAnnotationAttr(info, dirs);
continue;
}
@@ -2430,6 +2417,60 @@ class FirConverter : public Fortran::lower::AbstractConverter {
builder->restoreInsertionPoint(insertPt);
}
}
+
+ if (genDoConcurrent) {
+ auto loopWrapperOp = builder->create<fir::DoConcurrentOp>(loc);
+ builder->setInsertionPointToStart(
+ builder->createBlock(&loopWrapperOp.getRegion()));
+
+ for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) {
+ info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
+ info.isUnordered);
+ }
+
+ builder->setInsertionPointToEnd(loopWrapperOp.getBody());
+ auto loopOp = builder->create<fir::DoConcurrentLoopOp>(
+ loc, nestLBs, nestUBs, nestSts, nestReduceOperands,
+ nestReduceAttrs.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(builder->getContext(), nestReduceAttrs),
+ nullptr);
+
+ llvm::SmallVector<mlir::Type> loopBlockArgTypes(
+ incrementLoopNestInfo.size(), builder->getIndexType());
+ llvm::SmallVector<mlir::Location> loopBlockArgLocs(
+ incrementLoopNestInfo.size(), loc);
+ mlir::Region &loopRegion = loopOp.getRegion();
+ mlir::Block *loopBlock = builder->createBlock(
+ &loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs);
+ builder->setInsertionPointToStart(loopBlock);
+
+ for (auto [info, blockArg] :
+ llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) {
+ info.loopOp = loopOp;
+ mlir::Value loopValue =
+ builder->createConvert(loc, info.getLoopVariableType(), blockArg);
+ builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);
+
+ if (info.maskExpr) {
+ Fortran::lower::StatementContext stmtCtx;
+ mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
+ stmtCtx.finalizeAndReset();
+ mlir::Value maskCondCast =
+ builder->createConvert(loc, builder->getI1Type(), maskCond);
+ auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
+ /*withElseRegion=*/false);
+ builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
+ }
+ }
+
+ IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back();
+
+ if (innermostInfo.hasLocalitySpecs())
+ handleLocalitySpecs(innermostInfo);
+
+ addLoopAnnotationAttr(innermostInfo, dirs);
+ }
}
/// Generate FIR to end a structured or unstructured increment loop nest.
@@ -2446,29 +2487,31 @@ class FirConverter : public Fortran::lower::AbstractConverter {
it != rend; ++it) {
IncrementLoopInfo &info = *it;
if (info.isStructured()) {
- // End fir.do_loop.
+ // End fir.do_concurent.loop.
if (info.isUnordered) {
- builder->setInsertionPointAfter(info.doLoop);
+ builder->setInsertionPointAfter(info.loopOp->getParentOp());
continue;
}
+
+ // End fir.do_loop.
// Decrement tripVariable.
- builder->setInsertionPointToEnd(info.doLoop.getBody());
+ auto doLoopOp = mlir::cast<fir::DoLoopOp>(info.loopOp);
+ builder->setInsertionPointToEnd(doLoopOp.getBody());
llvm::SmallVector<mlir::Value, 2> results;
results.push_back(builder->create<mlir::arith::AddIOp>(
- loc, info.doLoop.getInductionVar(), info.doLoop.getStep(),
- iofAttr));
+ loc, doLoopOp.getInductionVar(), doLoopOp.getStep(), iofAttr));
// Step loopVariable to help optimizations such as vectorization.
// Induction variable elimination will clean up as necessary.
mlir::Value step = builder->createConvert(
- loc, info.getLoopVariableType(), info.doLoop.getStep());
+ loc, info.getLoopVariableType(), doLoopOp.getStep());
mlir::Value loopVar =
builder->create<fir::LoadOp>(loc, info.loopVariable);
results.push_back(
builder->create<mlir::arith::AddIOp>(loc, loopVar, step, iofAttr));
builder->create<fir::ResultOp>(loc, results);
- builder->setInsertionPointAfter(info.doLoop);
+ builder->setInsertionPointAfter(doLoopOp);
// The loop control variable may be used after the loop.
- builder->create<fir::StoreOp>(loc, info.doLoop.getResult(1),
+ builder->create<fir::StoreOp>(loc, doLoopOp.getResult(1),
info.loopVariable);
continue;
}
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index b7f8a8d3a9d56..8e7f47a8f2a9b 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -279,6 +279,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
if (auto cufKernelOp = getRegion().getParentOfType<cuf::KernelOp>())
return &cufKernelOp.getRegion().front();
+ if (auto doConcurentOp = getRegion().getParentOfType<fir::DoConcurrentOp>())
+ return doConcurentOp.getBody();
+
return getEntryBlock();
}
diff --git a/flang/test/Lower/do_concurrent.f90 b/flang/test/Lower/do_concurrent.f90
index ef93d2d6b035b..cc113f59c35e3 100644
--- a/flang/test/Lower/do_concurrent.f90
+++ b/flang/test/Lower/do_concurrent.f90
@@ -14,6 +14,9 @@ subroutine sub1(n)
implicit none
integer :: n, m, i, j, k
integer, dimension(n) :: a
+!CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFsub1En"}
+!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Ea"}
+
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
@@ -29,10 +32,30 @@ subroutine sub1(n)
!CHECK: %[[UB3:.*]] = arith.constant 10 : i32
!CHECK: %[[UB3_CVT:.*]] = fir.convert %[[UB3]] : (i32) -> index
-!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
-!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
-!CHECK: fir.do_loop %{{.*}} = %[[LB3_CVT]] to %[[UB3_CVT]] step %{{.*}} unordered
+!CHECK: fir.do_concurrent
+!CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i"}
+!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I]]
+!CHECK: %[[J:.*]] = fir.alloca i32 {bindc_name = "j"}
+!CHECK: %[[J_DECL:.*]]:2 = hlfir.declare %[[J]]
+!CHECK: %[[K:.*]] = fir.alloca i32 {bindc_name = "k"}
+!CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[K]]
+
+!CHECK: fir.do_concurrent.loop (%[[I_IV:.*]], %[[J_IV:.*]], %[[K_IV:.*]]) =
+!CHECK-SAME: (%[[LB1_CVT]], %[[LB2_CVT]], %[[LB3_CVT]]) to
+!CHECK-SAME: (%[[UB1_CVT]], %[[UB2_CVT]], %[[UB3_CVT]]) step
+!CHECK-SAME: (%{{.*}}, %{{.*}}, %{{.*}}) {
+!CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32
+!CHECK: fir.store %[[I_IV_CVT]] to %[[I_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[J_IV_CVT:.*]] = fir.convert %[[J_IV]] : (index) -> i32
+!CHECK: fir.store %[[J_IV_CVT]] to %[[J_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[K_IV_CVT:.*]] = fir.convert %[[K_IV]] : (index) -> i32
+!CHECK: fir.store %[[K_IV_CVT]] to %[[K_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[I_VAL:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[I_VAL_CVT:.*]] = fir.convert %[[I_VAL]] : (i32) -> i64
+!CHECK: %[[A_ELEM:.*]] = hlfir.designate %[[A_DECL]]#0 (%[[I_VAL_CVT]])
+!CHECK: hlfir.assign %[[N_VAL]] to %[[A_ELEM]] : i32, !fir.ref<i32>
do concurrent(i=1:n, j=1:bar(n*m, n/m), k=5:10)
a(i) = n
end do
@@ -45,14 +68,17 @@ subroutine sub2(n)
integer, dimension(n) :: a
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
-!CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32>
+!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index
-!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
+!CHECK: fir.do_concurrent
+!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB1_CVT]]) to (%[[UB1_CVT]]) step (%{{.*}})
+
!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index
-!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
+!CHECK: fir.do_concurrent
+!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB2_CVT]]) to (%[[UB2_CVT]]) step (%{{.*}})
do concurrent(i=1:n)
do concurrent(j=1:bar(n*m, n/m))
a(i) = n
@@ -60,7 +86,6 @@ subroutine sub2(n)
end do
end subroutine
-
!CHECK-LABEL: unstructured
subroutine unstructured(inner_step)
integer(4) :: i, j, inner_step
diff --git a/flang/test/Lower/do_concurrent_local_default_init.f90 b/flang/test/Lower/do_concurrent_local_default_init.f90
index 7652e4fcd0402..207704ac1a990 100644
--- a/flang/test/Lower/do_concurrent_local_default_init.f90
+++ b/flang/test/Lower/do_concurrent_local_default_init.f90
@@ -29,7 +29,7 @@ subroutine test_default_init()
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>> {fir.bindc_name = "p"}) {
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>
! CHECK: %[[VAL_7:.*]] = fir.box_elesize %[[VAL_6]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>) -> index
-! CHECK: fir.do_loop
+! CHECK: fir.do_concurrent.loop
! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>> {bindc_name = "p", pinned, uniq_name = "_QFtest_ptrEp"}
! CHECK: %[[VAL_17:.*]] = fir.zero_bits !fir.ptr<!fir.array<?x!fir.char<1,?>>>
! CHECK: %[[VAL_18:.*]] = arith.constant 0 : index
@@ -43,7 +43,7 @@ subroutine test_default_init()
! CHECK: }
! CHECK-LABEL: func.func @_QPtest_default_init(
-! CHECK: fir.do_loop
+! CHECK: fir.do_concurrent.loop
! CHECK: %[[VAL_26:.*]] = fir.alloca !fir.type<_QFtest_default_initTt{i:i32}> {bindc_name = "a", pinned, uniq_name = "_QFtest_default_initEa"}
! CHECK: %[[VAL_27:.*]] = fir.embox %[[VAL_26]] : (!fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>
! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_27]] : (!fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<none>
diff --git a/flang/test/Lower/loops.f90 b/flang/test/Lower/loops.f90
index ea65ba3e4d66d..60df27a591dc3 100644
--- a/flang/test/Lower/loops.f90
+++ b/flang/test/Lower/loops.f90
@@ -2,15 +2,6 @@
! CHECK-LABEL: loop_test
subroutine loop_test
- ! CHECK: %[[VAL_2:.*]] = fir.alloca i16 {bindc_name = "i"}
- ! CHECK: %[[VAL_3:.*]] = fir.alloca i16 {bindc_name = "i"}
- ! CHECK: %[[VAL_4:.*]] = fir.alloca i16 {bindc_name = "i"}
- ! CHECK: %[[VAL_5:.*]] = fir.alloca i8 {bindc_name = "k"}
- ! CHECK: %[[VAL_6:.*]] = fir.alloca i8 {bindc_name = "j"}
- ! CHECK: %[[VAL_7:.*]] = fir.alloca i8 {bindc_name = "i"}
- ! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "k"}
- ! CHECK: %[[VAL_9:.*]] = fir.alloca i32 {bindc_name = "j"}
- ! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i"}
! CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<5x5x5xi32> {bindc_name = "a", uniq_name = "_QFloop_testEa"}
! CHECK...
[truncated]
|
c85c0ef
to
421e550
Compare
421e550
to
252093c
Compare
Please have a look when you have time 🙏! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for working on this Kareem. Just a few questions.
Could this be made any simpler by using hlfir::genLoopNest
? No worries if it isn't a good fit.
if (auto doConcurentOp = getRegion().getParentOfType<fir::DoConcurrentOp>()) | ||
return doConcurentOp.getBody(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need this for any allocation in the do concurrent? We did not have this so far so I'm just wondering.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We do it this way for the new fir.do_concurrent
and fir.do_concurrent.loop
ops to keep all info related to the loop nest closely located in the IR. The allocations needed here are for the nest iteration variables. See: https://discourse.llvm.org/t/modeling-do-concurrent-loops-in-the-fir-dialect/84950/6?u=bob.belcher and related discussion and https://github.com/llvm/llvm-project/blob/main/flang/include/flang/Optimizer/Dialect/FIROps.td#L3457.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok for the iteration variables but what about any temporary that will be allocated during the body lowering. Is it ok to add them at the same place as the iteration variables?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These will be emitted in the body of the loop itself (for every iteration), just like what would happen now when targeting the old fir.do_loop ... unorderd
nest. I think it would be ok to move them to the alloca block only if we serialize to fir.do_loop ... unordered
nest (as we do in DoConcurrentConversion
in SimplifyFIROperations.cpp
) or if we are targeting OpenMP, for example, we can share them for each chunk of the parallel loop that we map to. But in the general case, I think it would not be ok to move them out at this point.
Let me know if I misunderstood what you meant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think they are created in the loop body. The code calls getAllocaBlock()
and currently, it will probably return the entry block of the function. With your change it will return the do concurrent body. So instead of having the alloca for temporary in the function entry block you will have them in the do concurrent. If this is fine for you then it's ok but this change from the current behavior.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right, I got confused here. This is still fine since the temp allocations will be emitted in the fir.do_concurrent
wrapper op not the fir.do_concurrent.loop
op. When serialized (i.e. converted to fir.do_loop ... unordered
nest), these allocations are moved to the alloca block (see https://github.com/llvm/llvm-project/blob/main/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp#L167-L176). For OpenMP, I call these "loop-local values", these values are privatized by the do concurrent to OpenMP pass (see #127635).
I didn't know about this util, thanks for pointing it out. I think it won't work out of the box since |
58bcb4b
to
2122290
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM once @clementval is happy
LGTM. Just a last question with the alloca insertion point. |
2122290
to
f86a5c6
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
Adds support for lowering `do concurrent` nests from PFT to the new `fir.do_concurrent` MLIR op as well as its special terminator `fir.do_concurrent.loop` which models the actual loop nest. To that end, this PR emits the allocations for the iteration variables within the block of the `fir.do_concurrent` op and creates a region for the `fir.do_concurrent.loop` op that accepts arguments equal in number to the number of the input `do concurrent` iteration ranges. For example, given the following input: ```fortran do concurrent(i=1:10, j=11:20) end do ``` the changes in this PR emit the following MLIR: ```mlir fir.do_concurrent { %22 = fir.alloca i32 {bindc_name = "i"} %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) %24 = fir.alloca i32 {bindc_name = "j"} %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) { %26 = fir.convert %arg1 : (index) -> i32 fir.store %26 to %23#0 : !fir.ref<i32> %27 = fir.convert %arg2 : (index) -> i32 fir.store %27 to %25#0 : !fir.ref<i32> } } ```
2a30b19
to
f1061b4
Compare
Sorry for the delayed follow up here, I am on vacation for a few weeks. If @clementval is ok with the PR now, I will merge it once the CI is green. |
Good for me. |
…urrent` (#132904)" This reverts commit 04b87e1. The reasons for reverting is that the following: 1. I still need need to upstream some part of the do concurrent to OpenMP pass from our downstream implementation and taking this in downstream will make things more difficult. 2. I still need to work on a solution for modeling locality specifiers on `hlfir.do_concurrent` ops. I would prefer to do that and merge the entire stack together instead of having a partial solution.
…urrent` (#132904)" (#135904) This reverts commit 04b87e1. The reasons for reverting is that the following: 1. I still need need to upstream some part of the do concurrent to OpenMP pass from our downstream implementation and taking this in downstream will make things more difficult. 2. I still need to work on a solution for modeling locality specifiers on `hlfir.do_concurrent` ops. I would prefer to do that and merge the entire stack together instead of having a partial solution. After merging the revert I will reopen the origianl PR and keep it updated against main until I finish the above.
…urrent` (llvm#132904)" This reverts commit 04b87e1.
…lvm#132904) Adds support for lowering `do concurrent` nests from PFT to the new `fir.do_concurrent` MLIR op as well as its special terminator `fir.do_concurrent.loop` which models the actual loop nest. To that end, this PR emits the allocations for the iteration variables within the block of the `fir.do_concurrent` op and creates a region for the `fir.do_concurrent.loop` op that accepts arguments equal in number to the number of the input `do concurrent` iteration ranges. For example, given the following input: ```fortran do concurrent(i=1:10, j=11:20) end do ``` the changes in this PR emit the following MLIR: ```mlir fir.do_concurrent { %22 = fir.alloca i32 {bindc_name = "i"} %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) %24 = fir.alloca i32 {bindc_name = "j"} %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) { %26 = fir.convert %arg1 : (index) -> i32 fir.store %26 to %23#0 : !fir.ref<i32> %27 = fir.convert %arg2 : (index) -> i32 fir.store %27 to %25#0 : !fir.ref<i32> } } ```
…urrent` (llvm#132904)" (llvm#135904) This reverts commit 04b87e1. The reasons for reverting is that the following: 1. I still need need to upstream some part of the do concurrent to OpenMP pass from our downstream implementation and taking this in downstream will make things more difficult. 2. I still need to work on a solution for modeling locality specifiers on `hlfir.do_concurrent` ops. I would prefer to do that and merge the entire stack together instead of having a partial solution. After merging the revert I will reopen the origianl PR and keep it updated against main until I finish the above.
Adds support for lowering
do concurrent
nests from PFT to the newfir.do_concurrent
MLIR op as well as its special terminatorfir.do_concurrent.loop
which models the actual loop nest.To that end, this PR emits the allocations for the iteration variables within the block of the
fir.do_concurrent
op and creates a region for thefir.do_concurrent.loop
op that accepts arguments equal in number to the number of the inputdo concurrent
iteration ranges.For example, given the following input:
the changes in this PR emit the following MLIR: