-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[flang][OpenMP][MLIR] Basic support for delayed privatization code-gen #81833
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-openmp Author: Kareem Ergawy (ergawy) ChangesAdds basic support for emitting delayed privatizers from flang. So far, This is a follow-up to both #81414 & #81452, only the latest commit (with the same title as the PR) is relevant to this PR.Patch is 48.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81833.diff 15 Files Affected:
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 796933a4eb5f68..55bc33e76e5f6e 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -16,6 +16,7 @@
#include "flang/Common/Fortran.h"
#include "flang/Lower/LoweringOptions.h"
#include "flang/Lower/PFTDefs.h"
+#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Semantics/symbol.h"
#include "mlir/IR/Builders.h"
@@ -296,6 +297,9 @@ class AbstractConverter {
return loweringOptions;
}
+ virtual Fortran::lower::SymbolBox
+ lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
private:
/// Options controlling lowering behavior.
const Fortran::lower::LoweringOptions &loweringOptions;
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 76e127207d764e..09557888c52885 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1070,7 +1070,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/// Find the symbol in one level up of symbol map such as for host-association
/// in OpenMP code or return null.
Fortran::lower::SymbolBox
- lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) {
+ lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) override {
if (Fortran::lower::SymbolBox v = localSymbols.lookupOneLevelUpSymbol(sym))
return v;
return {};
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 24f91765cb439b..acb846edf512fb 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -40,6 +40,12 @@ static llvm::cl::opt<bool> treatIndexAsSection(
llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."),
llvm::cl::init(true));
+static llvm::cl::opt<bool> enableDelayedPrivatization(
+ "openmp-enable-delayed-privatization",
+ llvm::cl::desc(
+ "Emit `[first]private` variables as clauses on the MLIR ops."),
+ llvm::cl::init(false));
+
using DeclareTargetCapturePair =
std::pair<mlir::omp::DeclareTargetCaptureClause,
Fortran::semantics::Symbol>;
@@ -147,6 +153,14 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
//===----------------------------------------------------------------------===//
class DataSharingProcessor {
+public:
+ struct DelayedPrivatizationInfo {
+ llvm::SmallVector<mlir::SymbolRefAttr> privatizers;
+ llvm::SmallVector<mlir::Value> hostAddresses;
+ llvm::SmallVector<const Fortran::semantics::Symbol *> hostSymbols;
+ };
+
+private:
bool hasLastPrivateOp;
mlir::OpBuilder::InsertPoint lastPrivIP;
mlir::OpBuilder::InsertPoint insPt;
@@ -161,6 +175,11 @@ class DataSharingProcessor {
const Fortran::parser::OmpClauseList &opClauseList;
Fortran::lower::pft::Evaluation &eval;
+ bool useDelayedPrivatization;
+ Fortran::lower::SymMap *symTable;
+
+ DelayedPrivatizationInfo delayedPrivatizationInfo;
+
bool needBarrier();
void collectSymbols(Fortran::semantics::Symbol::Flag flag);
void collectOmpObjectListSymbol(
@@ -171,10 +190,13 @@ class DataSharingProcessor {
void collectDefaultSymbols();
void privatize();
void defaultPrivatize();
+ void doPrivatize(const Fortran::semantics::Symbol *sym);
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const Fortran::semantics::Symbol *sym);
- void copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym);
+ void
+ copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym,
+ mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr);
void copyLastPrivateSymbol(const Fortran::semantics::Symbol *sym,
mlir::OpBuilder::InsertPoint *lastPrivIP);
void insertDeallocs();
@@ -182,10 +204,14 @@ class DataSharingProcessor {
public:
DataSharingProcessor(Fortran::lower::AbstractConverter &converter,
const Fortran::parser::OmpClauseList &opClauseList,
- Fortran::lower::pft::Evaluation &eval)
+ Fortran::lower::pft::Evaluation &eval,
+ bool useDelayedPrivatization = false,
+ Fortran::lower::SymMap *symTable = nullptr)
: hasLastPrivateOp(false), converter(converter),
firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
- eval(eval) {}
+ eval(eval), useDelayedPrivatization(useDelayedPrivatization),
+ symTable(symTable) {}
+
// Privatisation is split into two steps.
// Step1 performs cloning of all privatisation clauses and copying for
// firstprivates. Step1 is performed at the place where process/processStep1
@@ -204,6 +230,10 @@ class DataSharingProcessor {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}
+
+ const DelayedPrivatizationInfo &getDelayedPrivatizationInfo() const {
+ return delayedPrivatizationInfo;
+ }
};
void DataSharingProcessor::processStep1() {
@@ -250,9 +280,10 @@ void DataSharingProcessor::cloneSymbol(const Fortran::semantics::Symbol *sym) {
}
void DataSharingProcessor::copyFirstPrivateSymbol(
- const Fortran::semantics::Symbol *sym) {
+ const Fortran::semantics::Symbol *sym,
+ mlir::OpBuilder::InsertPoint *copyAssignIP) {
if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate))
- converter.copyHostAssociateVar(*sym);
+ converter.copyHostAssociateVar(*sym, copyAssignIP);
}
void DataSharingProcessor::copyLastPrivateSymbol(
@@ -491,14 +522,10 @@ void DataSharingProcessor::privatize() {
for (const Fortran::semantics::Symbol *sym : privatizedSymbols) {
if (const auto *commonDet =
sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
- for (const auto &mem : commonDet->objects()) {
- cloneSymbol(&*mem);
- copyFirstPrivateSymbol(&*mem);
- }
- } else {
- cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
- }
+ for (const auto &mem : commonDet->objects())
+ doPrivatize(&*mem);
+ } else
+ doPrivatize(sym);
}
}
@@ -522,11 +549,96 @@ void DataSharingProcessor::defaultPrivatize() {
!sym->GetUltimate().has<Fortran::semantics::NamelistDetails>() &&
!symbolsInNestedRegions.contains(sym) &&
!symbolsInParentRegions.contains(sym) &&
- !privatizedSymbols.contains(sym)) {
+ !privatizedSymbols.contains(sym))
+ doPrivatize(sym);
+ }
+}
+
+void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
+ if (!useDelayedPrivatization) {
+ cloneSymbol(sym);
+ copyFirstPrivateSymbol(sym);
+ return;
+ }
+
+ Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+ assert(hsb && "Host symbol box not found");
+
+ mlir::Type symType = hsb.getAddr().getType();
+ mlir::Location symLoc = hsb.getAddr().getLoc();
+ std::string privatizerName = sym->name().ToString() + ".privatizer";
+ bool isFirstPrivate =
+ sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate);
+
+ mlir::omp::PrivateClauseOp privatizerOp = [&]() {
+ auto moduleOp = firOpBuilder.getModule();
+
+ auto uniquePrivatizerName = fir::getTypeAsString(
+ symType, converter.getKindMap(),
+ sym->name().ToString() +
+ (isFirstPrivate ? "_firstprivate" : "_private"));
+
+ if (auto existingPrivatizer =
+ moduleOp.lookupSymbol<mlir::omp::PrivateClauseOp>(
+ uniquePrivatizerName))
+ return existingPrivatizer;
+
+ auto ip = firOpBuilder.saveInsertionPoint();
+ firOpBuilder.setInsertionPoint(&moduleOp.getBodyRegion().front(),
+ moduleOp.getBodyRegion().front().begin());
+ auto result = firOpBuilder.create<mlir::omp::PrivateClauseOp>(
+ symLoc, uniquePrivatizerName, symType,
+ isFirstPrivate ? mlir::omp::DataSharingClauseType ::FirstPrivate
+ : mlir::omp::DataSharingClauseType::Private);
+
+ symTable->pushScope();
+
+ {
+ mlir::Region &allocRegion = result.getAllocRegion();
+ mlir::Block &allocEntryBlock = allocRegion.emplaceBlock();
+ allocEntryBlock.addArgument(symType, symLoc);
+
+ firOpBuilder.setInsertionPointToEnd(&allocEntryBlock);
+ symTable->addSymbol(*sym, allocRegion.getArgument(0));
+ symTable->pushScope();
cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
+ firOpBuilder.create<mlir::omp::YieldOp>(
+ hsb.getAddr().getLoc(),
+ symTable->shallowLookupSymbol(*sym).getAddr());
+ symTable->popScope();
}
- }
+
+ if (isFirstPrivate) {
+ mlir::Region ©Region = result.getCopyRegion();
+ mlir::Block ©EntryBlock = copyRegion.emplaceBlock();
+ // Argument corresponding to the original/host value.
+ copyEntryBlock.addArgument(symType, symLoc);
+ // Argument corresponding to the privatized value.
+ copyEntryBlock.addArgument(symType, symLoc);
+
+ firOpBuilder.setInsertionPointToEnd(©EntryBlock);
+ symTable->addSymbol(*sym, copyRegion.getArgument(0),
+ /*force=*/true);
+ symTable->pushScope();
+ symTable->addSymbol(*sym, copyRegion.getArgument(1));
+ auto ip = firOpBuilder.saveInsertionPoint();
+ copyFirstPrivateSymbol(sym, &ip);
+
+ firOpBuilder.create<mlir::omp::YieldOp>(
+ hsb.getAddr().getLoc(),
+ symTable->shallowLookupSymbol(*sym).getAddr());
+ symTable->popScope();
+ }
+
+ symTable->popScope();
+ firOpBuilder.restoreInsertionPoint(ip);
+ return result;
+ }();
+
+ delayedPrivatizationInfo.privatizers.push_back(
+ mlir::SymbolRefAttr::get(privatizerOp));
+ delayedPrivatizationInfo.hostAddresses.push_back(hsb.getAddr());
+ delayedPrivatizationInfo.hostSymbols.push_back(sym);
}
//===----------------------------------------------------------------------===//
@@ -2585,6 +2697,7 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
static mlir::omp::ParallelOp
genParallelOp(Fortran::lower::AbstractConverter &converter,
+ Fortran::lower::SymMap &symTable,
Fortran::semantics::SemanticsContext &semaCtx,
Fortran::lower::pft::Evaluation &eval, bool genNested,
mlir::Location currentLocation,
@@ -2617,8 +2730,8 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
auto reductionCallback = [&](mlir::Operation *op) {
llvm::SmallVector<mlir::Location> locs(reductionVars.size(),
currentLocation);
- auto block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
- reductionTypes, locs);
+ auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
+ reductionTypes, locs);
for (auto [arg, prv] :
llvm::zip_equal(reductionSymbols, block->getArguments())) {
converter.bindSymbol(*arg, prv);
@@ -2626,13 +2739,78 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
return reductionSymbols;
};
- return genOpWithBody<mlir::omp::ParallelOp>(
+ OpWithBodyGenInfo genInfo =
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
.setGenNested(genNested)
.setOuterCombined(outerCombined)
.setClauses(&clauseList)
.setReductions(&reductionSymbols, &reductionTypes)
- .setGenRegionEntryCb(reductionCallback),
+ .setGenRegionEntryCb(reductionCallback);
+
+ if (!enableDelayedPrivatization) {
+ return genOpWithBody<mlir::omp::ParallelOp>(
+ genInfo,
+ /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
+ numThreadsClauseOperand, allocateOperands, allocatorOperands,
+ reductionVars,
+ reductionDeclSymbols.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+ reductionDeclSymbols),
+ procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
+ /*privatizers=*/nullptr);
+ }
+
+ bool privatize = !outerCombined;
+ DataSharingProcessor dsp(converter, clauseList, eval,
+ /*useDelayedPrivatization=*/true, &symTable);
+
+ if (privatize)
+ dsp.processStep1();
+
+ const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
+
+ auto genRegionEntryCB = [&](mlir::Operation *op) {
+ auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
+
+ llvm::SmallVector<mlir::Location> reductionLocs(reductionVars.size(),
+ currentLocation);
+
+ auto privateVars = parallelOp.getPrivateVars();
+ auto ®ion = parallelOp.getRegion();
+
+ llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
+ privateVarTypes.reserve(privateVars.size());
+ llvm::transform(privateVars, std::back_inserter(privateVarTypes),
+ [](mlir::Value v) { return v.getType(); });
+
+ llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
+ privateVarLocs.reserve(privateVars.size());
+ llvm::transform(privateVars, std::back_inserter(privateVarLocs),
+ [](mlir::Value v) { return v.getLoc(); });
+
+ converter.getFirOpBuilder().createBlock(®ion, {}, privateVarTypes,
+ privateVarLocs);
+
+ llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
+ reductionSymbols;
+ allSymbols.append(delayedPrivatizationInfo.hostSymbols);
+ for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
+ converter.bindSymbol(*arg, prv);
+ }
+
+ return allSymbols;
+ };
+
+ // TODO Merge with the reduction CB.
+ genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
+
+ llvm::SmallVector<mlir::Attribute> privatizers(
+ delayedPrivatizationInfo.privatizers.begin(),
+ delayedPrivatizationInfo.privatizers.end());
+
+ return genOpWithBody<mlir::omp::ParallelOp>(
+ genInfo,
/*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
numThreadsClauseOperand, allocateOperands, allocatorOperands,
reductionVars,
@@ -2640,7 +2818,11 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
? nullptr
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
reductionDeclSymbols),
- procBindKindAttr);
+ procBindKindAttr, delayedPrivatizationInfo.hostAddresses,
+ delayedPrivatizationInfo.privatizers.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+ privatizers));
}
static mlir::omp::SectionOp
@@ -3632,7 +3814,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
.test(ompDirective)) {
validDirective = true;
- genParallelOp(converter, semaCtx, eval, /*genNested=*/false,
+ genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
currentLocation, loopOpClauseList,
/*outerCombined=*/true);
}
@@ -3721,8 +3903,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
currentLocation);
break;
case llvm::omp::Directive::OMPD_parallel:
- genParallelOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
- beginClauseList);
+ genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
+ currentLocation, beginClauseList);
break;
case llvm::omp::Directive::OMPD_single:
genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
@@ -3779,7 +3961,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
.test(directive.v)) {
bool outerCombined =
directive.v != llvm::omp::Directive::OMPD_target_parallel;
- genParallelOp(converter, semaCtx, eval, /*genNested=*/false,
+ genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
currentLocation, beginClauseList, outerCombined);
combinedDirective = true;
}
@@ -3862,7 +4044,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
// Parallel wrapper of PARALLEL SECTIONS construct
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
- genParallelOp(converter, semaCtx, eval,
+ genParallelOp(converter, symTable, semaCtx, eval,
/*genNested=*/false, currentLocation, sectionsClauseList,
/*outerCombined=*/true);
} else {
diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90
new file mode 100644
index 00000000000000..21ffc1d008e313
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90
@@ -0,0 +1,26 @@
+! Test delayed privatization for the `private` clause.
+
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_firstprivate
+ implicit none
+ integer :: var1
+
+!$OMP PARALLEL FIRSTPRIVATE(var1)
+ var1 = 10
+!$OMP END PARALLEL
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: !fir.ref<i32>, %[[PRIV_PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK: %[[ORIG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]] : !fir.ref<i32>
+! CHECK: fir.store %[[ORIG_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[PRIV_PRIV_ARG]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: @_QPdelayed_privatization_firstprivate
+! CHECK: omp.parallel private(@[[VAR1_PRIVATIZER_SYM]] %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: omp.terminator
+
diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90
new file mode 100644
index 00000000000000..91eaa092f11d69
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90
@@ -0,0 +1,38 @@
+! Test delayed privatization for the `private` clause.
+
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_private
+ implicit none
+ integer :: var1
+
+!$OMP PARALLEL PRIVATE(var1)
+ var1 = 10
+!$OMP END PARALLEL
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! CHECK-NEXT: omp.yield(%[[PRIV_ALLOC]] : !fir.ref<i32>)
+
+! CHECK-LABEL: @_QPdelayed_privatization_private
+! CHECK: %[[ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref<i32>) {
+! CHECK: fir.store %{{.*}} to %[[PAR_ARG]] : !fir.ref<i32>
+! CHECK: omp.terminator
+
+! Test that the same privatizer is used if the a variable with the same type and
+! name was previously privatized.
+subroutine delayed_privatization_private_same_type_and_name
+ implicit none
+ integer :: var1
+
+!$OMP PARALLEL PRIVATE(var1)
+ var1 = 10
+!$OMP END PARALLEL
+end subroutine
+
+! CHECK-LABEL: @_QPdelayed_privatization_private_same_type_and_name
+! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref<i32>) {
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90 b/flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90
new file mode 100644
index 00000000000000..d72093c8eb73ef
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90
@@ -0,0 +1,30 @@
+! Test that reductions and delayed privatization work properly togehter. Since
+! both types of clauses add block arguments to the Ope...
[truncated]
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Kareem Ergawy (ergawy) ChangesAdds basic support for emitting delayed privatizers from flang. So far, This is a follow-up to both #81414 & #81452, only the latest commit (with the same title as the PR) is relevant to this PR.Patch is 48.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81833.diff 15 Files Affected:
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 796933a4eb5f68..55bc33e76e5f6e 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -16,6 +16,7 @@
#include "flang/Common/Fortran.h"
#include "flang/Lower/LoweringOptions.h"
#include "flang/Lower/PFTDefs.h"
+#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Semantics/symbol.h"
#include "mlir/IR/Builders.h"
@@ -296,6 +297,9 @@ class AbstractConverter {
return loweringOptions;
}
+ virtual Fortran::lower::SymbolBox
+ lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
private:
/// Options controlling lowering behavior.
const Fortran::lower::LoweringOptions &loweringOptions;
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 76e127207d764e..09557888c52885 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1070,7 +1070,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/// Find the symbol in one level up of symbol map such as for host-association
/// in OpenMP code or return null.
Fortran::lower::SymbolBox
- lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) {
+ lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) override {
if (Fortran::lower::SymbolBox v = localSymbols.lookupOneLevelUpSymbol(sym))
return v;
return {};
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 24f91765cb439b..acb846edf512fb 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -40,6 +40,12 @@ static llvm::cl::opt<bool> treatIndexAsSection(
llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."),
llvm::cl::init(true));
+static llvm::cl::opt<bool> enableDelayedPrivatization(
+ "openmp-enable-delayed-privatization",
+ llvm::cl::desc(
+ "Emit `[first]private` variables as clauses on the MLIR ops."),
+ llvm::cl::init(false));
+
using DeclareTargetCapturePair =
std::pair<mlir::omp::DeclareTargetCaptureClause,
Fortran::semantics::Symbol>;
@@ -147,6 +153,14 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
//===----------------------------------------------------------------------===//
class DataSharingProcessor {
+public:
+ struct DelayedPrivatizationInfo {
+ llvm::SmallVector<mlir::SymbolRefAttr> privatizers;
+ llvm::SmallVector<mlir::Value> hostAddresses;
+ llvm::SmallVector<const Fortran::semantics::Symbol *> hostSymbols;
+ };
+
+private:
bool hasLastPrivateOp;
mlir::OpBuilder::InsertPoint lastPrivIP;
mlir::OpBuilder::InsertPoint insPt;
@@ -161,6 +175,11 @@ class DataSharingProcessor {
const Fortran::parser::OmpClauseList &opClauseList;
Fortran::lower::pft::Evaluation &eval;
+ bool useDelayedPrivatization;
+ Fortran::lower::SymMap *symTable;
+
+ DelayedPrivatizationInfo delayedPrivatizationInfo;
+
bool needBarrier();
void collectSymbols(Fortran::semantics::Symbol::Flag flag);
void collectOmpObjectListSymbol(
@@ -171,10 +190,13 @@ class DataSharingProcessor {
void collectDefaultSymbols();
void privatize();
void defaultPrivatize();
+ void doPrivatize(const Fortran::semantics::Symbol *sym);
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const Fortran::semantics::Symbol *sym);
- void copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym);
+ void
+ copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym,
+ mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr);
void copyLastPrivateSymbol(const Fortran::semantics::Symbol *sym,
mlir::OpBuilder::InsertPoint *lastPrivIP);
void insertDeallocs();
@@ -182,10 +204,14 @@ class DataSharingProcessor {
public:
DataSharingProcessor(Fortran::lower::AbstractConverter &converter,
const Fortran::parser::OmpClauseList &opClauseList,
- Fortran::lower::pft::Evaluation &eval)
+ Fortran::lower::pft::Evaluation &eval,
+ bool useDelayedPrivatization = false,
+ Fortran::lower::SymMap *symTable = nullptr)
: hasLastPrivateOp(false), converter(converter),
firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
- eval(eval) {}
+ eval(eval), useDelayedPrivatization(useDelayedPrivatization),
+ symTable(symTable) {}
+
// Privatisation is split into two steps.
// Step1 performs cloning of all privatisation clauses and copying for
// firstprivates. Step1 is performed at the place where process/processStep1
@@ -204,6 +230,10 @@ class DataSharingProcessor {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}
+
+ const DelayedPrivatizationInfo &getDelayedPrivatizationInfo() const {
+ return delayedPrivatizationInfo;
+ }
};
void DataSharingProcessor::processStep1() {
@@ -250,9 +280,10 @@ void DataSharingProcessor::cloneSymbol(const Fortran::semantics::Symbol *sym) {
}
void DataSharingProcessor::copyFirstPrivateSymbol(
- const Fortran::semantics::Symbol *sym) {
+ const Fortran::semantics::Symbol *sym,
+ mlir::OpBuilder::InsertPoint *copyAssignIP) {
if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate))
- converter.copyHostAssociateVar(*sym);
+ converter.copyHostAssociateVar(*sym, copyAssignIP);
}
void DataSharingProcessor::copyLastPrivateSymbol(
@@ -491,14 +522,10 @@ void DataSharingProcessor::privatize() {
for (const Fortran::semantics::Symbol *sym : privatizedSymbols) {
if (const auto *commonDet =
sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
- for (const auto &mem : commonDet->objects()) {
- cloneSymbol(&*mem);
- copyFirstPrivateSymbol(&*mem);
- }
- } else {
- cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
- }
+ for (const auto &mem : commonDet->objects())
+ doPrivatize(&*mem);
+ } else
+ doPrivatize(sym);
}
}
@@ -522,11 +549,96 @@ void DataSharingProcessor::defaultPrivatize() {
!sym->GetUltimate().has<Fortran::semantics::NamelistDetails>() &&
!symbolsInNestedRegions.contains(sym) &&
!symbolsInParentRegions.contains(sym) &&
- !privatizedSymbols.contains(sym)) {
+ !privatizedSymbols.contains(sym))
+ doPrivatize(sym);
+ }
+}
+
+void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
+ if (!useDelayedPrivatization) {
+ cloneSymbol(sym);
+ copyFirstPrivateSymbol(sym);
+ return;
+ }
+
+ Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+ assert(hsb && "Host symbol box not found");
+
+ mlir::Type symType = hsb.getAddr().getType();
+ mlir::Location symLoc = hsb.getAddr().getLoc();
+ std::string privatizerName = sym->name().ToString() + ".privatizer";
+ bool isFirstPrivate =
+ sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate);
+
+ mlir::omp::PrivateClauseOp privatizerOp = [&]() {
+ auto moduleOp = firOpBuilder.getModule();
+
+ auto uniquePrivatizerName = fir::getTypeAsString(
+ symType, converter.getKindMap(),
+ sym->name().ToString() +
+ (isFirstPrivate ? "_firstprivate" : "_private"));
+
+ if (auto existingPrivatizer =
+ moduleOp.lookupSymbol<mlir::omp::PrivateClauseOp>(
+ uniquePrivatizerName))
+ return existingPrivatizer;
+
+ auto ip = firOpBuilder.saveInsertionPoint();
+ firOpBuilder.setInsertionPoint(&moduleOp.getBodyRegion().front(),
+ moduleOp.getBodyRegion().front().begin());
+ auto result = firOpBuilder.create<mlir::omp::PrivateClauseOp>(
+ symLoc, uniquePrivatizerName, symType,
+ isFirstPrivate ? mlir::omp::DataSharingClauseType ::FirstPrivate
+ : mlir::omp::DataSharingClauseType::Private);
+
+ symTable->pushScope();
+
+ {
+ mlir::Region &allocRegion = result.getAllocRegion();
+ mlir::Block &allocEntryBlock = allocRegion.emplaceBlock();
+ allocEntryBlock.addArgument(symType, symLoc);
+
+ firOpBuilder.setInsertionPointToEnd(&allocEntryBlock);
+ symTable->addSymbol(*sym, allocRegion.getArgument(0));
+ symTable->pushScope();
cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
+ firOpBuilder.create<mlir::omp::YieldOp>(
+ hsb.getAddr().getLoc(),
+ symTable->shallowLookupSymbol(*sym).getAddr());
+ symTable->popScope();
}
- }
+
+ if (isFirstPrivate) {
+ mlir::Region ©Region = result.getCopyRegion();
+ mlir::Block ©EntryBlock = copyRegion.emplaceBlock();
+ // Argument corresponding to the original/host value.
+ copyEntryBlock.addArgument(symType, symLoc);
+ // Argument corresponding to the privatized value.
+ copyEntryBlock.addArgument(symType, symLoc);
+
+ firOpBuilder.setInsertionPointToEnd(©EntryBlock);
+ symTable->addSymbol(*sym, copyRegion.getArgument(0),
+ /*force=*/true);
+ symTable->pushScope();
+ symTable->addSymbol(*sym, copyRegion.getArgument(1));
+ auto ip = firOpBuilder.saveInsertionPoint();
+ copyFirstPrivateSymbol(sym, &ip);
+
+ firOpBuilder.create<mlir::omp::YieldOp>(
+ hsb.getAddr().getLoc(),
+ symTable->shallowLookupSymbol(*sym).getAddr());
+ symTable->popScope();
+ }
+
+ symTable->popScope();
+ firOpBuilder.restoreInsertionPoint(ip);
+ return result;
+ }();
+
+ delayedPrivatizationInfo.privatizers.push_back(
+ mlir::SymbolRefAttr::get(privatizerOp));
+ delayedPrivatizationInfo.hostAddresses.push_back(hsb.getAddr());
+ delayedPrivatizationInfo.hostSymbols.push_back(sym);
}
//===----------------------------------------------------------------------===//
@@ -2585,6 +2697,7 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
static mlir::omp::ParallelOp
genParallelOp(Fortran::lower::AbstractConverter &converter,
+ Fortran::lower::SymMap &symTable,
Fortran::semantics::SemanticsContext &semaCtx,
Fortran::lower::pft::Evaluation &eval, bool genNested,
mlir::Location currentLocation,
@@ -2617,8 +2730,8 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
auto reductionCallback = [&](mlir::Operation *op) {
llvm::SmallVector<mlir::Location> locs(reductionVars.size(),
currentLocation);
- auto block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
- reductionTypes, locs);
+ auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
+ reductionTypes, locs);
for (auto [arg, prv] :
llvm::zip_equal(reductionSymbols, block->getArguments())) {
converter.bindSymbol(*arg, prv);
@@ -2626,13 +2739,78 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
return reductionSymbols;
};
- return genOpWithBody<mlir::omp::ParallelOp>(
+ OpWithBodyGenInfo genInfo =
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
.setGenNested(genNested)
.setOuterCombined(outerCombined)
.setClauses(&clauseList)
.setReductions(&reductionSymbols, &reductionTypes)
- .setGenRegionEntryCb(reductionCallback),
+ .setGenRegionEntryCb(reductionCallback);
+
+ if (!enableDelayedPrivatization) {
+ return genOpWithBody<mlir::omp::ParallelOp>(
+ genInfo,
+ /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
+ numThreadsClauseOperand, allocateOperands, allocatorOperands,
+ reductionVars,
+ reductionDeclSymbols.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+ reductionDeclSymbols),
+ procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
+ /*privatizers=*/nullptr);
+ }
+
+ bool privatize = !outerCombined;
+ DataSharingProcessor dsp(converter, clauseList, eval,
+ /*useDelayedPrivatization=*/true, &symTable);
+
+ if (privatize)
+ dsp.processStep1();
+
+ const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
+
+ auto genRegionEntryCB = [&](mlir::Operation *op) {
+ auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
+
+ llvm::SmallVector<mlir::Location> reductionLocs(reductionVars.size(),
+ currentLocation);
+
+ auto privateVars = parallelOp.getPrivateVars();
+ auto ®ion = parallelOp.getRegion();
+
+ llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
+ privateVarTypes.reserve(privateVars.size());
+ llvm::transform(privateVars, std::back_inserter(privateVarTypes),
+ [](mlir::Value v) { return v.getType(); });
+
+ llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
+ privateVarLocs.reserve(privateVars.size());
+ llvm::transform(privateVars, std::back_inserter(privateVarLocs),
+ [](mlir::Value v) { return v.getLoc(); });
+
+ converter.getFirOpBuilder().createBlock(®ion, {}, privateVarTypes,
+ privateVarLocs);
+
+ llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
+ reductionSymbols;
+ allSymbols.append(delayedPrivatizationInfo.hostSymbols);
+ for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
+ converter.bindSymbol(*arg, prv);
+ }
+
+ return allSymbols;
+ };
+
+ // TODO Merge with the reduction CB.
+ genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
+
+ llvm::SmallVector<mlir::Attribute> privatizers(
+ delayedPrivatizationInfo.privatizers.begin(),
+ delayedPrivatizationInfo.privatizers.end());
+
+ return genOpWithBody<mlir::omp::ParallelOp>(
+ genInfo,
/*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
numThreadsClauseOperand, allocateOperands, allocatorOperands,
reductionVars,
@@ -2640,7 +2818,11 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
? nullptr
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
reductionDeclSymbols),
- procBindKindAttr);
+ procBindKindAttr, delayedPrivatizationInfo.hostAddresses,
+ delayedPrivatizationInfo.privatizers.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+ privatizers));
}
static mlir::omp::SectionOp
@@ -3632,7 +3814,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
.test(ompDirective)) {
validDirective = true;
- genParallelOp(converter, semaCtx, eval, /*genNested=*/false,
+ genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
currentLocation, loopOpClauseList,
/*outerCombined=*/true);
}
@@ -3721,8 +3903,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
currentLocation);
break;
case llvm::omp::Directive::OMPD_parallel:
- genParallelOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
- beginClauseList);
+ genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
+ currentLocation, beginClauseList);
break;
case llvm::omp::Directive::OMPD_single:
genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
@@ -3779,7 +3961,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
.test(directive.v)) {
bool outerCombined =
directive.v != llvm::omp::Directive::OMPD_target_parallel;
- genParallelOp(converter, semaCtx, eval, /*genNested=*/false,
+ genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
currentLocation, beginClauseList, outerCombined);
combinedDirective = true;
}
@@ -3862,7 +4044,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
// Parallel wrapper of PARALLEL SECTIONS construct
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
- genParallelOp(converter, semaCtx, eval,
+ genParallelOp(converter, symTable, semaCtx, eval,
/*genNested=*/false, currentLocation, sectionsClauseList,
/*outerCombined=*/true);
} else {
diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90
new file mode 100644
index 00000000000000..21ffc1d008e313
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90
@@ -0,0 +1,26 @@
+! Test delayed privatization for the `private` clause.
+
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_firstprivate
+ implicit none
+ integer :: var1
+
+!$OMP PARALLEL FIRSTPRIVATE(var1)
+ var1 = 10
+!$OMP END PARALLEL
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: !fir.ref<i32>, %[[PRIV_PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK: %[[ORIG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]] : !fir.ref<i32>
+! CHECK: fir.store %[[ORIG_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[PRIV_PRIV_ARG]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: @_QPdelayed_privatization_firstprivate
+! CHECK: omp.parallel private(@[[VAR1_PRIVATIZER_SYM]] %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: omp.terminator
+
diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90
new file mode 100644
index 00000000000000..91eaa092f11d69
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90
@@ -0,0 +1,38 @@
+! Test delayed privatization for the `private` clause.
+
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_private
+ implicit none
+ integer :: var1
+
+!$OMP PARALLEL PRIVATE(var1)
+ var1 = 10
+!$OMP END PARALLEL
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! CHECK-NEXT: omp.yield(%[[PRIV_ALLOC]] : !fir.ref<i32>)
+
+! CHECK-LABEL: @_QPdelayed_privatization_private
+! CHECK: %[[ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref<i32>) {
+! CHECK: fir.store %{{.*}} to %[[PAR_ARG]] : !fir.ref<i32>
+! CHECK: omp.terminator
+
+! Test that the same privatizer is used if the a variable with the same type and
+! name was previously privatized.
+subroutine delayed_privatization_private_same_type_and_name
+ implicit none
+ integer :: var1
+
+!$OMP PARALLEL PRIVATE(var1)
+ var1 = 10
+!$OMP END PARALLEL
+end subroutine
+
+! CHECK-LABEL: @_QPdelayed_privatization_private_same_type_and_name
+! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref<i32>) {
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90 b/flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90
new file mode 100644
index 00000000000000..d72093c8eb73ef
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90
@@ -0,0 +1,30 @@
+! Test that reductions and delayed privatization work properly togehter. Since
+! both types of clauses add block arguments to the Ope...
[truncated]
|
284b722
to
91ccce2
Compare
91ccce2
to
441e959
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks Kareem, this approach makes sense to me. I only have a couple of small comments.
flang/lib/Lower/OpenMP.cpp
Outdated
llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes; | ||
privateVarTypes.reserve(privateVars.size()); | ||
llvm::transform(privateVars, std::back_inserter(privateVarTypes), | ||
[](mlir::Value v) { return v.getType(); }); | ||
|
||
llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs; | ||
privateVarLocs.reserve(privateVars.size()); | ||
llvm::transform(privateVars, std::back_inserter(privateVarLocs), | ||
[](mlir::Value v) { return v.getLoc(); }); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is fine at the moment, but maybe we should start thinking about how to better deal with matching multiple block arguments with their different uses during creation and also later when they're used. At this point we only have to deal with privatization and reduction-related block arguments at the same time, but other operations also use them for loop induction variables or target data movements.
It can become quite difficult to keep track of the expected ordering and uses of block arguments for each operation if we implement that handling ad-hoc, so maybe there is something we could define at the MLIR operation level to centralize that logic.
1595682
to
476a514
Compare
…code-gen Similar to llvm#81833 but for `hlfir` since we need different handling for the `fir` vs. `hlfir` dialect.
f6b2ba8
to
e9a3e01
Compare
Ping 🔔! Anyone in addition to Sergio can take a look? 🙏 |
d9e8ddf
to
19dee3d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just have a few nits and minor comments to add.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the patch. I have a couple of questions on the driver. Rest looks good.
! that the block arguments are added in the proper order (reductions first and | ||
! then delayed privatization. | ||
|
||
! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this patch accessible through bbc
at the moment? Not through flang-new
? I am just curious because we have been not been testing with bbc
(without testing for flang-new
) for a while now. Is it something to do with --openmp-enable-delayed-privatization
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indeed, it is related to the delayed privatization option. I think I can add a corresponding option to flang-new
but my idea was to get delayed privatization working properly first.
"openmp-enable-delayed-privatization", | ||
llvm::cl::desc( | ||
"Emit `[first]private` variables as clauses on the MLIR ops."), | ||
llvm::cl::init(false)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we eventually plan to make this default?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it is temporary until we get delayed privatization working end-to-end. Since delayed privatization is quite disruptive, it has to be introduced carefully and be tested extensively enough first.
fcd7ab7
to
cbefd31
Compare
Ping 🔔 ! The failure is not related to the PR (see this Slack thread). Please take one more look and let me know if you have other comments or this is good to go! |
cbefd31
to
00d218c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
// For symbols to be privatized in OMP, the symbol is mapped to an | ||
// instance of `SymbolBox::Intrinsic` (i.e. a direct mapping to an MLIR | ||
// SSA value). This MLIR SSA value is the block argument to the | ||
// `omp.private`'s `alloc` block. If this is the case, we return this | ||
// `SymbolBox::Intrinsic` value. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the point that Extended values are not allowed here now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indeed. I am working on adding more support for other types of value at the moment.
Adds basic support for emitting delayed privatizers from flang. So far, only types of symbols are supported (i.e. scalars), support for more complicated types will be added later. This also makes sure that reductio and delayed privatization work properly together by merging the body-gen callbacks for both in case both clauses are present on the parallel construct.
00d218c
to
dc4b235
Compare
PR llvm#81833 introduced some changes to broke some debug builds. This happened due to an indirectly included file referencing an `operator <<` function which is defined in a `.cpp` file that not linked with `tco` and `fir-opt`.
PR #81833 introduced some changes to broke some debug builds. This happened due to an indirectly included file referencing an `operator <<` function which is defined in a `.cpp` file that not linked with `tco` and `fir-opt`.
Adds basic support for emitting delayed privatizers from flang. So far,
only types of symbols are supported (i.e. scalars), support for more
complicated types will be added later. This also makes sure that
reduction and delayed privatization work properly together by merging the
body-gen callbacks for both in case both clauses are present on the
parallel construct.