-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[MLIR][OpenMP] Extend omp.private materialization support: firstprivate
#82164
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-mlir Author: Kareem Ergawy (ergawy) ChangesExtends current support for delayed privatization during translation to This is a follow-up to #81715, only the commit with the same name as the PR title is relevant.Full diff: https://github.com/llvm/llvm-project/pull/82164.diff 3 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 78a2ad76a1e3b8..b41c27b1e14ba2 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1000,11 +1000,39 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
+/// Replace the region arguments of the parallel op (which correspond to private
+/// variables) with the actual private varibles they correspond to. This
+/// prepares the parallel op so that it matches what is expected by the
+/// OMPIRBuilder. Instead of editing the original op in-place, this function
+/// does the required changes to a cloned version which should then be erased by
+/// the caller.
+static omp::ParallelOp
+prepareOmpParallelForPrivatization(omp::ParallelOp opInst) {
+ mlir::OpBuilder cloneBuilder(opInst);
+ omp::ParallelOp opInstClone =
+ llvm::cast<omp::ParallelOp>(cloneBuilder.clone(*opInst));
+
+ Region ®ion = opInstClone.getRegion();
+ auto privateVars = opInstClone.getPrivateVars();
+
+ auto privateVarsIt = privateVars.begin();
+ // Reduction precede private arguments, so skip them first.
+ unsigned privateArgBeginIdx = opInstClone.getNumReductionVars();
+ unsigned privateArgEndIdx = privateArgBeginIdx + privateVars.size();
+ for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
+ ++argIdx, ++privateVarsIt)
+ replaceAllUsesInRegionWith(region.getArgument(argIdx), *privateVarsIt,
+ region);
+ return opInstClone;
+}
+
/// Converts the OpenMP parallel operation to LLVM IR.
static LogicalResult
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ omp::ParallelOp opInstClone = prepareOmpParallelForPrivatization(opInst);
+
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
// relying on captured variables.
LogicalResult bodyGenStatus = success();
@@ -1013,12 +1041,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
// Collect reduction declarations
SmallVector<omp::ReductionDeclareOp> reductionDecls;
- collectReductionDecls(opInst, reductionDecls);
+ collectReductionDecls(opInstClone, reductionDecls);
// Allocate reduction vars
SmallVector<llvm::Value *> privateReductionVariables;
DenseMap<Value, llvm::Value *> reductionVariableMap;
- allocReductionVars(opInst, builder, moduleTranslation, allocaIP,
+ allocReductionVars(opInstClone, builder, moduleTranslation, allocaIP,
reductionDecls, privateReductionVariables,
reductionVariableMap);
@@ -1030,7 +1058,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Initialize reduction vars
builder.restoreIP(allocaIP);
- for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
+ for (unsigned i = 0; i < opInstClone.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;
if (failed(inlineConvertOmpRegions(
reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
@@ -1051,18 +1079,19 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// ParallelOp has only one region associated with it.
builder.restoreIP(codeGenIP);
auto regionBlock =
- convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
+ convertOmpOpRegions(opInstClone.getRegion(), "omp.par.region", builder,
moduleTranslation, bodyGenStatus);
// Process the reductions if required.
- if (opInst.getNumReductionVars() > 0) {
+ if (opInstClone.getNumReductionVars() > 0) {
// Collect reduction info
SmallVector<OwningReductionGen> owningReductionGens;
SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
- collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
- owningReductionGens, owningAtomicReductionGens,
- privateReductionVariables, reductionInfos);
+ collectReductionInfo(opInstClone, builder, moduleTranslation,
+ reductionDecls, owningReductionGens,
+ owningAtomicReductionGens, privateReductionVariables,
+ reductionInfos);
// Move to region cont block
builder.SetInsertPoint(regionBlock->getTerminator());
@@ -1075,7 +1104,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
ompBuilder->createReductions(builder.saveIP(), allocaIP,
reductionInfos, false);
if (!contInsertPoint.getBlock()) {
- bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
+ bodyGenStatus = opInstClone->emitOpError()
+ << "failed to convert reductions";
return;
}
@@ -1086,12 +1116,125 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// TODO: Perform appropriate actions according to the data-sharing
// attribute (shared, private, firstprivate, ...) of variables.
- // Currently defaults to shared.
+ // Currently shared and private are supported.
auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
llvm::Value &, llvm::Value &vPtr,
llvm::Value *&replacementValue) -> InsertPointTy {
replacementValue = &vPtr;
+ // If this is a private value, this lambda will return the corresponding
+ // mlir value and its `PrivateClauseOp`. Otherwise, empty values are
+ // returned.
+ auto [privVar, privatizerClone] =
+ [&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
+ if (!opInstClone.getPrivateVars().empty()) {
+ auto privVars = opInstClone.getPrivateVars();
+ auto privatizers = opInstClone.getPrivatizers();
+
+ for (auto [privVar, privatizerAttr] :
+ llvm::zip_equal(privVars, *privatizers)) {
+ // Find the MLIR private variable corresponding to the LLVM value
+ // being privatized.
+ llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar);
+ if (llvmPrivVar != &vPtr)
+ continue;
+
+ SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
+ omp::PrivateClauseOp privatizer =
+ SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
+ opInstClone, privSym);
+
+ // Clone the privatizer in case it used by more than one parallel
+ // region. The privatizer is processed in-place (see below) before it
+ // gets inlined in the parallel region and therefore processing the
+ // original op is dangerous.
+ return {privVar, privatizer.clone()};
+ }
+ }
+
+ return {mlir::Value(), omp::PrivateClauseOp()};
+ }();
+
+ if (privVar) {
+ Region &allocRegion = privatizerClone.getAllocRegion();
+
+ if (!allocRegion.hasOneBlock()) {
+ privatizerClone.emitOpError(
+ "TODO: multi-block alloc regions are not supported yet.");
+ bodyGenStatus = failure();
+ return codeGenIP;
+ }
+
+ // If this is a `firstprivate` clause, prepare the `omp.private` op by:
+ if (privatizerClone.getDataSharingType() ==
+ omp::DataSharingClauseType::FirstPrivate) {
+ auto oldAllocBackBlock = std::prev(allocRegion.end());
+ omp::YieldOp oldAllockYieldOp =
+ llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());
+
+ Region ©Region = privatizerClone.getCopyRegion();
+
+ if (!copyRegion.hasOneBlock()) {
+ privatizerClone.emitOpError(
+ "TODO: multi-block copy regions are not supported yet.");
+ bodyGenStatus = failure();
+ return codeGenIP;
+ }
+
+ mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
+ // 1. Cloning the `copy` region to the end of the `alloc` region.
+ copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion,
+ allocRegion.end());
+
+ auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock);
+ // 2. Merging the last `alloc` block with the first block in the `copy`
+ // region clone.
+ // 3. Re-mapping the first argument of the `copy` region to be the
+ // argument of the `alloc` region and the second argument of the `copy`
+ // region to be the yielded value of the `alloc` region (this is the
+ // private clone of the privatized value).
+ copyCloneBuilder.mergeBlocks(
+ &*newCopyRegionFrontBlock, &*oldAllocBackBlock,
+ {allocRegion.getArgument(0), oldAllockYieldOp.getOperand(0)});
+
+ // 4. The old terminator of the `alloc` region is not needed anymore, so
+ // delete it.
+ oldAllockYieldOp.erase();
+ }
+
+ // Replace the privatizer block argument with mlir value being privatized.
+ // This way, the body of the privatizer will be changed from using the
+ // region/block argument to the value being privatized.
+ auto allocRegionArg = allocRegion.getArgument(0);
+ replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
+
+ auto oldIP = builder.saveIP();
+ builder.restoreIP(allocaIP);
+
+ // Temporarily unlink the terminator from its parent since
+ // `inlineConvertOmpRegions` expects the insertion block to **not**
+ // contain a terminator.
+ llvm::Instruction &allocaTerminator = builder.GetInsertBlock()->back();
+ assert(allocaTerminator.isTerminator());
+ allocaTerminator.removeFromParent();
+
+ SmallVector<llvm::Value *, 1> yieldedValues;
+ if (failed(inlineConvertOmpRegions(allocRegion, "omp.privatizer", builder,
+ moduleTranslation, &yieldedValues))) {
+ opInstClone.emitError(
+ "failed to inline `alloc` region of an `omp.private` "
+ "op in the parallel region");
+ bodyGenStatus = failure();
+ } else {
+ assert(yieldedValues.size() == 1);
+ replacementValue = yieldedValues.front();
+ }
+
+ allocaTerminator.insertAfter(&builder.GetInsertBlock()->back());
+ privatizerClone.erase();
+ builder.restoreIP(oldIP);
+ }
+
return codeGenIP;
};
@@ -1100,13 +1243,13 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
auto finiCB = [&](InsertPointTy codeGenIP) {};
llvm::Value *ifCond = nullptr;
- if (auto ifExprVar = opInst.getIfExprVar())
+ if (auto ifExprVar = opInstClone.getIfExprVar())
ifCond = moduleTranslation.lookupValue(ifExprVar);
llvm::Value *numThreads = nullptr;
- if (auto numThreadsVar = opInst.getNumThreadsVar())
+ if (auto numThreadsVar = opInstClone.getNumThreadsVar())
numThreads = moduleTranslation.lookupValue(numThreadsVar);
auto pbKind = llvm::omp::OMP_PROC_BIND_default;
- if (auto bind = opInst.getProcBindVal())
+ if (auto bind = opInstClone.getProcBindVal())
pbKind = getProcBindKind(*bind);
// TODO: Is the Parallel construct cancellable?
bool isCancellable = false;
@@ -1119,6 +1262,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
ifCond, numThreads, pbKind, isCancellable));
+ opInstClone.erase();
return bodyGenStatus;
}
@@ -3009,12 +3153,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::TargetOp) {
return convertOmpTarget(*op, builder, moduleTranslation);
})
- .Case<omp::MapInfoOp, omp::DataBoundsOp>([&](auto op) {
- // No-op, should be handled by relevant owning operations e.g.
- // TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
- // discarded
- return success();
- })
+ .Case<omp::MapInfoOp, omp::DataBoundsOp, omp::PrivateClauseOp>(
+ [&](auto op) {
+ // No-op, should be handled by relevant owning operations e.g.
+ // TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
+ // discarded
+ return success();
+ })
.Default([&](Operation *inst) {
return inst->emitError("unsupported OpenMP operation: ")
<< inst->getName();
diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
new file mode 100644
index 00000000000000..54b8dfc85ccc22
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
@@ -0,0 +1,50 @@
+// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
+// `omp.private` ops).
+
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+llvm.func @parallel_op_firstprivate(%arg0: !llvm.ptr) {
+ omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
+ %0 = llvm.load %arg2 : !llvm.ptr -> f32
+ omp.terminator
+ }
+ llvm.return
+}
+
+omp.private {type = firstprivate} @x.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ %0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
+ omp.yield(%0 : !llvm.ptr)
+} copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.load %arg0 : !llvm.ptr -> f32
+ llvm.store %0, %arg1 : f32, !llvm.ptr
+ omp.yield(%arg1 : !llvm.ptr)
+}
+
+// CHECK-LABEL: @parallel_op_firstprivate
+// CHECK-SAME: (ptr %[[ORIG:.*]]) {
+// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
+// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
+// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_firstprivate..omp_par, ptr %[[OMP_PAR_ARG]])
+// CHECK: }
+
+// CHECK-LABEL: void @parallel_op_firstprivate..omp_par
+// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
+// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
+
+// Check that the privatizer alloc region was inlined properly.
+// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
+
+// Check that the privatizer copy region was inlined properly.
+
+// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
+// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
+// CHECK-NEXT: br
+
+// Check that the privatized value is used (rather than the original one).
+// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
+// CHECK: }
diff --git a/mlir/test/Target/LLVMIR/openmp-private.mlir b/mlir/test/Target/LLVMIR/openmp-private.mlir
new file mode 100644
index 00000000000000..1ac87852d5300f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-private.mlir
@@ -0,0 +1,91 @@
+// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
+// `omp.private` ops).
+
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+llvm.func @parallel_op_1_private(%arg0: !llvm.ptr) {
+ omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
+ %0 = llvm.load %arg2 : !llvm.ptr -> f32
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: @parallel_op_1_private
+// CHECK-SAME: (ptr %[[ORIG:.*]]) {
+// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
+// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
+// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_1_private..omp_par, ptr %[[OMP_PAR_ARG]])
+// CHECK: }
+
+// CHECK-LABEL: void @parallel_op_1_private..omp_par
+// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
+// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
+
+// Check that the privatizer alloc region was inlined properly.
+// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
+// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
+// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
+// CHECK-NEXT: br
+
+// Check that the privatized value is used (rather than the original one).
+// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
+// CHECK: }
+
+llvm.func @parallel_op_2_privates(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr, @y.privatizer %arg1 -> %arg3 : !llvm.ptr) {
+ %0 = llvm.load %arg2 : !llvm.ptr -> f32
+ %1 = llvm.load %arg3 : !llvm.ptr -> i32
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: @parallel_op_2_privates
+// CHECK-SAME: (ptr %[[ORIG1:.*]], ptr %[[ORIG2:.*]]) {
+// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr, ptr }, align 8
+// CHECK: %[[ORIG1_GEP:.*]] = getelementptr { ptr, ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
+// CHECK: store ptr %[[ORIG1]], ptr %[[ORIG1_GEP]], align 8
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_2_privates..omp_par, ptr %[[OMP_PAR_ARG]])
+// CHECK: }
+
+// CHECK-LABEL: void @parallel_op_2_privates..omp_par
+// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
+// CHECK: %[[ORIG1_PTR_PTR:.*]] = getelementptr { ptr, ptr }, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %[[ORIG1_PTR:.*]] = load ptr, ptr %[[ORIG1_PTR_PTR]], align 8
+// CHECK: %[[ORIG2_PTR_PTR:.*]] = getelementptr { ptr, ptr }, ptr %[[ARG]], i32 0, i32 1
+// CHECK: %[[ORIG2_PTR:.*]] = load ptr, ptr %[[ORIG2_PTR_PTR]], align 8
+
+// Check that the privatizer alloc region was inlined properly.
+// CHECK: %[[PRIV1_ALLOC:.*]] = alloca float, align 4
+// CHECK: %[[ORIG1_VAL:.*]] = load float, ptr %[[ORIG1_PTR]], align 4
+// CHECK: store float %[[ORIG1_VAL]], ptr %[[PRIV1_ALLOC]], align 4
+// CHECK: %[[PRIV2_ALLOC:.*]] = alloca i32, align 4
+// CHECK: %[[ORIG2_VAL:.*]] = load i32, ptr %[[ORIG2_PTR]], align 4
+// CHECK: store i32 %[[ORIG2_VAL]], ptr %[[PRIV2_ALLOC]], align 4
+// CHECK-NEXT: br
+
+// Check that the privatized value is used (rather than the original one).
+// CHECK: load float, ptr %[[PRIV1_ALLOC]], align 4
+// CHECK: load i32, ptr %[[PRIV2_ALLOC]], align 4
+// CHECK: }
+
+omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ %0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
+ %1 = llvm.load %arg0 : !llvm.ptr -> f32
+ llvm.store %1, %0 : f32, !llvm.ptr
+ omp.yield(%0 : !llvm.ptr)
+}
+
+omp.private {type = private} @y.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+ %1 = llvm.load %arg0 : !llvm.ptr -> i32
+ llvm.store %1, %0 : i32, !llvm.ptr
+ omp.yield(%0 : !llvm.ptr)
+}
|
e0ba399
to
06bcb34
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, but I haven't followed the private
discussion closely.
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Outdated
Show resolved
Hide resolved
8787cdd
to
5c702d3
Compare
The parent PR of this one was merged. So this one is ready for review now. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks @ergawy for pushing this through.
@@ -1176,17 +1176,38 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, | |||
}(); | |||
|
|||
if (privVar) { | |||
Region &allocRegion = privatizerClone.getAllocRegion(); | |||
|
|||
// If this is a `firstprivate` clause, prepare the `omp.private` op by: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure whether this approach of moving the copy to the alloca region might be sufficient when we have last private and types with deallocate regions. I guess we will cross the bridge when we reach there.
5c702d3
to
ccc48d4
Compare
…ivate` Extends current support for delayed privatization during translation to LLVM IR. This adds support for one-block `firstprivate` `omp.private` ops.
Extends current support for delayed privatization during translation to
LLVM IR. This adds support for one-block
firstprivate
omp.private
ops.