Skip to content

[flang][OpenMP] Add support for target ... private #78968

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 58 additions & 23 deletions flang/lib/Lower/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,17 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
//===----------------------------------------------------------------------===//

class DataSharingProcessor {
using SymbolSet = llvm::SetVector<const Fortran::semantics::Symbol *>;

bool hasLastPrivateOp;
mlir::OpBuilder::InsertPoint lastPrivIP;
mlir::OpBuilder::InsertPoint insPt;
mlir::Value loopIV;
// Symbols in private, firstprivate, and/or lastprivate clauses.
llvm::SetVector<const Fortran::semantics::Symbol *> privatizedSymbols;
llvm::SetVector<const Fortran::semantics::Symbol *> defaultSymbols;
llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInNestedRegions;
llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInParentRegions;
SymbolSet privatizedSymbols;
SymbolSet defaultSymbols;
SymbolSet symbolsInNestedRegions;
SymbolSet symbolsInParentRegions;
Fortran::lower::AbstractConverter &converter;
fir::FirOpBuilder &firOpBuilder;
const Fortran::parser::OmpClauseList &opClauseList;
Expand Down Expand Up @@ -182,35 +184,54 @@ class DataSharingProcessor {
: hasLastPrivateOp(false), converter(converter),
firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
eval(eval) {}
// Privatisation is split into two steps.
// Step1 performs cloning of all privatisation clauses and copying for
// firstprivates. Step1 is performed at the place where process/processStep1
// Privatisation is split into 3 steps:
//
// * Step1: collects all symbols that should be privatized.
//
// * Step2: performs cloning of all privatisation clauses and copying for
// firstprivates. Step2 is performed at the place where process/processStep2
// is called. This is usually inside the Operation corresponding to the OpenMP
// construct, for looping constructs this is just before the Operation. The
// split into two steps was performed basically to be able to call
// privatisation for looping constructs before the operation is created since
// the bounds of the MLIR OpenMP operation can be privatised.
// Step2 performs the copying for lastprivates and requires knowledge of the
// MLIR operation to insert the last private update. Step2 adds
// construct, for looping constructs this is just before the Operation.
//
// * Step3: performs the copying for lastprivates and requires knowledge of
// the MLIR operation to insert the last private update. Step3 adds
// dealocation code as well.
//
// The split was performed for the following reasons:
//
// 1. Step1 was split so that the `target` op knows which symbols should not
// be mapped into the target region due to being `private`. The implicit
// mapping happens before the op body is generated so we need to to collect
// the private symbols first and then later in the body actually privatize
// them.
//
// 2. Step2 was split in order to call privatisation for looping constructs
// before the operation is created since the bounds of the MLIR OpenMP
// operation can be privatised.
void processStep1();
void processStep2(mlir::Operation *op, bool isLoop);
void processStep2();
void processStep3(mlir::Operation *op, bool isLoop);

void setLoopIV(mlir::Value iv) {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}

const SymbolSet &getPrivatizedSymbols() const { return privatizedSymbols; }
};

void DataSharingProcessor::processStep1() {
collectSymbolsForPrivatization();
collectDefaultSymbols();
}

void DataSharingProcessor::processStep2() {
privatize();
defaultPrivatize();
insertBarrier();
}

void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) {
insPt = firOpBuilder.saveInsertionPoint();
copyLastPrivatize(op);
firOpBuilder.restoreInsertionPoint(insPt);
Expand Down Expand Up @@ -2306,11 +2327,12 @@ static void createBodyOfOp(
if (!dsp) {
DataSharingProcessor proc(converter, *clauses, eval);
proc.processStep1();
proc.processStep2(op, isLoop);
proc.processStep2();
proc.processStep3(op, isLoop);
} else {
if (isLoop && args.size() > 0)
dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
dsp->processStep2(op, isLoop);
dsp->processStep3(op, isLoop);
}

if (storeOp)
Expand Down Expand Up @@ -2648,7 +2670,9 @@ static void genBodyOfTargetOp(
const llvm::SmallVector<mlir::Type> &mapSymTypes,
const llvm::SmallVector<mlir::Location> &mapSymLocs,
const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
const mlir::Location &currentLocation) {
const mlir::Location &currentLocation,
const Fortran::parser::OmpClauseList &clauseList,
DataSharingProcessor &dsp) {
assert(mapSymTypes.size() == mapSymLocs.size());

fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
Expand All @@ -2657,6 +2681,8 @@ static void genBodyOfTargetOp(
auto *regionBlock =
firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);

dsp.processStep2();

// Clones the `bounds` placing them inside the target region and returns them.
auto cloneBound = [&](mlir::Value bound) {
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
Expand Down Expand Up @@ -2811,8 +2837,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
cp.processNowait(nowaitAttr);
cp.processMap(currentLocation, directive, semanticsContext, stmtCtx,
mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols);
cp.processTODO<Fortran::parser::OmpClause::Private,
Fortran::parser::OmpClause::Depend,
cp.processTODO<Fortran::parser::OmpClause::Depend,
Fortran::parser::OmpClause::Firstprivate,
Fortran::parser::OmpClause::IsDevicePtr,
Fortran::parser::OmpClause::HasDeviceAddr,
Expand All @@ -2823,11 +2848,19 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
Fortran::parser::OmpClause::Defaultmap>(
currentLocation, llvm::omp::Directive::OMPD_target);

DataSharingProcessor dsp(converter, clauseList, eval);
dsp.processStep1();

// 5.8.1 Implicit Data-Mapping Attribute Rules
// The following code follows the implicit data-mapping rules to map all the
// symbols used inside the region that have not been explicitly mapped using
// the map clause.
// symbols used inside the region that do not have explicit data-environment
// attribute clauses (neither data-sharing; e.g. `private`, nor `map`
// clauses).
auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
if (dsp.getPrivatizedSymbols().contains(&sym)) {
return;
}

if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
mlir::Value baseOp = converter.getSymbolAddress(sym);
if (!baseOp)
Expand Down Expand Up @@ -2893,7 +2926,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
nowaitAttr, mapOperands);

genBodyOfTargetOp(converter, eval, genNested, targetOp, mapSymTypes,
mapSymLocs, mapSymbols, currentLocation);
mapSymLocs, mapSymbols, currentLocation, clauseList, dsp);

return targetOp;
}
Expand Down Expand Up @@ -3127,6 +3160,7 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
DataSharingProcessor dsp(converter, loopOpClauseList, eval);
dsp.processStep1();
dsp.processStep2();

Fortran::lower::StatementContext stmtCtx;
mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
Expand Down Expand Up @@ -3179,6 +3213,7 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
DataSharingProcessor dsp(converter, beginClauseList, eval);
dsp.processStep1();
dsp.processStep2();

Fortran::lower::StatementContext stmtCtx;
mlir::Value scheduleChunkClauseOperand;
Expand Down
30 changes: 30 additions & 0 deletions flang/test/Lower/OpenMP/target_private.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
!Test data-sharing attribute clauses for the `target` directive.

!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s

!CHECK-LABEL: func.func @_QPomp_target_private()
subroutine omp_target_private
implicit none
integer :: x(1)

!$omp target private(x)
x(1) = 42
!$omp end target
!CHECK: omp.target {
!CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
!CHECK-DAG: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x",
!CHECK-SAME: pinned, uniq_name = "_QFomp_target_privateEx"}
!CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1>
!CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]])
!CHECK-SAME: {uniq_name = "_QFomp_target_privateEx"} :
!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) ->
!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
!CHECK-DAG: %[[C42:.*]] = arith.constant 42 : i32
!CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : index
!CHECK-NEXT: %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]])
!CHECK-SAME: : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
!CHECK-NEXT: hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref<i32>
!CHECK-NEXT: omp.terminator
!CHECK-NEXT: }

end subroutine omp_target_private
29 changes: 29 additions & 0 deletions openmp/libomptarget/test/offloading/fortran/target_private.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
! Basic offloading test with a target region
! REQUIRES: flang
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
! UNSUPPORTED: aarch64-unknown-linux-gnu
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
! UNSUPPORTED: x86_64-pc-linux-gnu
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO

! RUN: %libomptarget-compile-fortran-generic
! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic
program target_update
implicit none
integer :: x(1)
integer :: y(1)

x(1) = 42

!$omp target private(x) map(tofrom: y)
x(1) = 84
y(1) = x(1)
!$omp end target

print *, "x =", x(1)
print *, "y =", y(1)

end program target_update
! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}}
! CHECK: x = 42
! CHECK: y = 84