Skip to content

Commit 75291f1

Browse files
committed
[flang][OpenMP] Lower target .. private(..) to omp.private ops
Extends delayed privatization support to `taraget .. private(..)`. With this PR, `private` is support for `target` **only** is delayed privatization mode.
1 parent 763b96c commit 75291f1

File tree

5 files changed

+183
-35
lines changed

5 files changed

+183
-35
lines changed

flang/lib/Lower/OpenMP/DataSharingProcessor.cpp

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,13 @@ DataSharingProcessor::DataSharingProcessor(
4848
}
4949

5050
void DataSharingProcessor::processStep1(
51-
mlir::omp::PrivateClauseOps *clauseOps,
52-
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
51+
mlir::omp::PrivateClauseOps *clauseOps) {
5352
collectSymbolsForPrivatization();
5453
collectDefaultSymbols();
5554
collectImplicitSymbols();
5655
collectPreDeterminedSymbols();
5756

58-
privatize(clauseOps, privateSyms);
57+
privatize(clauseOps);
5958

6059
insertBarrier();
6160
}
@@ -415,16 +414,14 @@ void DataSharingProcessor::collectPreDeterminedSymbols() {
415414
preDeterminedSymbols);
416415
}
417416

418-
void DataSharingProcessor::privatize(
419-
mlir::omp::PrivateClauseOps *clauseOps,
420-
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
417+
void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps) {
421418
for (const semantics::Symbol *sym : allPrivatizedSymbols) {
422419
if (const auto *commonDet =
423420
sym->detailsIf<semantics::CommonBlockDetails>()) {
424421
for (const auto &mem : commonDet->objects())
425-
doPrivatize(&*mem, clauseOps, privateSyms);
422+
doPrivatize(&*mem, clauseOps);
426423
} else
427-
doPrivatize(sym, clauseOps, privateSyms);
424+
doPrivatize(sym, clauseOps);
428425
}
429426
}
430427

@@ -441,9 +438,8 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) {
441438
}
442439
}
443440

444-
void DataSharingProcessor::doPrivatize(
445-
const semantics::Symbol *sym, mlir::omp::PrivateClauseOps *clauseOps,
446-
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
441+
void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
442+
mlir::omp::PrivateClauseOps *clauseOps) {
447443
if (!useDelayedPrivatization) {
448444
cloneSymbol(sym);
449445
copyFirstPrivateSymbol(sym);
@@ -548,9 +544,6 @@ void DataSharingProcessor::doPrivatize(
548544
clauseOps->privateVars.push_back(hsb.getAddr());
549545
}
550546

551-
if (privateSyms)
552-
privateSyms->push_back(sym);
553-
554547
symToPrivatizer[sym] = privatizerOp;
555548
}
556549

flang/lib/Lower/OpenMP/DataSharingProcessor.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,18 +103,15 @@ class DataSharingProcessor {
103103
void collectDefaultSymbols();
104104
void collectImplicitSymbols();
105105
void collectPreDeterminedSymbols();
106-
void privatize(mlir::omp::PrivateClauseOps *clauseOps,
107-
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
106+
void privatize(mlir::omp::PrivateClauseOps *clauseOps);
108107
void defaultPrivatize(
109108
mlir::omp::PrivateClauseOps *clauseOps,
110109
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
111110
void implicitPrivatize(
112111
mlir::omp::PrivateClauseOps *clauseOps,
113112
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
114-
void
115-
doPrivatize(const semantics::Symbol *sym,
116-
mlir::omp::PrivateClauseOps *clauseOps,
117-
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
113+
void doPrivatize(const semantics::Symbol *sym,
114+
mlir::omp::PrivateClauseOps *clauseOps);
118115
void copyLastPrivatize(mlir::Operation *op);
119116
void insertLastPrivateCompare(mlir::Operation *op);
120117
void cloneSymbol(const semantics::Symbol *sym);
@@ -145,15 +142,18 @@ class DataSharingProcessor {
145142
// Step2 performs the copying for lastprivates and requires knowledge of the
146143
// MLIR operation to insert the last private update. Step2 adds
147144
// dealocation code as well.
148-
void processStep1(
149-
mlir::omp::PrivateClauseOps *clauseOps = nullptr,
150-
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms = nullptr);
145+
void processStep1(mlir::omp::PrivateClauseOps *clauseOps = nullptr);
151146
void processStep2(mlir::Operation *op, bool isLoop);
152147

153148
void setLoopIV(mlir::Value iv) {
154149
assert(!loopIV && "Loop iteration variable already set");
155150
loopIV = iv;
156151
}
152+
153+
const llvm::SetVector<const semantics::Symbol *> &
154+
getAllSymbolsToPrivatize() const {
155+
return allPrivatizedSymbols;
156+
}
157157
};
158158

159159
} // namespace omp

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -758,15 +758,33 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
758758
llvm::ArrayRef<const semantics::Symbol *> mapSyms,
759759
llvm::ArrayRef<mlir::Location> mapSymLocs,
760760
llvm::ArrayRef<mlir::Type> mapSymTypes,
761+
DataSharingProcessor &dsp,
761762
const mlir::Location &currentLocation,
762763
const ConstructQueue &queue, ConstructQueue::iterator item) {
763764
assert(mapSymTypes.size() == mapSymLocs.size());
764765

765766
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
766767
mlir::Region &region = targetOp.getRegion();
767-
768-
auto *regionBlock =
769-
firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
768+
mlir::OperandRange privateVars = targetOp.getPrivateVars();
769+
770+
llvm::SmallVector<mlir::Type> allRegionArgTypes;
771+
allRegionArgTypes.reserve(mapSymTypes.size() +
772+
targetOp.getPrivateVars().size());
773+
llvm::transform(mapSymTypes, std::back_inserter(allRegionArgTypes),
774+
[](mlir::Type t) { return t; });
775+
llvm::transform(privateVars, std::back_inserter(allRegionArgTypes),
776+
[](mlir::Value v) { return v.getType(); });
777+
778+
llvm::SmallVector<mlir::Location> allRegionArgLocs;
779+
allRegionArgTypes.reserve(mapSymTypes.size() +
780+
targetOp.getPrivateVars().size());
781+
llvm::transform(mapSymLocs, std::back_inserter(allRegionArgLocs),
782+
[](mlir::Location l) { return l; });
783+
llvm::transform(privateVars, std::back_inserter(allRegionArgLocs),
784+
[](mlir::Value v) { return v.getLoc(); });
785+
786+
auto *regionBlock = firOpBuilder.createBlock(&region, {}, allRegionArgTypes,
787+
allRegionArgLocs);
770788

771789
// Clones the `bounds` placing them inside the target region and returns them.
772790
auto cloneBound = [&](mlir::Value bound) {
@@ -830,6 +848,20 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
830848
});
831849
}
832850

851+
for (auto [argIndex, argSymbol] :
852+
llvm::enumerate(dsp.getAllSymbolsToPrivatize())) {
853+
argIndex = mapSyms.size() + argIndex;
854+
855+
const mlir::BlockArgument &arg = region.getArgument(argIndex);
856+
converter.bindSymbol(*argSymbol,
857+
hlfir::translateToExtendedValue(
858+
currentLocation, firOpBuilder, hlfir::Entity{arg},
859+
/*contiguousHint=*/
860+
evaluate::IsSimplyContiguous(
861+
*argSymbol, converter.getFoldingContext()))
862+
.first);
863+
}
864+
833865
// Check if cloning the bounds introduced any dependency on the outer region.
834866
// If so, then either clone them as well if they are MemoryEffectFree, or else
835867
// copy them to a new temporary and add them to the map and block_argument
@@ -907,6 +939,8 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
907939
} else {
908940
genNestedEvaluations(converter, eval);
909941
}
942+
943+
dsp.processStep2(targetOp, /*isLoop=*/false);
910944
}
911945

912946
template <typename OpTy, typename... Args>
@@ -1048,15 +1082,18 @@ static void genTargetClauses(
10481082
devicePtrSyms);
10491083
cp.processMap(loc, stmtCtx, clauseOps, &mapSyms, &mapLocs, &mapTypes);
10501084
cp.processThreadLimit(stmtCtx, clauseOps);
1051-
// TODO Support delayed privatization.
10521085

10531086
if (processHostOnlyClauses)
10541087
cp.processNowait(clauseOps);
10551088

10561089
cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate,
1057-
clause::InReduction, clause::Private, clause::Reduction,
1090+
clause::InReduction, clause::Reduction,
10581091
clause::UsesAllocators>(loc,
10591092
llvm::omp::Directive::OMPD_target);
1093+
1094+
// `target private(..)` is only supported in delayed privatization mode.
1095+
if (!enableDelayedPrivatization)
1096+
cp.processTODO<clause::Private>(loc, llvm::omp::Directive::OMPD_target);
10601097
}
10611098

10621099
static void genTargetDataClauses(
@@ -1289,7 +1326,6 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
12891326
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
12901327
lower::StatementContext stmtCtx;
12911328
mlir::omp::ParallelClauseOps clauseOps;
1292-
llvm::SmallVector<const semantics::Symbol *> privateSyms;
12931329
llvm::SmallVector<mlir::Type> reductionTypes;
12941330
llvm::SmallVector<const semantics::Symbol *> reductionSyms;
12951331
genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
@@ -1319,7 +1355,7 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
13191355
/*useDelayedPrivatization=*/true, &symTable);
13201356

13211357
if (privatize)
1322-
dsp.processStep1(&clauseOps, &privateSyms);
1358+
dsp.processStep1(&clauseOps);
13231359

13241360
auto genRegionEntryCB = [&](mlir::Operation *op) {
13251361
auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
@@ -1344,9 +1380,10 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
13441380
privateVarLocs);
13451381

13461382
llvm::SmallVector<const semantics::Symbol *> allSymbols = reductionSyms;
1347-
allSymbols.append(privateSyms);
1383+
allSymbols.append(dsp.getAllSymbolsToPrivatize().begin(),
1384+
dsp.getAllSymbolsToPrivatize().end());
1385+
13481386
for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
1349-
fir::ExtendedValue hostExV = converter.getSymbolExtendedValue(*arg);
13501387
converter.bindSymbol(*arg, hlfir::translateToExtendedValue(
13511388
loc, firOpBuilder, hlfir::Entity{prv},
13521389
/*contiguousHint=*/
@@ -1541,11 +1578,22 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
15411578
deviceAddrLocs, deviceAddrTypes, devicePtrSyms,
15421579
devicePtrLocs, devicePtrTypes);
15431580

1581+
llvm::SmallVector<const semantics::Symbol *> privateSyms;
1582+
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
1583+
/*shouldCollectPreDeterminedSymbols=*/
1584+
lower::omp::isLastItemInQueue(item, queue),
1585+
/*useDelayedPrivatization=*/true, &symTable);
1586+
dsp.processStep1(&clauseOps);
1587+
15441588
// 5.8.1 Implicit Data-Mapping Attribute Rules
15451589
// The following code follows the implicit data-mapping rules to map all the
1546-
// symbols used inside the region that have not been explicitly mapped using
1547-
// the map clause.
1590+
// symbols used inside the region that do not have explicit data-environment
1591+
// attribute clauses (neither data-sharing; e.g. `private`, nor `map`
1592+
// clauses).
15481593
auto captureImplicitMap = [&](const semantics::Symbol &sym) {
1594+
if (dsp.getAllSymbolsToPrivatize().contains(&sym))
1595+
return;
1596+
15491597
if (llvm::find(mapSyms, &sym) == mapSyms.end()) {
15501598
mlir::Value baseOp = converter.getSymbolAddress(sym);
15511599
if (!baseOp)
@@ -1632,7 +1680,7 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
16321680

16331681
auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
16341682
genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
1635-
mapLocs, mapTypes, loc, queue, item);
1683+
mapLocs, mapTypes, dsp, loc, queue, item);
16361684
return targetOp;
16371685
}
16381686

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
2+
! RUN: -o - %s 2>&1 | FileCheck %s
3+
! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \
4+
! RUN: | FileCheck %s
5+
6+
subroutine target_allocatable
7+
implicit none
8+
integer, allocatable :: alloc_var
9+
10+
!$omp target private(alloc_var)
11+
alloc_var = 10
12+
!$omp end target
13+
end subroutine target_allocatable
14+
15+
! CHECK-LABEL: omp.private {type = private}
16+
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM:.*]] :
17+
! CHECK-SAME: [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
18+
! CHECK: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
19+
! CHECK: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "alloc_var", {{.*}}}
20+
21+
! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
22+
! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
23+
! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
24+
! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
25+
! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
26+
27+
! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
28+
! CHECK: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, {{.*}}}
29+
! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
30+
! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
31+
! CHECK-NEXT: } else {
32+
! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
33+
! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
34+
! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
35+
! CHECK-NEXT: }
36+
37+
! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
38+
! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
39+
40+
! CHECK-NEXT: } dealloc {
41+
! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
42+
43+
! CHECK-NEXT: %[[PRIV_VAL:.*]] = fir.load %[[PRIV_ARG]]
44+
! CHECK-NEXT: %[[PRIV_ADDR:.*]] = fir.box_addr %[[PRIV_VAL]]
45+
! CHECK-NEXT: %[[PRIV_ADDR_I64:.*]] = fir.convert %[[PRIV_ADDR]]
46+
! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
47+
! CHECK-NEXT: %[[PRIV_NULL_COND:.*]] = arith.cmpi ne, %[[PRIV_ADDR_I64]], %[[C0]] : i64
48+
49+
! CHECK-NEXT: fir.if %[[PRIV_NULL_COND]] {
50+
! CHECK: %[[PRIV_VAL_2:.*]] = fir.load %[[PRIV_ARG]]
51+
! CHECK-NEXT: %[[PRIV_ADDR_2:.*]] = fir.box_addr %[[PRIV_VAL_2]]
52+
! CHECK-NEXT: fir.freemem %[[PRIV_ADDR_2]]
53+
! CHECK-NEXT: %[[ZEROS:.*]] = fir.zero_bits
54+
! CHECK-NEXT: %[[ZEROS_BOX:.*]] = fir.embox %[[ZEROS]]
55+
! CHECK-NEXT: fir.store %[[ZEROS_BOX]] to %[[PRIV_ARG]]
56+
! CHECK-NEXT: }
57+
58+
! CHECK-NEXT: omp.yield
59+
! CHECK-NEXT: }
60+
61+
62+
! CHECK-LABEL: func.func @_QPtarget_allocatable() {
63+
64+
! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>>
65+
! CHECK-SAME: {bindc_name = "alloc_var", {{.*}}}
66+
! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]]
67+
68+
! CHECK: omp.target private(
69+
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} : [[TYPE]]) {
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
2+
! RUN: -o - %s 2>&1 | FileCheck %s
3+
! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \
4+
! RUN: | FileCheck %s
5+
6+
subroutine target_simple
7+
implicit none
8+
integer :: simple_var
9+
10+
!$omp target private(simple_var)
11+
simple_var = 10
12+
!$omp end target
13+
end subroutine target_simple
14+
15+
! CHECK-LABEL: omp.private {type = private}
16+
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
17+
! CHECK: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<i32>):
18+
! CHECK: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "simple_var", {{.*}}}
19+
! CHECK: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
20+
! CHECK: omp.yield(%[[PRIV_DECL]]#0 : !fir.ref<i32>)
21+
! CHECK: }
22+
23+
! CHECK-LABEL: func.func @_QPtarget_simple() {
24+
! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca i32 {bindc_name = "simple_var", {{.*}}}
25+
! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]]
26+
27+
! CHECK: omp.target private(
28+
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} : !fir.ref<i32>) {
29+
! CHECK: ^bb0(%[[REG_ARG:.*]]: !fir.ref<i32>):
30+
! CHECK: %[[REG_DECL:.*]]:2 = hlfir.declare %[[REG_ARG]]
31+
! CHECK: %[[C10:.*]] = arith.constant 10
32+
! CHECK: hlfir.assign %[[C10]] to %[[REG_DECL]]#0
33+
! CHECK: omp.terminator
34+
! CHECK: }
35+
36+
! CHECK: return
37+
! CHECK: }
38+

0 commit comments

Comments
 (0)