@@ -40,6 +40,12 @@ static llvm::cl::opt<bool> treatIndexAsSection(
40
40
llvm::cl::desc (" In the OpenMP data clauses treat `a(N)` as `a(N:N)`." ),
41
41
llvm::cl::init(true ));
42
42
43
+ static llvm::cl::opt<bool > enableDelayedPrivatization (
44
+ " openmp-enable-delayed-privatization" ,
45
+ llvm::cl::desc (
46
+ " Emit `[first]private` variables as clauses on the MLIR ops." ),
47
+ llvm::cl::init(false ));
48
+
43
49
using DeclareTargetCapturePair =
44
50
std::pair<mlir::omp::DeclareTargetCaptureClause,
45
51
Fortran::semantics::Symbol>;
@@ -147,6 +153,14 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
147
153
// ===----------------------------------------------------------------------===//
148
154
149
155
class DataSharingProcessor {
156
+ public:
157
+ struct DelayedPrivatizationInfo {
158
+ llvm::SmallVector<mlir::SymbolRefAttr> privatizers;
159
+ llvm::SmallVector<mlir::Value> hostAddresses;
160
+ llvm::SmallVector<const Fortran::semantics::Symbol *> hostSymbols;
161
+ };
162
+
163
+ private:
150
164
bool hasLastPrivateOp;
151
165
mlir::OpBuilder::InsertPoint lastPrivIP;
152
166
mlir::OpBuilder::InsertPoint insPt;
@@ -161,6 +175,11 @@ class DataSharingProcessor {
161
175
const Fortran::parser::OmpClauseList &opClauseList;
162
176
Fortran::lower::pft::Evaluation &eval;
163
177
178
+ bool useDelayedPrivatization;
179
+ Fortran::lower::SymMap *symTable;
180
+
181
+ DelayedPrivatizationInfo delayedPrivatizationInfo;
182
+
164
183
bool needBarrier ();
165
184
void collectSymbols (Fortran::semantics::Symbol::Flag flag);
166
185
void collectOmpObjectListSymbol (
@@ -171,21 +190,28 @@ class DataSharingProcessor {
171
190
void collectDefaultSymbols ();
172
191
void privatize ();
173
192
void defaultPrivatize ();
193
+ void doPrivatize (const Fortran::semantics::Symbol *sym);
174
194
void copyLastPrivatize (mlir::Operation *op);
175
195
void insertLastPrivateCompare (mlir::Operation *op);
176
196
void cloneSymbol (const Fortran::semantics::Symbol *sym);
177
- void copyFirstPrivateSymbol (const Fortran::semantics::Symbol *sym);
197
+ void
198
+ copyFirstPrivateSymbol (const Fortran::semantics::Symbol *sym,
199
+ mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr );
178
200
void copyLastPrivateSymbol (const Fortran::semantics::Symbol *sym,
179
201
mlir::OpBuilder::InsertPoint *lastPrivIP);
180
202
void insertDeallocs ();
181
203
182
204
public:
183
205
DataSharingProcessor (Fortran::lower::AbstractConverter &converter,
184
206
const Fortran::parser::OmpClauseList &opClauseList,
185
- Fortran::lower::pft::Evaluation &eval)
207
+ Fortran::lower::pft::Evaluation &eval,
208
+ bool useDelayedPrivatization = false ,
209
+ Fortran::lower::SymMap *symTable = nullptr )
186
210
: hasLastPrivateOp(false ), converter(converter),
187
211
firOpBuilder (converter.getFirOpBuilder()), opClauseList(opClauseList),
188
- eval(eval) {}
212
+ eval(eval), useDelayedPrivatization(useDelayedPrivatization),
213
+ symTable(symTable) {}
214
+
189
215
// Privatisation is split into two steps.
190
216
// Step1 performs cloning of all privatisation clauses and copying for
191
217
// firstprivates. Step1 is performed at the place where process/processStep1
@@ -204,6 +230,10 @@ class DataSharingProcessor {
204
230
assert (!loopIV && " Loop iteration variable already set" );
205
231
loopIV = iv;
206
232
}
233
+
234
+ const DelayedPrivatizationInfo &getDelayedPrivatizationInfo () const {
235
+ return delayedPrivatizationInfo;
236
+ }
207
237
};
208
238
209
239
void DataSharingProcessor::processStep1 () {
@@ -250,9 +280,10 @@ void DataSharingProcessor::cloneSymbol(const Fortran::semantics::Symbol *sym) {
250
280
}
251
281
252
282
void DataSharingProcessor::copyFirstPrivateSymbol (
253
- const Fortran::semantics::Symbol *sym) {
283
+ const Fortran::semantics::Symbol *sym,
284
+ mlir::OpBuilder::InsertPoint *copyAssignIP) {
254
285
if (sym->test (Fortran::semantics::Symbol::Flag::OmpFirstPrivate))
255
- converter.copyHostAssociateVar (*sym);
286
+ converter.copyHostAssociateVar (*sym, copyAssignIP );
256
287
}
257
288
258
289
void DataSharingProcessor::copyLastPrivateSymbol (
@@ -491,14 +522,10 @@ void DataSharingProcessor::privatize() {
491
522
for (const Fortran::semantics::Symbol *sym : privatizedSymbols) {
492
523
if (const auto *commonDet =
493
524
sym->detailsIf <Fortran::semantics::CommonBlockDetails>()) {
494
- for (const auto &mem : commonDet->objects ()) {
495
- cloneSymbol (&*mem);
496
- copyFirstPrivateSymbol (&*mem);
497
- }
498
- } else {
499
- cloneSymbol (sym);
500
- copyFirstPrivateSymbol (sym);
501
- }
525
+ for (const auto &mem : commonDet->objects ())
526
+ doPrivatize (&*mem);
527
+ } else
528
+ doPrivatize (sym);
502
529
}
503
530
}
504
531
@@ -522,11 +549,96 @@ void DataSharingProcessor::defaultPrivatize() {
522
549
!sym->GetUltimate ().has <Fortran::semantics::NamelistDetails>() &&
523
550
!symbolsInNestedRegions.contains (sym) &&
524
551
!symbolsInParentRegions.contains (sym) &&
525
- !privatizedSymbols.contains (sym)) {
552
+ !privatizedSymbols.contains (sym))
553
+ doPrivatize (sym);
554
+ }
555
+ }
556
+
557
+ void DataSharingProcessor::doPrivatize (const Fortran::semantics::Symbol *sym) {
558
+ if (!useDelayedPrivatization) {
559
+ cloneSymbol (sym);
560
+ copyFirstPrivateSymbol (sym);
561
+ return ;
562
+ }
563
+
564
+ Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol (*sym);
565
+ assert (hsb && " Host symbol box not found" );
566
+
567
+ mlir::Type symType = hsb.getAddr ().getType ();
568
+ mlir::Location symLoc = hsb.getAddr ().getLoc ();
569
+ std::string privatizerName = sym->name ().ToString () + " .privatizer" ;
570
+ bool isFirstPrivate =
571
+ sym->test (Fortran::semantics::Symbol::Flag::OmpFirstPrivate);
572
+
573
+ mlir::omp::PrivateClauseOp privatizerOp = [&]() {
574
+ auto moduleOp = firOpBuilder.getModule ();
575
+
576
+ auto uniquePrivatizerName = fir::getTypeAsString (
577
+ symType, converter.getKindMap (),
578
+ sym->name ().ToString () +
579
+ (isFirstPrivate ? " _firstprivate" : " _private" ));
580
+
581
+ if (auto existingPrivatizer =
582
+ moduleOp.lookupSymbol <mlir::omp::PrivateClauseOp>(
583
+ uniquePrivatizerName))
584
+ return existingPrivatizer;
585
+
586
+ auto ip = firOpBuilder.saveInsertionPoint ();
587
+ firOpBuilder.setInsertionPoint (&moduleOp.getBodyRegion ().front (),
588
+ moduleOp.getBodyRegion ().front ().begin ());
589
+ auto result = firOpBuilder.create <mlir::omp::PrivateClauseOp>(
590
+ symLoc, uniquePrivatizerName, symType,
591
+ isFirstPrivate ? mlir::omp::DataSharingClauseType ::FirstPrivate
592
+ : mlir::omp::DataSharingClauseType::Private);
593
+
594
+ symTable->pushScope ();
595
+
596
+ // Populate the `alloc` region.
597
+ {
598
+ mlir::Region &allocRegion = result.getAllocRegion ();
599
+ mlir::Block *allocEntryBlock = firOpBuilder.createBlock (
600
+ &allocRegion, /* insertPt=*/ {}, symType, symLoc);
601
+
602
+ firOpBuilder.setInsertionPointToEnd (allocEntryBlock);
603
+ symTable->addSymbol (*sym, allocRegion.getArgument (0 ));
604
+ symTable->pushScope ();
526
605
cloneSymbol (sym);
527
- copyFirstPrivateSymbol (sym);
606
+ firOpBuilder.create <mlir::omp::YieldOp>(
607
+ hsb.getAddr ().getLoc (),
608
+ symTable->shallowLookupSymbol (*sym).getAddr ());
609
+ symTable->popScope ();
528
610
}
529
- }
611
+
612
+ // Poplate the `copy` region if this is a `firstprivate`.
613
+ if (isFirstPrivate) {
614
+ mlir::Region ©Region = result.getCopyRegion ();
615
+ // First block argument corresponding to the original/host value while
616
+ // second block argument corresponding to the privatized value.
617
+ mlir::Block *copyEntryBlock = firOpBuilder.createBlock (
618
+ ©Region, /* insertPt=*/ {}, {symType, symType}, {symLoc, symLoc});
619
+ firOpBuilder.setInsertionPointToEnd (copyEntryBlock);
620
+ symTable->addSymbol (*sym, copyRegion.getArgument (0 ),
621
+ /* force=*/ true );
622
+ symTable->pushScope ();
623
+ symTable->addSymbol (*sym, copyRegion.getArgument (1 ));
624
+ auto ip = firOpBuilder.saveInsertionPoint ();
625
+ copyFirstPrivateSymbol (sym, &ip);
626
+
627
+ firOpBuilder.create <mlir::omp::YieldOp>(
628
+ hsb.getAddr ().getLoc (),
629
+ symTable->shallowLookupSymbol (*sym).getAddr ());
630
+ symTable->popScope ();
631
+ }
632
+
633
+ symTable->popScope ();
634
+ firOpBuilder.restoreInsertionPoint (ip);
635
+ return result;
636
+ }();
637
+
638
+ delayedPrivatizationInfo.privatizers .push_back (
639
+ mlir::SymbolRefAttr::get (privatizerOp));
640
+ delayedPrivatizationInfo.hostAddresses .push_back (hsb.getAddr ());
641
+ delayedPrivatizationInfo.hostSymbols .push_back (sym);
530
642
}
531
643
532
644
// ===----------------------------------------------------------------------===//
@@ -2585,6 +2697,7 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
2585
2697
2586
2698
static mlir::omp::ParallelOp
2587
2699
genParallelOp (Fortran::lower::AbstractConverter &converter,
2700
+ Fortran::lower::SymMap &symTable,
2588
2701
Fortran::semantics::SemanticsContext &semaCtx,
2589
2702
Fortran::lower::pft::Evaluation &eval, bool genNested,
2590
2703
mlir::Location currentLocation,
@@ -2617,31 +2730,99 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
2617
2730
auto reductionCallback = [&](mlir::Operation *op) {
2618
2731
llvm::SmallVector<mlir::Location> locs (reductionVars.size (),
2619
2732
currentLocation);
2620
- auto block = converter.getFirOpBuilder ().createBlock (&op->getRegion (0 ), {},
2621
- reductionTypes, locs);
2733
+ auto * block = converter.getFirOpBuilder ().createBlock (&op->getRegion (0 ), {},
2734
+ reductionTypes, locs);
2622
2735
for (auto [arg, prv] :
2623
2736
llvm::zip_equal (reductionSymbols, block->getArguments ())) {
2624
2737
converter.bindSymbol (*arg, prv);
2625
2738
}
2626
2739
return reductionSymbols;
2627
2740
};
2628
2741
2629
- return genOpWithBody<mlir::omp::ParallelOp>(
2742
+ OpWithBodyGenInfo genInfo =
2630
2743
OpWithBodyGenInfo (converter, semaCtx, currentLocation, eval)
2631
2744
.setGenNested (genNested)
2632
2745
.setOuterCombined (outerCombined)
2633
2746
.setClauses (&clauseList)
2634
2747
.setReductions (&reductionSymbols, &reductionTypes)
2635
- .setGenRegionEntryCb (reductionCallback),
2748
+ .setGenRegionEntryCb (reductionCallback);
2749
+
2750
+ if (!enableDelayedPrivatization) {
2751
+ return genOpWithBody<mlir::omp::ParallelOp>(
2752
+ genInfo,
2753
+ /* resultTypes=*/ mlir::TypeRange (), ifClauseOperand,
2754
+ numThreadsClauseOperand, allocateOperands, allocatorOperands,
2755
+ reductionVars,
2756
+ reductionDeclSymbols.empty ()
2757
+ ? nullptr
2758
+ : mlir::ArrayAttr::get (converter.getFirOpBuilder ().getContext (),
2759
+ reductionDeclSymbols),
2760
+ procBindKindAttr, /* private_vars=*/ llvm::SmallVector<mlir::Value>{},
2761
+ /* privatizers=*/ nullptr );
2762
+ }
2763
+
2764
+ bool privatize = !outerCombined;
2765
+ DataSharingProcessor dsp (converter, clauseList, eval,
2766
+ /* useDelayedPrivatization=*/ true , &symTable);
2767
+
2768
+ if (privatize)
2769
+ dsp.processStep1 ();
2770
+
2771
+ const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo ();
2772
+
2773
+ auto genRegionEntryCB = [&](mlir::Operation *op) {
2774
+ auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
2775
+
2776
+ llvm::SmallVector<mlir::Location> reductionLocs (reductionVars.size (),
2777
+ currentLocation);
2778
+
2779
+ auto privateVars = parallelOp.getPrivateVars ();
2780
+ auto ®ion = parallelOp.getRegion ();
2781
+
2782
+ llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
2783
+ privateVarTypes.reserve (privateVars.size ());
2784
+ llvm::transform (privateVars, std::back_inserter (privateVarTypes),
2785
+ [](mlir::Value v) { return v.getType (); });
2786
+
2787
+ llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
2788
+ privateVarLocs.reserve (privateVars.size ());
2789
+ llvm::transform (privateVars, std::back_inserter (privateVarLocs),
2790
+ [](mlir::Value v) { return v.getLoc (); });
2791
+
2792
+ converter.getFirOpBuilder ().createBlock (®ion, /* insertPt=*/ {},
2793
+ privateVarTypes, privateVarLocs);
2794
+
2795
+ llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
2796
+ reductionSymbols;
2797
+ allSymbols.append (delayedPrivatizationInfo.hostSymbols );
2798
+ for (auto [arg, prv] : llvm::zip_equal (allSymbols, region.getArguments ())) {
2799
+ converter.bindSymbol (*arg, prv);
2800
+ }
2801
+
2802
+ return allSymbols;
2803
+ };
2804
+
2805
+ // TODO Merge with the reduction CB.
2806
+ genInfo.setGenRegionEntryCb (genRegionEntryCB).setDataSharingProcessor (&dsp);
2807
+
2808
+ llvm::SmallVector<mlir::Attribute> privatizers (
2809
+ delayedPrivatizationInfo.privatizers .begin (),
2810
+ delayedPrivatizationInfo.privatizers .end ());
2811
+
2812
+ return genOpWithBody<mlir::omp::ParallelOp>(
2813
+ genInfo,
2636
2814
/* resultTypes=*/ mlir::TypeRange (), ifClauseOperand,
2637
2815
numThreadsClauseOperand, allocateOperands, allocatorOperands,
2638
2816
reductionVars,
2639
2817
reductionDeclSymbols.empty ()
2640
2818
? nullptr
2641
2819
: mlir::ArrayAttr::get (converter.getFirOpBuilder ().getContext (),
2642
2820
reductionDeclSymbols),
2643
- procBindKindAttr, /* private_vars=*/ llvm::SmallVector<mlir::Value>{},
2644
- /* privatizers=*/ nullptr );
2821
+ procBindKindAttr, delayedPrivatizationInfo.hostAddresses ,
2822
+ delayedPrivatizationInfo.privatizers .empty ()
2823
+ ? nullptr
2824
+ : mlir::ArrayAttr::get (converter.getFirOpBuilder ().getContext (),
2825
+ privatizers));
2645
2826
}
2646
2827
2647
2828
static mlir::omp::SectionOp
@@ -3633,7 +3814,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
3633
3814
if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
3634
3815
.test (ompDirective)) {
3635
3816
validDirective = true ;
3636
- genParallelOp (converter, semaCtx, eval, /* genNested=*/ false ,
3817
+ genParallelOp (converter, symTable, semaCtx, eval, /* genNested=*/ false ,
3637
3818
currentLocation, loopOpClauseList,
3638
3819
/* outerCombined=*/ true );
3639
3820
}
@@ -3722,8 +3903,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3722
3903
currentLocation);
3723
3904
break ;
3724
3905
case llvm::omp::Directive::OMPD_parallel:
3725
- genParallelOp (converter, semaCtx, eval, /* genNested=*/ true , currentLocation ,
3726
- beginClauseList);
3906
+ genParallelOp (converter, symTable, semaCtx, eval, /* genNested=*/ true ,
3907
+ currentLocation, beginClauseList);
3727
3908
break ;
3728
3909
case llvm::omp::Directive::OMPD_single:
3729
3910
genSingleOp (converter, semaCtx, eval, /* genNested=*/ true , currentLocation,
@@ -3780,7 +3961,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3780
3961
.test (directive.v )) {
3781
3962
bool outerCombined =
3782
3963
directive.v != llvm::omp::Directive::OMPD_target_parallel;
3783
- genParallelOp (converter, semaCtx, eval, /* genNested=*/ false ,
3964
+ genParallelOp (converter, symTable, semaCtx, eval, /* genNested=*/ false ,
3784
3965
currentLocation, beginClauseList, outerCombined);
3785
3966
combinedDirective = true ;
3786
3967
}
@@ -3863,7 +4044,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3863
4044
3864
4045
// Parallel wrapper of PARALLEL SECTIONS construct
3865
4046
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
3866
- genParallelOp (converter, semaCtx, eval,
4047
+ genParallelOp (converter, symTable, semaCtx, eval,
3867
4048
/* genNested=*/ false , currentLocation, sectionsClauseList,
3868
4049
/* outerCombined=*/ true );
3869
4050
} else {
0 commit comments