Skip to content

Commit 7dcea4c

Browse files
committed
[flang][Lower][OpenMP] try to avoid using init mold argument
Unfortunately we still have a lot of cases like !fir.box<!fir.array<10xi32>> where we read dimensions from the mold in case there are non-default lower bounds stored inside the box. I will address this in the next patch.
1 parent 4daf307 commit 7dcea4c

File tree

2 files changed

+32
-12
lines changed

2 files changed

+32
-12
lines changed

flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp

+32-11
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ class PopulateInitAndCleanupRegionsHelper {
277277
/// allocatedPrivVarArg: The allocation for the private
278278
/// variable.
279279
/// moldArg: The original variable.
280-
/// loadedMoldArg: The original variable, loaded.
280+
/// loadedMoldArg: The original variable, loaded. Access via
281+
/// getLoadedMoldArg().
281282
mlir::Value scalarInitValue, allocatedPrivVarArg, moldArg, loadedMoldArg;
282283

283284
/// The first block in the init region.
@@ -321,6 +322,14 @@ class PopulateInitAndCleanupRegionsHelper {
321322
void initAndCleanupUnboxedDerivedType(bool needsInitialization);
322323

323324
fir::IfOp handleNullAllocatable();
325+
326+
// Do this lazily so that we don't load it when it is not used.
327+
inline mlir::Value getLoadedMoldArg() {
328+
if (loadedMoldArg)
329+
return loadedMoldArg;
330+
loadedMoldArg = builder.loadIfRef(loc, moldArg);
331+
return loadedMoldArg;
332+
}
324333
};
325334

326335
} // namespace
@@ -333,7 +342,7 @@ void PopulateInitAndCleanupRegionsHelper::initBoxedPrivatePointer(
333342
// we need a shape with the right rank so that the embox op is lowered
334343
// to an llvm struct of the right type. This returns nullptr if the types
335344
// aren't right.
336-
mlir::Value shape = generateZeroShapeForRank(builder, loc, loadedMoldArg);
345+
mlir::Value shape = generateZeroShapeForRank(builder, loc, moldArg);
337346
// Just incase, do initialize the box with a null value
338347
mlir::Value null = builder.createNullConstant(loc, boxTy.getEleTy());
339348
mlir::Value nullBox;
@@ -355,7 +364,7 @@ void PopulateInitAndCleanupRegionsHelper::initBoxedPrivatePointer(
355364
/// }
356365
/// omp.yield %box_alloca
357366
fir::IfOp PopulateInitAndCleanupRegionsHelper::handleNullAllocatable() {
358-
mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, loadedMoldArg);
367+
mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, getLoadedMoldArg());
359368
mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr);
360369
fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated,
361370
/*withElseRegion=*/true);
@@ -391,7 +400,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedScalar(
391400
loc, valType, valAlloc, /*shape=*/mlir::Value{},
392401
/*slice=*/mlir::Value{}, lenParams);
393402
initializeIfDerivedTypeBox(
394-
builder, loc, box, loadedMoldArg, needsInitialization,
403+
builder, loc, box, getLoadedMoldArg(), needsInitialization,
395404
/*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivate);
396405
fir::StoreOp lastOp =
397406
builder.create<fir::StoreOp>(loc, box, allocatedPrivVarArg);
@@ -410,7 +419,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
410419
fir::BaseBoxType boxTy, bool needsInitialization) {
411420
bool isAllocatableOrPointer =
412421
mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy());
413-
getLengthParameters(builder, loc, loadedMoldArg, lenParams);
422+
getLengthParameters(builder, loc, getLoadedMoldArg(), lenParams);
414423

415424
fir::IfOp ifUnallocated{nullptr};
416425
if (isAllocatableOrPointer) {
@@ -419,7 +428,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
419428
}
420429

421430
// Create the private copy from the initial fir.box:
422-
hlfir::Entity source = hlfir::Entity{loadedMoldArg};
431+
hlfir::Entity source = hlfir::Entity{getLoadedMoldArg()};
423432

424433
// Special case for (possibly allocatable) arrays of polymorphic types
425434
// e.g. !fir.class<!fir.heap<!fir.array<?x!fir.type<>>>>
@@ -463,8 +472,9 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
463472
// Put the temporary inside of a box:
464473
// hlfir::genVariableBox doesn't handle non-default lower bounds
465474
mlir::Value box;
466-
fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedMoldArg);
467-
mlir::Type boxType = loadedMoldArg.getType();
475+
fir::ShapeShiftOp shapeShift =
476+
getShapeShift(builder, loc, getLoadedMoldArg());
477+
mlir::Type boxType = getLoadedMoldArg().getType();
468478
if (mlir::isa<fir::BaseBoxType>(temp.getType()))
469479
// the box created by the declare form createTempFromMold is missing
470480
// lower bounds info
@@ -480,7 +490,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
480490
builder.create<hlfir::AssignOp>(loc, scalarInitValue, box);
481491

482492
initializeIfDerivedTypeBox(
483-
builder, loc, box, loadedMoldArg, needsInitialization,
493+
builder, loc, box, getLoadedMoldArg(), needsInitialization,
484494
/*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivate);
485495

486496
builder.create<fir::StoreOp>(loc, box, allocatedPrivVarArg);
@@ -553,8 +563,7 @@ void PopulateInitAndCleanupRegionsHelper::populateByRefInitAndCleanupRegions() {
553563
builder.setInsertionPointToEnd(initBlock);
554564

555565
// TODO: don't do this unless it is needed
556-
loadedMoldArg = builder.loadIfRef(loc, moldArg);
557-
getLengthParameters(builder, loc, loadedMoldArg, lenParams);
566+
getLengthParameters(builder, loc, getLoadedMoldArg(), lenParams);
558567

559568
if (isPrivatization(kind) &&
560569
mlir::isa<fir::PointerType>(boxTy.getEleTy())) {
@@ -604,4 +613,16 @@ void Fortran::lower::omp::populateByRefInitAndCleanupRegions(
604613
converter, loc, argType, scalarInitValue, allocatedPrivVarArg, moldArg,
605614
initBlock, cleanupRegion, kind, sym);
606615
helper.populateByRefInitAndCleanupRegions();
616+
617+
// Often we load moldArg to check something (e.g. length parameters, shape)
618+
// but then those answers can be gotten statically without accessing the
619+
// runtime value and so the only remaining use is a dead load. These loads can
620+
// force us to insert additional barriers and so should be avoided where
621+
// possible.
622+
if (moldArg.hasOneUse()) {
623+
mlir::Operation *user = *moldArg.getUsers().begin();
624+
if (auto load = mlir::dyn_cast<fir::LoadOp>(user))
625+
if (load.use_empty())
626+
load.erase();
627+
}
607628
}

flang/test/Lower/OpenMP/delayed-privatization-pointer.f90

-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ subroutine delayed_privatization_lenparams(length)
4242
! CHECK-LABEL: omp.private {type = firstprivate}
4343
! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.box<!fir.ptr<i32>>]] init {
4444
! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<[[TYPE]]>, %[[PRIV_ALLOC:.*]]: !fir.ref<[[TYPE]]>):
45-
! CHECK-NEXT: %[[ARG:.*]] = fir.load %[[PRIV_ARG]]
4645
! CHECK-NEXT: %[[NULL:.*]] = fir.zero_bits !fir.ptr<i32>
4746
! CHECK-NEXT: %[[INIT:.*]] = fir.embox %[[NULL]] : (!fir.ptr<i32>) -> !fir.box<!fir.ptr<i32>>
4847
! CHECK-NEXT: fir.store %[[INIT]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.ptr<i32>>>

0 commit comments

Comments
 (0)