Skip to content

Commit a871124

Browse files
authored
[flang][hlfir] optimize hlfir.eval_in_mem bufferization (#118069)
This patch extends the optimize bufferization to deal with the new hlfir.eval_in_mem and move the evaluation contained in its body to operate directly over the LHS when it can prove there are no access to the LHS inside the region (and that the LHS is contiguous). This will allow the array function call optimization when lowering is changed to produce an hlfir.eval_in_mem in the next patch.
1 parent 5b0f4f2 commit a871124

File tree

4 files changed

+208
-1
lines changed

4 files changed

+208
-1
lines changed

flang/include/flang/Optimizer/Analysis/AliasAnalysis.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,12 @@ struct AliasAnalysis {
198198
/// Return the modify-reference behavior of `op` on `location`.
199199
mlir::ModRefResult getModRef(mlir::Operation *op, mlir::Value location);
200200

201+
/// Return the modify-reference behavior of operations inside `region` on
202+
/// `location`. Contrary to getModRef(operation, location), this will visit
203+
/// nested regions recursively according to the HasRecursiveMemoryEffects
204+
/// trait.
205+
mlir::ModRefResult getModRef(mlir::Region &region, mlir::Value location);
206+
201207
/// Return the memory source of a value.
202208
/// If getLastInstantiationPoint is true, the search for the source
203209
/// will stop at [hl]fir.declare if it represents a dummy

flang/lib/Optimizer/Analysis/AliasAnalysis.cpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,13 @@ bool AliasAnalysis::Source::isDummyArgument() const {
9191
return false;
9292
}
9393

94+
static bool isEvaluateInMemoryBlockArg(mlir::Value v) {
95+
if (auto evalInMem = llvm::dyn_cast_or_null<hlfir::EvaluateInMemoryOp>(
96+
v.getParentRegion()->getParentOp()))
97+
return evalInMem.getMemory() == v;
98+
return false;
99+
}
100+
94101
bool AliasAnalysis::Source::isData() const { return origin.isData; }
95102
bool AliasAnalysis::Source::isBoxData() const {
96103
return mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(valueType)) &&
@@ -457,6 +464,33 @@ ModRefResult AliasAnalysis::getModRef(Operation *op, Value location) {
457464
return result;
458465
}
459466

467+
ModRefResult AliasAnalysis::getModRef(mlir::Region &region,
468+
mlir::Value location) {
469+
ModRefResult result = ModRefResult::getNoModRef();
470+
for (mlir::Operation &op : region.getOps()) {
471+
if (op.hasTrait<mlir::OpTrait::HasRecursiveMemoryEffects>()) {
472+
for (mlir::Region &subRegion : op.getRegions()) {
473+
result = result.merge(getModRef(subRegion, location));
474+
// Fast return is already mod and ref.
475+
if (result.isModAndRef())
476+
return result;
477+
}
478+
// In MLIR, RecursiveMemoryEffects can be combined with
479+
// MemoryEffectOpInterface to describe extra effects on top of the
480+
// effects of the nested operations. However, the presence of
481+
// RecursiveMemoryEffects and the absence of MemoryEffectOpInterface
482+
// implies the operation has no other memory effects than the one of its
483+
// nested operations.
484+
if (!mlir::isa<mlir::MemoryEffectOpInterface>(op))
485+
continue;
486+
}
487+
result = result.merge(getModRef(&op, location));
488+
if (result.isModAndRef())
489+
return result;
490+
}
491+
return result;
492+
}
493+
460494
AliasAnalysis::Source::Attributes
461495
getAttrsFromVariable(fir::FortranVariableOpInterface var) {
462496
AliasAnalysis::Source::Attributes attrs;
@@ -698,7 +732,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
698732
breakFromLoop = true;
699733
});
700734
}
701-
if (!defOp && type == SourceKind::Unknown)
735+
if (!defOp && type == SourceKind::Unknown) {
702736
// Check if the memory source is coming through a dummy argument.
703737
if (isDummyArgument(v)) {
704738
type = SourceKind::Argument;
@@ -708,7 +742,12 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
708742

709743
if (isPointerReference(ty))
710744
attributes.set(Attribute::Pointer);
745+
} else if (isEvaluateInMemoryBlockArg(v)) {
746+
// hlfir.eval_in_mem block operands is allocated by the operation.
747+
type = SourceKind::Allocate;
748+
ty = v.getType();
711749
}
750+
}
712751

713752
if (type == SourceKind::Global) {
714753
return {{global, instantiationPoint, followingData},

flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,100 @@ class ReductionMaskConversion : public mlir::OpRewritePattern<Op> {
11081108
}
11091109
};
11101110

1111+
class EvaluateIntoMemoryAssignBufferization
1112+
: public mlir::OpRewritePattern<hlfir::EvaluateInMemoryOp> {
1113+
1114+
public:
1115+
using mlir::OpRewritePattern<hlfir::EvaluateInMemoryOp>::OpRewritePattern;
1116+
1117+
llvm::LogicalResult
1118+
matchAndRewrite(hlfir::EvaluateInMemoryOp,
1119+
mlir::PatternRewriter &rewriter) const override;
1120+
};
1121+
1122+
static llvm::LogicalResult
1123+
tryUsingAssignLhsDirectly(hlfir::EvaluateInMemoryOp evalInMem,
1124+
mlir::PatternRewriter &rewriter) {
1125+
mlir::Location loc = evalInMem.getLoc();
1126+
hlfir::DestroyOp destroy;
1127+
hlfir::AssignOp assign;
1128+
for (auto user : llvm::enumerate(evalInMem->getUsers())) {
1129+
if (user.index() > 2)
1130+
return mlir::failure();
1131+
mlir::TypeSwitch<mlir::Operation *, void>(user.value())
1132+
.Case([&](hlfir::AssignOp op) { assign = op; })
1133+
.Case([&](hlfir::DestroyOp op) { destroy = op; });
1134+
}
1135+
if (!assign || !destroy || destroy.mustFinalizeExpr() ||
1136+
assign.isAllocatableAssignment())
1137+
return mlir::failure();
1138+
1139+
hlfir::Entity lhs{assign.getLhs()};
1140+
// EvaluateInMemoryOp memory is contiguous, so in general, it can only be
1141+
// replace by the LHS if the LHS is contiguous.
1142+
if (!lhs.isSimplyContiguous())
1143+
return mlir::failure();
1144+
// Character assignment may involves truncation/padding, so the LHS
1145+
// cannot be used to evaluate RHS in place without proving the LHS and
1146+
// RHS lengths are the same.
1147+
if (lhs.isCharacter())
1148+
return mlir::failure();
1149+
fir::AliasAnalysis aliasAnalysis;
1150+
// The region must not read or write the LHS.
1151+
// Note that getModRef is used instead of mlir::MemoryEffects because
1152+
// EvaluateInMemoryOp is typically expected to hold fir.calls and that
1153+
// Fortran calls cannot be modeled in a useful way with mlir::MemoryEffects:
1154+
// it is hard/impossible to list all the read/written SSA values in a call,
1155+
// but it is often possible to tell that an SSA value cannot be accessed,
1156+
// hence getModRef is needed here and below. Also note that getModRef uses
1157+
// mlir::MemoryEffects for operations that do not have special handling in
1158+
// getModRef.
1159+
if (aliasAnalysis.getModRef(evalInMem.getBody(), lhs).isModOrRef())
1160+
return mlir::failure();
1161+
// Any variables affected between the hlfir.evalInMem and assignment must not
1162+
// be read or written inside the region since it will be moved at the
1163+
// assignment insertion point.
1164+
auto effects = getEffectsBetween(evalInMem->getNextNode(), assign);
1165+
if (!effects) {
1166+
LLVM_DEBUG(
1167+
llvm::dbgs()
1168+
<< "operation with unknown effects between eval_in_mem and assign\n");
1169+
return mlir::failure();
1170+
}
1171+
for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
1172+
mlir::Value affected = effect.getValue();
1173+
if (!affected ||
1174+
aliasAnalysis.getModRef(evalInMem.getBody(), affected).isModOrRef())
1175+
return mlir::failure();
1176+
}
1177+
1178+
rewriter.setInsertionPoint(assign);
1179+
fir::FirOpBuilder builder(rewriter, evalInMem.getOperation());
1180+
mlir::Value rawLhs = hlfir::genVariableRawAddress(loc, builder, lhs);
1181+
hlfir::computeEvaluateOpIn(loc, builder, evalInMem, rawLhs);
1182+
rewriter.eraseOp(assign);
1183+
rewriter.eraseOp(destroy);
1184+
rewriter.eraseOp(evalInMem);
1185+
return mlir::success();
1186+
}
1187+
1188+
llvm::LogicalResult EvaluateIntoMemoryAssignBufferization::matchAndRewrite(
1189+
hlfir::EvaluateInMemoryOp evalInMem,
1190+
mlir::PatternRewriter &rewriter) const {
1191+
if (mlir::succeeded(tryUsingAssignLhsDirectly(evalInMem, rewriter)))
1192+
return mlir::success();
1193+
// Rewrite to temp + as_expr here so that the assign + as_expr pattern can
1194+
// kick-in for simple types and at least implement the assignment inline
1195+
// instead of call Assign runtime.
1196+
fir::FirOpBuilder builder(rewriter, evalInMem.getOperation());
1197+
mlir::Location loc = evalInMem.getLoc();
1198+
auto [temp, isHeapAllocated] = hlfir::computeEvaluateOpInNewTemp(
1199+
loc, builder, evalInMem, evalInMem.getShape(), evalInMem.getTypeparams());
1200+
rewriter.replaceOpWithNewOp<hlfir::AsExprOp>(
1201+
evalInMem, temp, /*mustFree=*/builder.createBool(loc, isHeapAllocated));
1202+
return mlir::success();
1203+
}
1204+
11111205
class OptimizedBufferizationPass
11121206
: public hlfir::impl::OptimizedBufferizationBase<
11131207
OptimizedBufferizationPass> {
@@ -1130,6 +1224,7 @@ class OptimizedBufferizationPass
11301224
patterns.insert<ElementalAssignBufferization>(context);
11311225
patterns.insert<BroadcastAssignBufferization>(context);
11321226
patterns.insert<VariableAssignBufferization>(context);
1227+
patterns.insert<EvaluateIntoMemoryAssignBufferization>(context);
11331228
patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
11341229
patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);
11351230
patterns.insert<ReductionConversion<hlfir::AllOp>>(context);
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// RUN: fir-opt --opt-bufferization %s | FileCheck %s
2+
3+
// Fortran F2023 15.5.2.14 point 4. ensures that _QPfoo cannot access _QFtestEx
4+
// and the temporary storage for the result can be avoided.
5+
func.func @_QPtest(%arg0: !fir.ref<!fir.array<10xf32>> {fir.bindc_name = "x"}) {
6+
%c10 = arith.constant 10 : index
7+
%0 = fir.dummy_scope : !fir.dscope
8+
%1 = fir.shape %c10 : (index) -> !fir.shape<1>
9+
%2:2 = hlfir.declare %arg0(%1) dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
10+
%3 = hlfir.eval_in_mem shape %1 : (!fir.shape<1>) -> !hlfir.expr<10xf32> {
11+
^bb0(%arg1: !fir.ref<!fir.array<10xf32>>):
12+
%4 = fir.call @_QPfoo() fastmath<contract> : () -> !fir.array<10xf32>
13+
fir.save_result %4 to %arg1(%1) : !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>, !fir.shape<1>
14+
}
15+
hlfir.assign %3 to %2#0 : !hlfir.expr<10xf32>, !fir.ref<!fir.array<10xf32>>
16+
hlfir.destroy %3 : !hlfir.expr<10xf32>
17+
return
18+
}
19+
func.func private @_QPfoo() -> !fir.array<10xf32>
20+
21+
// CHECK-LABEL: func.func @_QPtest(
22+
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<10xf32>> {fir.bindc_name = "x"}) {
23+
// CHECK: %[[VAL_1:.*]] = arith.constant 10 : index
24+
// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
25+
// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
26+
// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) dummy_scope %[[VAL_2]] {uniq_name = "_QFtestEx"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
27+
// CHECK: %[[VAL_5:.*]] = fir.call @_QPfoo() fastmath<contract> : () -> !fir.array<10xf32>
28+
// CHECK: fir.save_result %[[VAL_5]] to %[[VAL_4]]#1(%[[VAL_3]]) : !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>, !fir.shape<1>
29+
// CHECK: return
30+
// CHECK: }
31+
32+
33+
// Temporary storage cannot be avoided in this case since
34+
// _QFnegative_test_is_targetEx has the TARGET attribute.
35+
func.func @_QPnegative_test_is_target(%arg0: !fir.ref<!fir.array<10xf32>> {fir.bindc_name = "x", fir.target}) {
36+
%c10 = arith.constant 10 : index
37+
%0 = fir.dummy_scope : !fir.dscope
38+
%1 = fir.shape %c10 : (index) -> !fir.shape<1>
39+
%2:2 = hlfir.declare %arg0(%1) dummy_scope %0 {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFnegative_test_is_targetEx"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
40+
%3 = hlfir.eval_in_mem shape %1 : (!fir.shape<1>) -> !hlfir.expr<10xf32> {
41+
^bb0(%arg1: !fir.ref<!fir.array<10xf32>>):
42+
%4 = fir.call @_QPfoo() fastmath<contract> : () -> !fir.array<10xf32>
43+
fir.save_result %4 to %arg1(%1) : !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>, !fir.shape<1>
44+
}
45+
hlfir.assign %3 to %2#0 : !hlfir.expr<10xf32>, !fir.ref<!fir.array<10xf32>>
46+
hlfir.destroy %3 : !hlfir.expr<10xf32>
47+
return
48+
}
49+
// CHECK-LABEL: func.func @_QPnegative_test_is_target(
50+
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<10xf32>> {fir.bindc_name = "x", fir.target}) {
51+
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
52+
// CHECK: %[[VAL_2:.*]] = arith.constant false
53+
// CHECK: %[[VAL_3:.*]] = arith.constant 10 : index
54+
// CHECK: %[[VAL_4:.*]] = fir.alloca !fir.array<10xf32>
55+
// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]]{{.*}}
56+
// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_4]]{{.*}}
57+
// CHECK: %[[VAL_9:.*]] = fir.call @_QPfoo() fastmath<contract> : () -> !fir.array<10xf32>
58+
// CHECK: fir.save_result %[[VAL_9]] to %[[VAL_8]]#1{{.*}}
59+
// CHECK: %[[VAL_10:.*]] = hlfir.as_expr %[[VAL_8]]#0 move %[[VAL_2]] : (!fir.ref<!fir.array<10xf32>>, i1) -> !hlfir.expr<10xf32>
60+
// CHECK: fir.do_loop %[[VAL_11:.*]] = %[[VAL_1]] to %[[VAL_3]] step %[[VAL_1]] unordered {
61+
// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_10]], %[[VAL_11]] : (!hlfir.expr<10xf32>, index) -> f32
62+
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_11]]) : (!fir.ref<!fir.array<10xf32>>, index) -> !fir.ref<f32>
63+
// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] : f32, !fir.ref<f32>
64+
// CHECK: }
65+
// CHECK: hlfir.destroy %[[VAL_10]] : !hlfir.expr<10xf32>
66+
// CHECK: return
67+
// CHECK: }

0 commit comments

Comments
 (0)