Skip to content

Commit

Permalink
[flang] Extract hlfir.assign inlining from opt-bufferization. (#121544)
Browse files Browse the repository at this point in the history
Optimized bufferization can transform hlfir.assign into a loop
nest doing element per element assignment, but it avoids
doing so for RHS that is hlfir.expr. This is done to let
ElementalAssignBufferization pattern to try to do a better job.

This patch moves the hlfir.assign inlining after opt-bufferization,
and enables it for hlfir.expr RHS.

The hlfir.expr RHS cases are present in tonto, and this patch
results in some nice improvements. Note that those cases
are handled by other compilers also using array temporaries,
so this patch seems to just get rid of the Assign runtime
overhead/inefficiency.
  • Loading branch information
vzakhari authored Jan 3, 2025
1 parent b9482ce commit 3c700d1
Show file tree
Hide file tree
Showing 12 changed files with 228 additions and 178 deletions.
4 changes: 4 additions & 0 deletions flang/include/flang/Optimizer/HLFIR/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,8 @@ def InlineElementals : Pass<"inline-elementals"> {
let summary = "Inline chained hlfir.elemental operations";
}

def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
let summary = "Inline hlfir.assign operations";
}

#endif //FORTRAN_DIALECT_HLFIR_PASSES
1 change: 1 addition & 0 deletions flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_flang_library(HLFIRTransforms
BufferizeHLFIR.cpp
ConvertToFIR.cpp
InlineElementals.cpp
InlineHLFIRAssign.cpp
LowerHLFIRIntrinsics.cpp
LowerHLFIROrderedAssignments.cpp
ScheduleOrderedAssignments.cpp
Expand Down
152 changes: 152 additions & 0 deletions flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
//===- InlineHLFIRAssign.cpp - Inline hlfir.assign ops --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Transform hlfir.assign array operations into loop nests performing element
// per element assignments. The inlining is done for trivial data types always,
// though, we may add performance/code-size heuristics in future.
//===----------------------------------------------------------------------===//

#include "flang/Optimizer/Analysis/AliasAnalysis.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

namespace hlfir {
#define GEN_PASS_DEF_INLINEHLFIRASSIGN
#include "flang/Optimizer/HLFIR/Passes.h.inc"
} // namespace hlfir

#define DEBUG_TYPE "inline-hlfir-assign"

namespace {
/// Expand hlfir.assign of array RHS to array LHS into a loop nest
/// of element-by-element assignments:
/// hlfir.assign %4 to %5 : !fir.ref<!fir.array<3x3xf32>>,
/// !fir.ref<!fir.array<3x3xf32>>
/// into:
/// fir.do_loop %arg1 = %c1 to %c3 step %c1 unordered {
/// fir.do_loop %arg2 = %c1 to %c3 step %c1 unordered {
/// %6 = hlfir.designate %4 (%arg2, %arg1) :
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
/// %7 = fir.load %6 : !fir.ref<f32>
/// %8 = hlfir.designate %5 (%arg2, %arg1) :
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
/// hlfir.assign %7 to %8 : f32, !fir.ref<f32>
/// }
/// }
///
/// The transformation is correct only when LHS and RHS do not alias.
/// When RHS is an array expression, then there is no aliasing.
/// This transformation does not support runtime checking for
/// non-conforming LHS/RHS arrays' shapes currently.
class InlineHLFIRAssignConversion
: public mlir::OpRewritePattern<hlfir::AssignOp> {
public:
using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;

llvm::LogicalResult
matchAndRewrite(hlfir::AssignOp assign,
mlir::PatternRewriter &rewriter) const override {
if (assign.isAllocatableAssignment())
return rewriter.notifyMatchFailure(assign,
"AssignOp may imply allocation");

hlfir::Entity rhs{assign.getRhs()};

if (!rhs.isArray())
return rewriter.notifyMatchFailure(assign,
"AssignOp's RHS is not an array");

mlir::Type rhsEleTy = rhs.getFortranElementType();
if (!fir::isa_trivial(rhsEleTy))
return rewriter.notifyMatchFailure(
assign, "AssignOp's RHS data type is not trivial");

hlfir::Entity lhs{assign.getLhs()};
if (!lhs.isArray())
return rewriter.notifyMatchFailure(assign,
"AssignOp's LHS is not an array");

mlir::Type lhsEleTy = lhs.getFortranElementType();
if (!fir::isa_trivial(lhsEleTy))
return rewriter.notifyMatchFailure(
assign, "AssignOp's LHS data type is not trivial");

if (lhsEleTy != rhsEleTy)
return rewriter.notifyMatchFailure(assign,
"RHS/LHS element types mismatch");

if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
// If RHS is not an hlfir.expr, then we should prove that
// LHS and RHS do not alias.
// TODO: if they may alias, we can insert hlfir.as_expr for RHS,
// and proceed with the inlining.
fir::AliasAnalysis aliasAnalysis;
mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
// TODO: use areIdenticalOrDisjointSlices() from
// OptimizedBufferization.cpp to check if we can still do the expansion.
if (!aliasRes.isNo()) {
LLVM_DEBUG(llvm::dbgs() << "InlineHLFIRAssign:\n"
<< "\tLHS: " << lhs << "\n"
<< "\tRHS: " << rhs << "\n"
<< "\tALIAS: " << aliasRes << "\n");
return rewriter.notifyMatchFailure(assign, "RHS/LHS may alias");
}
}

mlir::Location loc = assign->getLoc();
fir::FirOpBuilder builder(rewriter, assign.getOperation());
builder.setInsertionPoint(assign);
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
mlir::Value shape = hlfir::genShape(loc, builder, lhs);
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto rhsArrayElement =
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
auto lhsArrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(loc, rhsArrayElement, lhsArrayElement);
rewriter.eraseOp(assign);
return mlir::success();
}
};

class InlineHLFIRAssignPass
: public hlfir::impl::InlineHLFIRAssignBase<InlineHLFIRAssignPass> {
public:
void runOnOperation() override {
mlir::MLIRContext *context = &getContext();

mlir::GreedyRewriteConfig config;
// Prevent the pattern driver from merging blocks.
config.enableRegionSimplification =
mlir::GreedySimplifyRegionLevel::Disabled;

mlir::RewritePatternSet patterns(context);
patterns.insert<InlineHLFIRAssignConversion>(context);

if (mlir::failed(mlir::applyPatternsGreedily(
getOperation(), std::move(patterns), config))) {
mlir::emitError(getOperation()->getLoc(),
"failure in hlfir.assign inlining");
signalPassFailure();
}
}
};
} // namespace
109 changes: 3 additions & 106 deletions flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,108 +772,6 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
return mlir::success();
}

/// Expand hlfir.assign of array RHS to array LHS into a loop nest
/// of element-by-element assignments:
/// hlfir.assign %4 to %5 : !fir.ref<!fir.array<3x3xf32>>,
/// !fir.ref<!fir.array<3x3xf32>>
/// into:
/// fir.do_loop %arg1 = %c1 to %c3 step %c1 unordered {
/// fir.do_loop %arg2 = %c1 to %c3 step %c1 unordered {
/// %6 = hlfir.designate %4 (%arg2, %arg1) :
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
/// %7 = fir.load %6 : !fir.ref<f32>
/// %8 = hlfir.designate %5 (%arg2, %arg1) :
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
/// hlfir.assign %7 to %8 : f32, !fir.ref<f32>
/// }
/// }
///
/// The transformation is correct only when LHS and RHS do not alias.
/// This transformation does not support runtime checking for
/// non-conforming LHS/RHS arrays' shapes currently.
class VariableAssignBufferization
: public mlir::OpRewritePattern<hlfir::AssignOp> {
private:
public:
using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;

llvm::LogicalResult
matchAndRewrite(hlfir::AssignOp assign,
mlir::PatternRewriter &rewriter) const override;
};

llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
hlfir::AssignOp assign, mlir::PatternRewriter &rewriter) const {
if (assign.isAllocatableAssignment())
return rewriter.notifyMatchFailure(assign, "AssignOp may imply allocation");

hlfir::Entity rhs{assign.getRhs()};

// To avoid conflicts with ElementalAssignBufferization pattern, we avoid
// matching RHS when it is an `ExprType` defined by an `ElementalOp`; which is
// among the main criteria matched by ElementalAssignBufferization.
if (mlir::isa<hlfir::ExprType>(rhs.getType()) &&
mlir::isa<hlfir::ElementalOp>(rhs.getDefiningOp()))
return rewriter.notifyMatchFailure(
assign, "RHS is an ExprType defined by ElementalOp");

if (!rhs.isArray())
return rewriter.notifyMatchFailure(assign,
"AssignOp's RHS is not an array");

mlir::Type rhsEleTy = rhs.getFortranElementType();
if (!fir::isa_trivial(rhsEleTy))
return rewriter.notifyMatchFailure(
assign, "AssignOp's RHS data type is not trivial");

hlfir::Entity lhs{assign.getLhs()};
if (!lhs.isArray())
return rewriter.notifyMatchFailure(assign,
"AssignOp's LHS is not an array");

mlir::Type lhsEleTy = lhs.getFortranElementType();
if (!fir::isa_trivial(lhsEleTy))
return rewriter.notifyMatchFailure(
assign, "AssignOp's LHS data type is not trivial");

if (lhsEleTy != rhsEleTy)
return rewriter.notifyMatchFailure(assign,
"RHS/LHS element types mismatch");

fir::AliasAnalysis aliasAnalysis;
mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
// TODO: use areIdenticalOrDisjointSlices() to check if
// we can still do the expansion.
if (!aliasRes.isNo()) {
LLVM_DEBUG(llvm::dbgs() << "VariableAssignBufferization:\n"
<< "\tLHS: " << lhs << "\n"
<< "\tRHS: " << rhs << "\n"
<< "\tALIAS: " << aliasRes << "\n");
return rewriter.notifyMatchFailure(assign, "RHS/LHS may alias");
}

mlir::Location loc = assign->getLoc();
fir::FirOpBuilder builder(rewriter, assign.getOperation());
builder.setInsertionPoint(assign);
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
mlir::Value shape = hlfir::genShape(loc, builder, lhs);
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto rhsArrayElement =
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
auto lhsArrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(loc, rhsArrayElement, lhsArrayElement);
rewriter.eraseOp(assign);
return mlir::success();
}

using GenBodyFn =
std::function<mlir::Value(fir::FirOpBuilder &, mlir::Location, mlir::Value,
const llvm::SmallVectorImpl<mlir::Value> &)>;
Expand Down Expand Up @@ -1280,9 +1178,9 @@ class ReductionMaskConversion : public mlir::OpRewritePattern<Op> {
loc, resultArr, builder.createBool(loc, false));

// Check all the users - the destroy is no longer required, and any assign
// can use resultArr directly so that VariableAssignBufferization in this
// pass can optimize the results. Other operations are replaces with an
// AsExpr for the temporary resultArr.
// can use resultArr directly so that InlineHLFIRAssign pass
// can optimize the results. Other operations are replaced with an AsExpr
// for the temporary resultArr.
llvm::SmallVector<hlfir::DestroyOp> destroys;
llvm::SmallVector<hlfir::AssignOp> assigns;
for (auto user : mloc->getUsers()) {
Expand Down Expand Up @@ -1430,7 +1328,6 @@ class OptimizedBufferizationPass
// This requires small code reordering in ElementalAssignBufferization.
patterns.insert<ElementalAssignBufferization>(context);
patterns.insert<BroadcastAssignBufferization>(context);
patterns.insert<VariableAssignBufferization>(context);
patterns.insert<EvaluateIntoMemoryAssignBufferization>(context);
patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);
Expand Down
2 changes: 2 additions & 0 deletions flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
pm.addPass(mlir::createCSEPass());
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createOptimizedBufferization);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);
}
pm.addPass(hlfir::createLowerHLFIROrderedAssignments());
pm.addPass(hlfir::createLowerHLFIRIntrinsics());
Expand Down
4 changes: 4 additions & 0 deletions flang/test/Driver/mlir-pass-pipeline.f90
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,16 @@
! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
! O2-NEXT: 'fir.global' Pipeline
! O2-NEXT: OptimizedBufferization
! O2-NEXT: InlineHLFIRAssign
! O2-NEXT: 'func.func' Pipeline
! O2-NEXT: OptimizedBufferization
! O2-NEXT: InlineHLFIRAssign
! O2-NEXT: 'omp.declare_reduction' Pipeline
! O2-NEXT: OptimizedBufferization
! O2-NEXT: InlineHLFIRAssign
! O2-NEXT: 'omp.private' Pipeline
! O2-NEXT: OptimizedBufferization
! O2-NEXT: InlineHLFIRAssign
! ALL: LowerHLFIROrderedAssignments
! ALL-NEXT: LowerHLFIRIntrinsics
! ALL-NEXT: BufferizeHLFIR
Expand Down
4 changes: 4 additions & 0 deletions flang/test/Fir/basic-program.fir
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,16 @@ func.func @_QQmain() {
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
// PASSES-NEXT: 'fir.global' Pipeline
// PASSES-NEXT: OptimizedBufferization
// PASSES-NEXT: InlineHLFIRAssign
// PASSES-NEXT: 'func.func' Pipeline
// PASSES-NEXT: OptimizedBufferization
// PASSES-NEXT: InlineHLFIRAssign
// PASSES-NEXT: 'omp.declare_reduction' Pipeline
// PASSES-NEXT: OptimizedBufferization
// PASSES-NEXT: InlineHLFIRAssign
// PASSES-NEXT: 'omp.private' Pipeline
// PASSES-NEXT: OptimizedBufferization
// PASSES-NEXT: InlineHLFIRAssign
// PASSES-NEXT: LowerHLFIROrderedAssignments
// PASSES-NEXT: LowerHLFIRIntrinsics
// PASSES-NEXT: BufferizeHLFIR
Expand Down
Loading

0 comments on commit 3c700d1

Please sign in to comment.