Skip to content

[WIP][PoC][flang] Re-use OpenMP data environemnt clauses for locality spec #128148

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions do_loop_with_local_and_local_init.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
! For testing try: `flang -fc1 -emit-hlfir -mmlir --openmp-enable-delayed-privatization-staging=true do_loop_with_local_and_local_init.f90 -o test.mlir

! TODO Will be added as proper test later.
subroutine omploop
implicit none
integer :: i, local_var, local_init_var

do concurrent (i=1:10) local(local_var) local_init(local_init_var)
if (i < 5) then
local_var = 42
else
local_init_var = 84
end if
end do
end subroutine
49 changes: 49 additions & 0 deletions do_loop_with_local_and_local_init.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// For testing:
// 1. parsing/printing (roundtripping): `fir-opt do_loop_with_local_and_local_init.mlir -o roundtrip.mlir`
// 2. Lowering locality specs during CFG: `fir-opt --cfg-conversion do_loop_with_local_and_local_init.mlir -o after_cfg_lowering.mlir`

// TODO I will add both of the above steps as proper tests when the PoC is complete.
module attributes {dlti.dl_spec = #dlti.dl_spec<i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 21.0.0 (/home/kaergawy/git/aomp20.0/llvm-project/flang c8cf5a644886bb8dd3ad19be6e3b916ffcbd222c)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {

omp.private {type = private} @local_privatizer : i32

omp.private {type = firstprivate} @local_init_privatizer : i32 copy {
^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
%0 = fir.load %arg0 : !fir.ref<i32>
fir.store %0 to %arg1 : !fir.ref<i32>
omp.yield(%arg1 : !fir.ref<i32>)
}

func.func @_QPomploop() {
%0 = fir.alloca i32 {bindc_name = "i"}
%1:2 = hlfir.declare %0 {uniq_name = "_QFomploopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomploopEi"}
%3:2 = hlfir.declare %2 {uniq_name = "_QFomploopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%4 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFomploopElocal_init_var"}
%5:2 = hlfir.declare %4 {uniq_name = "_QFomploopElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%6 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFomploopElocal_var"}
%7:2 = hlfir.declare %6 {uniq_name = "_QFomploopElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%c1_i32 = arith.constant 1 : i32
%8 = fir.convert %c1_i32 : (i32) -> index
%c10_i32 = arith.constant 10 : i32
%9 = fir.convert %c10_i32 : (i32) -> index
%c1 = arith.constant 1 : index
fir.do_loop %arg0 = %8 to %9 step %c1 unordered private(@local_privatizer %7#0 -> %arg1, @local_init_privatizer %5#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>) {
%10 = fir.convert %arg0 : (index) -> i32
fir.store %10 to %1#1 : !fir.ref<i32>
%12:2 = hlfir.declare %arg1 {uniq_name = "_QFomploopElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%14:2 = hlfir.declare %arg2 {uniq_name = "_QFomploopElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%16 = fir.load %1#0 : !fir.ref<i32>
%c5_i32 = arith.constant 5 : i32
%17 = arith.cmpi slt, %16, %c5_i32 : i32
fir.if %17 {
%c42_i32 = arith.constant 42 : i32
hlfir.assign %c42_i32 to %12#0 : i32, !fir.ref<i32>
} else {
%c84_i32 = arith.constant 84 : i32
hlfir.assign %c84_i32 to %14#0 : i32, !fir.ref<i32>
}
}
return
}
}
3 changes: 3 additions & 0 deletions flang/include/flang/Lower/AbstractConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,9 @@ class AbstractConverter {
virtual Fortran::lower::SymbolBox
lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;

virtual Fortran::lower::SymbolBox
shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0;

/// Return the mlir::SymbolTable associated to the ModuleOp.
/// Look-ups are faster using it than using module.lookup<>,
/// but the module op should be queried in case of failure
Expand Down
4 changes: 2 additions & 2 deletions flang/include/flang/Optimizer/Dialect/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ mlir_tablegen(FIRAttr.cpp.inc -gen-attrdef-defs)
set(LLVM_TARGET_DEFINITIONS FIROps.td)
mlir_tablegen(FIROps.h.inc -gen-op-decls)
mlir_tablegen(FIROps.cpp.inc -gen-op-defs)
mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls)
mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs)
mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls -typedefs-dialect=fir)
mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs -typedefs-dialect=fir)
add_public_tablegen_target(FIROpsIncGen)

set(LLVM_TARGET_DEFINITIONS FortranVariableInterface.td)
Expand Down
37 changes: 29 additions & 8 deletions flang/include/flang/Optimizer/Dialect/FIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

include "mlir/Dialect/Arith/IR/ArithBase.td"
include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
include "mlir/Dialect/OpenMP/OpenMPClauses.td"
include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRDialect.td"
Expand Down Expand Up @@ -2226,7 +2227,7 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
let hasVerifier = 1;
let hasCustomAssemblyFormat = 1;

let arguments = (ins
defvar opArgs = (ins
Index:$lowerBound,
Index:$upperBound,
Index:$step,
Expand All @@ -2237,6 +2238,8 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
OptionalAttr<ArrayAttr>:$reduceAttrs,
OptionalAttr<LoopAnnotationAttr>:$loopAnnotation
);

let arguments = !con(opArgs, OpenMP_PrivateClause.arguments);
let results = (outs Variadic<AnyType>:$results);
let regions = (region SizedRegion<1>:$region);

Expand All @@ -2248,24 +2251,38 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
CArg<"mlir::ValueRange", "std::nullopt">:$iterArgs,
CArg<"mlir::ValueRange", "std::nullopt">:$reduceOperands,
CArg<"llvm::ArrayRef<mlir::Attribute>", "{}">:$reduceAttrs,
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes,
CArg<"mlir::ValueRange", "std::nullopt">:$private_vars,
CArg<"mlir::ArrayRef<mlir::Attribute>", "{}">:$private_syms
)>
];

let extraClassDeclaration = [{
mlir::Value getInductionVar() { return getBody()->getArgument(0); }
defvar opExtraClassDeclaration = [{
mlir::OpBuilder getBodyBuilder() {
return mlir::OpBuilder(getBody(), std::prev(getBody()->end()));
}

/// Region argument accessors.
mlir::Value getInductionVar() { return getBody()->getArgument(0); }
mlir::Block::BlockArgListType getRegionIterArgs() {
return getBody()->getArguments().drop_front();
// 1 for skipping the induction variable.
return getBody()->getArguments().slice(1, getNumIterOperands());
}
mlir::Block::BlockArgListType getRegionPrivateArgs() {
return getBody()->getArguments().slice(1 + getNumIterOperands(),
numPrivateBlockArgs());
}

/// Operation operand accessors.
mlir::Operation::operand_range getIterOperands() {
return getOperands()
.drop_front(getNumControlOperands() + getNumReduceOperands());
.slice(getNumControlOperands() + getNumReduceOperands(),
getNumIterOperands());
}
llvm::MutableArrayRef<mlir::OpOperand> getInitsMutable() {
return getOperation()->getOpOperands()
.drop_front(getNumControlOperands() + getNumReduceOperands());
.slice(getNumControlOperands() + getNumReduceOperands(),
getNumIterOperands());
}

void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); }
Expand All @@ -2274,7 +2291,7 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,

/// Number of region arguments for loop-carried values
unsigned getNumRegionIterArgs() {
return getBody()->getNumArguments() - 1;
return getNumIterOperands();
}
/// Number of operands controlling the loop: lb, ub, step
unsigned getNumControlOperands() { return 3; }
Expand Down Expand Up @@ -2313,6 +2330,10 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
unsigned resultNum);
mlir::Value blockArgToSourceOp(unsigned blockArgNum);
}];

let extraClassDeclaration =
!strconcat(opExtraClassDeclaration, "\n",
OpenMP_PrivateClause.extraClassDeclaration);
}

def fir_IfOp : region_Op<"if", [DeclareOpInterfaceMethods<RegionBranchOpInterface, [
Expand Down
55 changes: 45 additions & 10 deletions flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

#include "flang/Lower/Bridge.h"

#include "OpenMP/DataSharingProcessor.h"
#include "OpenMP/Utils.h"
#include "flang/Lower/Allocatable.h"
#include "flang/Lower/CallInterface.h"
#include "flang/Lower/Coarray.h"
Expand Down Expand Up @@ -1136,6 +1138,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
return name;
}

/// Find the symbol in the inner-most level of the local map or return null.
Fortran::lower::SymbolBox
shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override {
if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
return v;
return {};
}

private:
FirConverter() = delete;
FirConverter(const FirConverter &) = delete;
Expand Down Expand Up @@ -1210,14 +1220,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
return {};
}

/// Find the symbol in the inner-most level of the local map or return null.
Fortran::lower::SymbolBox
shallowLookupSymbol(const Fortran::semantics::Symbol &sym) {
if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
return v;
return {};
}

/// Find the symbol in one level up of symbol map such as for host-association
/// in OpenMP code or return null.
Fortran::lower::SymbolBox
Expand Down Expand Up @@ -2017,12 +2019,29 @@ class FirConverter : public Fortran::lower::AbstractConverter {

/// Create DO CONCURRENT construct symbol bindings and generate LOCAL_INIT
/// assignments.
void handleLocalitySpecs(const IncrementLoopInfo &info) {
void handleLocalitySpecs(IncrementLoopInfo &info) {
Fortran::semantics::SemanticsContext &semanticsContext =
bridge.getSemanticsContext();
for (const Fortran::semantics::Symbol *sym : info.localSymList)
Fortran::lower::omp::DataSharingProcessor dsp(
*this, semanticsContext, getEval(),
/*useDelayedPrivatization=*/true, localSymbols);
mlir::omp::PrivateClauseOps privateClauseOps;

for (const Fortran::semantics::Symbol *sym : info.localSymList) {
if (enableDelayedPrivatizationStaging) {
dsp.doPrivatize(sym, &privateClauseOps);
continue;
}

createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false);
}

for (const Fortran::semantics::Symbol *sym : info.localInitSymList) {
if (enableDelayedPrivatizationStaging) {
dsp.doPrivatize(sym, &privateClauseOps);
continue;
}

createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true);
const auto *hostDetails =
sym->detailsIf<Fortran::semantics::HostAssocDetails>();
Expand All @@ -2036,11 +2055,27 @@ class FirConverter : public Fortran::lower::AbstractConverter {
assign.u = Fortran::evaluate::Assignment::BoundsSpec{};
genAssignment(assign);
}

for (const Fortran::semantics::Symbol *sym : info.sharedSymList) {
const auto *hostDetails =
sym->detailsIf<Fortran::semantics::HostAssocDetails>();
copySymbolBinding(hostDetails->symbol(), *sym);
}

info.doLoop.getPrivateVarsMutable().assign(privateClauseOps.privateVars);
info.doLoop.setPrivateSymsAttr(
builder->getArrayAttr(privateClauseOps.privateSyms));

for (auto [sym, privateVar] : llvm::zip_equal(
dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) {
auto arg = info.doLoop.getRegion().begin()->addArgument(
privateVar.getType(), info.doLoop.getLoc());
bindSymbol(*sym, hlfir::translateToExtendedValue(
privateVar.getLoc(), *builder, hlfir::Entity{arg},
/*contiguousHint=*/true)
.first);
}

// Note that allocatable, types with ultimate components, and type
// requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130),
// so no clean-up needs to be generated for these entities.
Expand Down
27 changes: 22 additions & 5 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ DataSharingProcessor::DataSharingProcessor(
});
}

DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
bool useDelayedPrivatization,
lower::SymMap &symTable)
: DataSharingProcessor(converter, semaCtx, {}, eval,
/*shouldCollectPreDeterminedSymols=*/false,
useDelayedPrivatization, symTable) {}

void DataSharingProcessor::processStep1(
mlir::omp::PrivateClauseOps *clauseOps) {
collectSymbolsForPrivatization();
Expand Down Expand Up @@ -504,22 +513,28 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) {
}
}

void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
void DataSharingProcessor::doPrivatize(const semantics::Symbol *symToPrivatize,
mlir::omp::PrivateClauseOps *clauseOps) {
if (!useDelayedPrivatization) {
cloneSymbol(sym);
copyFirstPrivateSymbol(sym);
cloneSymbol(symToPrivatize);
copyFirstPrivateSymbol(symToPrivatize);
return;
}

lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
const semantics::Symbol *sym = symToPrivatize->HasLocalLocality()
? &symToPrivatize->GetUltimate()
: symToPrivatize;
lower::SymbolBox hsb = symToPrivatize->HasLocalLocality()
? converter.shallowLookupSymbol(*sym)
: converter.lookupOneLevelUpSymbol(*sym);
assert(hsb && "Host symbol box not found");
hlfir::Entity entity{hsb.getAddr()};
bool cannotHaveNonDefaultLowerBounds = !entity.mayHaveNonDefaultLowerBounds();

mlir::Location symLoc = hsb.getAddr().getLoc();
std::string privatizerName = sym->name().ToString() + ".privatizer";
bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
sym->test(semantics::Symbol::Flag::LocalityLocalInit);

mlir::Value privVal = hsb.getAddr();
mlir::Type allocType = privVal.getType();
Expand Down Expand Up @@ -645,6 +660,8 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
}

symToPrivatizer[sym] = privatizerOp;
if (symToPrivatize->HasLocalLocality())
allPrivatizedSymbols.insert(symToPrivatize);
}

} // namespace omp
Expand Down
10 changes: 8 additions & 2 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@ class DataSharingProcessor {
void collectImplicitSymbols();
void collectPreDeterminedSymbols();
void privatize(mlir::omp::PrivateClauseOps *clauseOps);
void doPrivatize(const semantics::Symbol *sym,
mlir::omp::PrivateClauseOps *clauseOps);
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const semantics::Symbol *sym);
Expand All @@ -125,6 +123,11 @@ class DataSharingProcessor {
bool shouldCollectPreDeterminedSymbols,
bool useDelayedPrivatization, lower::SymMap &symTable);

DataSharingProcessor(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
bool useDelayedPrivatization, lower::SymMap &symTable);

// Privatisation is split into two steps.
// Step1 performs cloning of all privatisation clauses and copying for
// firstprivates. Step1 is performed at the place where process/processStep1
Expand All @@ -151,6 +154,9 @@ class DataSharingProcessor {
? allPrivatizedSymbols.getArrayRef()
: llvm::ArrayRef<const semantics::Symbol *>();
}

void doPrivatize(const semantics::Symbol *sym,
mlir::omp::PrivateClauseOps *clauseOps);
};

} // namespace omp
Expand Down
Loading
Loading