Skip to content

Commit

Permalink
[flang][cuda] Pass the pinned variable in allocate calls (#125310)
Browse files Browse the repository at this point in the history
  • Loading branch information
clementval authored Feb 3, 2025
1 parent 2c030a1 commit f1b075d
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 55 deletions.
7 changes: 7 additions & 0 deletions flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,13 @@ constexpr TypeBuilderFunc getModel<bool &>() {
};
}
template <>
constexpr TypeBuilderFunc getModel<bool *>() {
return [](mlir::MLIRContext *context) -> mlir::Type {
TypeBuilderFunc f{getModel<bool>()};
return fir::ReferenceType::get(f(context));
};
}
template <>
constexpr TypeBuilderFunc getModel<unsigned short>() {
return [](mlir::MLIRContext *context) -> mlir::Type {
return mlir::IntegerType::get(
Expand Down
22 changes: 12 additions & 10 deletions flang/include/flang/Runtime/CUDA/allocatable.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,30 @@ extern "C" {

/// Perform allocation of the descriptor.
int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t stream = -1,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);

/// Perform allocation of the descriptor with synchronization of it when
/// necessary.
int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t stream = -1,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);

/// Perform allocation of the descriptor without synchronization. Assign data
/// from source.
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
const Descriptor &source, int64_t stream = -1, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);

/// Perform allocation of the descriptor with synchronization of it when
/// necessary. Assign data from source.
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
const Descriptor &source, int64_t stream = -1, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);

/// Perform deallocation of the descriptor with synchronization of it when
/// necessary.
Expand Down
22 changes: 12 additions & 10 deletions flang/include/flang/Runtime/CUDA/pointer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,30 @@ extern "C" {

/// Perform allocation of the descriptor.
int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);

/// Perform allocation of the descriptor with synchronization of it when
/// necessary.
int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t stream = -1,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);

/// Perform allocation of the descriptor without synchronization. Assign data
/// from source.
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
const Descriptor &source, int64_t stream = -1, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);

/// Perform allocation of the descriptor with synchronization of it when
/// necessary. Assign data from source.
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
const Descriptor &source, int64_t stream = -1, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);

} // extern "C"

Expand Down
23 changes: 12 additions & 11 deletions flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
mlir::Value sourceLine;
if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>)
sourceLine = fir::factory::locationToLineNo(
builder, loc, op.getSource() ? fTy.getInput(6) : fTy.getInput(5));
builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6));
else
sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4));

Expand All @@ -119,22 +119,28 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
}
llvm::SmallVector<mlir::Value> args;
if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) {
mlir::Value pinned =
op.getPinned()
? op.getPinned()
: builder.createNullConstant(
loc, fir::ReferenceType::get(
mlir::IntegerType::get(op.getContext(), 1)));
if (op.getSource()) {
mlir::Value stream =
op.getStream()
? op.getStream()
: builder.createIntegerConstant(loc, fTy.getInput(2), -1);
args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
op.getSource(), stream, hasStat,
errmsg, sourceFile, sourceLine);
args = fir::runtime::createArguments(
builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned,
hasStat, errmsg, sourceFile, sourceLine);
} else {
mlir::Value stream =
op.getStream()
? op.getStream()
: builder.createIntegerConstant(loc, fTy.getInput(1), -1);
args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
stream, hasStat, errmsg, sourceFile,
sourceLine);
stream, pinned, hasStat, errmsg,
sourceFile, sourceLine);
}
} else {
args =
Expand All @@ -153,11 +159,6 @@ struct CUFAllocateOpConversion
mlir::LogicalResult
matchAndRewrite(cuf::AllocateOp op,
mlir::PatternRewriter &rewriter) const override {
// TODO: Pinned is a reference to a logical value that can be set to true
// when pinned allocation succeed. This will require a new entry point.
if (op.getPinned())
return mlir::failure();

auto mod = op->getParentOfType<mlir::ModuleOp>();
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
Expand Down
23 changes: 14 additions & 9 deletions flang/runtime/CUDA/allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ extern "C" {
RT_EXT_API_GROUP_BEGIN

int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
int sourceLine) {
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
#ifndef RT_DEVICE_COMPILATION
// Descriptor synchronization is only done when the allocation is done
// from the host.
Expand All @@ -41,8 +41,8 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
}

int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
int sourceLine) {
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
if (desc.HasAddendum()) {
Terminator terminator{sourceFile, sourceLine};
// TODO: This require a bit more work to set the correct type descriptor
Expand All @@ -53,14 +53,19 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
// Perform the standard allocation.
int stat{RTNAME(AllocatableAllocate)(
desc, hasStat, errMsg, sourceFile, sourceLine)};
if (pinned) {
// Set pinned according to stat. More infrastructre is needed to set it
// closer to the actual allocation call.
*pinned = (stat == StatOk);
}
return stat;
}

int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
const Descriptor &source, int64_t stream, bool hasStat,
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
alloc, stream, hasStat, errMsg, sourceFile, sourceLine)};
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
Expand All @@ -70,10 +75,10 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
}

int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
const Descriptor &source, int64_t stream, bool hasStat,
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocateSync)(
alloc, stream, hasStat, errMsg, sourceFile, sourceLine)};
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
Expand Down
24 changes: 15 additions & 9 deletions flang/runtime/CUDA/pointer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN

int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned,
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
int sourceLine) {
if (desc.HasAddendum()) {
Terminator terminator{sourceFile, sourceLine};
// TODO: This require a bit more work to set the correct type descriptor
Expand All @@ -33,14 +34,19 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
// Perform the standard allocation.
int stat{
RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
if (pinned) {
// Set pinned according to stat. More infrastructre is needed to set it
// closer to the actual allocation call.
*pinned = (stat == StatOk);
}
return stat;
}

int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
int sourceLine) {
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocate)(
desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
#ifndef RT_DEVICE_COMPILATION
// Descriptor synchronization is only done when the allocation is done
// from the host.
Expand All @@ -55,10 +61,10 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
}

int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
const Descriptor &source, int64_t stream, bool hasStat,
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocate)(
pointer, stream, hasStat, errMsg, sourceFile, sourceLine)};
pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
Expand All @@ -68,10 +74,10 @@ int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
}

int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
const Descriptor &source, int64_t stream, bool hasStat,
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocateSync)(
pointer, stream, hasStat, errMsg, sourceFile, sourceLine)};
pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
Expand Down
27 changes: 21 additions & 6 deletions flang/test/Fir/CUDA/cuda-allocate.fir
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func.func @_QPsub1() {
// CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
// CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
Expand Down Expand Up @@ -47,7 +47,7 @@ func.func @_QPsub3() {
// CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ADDR]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)

// CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

// CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
Expand Down Expand Up @@ -87,7 +87,7 @@ func.func @_QPsub5() {
}

// CHECK-LABEL: func.func @_QPsub5()
// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: fir.call @_FortranAAllocatableDeallocate({{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32


Expand Down Expand Up @@ -118,7 +118,7 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} {
// CHECK: %[[B:.*]]:2 = hlfir.declare %[[B_ADDR]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMdataEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
// CHECK: _FortranAAllocatableSetBounds
// CHECK: %[[B_BOX:.*]] = fir.convert %[[B]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32


func.func @_QPallocate_source() {
Expand All @@ -142,7 +142,7 @@ func.func @_QPallocate_source() {
// CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
// CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.box<none>
// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32


fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?x?xf32>>> {
Expand Down Expand Up @@ -179,7 +179,7 @@ func.func @_QQallocate_stream() {
// CHECK: %[[STREAM_ALLOCA:.*]] = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"}
// CHECK: %[[STREAM:.*]] = fir.declare %[[STREAM_ALLOCA]] {uniq_name = "_QFEstream1"} : (!fir.ref<i64>) -> !fir.ref<i64>
// CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref<i64>
// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32


func.func @_QPp_alloc() {
Expand Down Expand Up @@ -255,4 +255,19 @@ func.func @_QMmod1Ppointer_source_global() {
// CHECK-LABEL: func.func @_QMmod1Ppointer_source_global()
// CHECK: fir.call @_FortranACUFPointerAllocateSourceSync

func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} {
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFEa"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
%4 = fir.declare %0 {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
%13 = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"}
%14 = fir.declare %13 {uniq_name = "_QFEpinnedflag"} : (!fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>>
%18 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> pinned(%14 : !fir.ref<!fir.logical<4>>) {data_attr = #cuf.cuda<pinned>, hasStat} -> i32
return
}

// CHECK-LABEL: func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} {
// CHECK: %[[PINNED:.*]] = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"}
// CHECK: %[[DECL_PINNED:.*]] = fir.declare %[[PINNED]] {uniq_name = "_QFEpinnedflag"} : (!fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>>
// CHECK: %[[CONV_PINNED:.*]] = fir.convert %[[DECL_PINNED]] : (!fir.ref<!fir.logical<4>>) -> !fir.ref<i1>
// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

} // end of module

0 comments on commit f1b075d

Please sign in to comment.