From f1b075df2e8510b6ebed405a857f6941913ddf9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Sun, 2 Feb 2025 18:05:59 -0800 Subject: [PATCH] [flang][cuda] Pass the pinned variable in allocate calls (#125310) --- .../Optimizer/Builder/Runtime/RTBuilder.h | 7 +++++ .../include/flang/Runtime/CUDA/allocatable.h | 22 ++++++++------- flang/include/flang/Runtime/CUDA/pointer.h | 22 ++++++++------- .../Optimizer/Transforms/CUFOpConversion.cpp | 23 ++++++++-------- flang/runtime/CUDA/allocatable.cpp | 23 +++++++++------- flang/runtime/CUDA/pointer.cpp | 24 ++++++++++------- flang/test/Fir/CUDA/cuda-allocate.fir | 27 ++++++++++++++----- 7 files changed, 93 insertions(+), 55 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h index eaa1de76154d933..116b927a86a7a9a 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h @@ -401,6 +401,13 @@ constexpr TypeBuilderFunc getModel() { }; } template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + TypeBuilderFunc f{getModel()}; + return fir::ReferenceType::get(f(context)); + }; +} +template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get( diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h index 0a96f73b6be44bb..822f2d4a2b297db 100644 --- a/flang/include/flang/Runtime/CUDA/allocatable.h +++ b/flang/include/flang/Runtime/CUDA/allocatable.h @@ -18,28 +18,30 @@ extern "C" { /// Perform allocation of the descriptor. int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t stream = -1, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + bool *pinned = nullptr, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t stream = -1, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + bool *pinned = nullptr, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); /// Perform allocation of the descriptor without synchronization. Assign data /// from source. int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, - const Descriptor &source, int64_t stream = -1, bool hasStat = false, - const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, - int sourceLine = 0); + const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr, + bool hasStat = false, const Descriptor *errMsg = nullptr, + const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. Assign data from source. int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, - const Descriptor &source, int64_t stream = -1, bool hasStat = false, - const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, - int sourceLine = 0); + const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr, + bool hasStat = false, const Descriptor *errMsg = nullptr, + const char *sourceFile = nullptr, int sourceLine = 0); /// Perform deallocation of the descriptor with synchronization of it when /// necessary. diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h index 78c7a1a92b7ea95..7fbd8f8e061f204 100644 --- a/flang/include/flang/Runtime/CUDA/pointer.h +++ b/flang/include/flang/Runtime/CUDA/pointer.h @@ -18,28 +18,30 @@ extern "C" { /// Perform allocation of the descriptor. int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + bool *pinned = nullptr, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t stream = -1, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + bool *pinned = nullptr, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); /// Perform allocation of the descriptor without synchronization. Assign data /// from source. int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer, - const Descriptor &source, int64_t stream = -1, bool hasStat = false, - const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, - int sourceLine = 0); + const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr, + bool hasStat = false, const Descriptor *errMsg = nullptr, + const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. Assign data from source. int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer, - const Descriptor &source, int64_t stream = -1, bool hasStat = false, - const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, - int sourceLine = 0); + const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr, + bool hasStat = false, const Descriptor *errMsg = nullptr, + const char *sourceFile = nullptr, int sourceLine = 0); } // extern "C" diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 77aa11f0603f697..549498f5585a68e 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -103,7 +103,7 @@ static mlir::LogicalResult convertOpToCall(OpTy op, mlir::Value sourceLine; if constexpr (std::is_same_v) sourceLine = fir::factory::locationToLineNo( - builder, loc, op.getSource() ? fTy.getInput(6) : fTy.getInput(5)); + builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6)); else sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); @@ -119,22 +119,28 @@ static mlir::LogicalResult convertOpToCall(OpTy op, } llvm::SmallVector args; if constexpr (std::is_same_v) { + mlir::Value pinned = + op.getPinned() + ? op.getPinned() + : builder.createNullConstant( + loc, fir::ReferenceType::get( + mlir::IntegerType::get(op.getContext(), 1))); if (op.getSource()) { mlir::Value stream = op.getStream() ? op.getStream() : builder.createIntegerConstant(loc, fTy.getInput(2), -1); - args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(), - op.getSource(), stream, hasStat, - errmsg, sourceFile, sourceLine); + args = fir::runtime::createArguments( + builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned, + hasStat, errmsg, sourceFile, sourceLine); } else { mlir::Value stream = op.getStream() ? op.getStream() : builder.createIntegerConstant(loc, fTy.getInput(1), -1); args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(), - stream, hasStat, errmsg, sourceFile, - sourceLine); + stream, pinned, hasStat, errmsg, + sourceFile, sourceLine); } } else { args = @@ -153,11 +159,6 @@ struct CUFAllocateOpConversion mlir::LogicalResult matchAndRewrite(cuf::AllocateOp op, mlir::PatternRewriter &rewriter) const override { - // TODO: Pinned is a reference to a logical value that can be set to true - // when pinned allocation succeed. This will require a new entry point. - if (op.getPinned()) - return mlir::failure(); - auto mod = op->getParentOfType(); fir::FirOpBuilder builder(rewriter, mod); mlir::Location loc = op.getLoc(); diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp index 9be54e8906903d2..6df3b06793b3e6b 100644 --- a/flang/runtime/CUDA/allocatable.cpp +++ b/flang/runtime/CUDA/allocatable.cpp @@ -23,10 +23,10 @@ extern "C" { RT_EXT_API_GROUP_BEGIN int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream, - bool hasStat, const Descriptor *errMsg, const char *sourceFile, - int sourceLine) { + bool *pinned, bool hasStat, const Descriptor *errMsg, + const char *sourceFile, int sourceLine) { int stat{RTNAME(CUFAllocatableAllocate)( - desc, stream, hasStat, errMsg, sourceFile, sourceLine)}; + desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; #ifndef RT_DEVICE_COMPILATION // Descriptor synchronization is only done when the allocation is done // from the host. @@ -41,8 +41,8 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream, } int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream, - bool hasStat, const Descriptor *errMsg, const char *sourceFile, - int sourceLine) { + bool *pinned, bool hasStat, const Descriptor *errMsg, + const char *sourceFile, int sourceLine) { if (desc.HasAddendum()) { Terminator terminator{sourceFile, sourceLine}; // TODO: This require a bit more work to set the correct type descriptor @@ -53,14 +53,19 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream, // Perform the standard allocation. int stat{RTNAME(AllocatableAllocate)( desc, hasStat, errMsg, sourceFile, sourceLine)}; + if (pinned) { + // Set pinned according to stat. More infrastructre is needed to set it + // closer to the actual allocation call. + *pinned = (stat == StatOk); + } return stat; } int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, - const Descriptor &source, int64_t stream, bool hasStat, + const Descriptor &source, int64_t stream, bool *pinned, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { int stat{RTNAME(CUFAllocatableAllocate)( - alloc, stream, hasStat, errMsg, sourceFile, sourceLine)}; + alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; Fortran::runtime::DoFromSourceAssign( @@ -70,10 +75,10 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, } int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, - const Descriptor &source, int64_t stream, bool hasStat, + const Descriptor &source, int64_t stream, bool *pinned, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { int stat{RTNAME(CUFAllocatableAllocateSync)( - alloc, stream, hasStat, errMsg, sourceFile, sourceLine)}; + alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; Fortran::runtime::DoFromSourceAssign( diff --git a/flang/runtime/CUDA/pointer.cpp b/flang/runtime/CUDA/pointer.cpp index 3252410bd8d2c2c..d3ebe97b4e4accb 100644 --- a/flang/runtime/CUDA/pointer.cpp +++ b/flang/runtime/CUDA/pointer.cpp @@ -21,8 +21,9 @@ namespace Fortran::runtime::cuda { extern "C" { RT_EXT_API_GROUP_BEGIN -int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { +int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned, + bool hasStat, const Descriptor *errMsg, const char *sourceFile, + int sourceLine) { if (desc.HasAddendum()) { Terminator terminator{sourceFile, sourceLine}; // TODO: This require a bit more work to set the correct type descriptor @@ -33,14 +34,19 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat, // Perform the standard allocation. int stat{ RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)}; + if (pinned) { + // Set pinned according to stat. More infrastructre is needed to set it + // closer to the actual allocation call. + *pinned = (stat == StatOk); + } return stat; } int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream, - bool hasStat, const Descriptor *errMsg, const char *sourceFile, - int sourceLine) { + bool *pinned, bool hasStat, const Descriptor *errMsg, + const char *sourceFile, int sourceLine) { int stat{RTNAME(CUFPointerAllocate)( - desc, stream, hasStat, errMsg, sourceFile, sourceLine)}; + desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; #ifndef RT_DEVICE_COMPILATION // Descriptor synchronization is only done when the allocation is done // from the host. @@ -55,10 +61,10 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream, } int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer, - const Descriptor &source, int64_t stream, bool hasStat, + const Descriptor &source, int64_t stream, bool *pinned, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { int stat{RTNAME(CUFPointerAllocate)( - pointer, stream, hasStat, errMsg, sourceFile, sourceLine)}; + pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; Fortran::runtime::DoFromSourceAssign( @@ -68,10 +74,10 @@ int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer, } int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer, - const Descriptor &source, int64_t stream, bool hasStat, + const Descriptor &source, int64_t stream, bool *pinned, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { int stat{RTNAME(CUFPointerAllocateSync)( - pointer, stream, hasStat, errMsg, sourceFile, sourceLine)}; + pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; Fortran::runtime::DoFromSourceAssign( diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index 08573110821cc2e..095ad92d5deb508 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -19,7 +19,7 @@ func.func @_QPsub1() { // CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref>) -> !fir.ref>>> // CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -47,7 +47,7 @@ func.func @_QPsub3() { // CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ADDR]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMmod1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -87,7 +87,7 @@ func.func @_QPsub5() { } // CHECK-LABEL: func.func @_QPsub5() -// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: fir.call @_FortranAAllocatableDeallocate({{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -118,7 +118,7 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} { // CHECK: %[[B:.*]]:2 = hlfir.declare %[[B_ADDR]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMdataEb"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: _FortranAAllocatableSetBounds // CHECK: %[[B_BOX:.*]] = fir.convert %[[B]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 func.func @_QPallocate_source() { @@ -142,7 +142,7 @@ func.func @_QPallocate_source() { // CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref>>> // CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box>>) -> !fir.box -// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda} : !fir.box>> { @@ -179,7 +179,7 @@ func.func @_QQallocate_stream() { // CHECK: %[[STREAM_ALLOCA:.*]] = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"} // CHECK: %[[STREAM:.*]] = fir.declare %[[STREAM_ALLOCA]] {uniq_name = "_QFEstream1"} : (!fir.ref) -> !fir.ref // CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref -// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 func.func @_QPp_alloc() { @@ -255,4 +255,19 @@ func.func @_QMmod1Ppointer_source_global() { // CHECK-LABEL: func.func @_QMmod1Ppointer_source_global() // CHECK: fir.call @_FortranACUFPointerAllocateSourceSync +func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} { + %0 = cuf.alloc !fir.box>> {bindc_name = "a", data_attr = #cuf.cuda, uniq_name = "_QFEa"} -> !fir.ref>>> + %4 = fir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFEa"} : (!fir.ref>>>) -> !fir.ref>>> + %13 = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"} + %14 = fir.declare %13 {uniq_name = "_QFEpinnedflag"} : (!fir.ref>) -> !fir.ref> + %18 = cuf.allocate %4 : !fir.ref>>> pinned(%14 : !fir.ref>) {data_attr = #cuf.cuda, hasStat} -> i32 + return +} + +// CHECK-LABEL: func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} { +// CHECK: %[[PINNED:.*]] = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"} +// CHECK: %[[DECL_PINNED:.*]] = fir.declare %[[PINNED]] {uniq_name = "_QFEpinnedflag"} : (!fir.ref>) -> !fir.ref> +// CHECK: %[[CONV_PINNED:.*]] = fir.convert %[[DECL_PINNED]] : (!fir.ref>) -> !fir.ref +// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 + } // end of module