-
Notifications
You must be signed in to change notification settings - Fork 13.6k
Revert "[flang][cuda] Specialize entry point for scalar to desc data transfer" #116458
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
…transfer…" This reverts commit 43cb424.
@llvm/pr-subscribers-flang-runtime Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesReverts llvm/llvm-project#116457 Full diff: https://github.com/llvm/llvm-project/pull/116458.diff 6 Files Affected:
diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h
index 2bb083b0dd75cb..713bdf536aaf90 100644
--- a/flang/include/flang/Runtime/CUDA/memory.h
+++ b/flang/include/flang/Runtime/CUDA/memory.h
@@ -44,10 +44,6 @@ void RTDECL(CUFDataTransferPtrDesc)(void *dst, Descriptor *src,
void RTDECL(CUFDataTransferDescDesc)(Descriptor *dst, Descriptor *src,
unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
-/// Data transfer from a scalar descriptor to a descriptor.
-void RTDECL(CUFDataTransferCstDesc)(Descriptor *dst, Descriptor *src,
- unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
-
/// Data transfer from a descriptor to a descriptor.
void RTDECL(CUFDataTransferDescDescNoRealloc)(Descriptor *dst, Descriptor *src,
unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 9de20f0f0d45e1..ec7f67dff763b4 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -563,9 +563,8 @@ struct CUFDataTransferOpConversion
// until we have more infrastructure.
mlir::Value src = emboxSrc(rewriter, op, symtab);
mlir::Value dst = emboxDst(rewriter, op, symtab);
- mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>(
- loc, builder);
+ mlir::func::FuncOp func = fir::runtime::getRuntimeFunc<mkRTKey(
+ CUFDataTransferDescDescNoRealloc)>(loc, builder);
auto fTy = func.getFunctionType();
mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
mlir::Value sourceLine =
@@ -649,9 +648,6 @@ struct CUFDataTransferOpConversion
mlir::Value src = op.getSrc();
if (!mlir::isa<fir::BaseBoxType>(srcTy)) {
src = emboxSrc(rewriter, op, symtab);
- if (fir::isa_trivial(srcTy))
- func = fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>(
- loc, builder);
}
auto materializeBoxIfNeeded = [&](mlir::Value val) -> mlir::Value {
if (mlir::isa<fir::EmboxOp>(val.getDefiningOp())) {
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index 68963c4d7738ac..7b40b837e7666e 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "flang/Runtime/CUDA/memory.h"
-#include "../assign-impl.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/CUDA/descriptor.h"
@@ -121,24 +120,6 @@ void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
*dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct);
}
-void RTDECL(CUFDataTransferCstDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
- unsigned mode, const char *sourceFile, int sourceLine) {
- MemmoveFct memmoveFct;
- Terminator terminator{sourceFile, sourceLine};
- if (mode == kHostToDevice) {
- memmoveFct = &MemmoveHostToDevice;
- } else if (mode == kDeviceToHost) {
- memmoveFct = &MemmoveDeviceToHost;
- } else if (mode == kDeviceToDevice) {
- memmoveFct = &MemmoveDeviceToDevice;
- } else {
- terminator.Crash("host to host copy not supported");
- }
-
- Fortran::runtime::DoFromSourceAssign(
- *dstDesc, *srcDesc, terminator, memmoveFct);
-}
-
void RTDECL(CUFDataTransferDescDescNoRealloc)(Descriptor *dstDesc,
Descriptor *srcDesc, unsigned mode, const char *sourceFile,
int sourceLine) {
diff --git a/flang/runtime/assign-impl.h b/flang/runtime/assign-impl.h
index 5db0bc81510bff..f07a501d1d1263 100644
--- a/flang/runtime/assign-impl.h
+++ b/flang/runtime/assign-impl.h
@@ -9,29 +9,16 @@
#ifndef FORTRAN_RUNTIME_ASSIGN_IMPL_H_
#define FORTRAN_RUNTIME_ASSIGN_IMPL_H_
-#include "flang/Runtime/freestanding-tools.h"
-
namespace Fortran::runtime {
class Descriptor;
class Terminator;
-using MemmoveFct = void *(*)(void *, const void *, std::size_t);
-
// Assign one object to another via allocate statement from source specifier.
// Note that if allocate object and source expression have the same rank, the
// value of the allocate object becomes the value provided; otherwise the value
// of each element of allocate object becomes the value provided (9.7.1.2(7)).
-#ifdef RT_DEVICE_COMPILATION
-static RT_API_ATTRS void *MemmoveWrapper(
- void *dest, const void *src, std::size_t count) {
- return Fortran::runtime::memmove(dest, src, count);
-}
-RT_API_ATTRS void DoFromSourceAssign(Descriptor &, const Descriptor &,
- Terminator &, MemmoveFct memmoveFct = &MemmoveWrapper);
-#else
-RT_API_ATTRS void DoFromSourceAssign(Descriptor &, const Descriptor &,
- Terminator &, MemmoveFct memmoveFct = &Fortran::runtime::memmove);
-#endif
+RT_API_ATTRS void DoFromSourceAssign(
+ Descriptor &, const Descriptor &, Terminator &);
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_ASSIGN_IMPL_H_
diff --git a/flang/runtime/assign.cpp b/flang/runtime/assign.cpp
index 8f0efaa376c198..83c0b9c70ed0d1 100644
--- a/flang/runtime/assign.cpp
+++ b/flang/runtime/assign.cpp
@@ -509,8 +509,8 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
RT_OFFLOAD_API_GROUP_BEGIN
-RT_API_ATTRS void DoFromSourceAssign(Descriptor &alloc,
- const Descriptor &source, Terminator &terminator, MemmoveFct memmoveFct) {
+RT_API_ATTRS void DoFromSourceAssign(
+ Descriptor &alloc, const Descriptor &source, Terminator &terminator) {
if (alloc.rank() > 0 && source.rank() == 0) {
// The value of each element of allocate object becomes the value of source.
DescriptorAddendum *allocAddendum{alloc.Addendum()};
@@ -523,17 +523,17 @@ RT_API_ATTRS void DoFromSourceAssign(Descriptor &alloc,
alloc.IncrementSubscripts(allocAt)) {
Descriptor allocElement{*Descriptor::Create(*allocDerived,
reinterpret_cast<void *>(alloc.Element<char>(allocAt)), 0)};
- Assign(allocElement, source, terminator, NoAssignFlags, memmoveFct);
+ Assign(allocElement, source, terminator, NoAssignFlags);
}
} else { // intrinsic type
for (std::size_t n{alloc.Elements()}; n-- > 0;
alloc.IncrementSubscripts(allocAt)) {
- memmoveFct(alloc.Element<char>(allocAt), source.raw().base_addr,
- alloc.ElementBytes());
+ Fortran::runtime::memmove(alloc.Element<char>(allocAt),
+ source.raw().base_addr, alloc.ElementBytes());
}
}
} else {
- Assign(alloc, source, terminator, NoAssignFlags, memmoveFct);
+ Assign(alloc, source, terminator, NoAssignFlags);
}
}
diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir
index 1ee44f3c6d97c9..3209197e118d19 100644
--- a/flang/test/Fir/CUDA/cuda-data-transfer.fir
+++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir
@@ -38,7 +38,7 @@ func.func @_QPsub2() {
// CHECK: fir.store %[[EMBOX]] to %[[TEMP_BOX]] : !fir.ref<!fir.box<i32>>
// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[TEMP_CONV:.*]] = fir.convert %[[TEMP_BOX]] : (!fir.ref<!fir.box<i32>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%[[ADEV_BOX]], %[[TEMP_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
+// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[ADEV_BOX]], %[[TEMP_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
func.func @_QPsub3() {
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "adev", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub3Eadev"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
@@ -58,7 +58,7 @@ func.func @_QPsub3() {
// CHECK: fir.store %[[EMBOX]] to %[[TEMP_BOX]] : !fir.ref<!fir.box<i32>>
// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[V_CONV:.*]] = fir.convert %[[TEMP_BOX]] : (!fir.ref<!fir.box<i32>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%[[ADEV_BOX]], %[[V_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
+// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[ADEV_BOX]], %[[V_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
func.func @_QPsub4() {
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "adev", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Eadev"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
@@ -297,7 +297,7 @@ func.func @_QPscalar_to_array() {
}
// CHECK-LABEL: func.func @_QPscalar_to_array()
-// CHECK: _FortranACUFDataTransferCstDesc
+// CHECK: _FortranACUFDataTransferDescDescNoRealloc
func.func @_QPtest_type() {
%0 = cuf.alloc !fir.type<_QMbarTcmplx{id:i32,c:complex<f32>}> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_typeEa"} -> !fir.ref<!fir.type<_QMbarTcmplx{id:i32,c:complex<f32>}>>
@@ -344,7 +344,7 @@ func.func @_QPshape_shift() {
}
// CHECK-LABEL: func.func @_QPshape_shift()
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc
+// CHECK: fir.call @_FortranACUFDataTransferDescDescNoRealloc
func.func @_QPshape_shift2() {
%c11 = arith.constant 11 : index
@@ -383,7 +383,7 @@ func.func @_QPdevice_addr_conv() {
// CHECK: %[[DEV_ADDR:.*]] = fir.call @_FortranACUFGetDeviceAddress(%{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
// CHECK: %[[DEV_ADDR_CONV:.*]] = fir.convert %[[DEV_ADDR]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.array<4xf32>>
// CHECK: fir.embox %[[DEV_ADDR_CONV]](%{{.*}}) : (!fir.ref<!fir.array<4xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc
+// CHECK: fir.call @_FortranACUFDataTransferDescDescNoRealloc
func.func @_QQchar_transfer() attributes {fir.bindc_name = "char_transfer"} {
%c1 = arith.constant 1 : index
@@ -464,6 +464,6 @@ func.func @_QPlogical_cst() {
// CHECK: %[[EMBOX:.*]] = fir.embox %[[CONST]] : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref<!fir.box<!fir.logical<4>>>
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<4>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
+// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
} // end of module
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesReverts llvm/llvm-project#116457 Full diff: https://github.com/llvm/llvm-project/pull/116458.diff 6 Files Affected:
diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h
index 2bb083b0dd75cb..713bdf536aaf90 100644
--- a/flang/include/flang/Runtime/CUDA/memory.h
+++ b/flang/include/flang/Runtime/CUDA/memory.h
@@ -44,10 +44,6 @@ void RTDECL(CUFDataTransferPtrDesc)(void *dst, Descriptor *src,
void RTDECL(CUFDataTransferDescDesc)(Descriptor *dst, Descriptor *src,
unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
-/// Data transfer from a scalar descriptor to a descriptor.
-void RTDECL(CUFDataTransferCstDesc)(Descriptor *dst, Descriptor *src,
- unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
-
/// Data transfer from a descriptor to a descriptor.
void RTDECL(CUFDataTransferDescDescNoRealloc)(Descriptor *dst, Descriptor *src,
unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 9de20f0f0d45e1..ec7f67dff763b4 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -563,9 +563,8 @@ struct CUFDataTransferOpConversion
// until we have more infrastructure.
mlir::Value src = emboxSrc(rewriter, op, symtab);
mlir::Value dst = emboxDst(rewriter, op, symtab);
- mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>(
- loc, builder);
+ mlir::func::FuncOp func = fir::runtime::getRuntimeFunc<mkRTKey(
+ CUFDataTransferDescDescNoRealloc)>(loc, builder);
auto fTy = func.getFunctionType();
mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
mlir::Value sourceLine =
@@ -649,9 +648,6 @@ struct CUFDataTransferOpConversion
mlir::Value src = op.getSrc();
if (!mlir::isa<fir::BaseBoxType>(srcTy)) {
src = emboxSrc(rewriter, op, symtab);
- if (fir::isa_trivial(srcTy))
- func = fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>(
- loc, builder);
}
auto materializeBoxIfNeeded = [&](mlir::Value val) -> mlir::Value {
if (mlir::isa<fir::EmboxOp>(val.getDefiningOp())) {
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index 68963c4d7738ac..7b40b837e7666e 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "flang/Runtime/CUDA/memory.h"
-#include "../assign-impl.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/CUDA/descriptor.h"
@@ -121,24 +120,6 @@ void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
*dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct);
}
-void RTDECL(CUFDataTransferCstDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
- unsigned mode, const char *sourceFile, int sourceLine) {
- MemmoveFct memmoveFct;
- Terminator terminator{sourceFile, sourceLine};
- if (mode == kHostToDevice) {
- memmoveFct = &MemmoveHostToDevice;
- } else if (mode == kDeviceToHost) {
- memmoveFct = &MemmoveDeviceToHost;
- } else if (mode == kDeviceToDevice) {
- memmoveFct = &MemmoveDeviceToDevice;
- } else {
- terminator.Crash("host to host copy not supported");
- }
-
- Fortran::runtime::DoFromSourceAssign(
- *dstDesc, *srcDesc, terminator, memmoveFct);
-}
-
void RTDECL(CUFDataTransferDescDescNoRealloc)(Descriptor *dstDesc,
Descriptor *srcDesc, unsigned mode, const char *sourceFile,
int sourceLine) {
diff --git a/flang/runtime/assign-impl.h b/flang/runtime/assign-impl.h
index 5db0bc81510bff..f07a501d1d1263 100644
--- a/flang/runtime/assign-impl.h
+++ b/flang/runtime/assign-impl.h
@@ -9,29 +9,16 @@
#ifndef FORTRAN_RUNTIME_ASSIGN_IMPL_H_
#define FORTRAN_RUNTIME_ASSIGN_IMPL_H_
-#include "flang/Runtime/freestanding-tools.h"
-
namespace Fortran::runtime {
class Descriptor;
class Terminator;
-using MemmoveFct = void *(*)(void *, const void *, std::size_t);
-
// Assign one object to another via allocate statement from source specifier.
// Note that if allocate object and source expression have the same rank, the
// value of the allocate object becomes the value provided; otherwise the value
// of each element of allocate object becomes the value provided (9.7.1.2(7)).
-#ifdef RT_DEVICE_COMPILATION
-static RT_API_ATTRS void *MemmoveWrapper(
- void *dest, const void *src, std::size_t count) {
- return Fortran::runtime::memmove(dest, src, count);
-}
-RT_API_ATTRS void DoFromSourceAssign(Descriptor &, const Descriptor &,
- Terminator &, MemmoveFct memmoveFct = &MemmoveWrapper);
-#else
-RT_API_ATTRS void DoFromSourceAssign(Descriptor &, const Descriptor &,
- Terminator &, MemmoveFct memmoveFct = &Fortran::runtime::memmove);
-#endif
+RT_API_ATTRS void DoFromSourceAssign(
+ Descriptor &, const Descriptor &, Terminator &);
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_ASSIGN_IMPL_H_
diff --git a/flang/runtime/assign.cpp b/flang/runtime/assign.cpp
index 8f0efaa376c198..83c0b9c70ed0d1 100644
--- a/flang/runtime/assign.cpp
+++ b/flang/runtime/assign.cpp
@@ -509,8 +509,8 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
RT_OFFLOAD_API_GROUP_BEGIN
-RT_API_ATTRS void DoFromSourceAssign(Descriptor &alloc,
- const Descriptor &source, Terminator &terminator, MemmoveFct memmoveFct) {
+RT_API_ATTRS void DoFromSourceAssign(
+ Descriptor &alloc, const Descriptor &source, Terminator &terminator) {
if (alloc.rank() > 0 && source.rank() == 0) {
// The value of each element of allocate object becomes the value of source.
DescriptorAddendum *allocAddendum{alloc.Addendum()};
@@ -523,17 +523,17 @@ RT_API_ATTRS void DoFromSourceAssign(Descriptor &alloc,
alloc.IncrementSubscripts(allocAt)) {
Descriptor allocElement{*Descriptor::Create(*allocDerived,
reinterpret_cast<void *>(alloc.Element<char>(allocAt)), 0)};
- Assign(allocElement, source, terminator, NoAssignFlags, memmoveFct);
+ Assign(allocElement, source, terminator, NoAssignFlags);
}
} else { // intrinsic type
for (std::size_t n{alloc.Elements()}; n-- > 0;
alloc.IncrementSubscripts(allocAt)) {
- memmoveFct(alloc.Element<char>(allocAt), source.raw().base_addr,
- alloc.ElementBytes());
+ Fortran::runtime::memmove(alloc.Element<char>(allocAt),
+ source.raw().base_addr, alloc.ElementBytes());
}
}
} else {
- Assign(alloc, source, terminator, NoAssignFlags, memmoveFct);
+ Assign(alloc, source, terminator, NoAssignFlags);
}
}
diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir
index 1ee44f3c6d97c9..3209197e118d19 100644
--- a/flang/test/Fir/CUDA/cuda-data-transfer.fir
+++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir
@@ -38,7 +38,7 @@ func.func @_QPsub2() {
// CHECK: fir.store %[[EMBOX]] to %[[TEMP_BOX]] : !fir.ref<!fir.box<i32>>
// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[TEMP_CONV:.*]] = fir.convert %[[TEMP_BOX]] : (!fir.ref<!fir.box<i32>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%[[ADEV_BOX]], %[[TEMP_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
+// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[ADEV_BOX]], %[[TEMP_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
func.func @_QPsub3() {
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "adev", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub3Eadev"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
@@ -58,7 +58,7 @@ func.func @_QPsub3() {
// CHECK: fir.store %[[EMBOX]] to %[[TEMP_BOX]] : !fir.ref<!fir.box<i32>>
// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[V_CONV:.*]] = fir.convert %[[TEMP_BOX]] : (!fir.ref<!fir.box<i32>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%[[ADEV_BOX]], %[[V_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
+// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[ADEV_BOX]], %[[V_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
func.func @_QPsub4() {
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "adev", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Eadev"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
@@ -297,7 +297,7 @@ func.func @_QPscalar_to_array() {
}
// CHECK-LABEL: func.func @_QPscalar_to_array()
-// CHECK: _FortranACUFDataTransferCstDesc
+// CHECK: _FortranACUFDataTransferDescDescNoRealloc
func.func @_QPtest_type() {
%0 = cuf.alloc !fir.type<_QMbarTcmplx{id:i32,c:complex<f32>}> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_typeEa"} -> !fir.ref<!fir.type<_QMbarTcmplx{id:i32,c:complex<f32>}>>
@@ -344,7 +344,7 @@ func.func @_QPshape_shift() {
}
// CHECK-LABEL: func.func @_QPshape_shift()
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc
+// CHECK: fir.call @_FortranACUFDataTransferDescDescNoRealloc
func.func @_QPshape_shift2() {
%c11 = arith.constant 11 : index
@@ -383,7 +383,7 @@ func.func @_QPdevice_addr_conv() {
// CHECK: %[[DEV_ADDR:.*]] = fir.call @_FortranACUFGetDeviceAddress(%{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
// CHECK: %[[DEV_ADDR_CONV:.*]] = fir.convert %[[DEV_ADDR]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.array<4xf32>>
// CHECK: fir.embox %[[DEV_ADDR_CONV]](%{{.*}}) : (!fir.ref<!fir.array<4xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc
+// CHECK: fir.call @_FortranACUFDataTransferDescDescNoRealloc
func.func @_QQchar_transfer() attributes {fir.bindc_name = "char_transfer"} {
%c1 = arith.constant 1 : index
@@ -464,6 +464,6 @@ func.func @_QPlogical_cst() {
// CHECK: %[[EMBOX:.*]] = fir.embox %[[CONST]] : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref<!fir.box<!fir.logical<4>>>
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<4>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
+// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> none
} // end of module
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Reverts #116457