[flang][cuda] Handle pointer allocation with source #124070

clementval · 2025-01-23T05:45:40Z

No description provided.

llvmbot · 2025-01-23T05:46:11Z

@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/124070.diff

4 Files Affected:

(modified) flang/include/flang/Runtime/CUDA/pointer.h (+7)
(modified) flang/lib/Optimizer/Transforms/CUFOpConversion.cpp (+5-4)
(modified) flang/runtime/CUDA/pointer.cpp (+15)
(modified) flang/test/Fir/CUDA/cuda-allocate.fir (+18)

diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
index db5242696303f5..2197d85f4b93e5 100644
--- a/flang/include/flang/Runtime/CUDA/pointer.h
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -21,6 +21,13 @@ int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
     bool hasStat = false, const Descriptor *errMsg = nullptr,
     const char *sourceFile = nullptr, int sourceLine = 0);
 
+/// Perform allocation of the descriptor without synchronization. Assign data
+/// from source.
+int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
+    const Descriptor &source, int64_t stream = -1, bool hasStat = false,
+    const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+    int sourceLine = 0);
+
 } // extern "C"
 
 } // namespace Fortran::runtime::cuda
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 23248f6d12622a..b0d6b0f0993a61 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -189,11 +189,12 @@ struct CUFAllocateOpConversion
 
     mlir::func::FuncOp func;
     if (op.getSource()) {
-      if (isPointer)
-        TODO(loc, "pointer allocation with source");
       func =
-          fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocateSource)>(
-              loc, builder);
+          isPointer
+              ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSource)>(
+                    loc, builder)
+              : fir::runtime::getRuntimeFunc<mkRTKey(
+                    CUFAllocatableAllocateSource)>(loc, builder);
     } else {
       func =
           isPointer
diff --git a/flang/runtime/CUDA/pointer.cpp b/flang/runtime/CUDA/pointer.cpp
index 0c5d3a5a6297d8..35f373b0a56c37 100644
--- a/flang/runtime/CUDA/pointer.cpp
+++ b/flang/runtime/CUDA/pointer.cpp
@@ -7,8 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Runtime/CUDA/pointer.h"
+#include "../assign-impl.h"
 #include "../stat.h"
 #include "../terminator.h"
+#include "flang/Runtime/CUDA/memmove-function.h"
 #include "flang/Runtime/pointer.h"
 
 #include "cuda_runtime.h"
@@ -33,6 +35,19 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
   return stat;
 }
 
+int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
+    const Descriptor &source, int64_t stream, bool hasStat,
+    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+  int stat{RTNAME(CUFPointerAllocate)(
+      pointer, stream, hasStat, errMsg, sourceFile, sourceLine)};
+  if (stat == StatOk) {
+    Terminator terminator{sourceFile, sourceLine};
+    Fortran::runtime::DoFromSourceAssign(
+        pointer, source, terminator, &MemmoveHostToDevice);
+  }
+  return stat;
+}
+
 RT_EXT_API_GROUP_END
 
 } // extern "C"
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index 2ac9498d355414..804bb8636685d1 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -192,4 +192,22 @@ func.func @_QPp_alloc() {
 // CHECK-LABEL: func.func @_QPp_alloc()
 // CHECK: fir.call @_FortranACUFPointerAllocate
 
+func.func @_QPpointer_source() {
+  %c0_i64 = arith.constant 0 : i64
+  %c1_i32 = arith.constant 1 : i32
+  %c0_i32 = arith.constant 0 : i32
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xf32>>> {bindc_name = "a", uniq_name = "_QFpointer_sourceEa"}
+  %4 = fir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFpointer_sourceEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %5 = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xf32>>> {bindc_name = "a_d", data_attr = #cuf.cuda<device>, uniq_name = "_QFpointer_sourceEa_d"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %7 = fir.declare %5 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointer_sourceEa_d"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %8 = fir.load %4 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %22 = cuf.allocate %7 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>> source(%8 : !fir.box<!fir.heap<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
+  return
+}
+
+// CHECK-LABEL: func.func @_QPpointer_source()
+// CHECK: _FortranACUFPointerAllocateSource
+
 } // end of module

llvmbot · 2025-01-23T05:46:12Z

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/124070.diff

4 Files Affected:

(modified) flang/include/flang/Runtime/CUDA/pointer.h (+7)
(modified) flang/lib/Optimizer/Transforms/CUFOpConversion.cpp (+5-4)
(modified) flang/runtime/CUDA/pointer.cpp (+15)
(modified) flang/test/Fir/CUDA/cuda-allocate.fir (+18)

diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
index db5242696303f5..2197d85f4b93e5 100644
--- a/flang/include/flang/Runtime/CUDA/pointer.h
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -21,6 +21,13 @@ int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
     bool hasStat = false, const Descriptor *errMsg = nullptr,
     const char *sourceFile = nullptr, int sourceLine = 0);
 
+/// Perform allocation of the descriptor without synchronization. Assign data
+/// from source.
+int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
+    const Descriptor &source, int64_t stream = -1, bool hasStat = false,
+    const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+    int sourceLine = 0);
+
 } // extern "C"
 
 } // namespace Fortran::runtime::cuda
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 23248f6d12622a..b0d6b0f0993a61 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -189,11 +189,12 @@ struct CUFAllocateOpConversion
 
     mlir::func::FuncOp func;
     if (op.getSource()) {
-      if (isPointer)
-        TODO(loc, "pointer allocation with source");
       func =
-          fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocateSource)>(
-              loc, builder);
+          isPointer
+              ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSource)>(
+                    loc, builder)
+              : fir::runtime::getRuntimeFunc<mkRTKey(
+                    CUFAllocatableAllocateSource)>(loc, builder);
     } else {
       func =
           isPointer
diff --git a/flang/runtime/CUDA/pointer.cpp b/flang/runtime/CUDA/pointer.cpp
index 0c5d3a5a6297d8..35f373b0a56c37 100644
--- a/flang/runtime/CUDA/pointer.cpp
+++ b/flang/runtime/CUDA/pointer.cpp
@@ -7,8 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Runtime/CUDA/pointer.h"
+#include "../assign-impl.h"
 #include "../stat.h"
 #include "../terminator.h"
+#include "flang/Runtime/CUDA/memmove-function.h"
 #include "flang/Runtime/pointer.h"
 
 #include "cuda_runtime.h"
@@ -33,6 +35,19 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
   return stat;
 }
 
+int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
+    const Descriptor &source, int64_t stream, bool hasStat,
+    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+  int stat{RTNAME(CUFPointerAllocate)(
+      pointer, stream, hasStat, errMsg, sourceFile, sourceLine)};
+  if (stat == StatOk) {
+    Terminator terminator{sourceFile, sourceLine};
+    Fortran::runtime::DoFromSourceAssign(
+        pointer, source, terminator, &MemmoveHostToDevice);
+  }
+  return stat;
+}
+
 RT_EXT_API_GROUP_END
 
 } // extern "C"
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index 2ac9498d355414..804bb8636685d1 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -192,4 +192,22 @@ func.func @_QPp_alloc() {
 // CHECK-LABEL: func.func @_QPp_alloc()
 // CHECK: fir.call @_FortranACUFPointerAllocate
 
+func.func @_QPpointer_source() {
+  %c0_i64 = arith.constant 0 : i64
+  %c1_i32 = arith.constant 1 : i32
+  %c0_i32 = arith.constant 0 : i32
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xf32>>> {bindc_name = "a", uniq_name = "_QFpointer_sourceEa"}
+  %4 = fir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFpointer_sourceEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %5 = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xf32>>> {bindc_name = "a_d", data_attr = #cuf.cuda<device>, uniq_name = "_QFpointer_sourceEa_d"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %7 = fir.declare %5 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointer_sourceEa_d"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %8 = fir.load %4 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %22 = cuf.allocate %7 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>> source(%8 : !fir.box<!fir.heap<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
+  return
+}
+
+// CHECK-LABEL: func.func @_QPpointer_source()
+// CHECK: _FortranACUFPointerAllocateSource
+
 } // end of module

[flang][cuda] Handle pointer allocation with source

9587eb5

clementval requested review from wangzpgi and Renaud-K January 23, 2025 05:45

llvmbot added flang:runtime flang Flang issues not falling into any other category flang:fir-hlfir labels Jan 23, 2025

wangzpgi approved these changes Jan 23, 2025

View reviewed changes

clementval merged commit 8c138be into llvm:main Jan 23, 2025
12 checks passed

clementval deleted the cuf_pointer_allocate_source branch January 23, 2025 17:24

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[flang][cuda] Handle pointer allocation with source #124070

[flang][cuda] Handle pointer allocation with source #124070

clementval commented Jan 23, 2025

llvmbot commented Jan 23, 2025

llvmbot commented Jan 23, 2025

[flang][cuda] Handle pointer allocation with source #124070

[flang][cuda] Handle pointer allocation with source #124070

Conversation

clementval commented Jan 23, 2025

llvmbot commented Jan 23, 2025

llvmbot commented Jan 23, 2025