Skip to content

Commit e9866d5

Browse files
authored
[flang][cuda] Fix GPULaunchKernelConversion to generate correct kernel launch parameters (#119431)
For the call to _FortranACUFLaunchKernel, we store the pointer to a member of a temporary structure in a parameter array. However, when we obtain an element pointer from the parameter array, its address is calculated based on the type of the structure. This PR properly treats the parameter array as an array of pointers. Example: ```mlir %30 = llvm.load %29 : !llvm.ptr -> i32 %31 = llvm.mlir.constant(1 : i32) : i32 %32 = llvm.alloca %31 x !llvm.struct<(i64, i64, i32, ptr)> : (i32) -> !llvm.ptr %33 = llvm.mlir.constant(4 : i32) : i32 %34 = llvm.alloca %33 x !llvm.ptr : (i32) -> !llvm.ptr %35 = llvm.mlir.constant(0 : i32) : i32 %36 = llvm.getelementptr %32[%35] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.struct<(i64, i64, i32, ptr)> llvm.store %8, %36 : i64, !llvm.ptr %37 = llvm.getelementptr %34[%35] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.struct<(i64, i64, i32, ptr)> llvm.store %36, %37 : !llvm.ptr, !llvm.ptr ... llvm.call @_FortranACUFLaunchKernel(%47, %8, %8, %8, %2, %8, %8, %7, %34, %48) : (!llvm.ptr, i64, i64, i64, i64, i64, i64, i32, !llvm.ptr, !llvm.ptr) -> () ``` In this example, `%37 = llvm.getelementptr %34[%35] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.struct<(i64, i64, i32, ptr)>` will be `%37 = llvm.getelementptr %34[%35] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.ptr`.
1 parent ffb19f4 commit e9866d5

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ static mlir::Value createKernelArgArray(mlir::Location loc,
5858
loc, ptrTy, structTy, argStruct, mlir::ArrayRef<mlir::Value>({indice}));
5959
rewriter.create<LLVM::StoreOp>(loc, arg, structMember);
6060
mlir::Value arrayMember = rewriter.create<LLVM::GEPOp>(
61-
loc, ptrTy, structTy, argArray, mlir::ArrayRef<mlir::Value>({indice}));
61+
loc, ptrTy, ptrTy, argArray, mlir::ArrayRef<mlir::Value>({indice}));
6262
rewriter.create<LLVM::StoreOp>(loc, structMember, arrayMember);
6363
}
6464
return argArray;

flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,16 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : ve
9999
}
100100

101101
// CHECK-LABEL: _QMmod1Phost_sub
102-
102+
// CHECK: %[[STRUCT:.*]] = llvm.alloca %{{.*}} x !llvm.struct<(ptr)> : (i32) -> !llvm.ptr
103+
// CHECK: %[[PARAMS:.*]] = llvm.alloca %{{.*}} x !llvm.ptr : (i32) -> !llvm.ptr
104+
// CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : i32
105+
// CHECK: %[[STRUCT_PTR:.*]] = llvm.getelementptr %[[STRUCT]][%[[ZERO]]] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.struct<(ptr)>
106+
// CHECK: llvm.store %{{.*}}, %[[STRUCT_PTR]] : !llvm.ptr, !llvm.ptr
107+
// CHECK: %[[PARAM_PTR:.*]] = llvm.getelementptr %[[PARAMS]][%[[ZERO]]] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.ptr
108+
// CHECK: llvm.store %[[STRUCT_PTR]], %[[PARAM_PTR]] : !llvm.ptr, !llvm.ptr
103109
// CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1 : !llvm.ptr
104-
// CHECK: llvm.call @_FortranACUFLaunchKernel(%[[KERNEL_PTR]], {{.*}})
110+
// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
111+
// CHECK: llvm.call @_FortranACUFLaunchKernel(%[[KERNEL_PTR]], {{.*}}, %[[PARAMS]], %[[NULL]])
105112

106113
// -----
107114

0 commit comments

Comments
 (0)