|
9 | 9 | // test pass doesn't set up the GPU address space conversions.
|
10 | 10 |
|
11 | 11 | #gpu_global_addrspace = 1
|
| 12 | +#gpu_lds_addrspace = 3 |
12 | 13 |
|
13 | 14 | // CHECK-LABEL: func @fat_raw_buffer_cast
|
14 | 15 | func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
|
@@ -461,3 +462,25 @@ func.func @sched_barrier() {
|
461 | 462 | amdgpu.sched_barrier allow = <valu|all_vmem>
|
462 | 463 | func.return
|
463 | 464 | }
|
| 465 | + |
| 466 | +// CHECK-LABEL: func @global_load_to_rocdl_f32 |
| 467 | +// CHECK-SAME: (%[[ARG0:.*]]: memref<128x72xf32, 1>) |
| 468 | +func.func @global_load_to_rocdl_f32(%global : memref<128x72xf32, #gpu_global_addrspace>) { |
| 469 | + %c0 = arith.constant 0 : i32 |
| 470 | + %c12 = arith.constant 12 : i32 |
| 471 | + %c32 = arith.constant 32 : i32 |
| 472 | + %alloc = memref.alloc() : memref<64x64xf32, #gpu_lds_addrspace> |
| 473 | + // GFX942: %[[GLOBAL_DESC:.*]] = builtin.unrealized_conversion_cast %arg0 : memref<128x72xf32, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64, array<2 x i64>, array<2 x i64>)> |
| 474 | + // GFX942: %[[ALLOC:.*]] = memref.alloc() : memref<64x64xf32, 3> |
| 475 | + // GFX942: %[[LDS_DESC:.*]] = builtin.unrealized_conversion_cast %[[ALLOC]] : memref<64x64xf32, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> |
| 476 | + // GFX942: %[[GLOBAL_BASE:.*]] = llvm.extractvalue %[[GLOBAL_DESC]][1] : !llvm.struct<(ptr<1>, ptr<1>, i64, array<2 x i64>, array<2 x i64>)> |
| 477 | + // GFX942: %[[LDS_BASE:.*]] = llvm.extractvalue %[[LDS_DESC]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> |
| 478 | + // GFX942: %[[GLOBAL_PTR:.*]] = llvm.getelementptr %[[GLOBAL_BASE]][%[[GLOBAL_OFFSET:.*]]] : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, f32 |
| 479 | + // GFX942: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]][%[[LDS_OFFSET:.*]]] : (!llvm.ptr<3>, i32) -> !llvm.ptr<3>, f32 |
| 480 | + // GFX942: %[[C4:.*]] = llvm.mlir.constant(4 : i32) : i32 |
| 481 | + // GFX942: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32 |
| 482 | + // GFX942: %[[C0_2:.*]] = llvm.mlir.constant(0 : i32) : i32 |
| 483 | + // GFX942: rocdl.global.load.lds %[[GLOBAL_PTR]], %[[LDS_PTR]], %[[C4]], %[[C0]], %[[C0_2]] |
| 484 | + amdgpu.global_load %global[%c12, %c0], %alloc[%c32, %c0] : memref<128x72xf32, #gpu_global_addrspace>, memref<64x64xf32, #gpu_lds_addrspace> |
| 485 | + func.return |
| 486 | +} |
0 commit comments