|
| 1 | +; RUN: llvm-as < %s | llvm-dis | FileCheck %s |
| 2 | + |
| 3 | +%struct.ndrange_t = type { i32 } |
| 4 | +%opencl.queue_t = type opaque |
| 5 | + |
| 6 | +; CHECK: %block.runtime.handle.t = type { ptr, i32, i32 } |
| 7 | +; CHECK: %block.runtime.handle.t.0 = type { ptr, i32, i32 } |
| 8 | +; CHECK: %block.runtime.handle.t.1 = type { ptr, i32, i32 } |
| 9 | +; CHECK: %block.runtime.handle.t.2 = type { ptr, i32, i32 } |
| 10 | +; CHECK: %block.runtime.handle.t.3 = type { ptr, i32, i32 } |
| 11 | +; CHECK: %block.runtime.handle.t.4 = type { ptr, i32, i32 } |
| 12 | + |
| 13 | + |
| 14 | +; CHECK: @kernel_address_user = global [1 x ptr] [ptr @block_has_used_kernel_address] |
| 15 | +; CHECK: @__test_block_invoke_kernel.runtime.handle = internal externally_initialized constant %block.runtime.handle.t zeroinitializer, section ".amdgpu.kernel.runtime.handle" |
| 16 | +; CHECK: @__test_block_invoke_2_kernel.runtime.handle = internal externally_initialized constant %block.runtime.handle.t.0 zeroinitializer, section ".amdgpu.kernel.runtime.handle" |
| 17 | +; CHECK: @block_has_used_kernel_address.runtime.handle = internal externally_initialized constant %block.runtime.handle.t.1 zeroinitializer, section ".amdgpu.kernel.runtime.handle" |
| 18 | +; CHECK: @.runtime.handle = internal externally_initialized constant %block.runtime.handle.t.2 zeroinitializer, section ".amdgpu.kernel.runtime.handle" |
| 19 | +; CHECK: @.runtime.handle.1 = internal externally_initialized constant %block.runtime.handle.t.3 zeroinitializer, section ".amdgpu.kernel.runtime.handle" |
| 20 | +; CHECK: @kernel_linkonce_odr_block.runtime.handle = linkonce_odr externally_initialized constant %block.runtime.handle.t.4 zeroinitializer, section ".amdgpu.kernel.runtime.handle" |
| 21 | +; CHECK: @llvm.used = appending global [12 x ptr] [ptr @__test_block_invoke_kernel, ptr @__test_block_invoke_kernel.runtime.handle, ptr @__test_block_invoke_2_kernel, ptr @__test_block_invoke_2_kernel.runtime.handle, ptr @block_has_used_kernel_address, ptr @block_has_used_kernel_address.runtime.handle, ptr @0, ptr @.runtime.handle, ptr @1, ptr @.runtime.handle.1, ptr @kernel_linkonce_odr_block, ptr @kernel_linkonce_odr_block.runtime.handle], section "llvm.metadata" |
| 22 | + |
| 23 | + |
| 24 | +define amdgpu_kernel void @non_caller(ptr addrspace(1) %a, i8 %b, ptr addrspace(1) %c, i64 %d) { |
| 25 | + ret void |
| 26 | +} |
| 27 | + |
| 28 | +define amdgpu_kernel void @caller(ptr addrspace(1) %a, i8 %b, ptr addrspace(1) %c, i64 %d) { |
| 29 | +entry: |
| 30 | + %block = alloca <{ i32, i32, ptr addrspace(1), i8 }>, align 8, addrspace(5) |
| 31 | + %inst = alloca %struct.ndrange_t, align 4, addrspace(5) |
| 32 | + %block2 = alloca <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) |
| 33 | + %inst3 = alloca %struct.ndrange_t, align 4, addrspace(5) |
| 34 | + %block.size = getelementptr inbounds <{ i32, i32, ptr addrspace(1), i8 }>, ptr addrspace(5) %block, i32 0, i32 0 |
| 35 | + store i32 25, ptr addrspace(5) %block.size, align 8 |
| 36 | + %block.align = getelementptr inbounds <{ i32, i32, ptr addrspace(1), i8 }>, ptr addrspace(5) %block, i32 0, i32 1 |
| 37 | + store i32 8, ptr addrspace(5) %block.align, align 4 |
| 38 | + %block.captured = getelementptr inbounds <{ i32, i32, ptr addrspace(1), i8 }>, ptr addrspace(5) %block, i32 0, i32 2 |
| 39 | + store ptr addrspace(1) %a, ptr addrspace(5) %block.captured, align 8 |
| 40 | + %block.captured1 = getelementptr inbounds <{ i32, i32, ptr addrspace(1), i8 }>, ptr addrspace(5) %block, i32 0, i32 3 |
| 41 | + store i8 %b, ptr addrspace(5) %block.captured1, align 8 |
| 42 | + %inst4 = addrspacecast ptr addrspace(5) %block to ptr |
| 43 | + %inst5 = call i32 @__enqueue_kernel_basic(ptr addrspace(1) poison, i32 0, ptr addrspace(5) byval(%struct.ndrange_t) nonnull %inst, |
| 44 | + ptr @__test_block_invoke_kernel, ptr nonnull %inst4) #2 |
| 45 | + %inst10 = call i32 @__enqueue_kernel_basic(ptr addrspace(1) poison, i32 0, ptr addrspace(5) byval(%struct.ndrange_t) nonnull %inst, |
| 46 | + ptr @__test_block_invoke_kernel, ptr nonnull %inst4) #2 |
| 47 | + %inst11 = call i32 @__enqueue_kernel_basic(ptr addrspace(1) poison, i32 0, ptr addrspace(5) byval(%struct.ndrange_t) nonnull %inst, |
| 48 | + ptr @0, ptr nonnull %inst4) #2 |
| 49 | + %inst12 = call i32 @__enqueue_kernel_basic(ptr addrspace(1) poison, i32 0, ptr addrspace(5) byval(%struct.ndrange_t) nonnull %inst, |
| 50 | + ptr @1, ptr nonnull %inst4) #2 |
| 51 | + %block.size4 = getelementptr inbounds <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) %block2, i32 0, i32 0 |
| 52 | + store i32 41, ptr addrspace(5) %block.size4, align 8 |
| 53 | + %block.align5 = getelementptr inbounds <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) %block2, i32 0, i32 1 |
| 54 | + store i32 8, ptr addrspace(5) %block.align5, align 4 |
| 55 | + %block.captured7 = getelementptr inbounds <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) %block2, i32 0, i32 2 |
| 56 | + store ptr addrspace(1) %a, ptr addrspace(5) %block.captured7, align 8 |
| 57 | + %block.captured8 = getelementptr inbounds <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) %block2, i32 0, i32 5 |
| 58 | + store i8 %b, ptr addrspace(5) %block.captured8, align 8 |
| 59 | + %block.captured9 = getelementptr inbounds <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) %block2, i32 0, i32 3 |
| 60 | + store ptr addrspace(1) %c, ptr addrspace(5) %block.captured9, align 8 |
| 61 | + %block.captured10 = getelementptr inbounds <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) %block2, i32 0, i32 4 |
| 62 | + store i64 %d, ptr addrspace(5) %block.captured10, align 8 |
| 63 | + %inst8 = addrspacecast ptr addrspace(5) %block2 to ptr |
| 64 | + %inst9 = call i32 @__enqueue_kernel_basic(ptr addrspace(1) poison, i32 0, ptr addrspace(5) byval(%struct.ndrange_t) nonnull %inst3, |
| 65 | + ptr @__test_block_invoke_2_kernel, ptr nonnull %inst8) #2 |
| 66 | + ret void |
| 67 | +} |
| 68 | + |
| 69 | +; __enqueue_kernel* functions may get inlined |
| 70 | +define amdgpu_kernel void @inlined_caller(ptr addrspace(1) %a, i8 %b, ptr addrspace(1) %c, i64 %d) { |
| 71 | +entry: |
| 72 | + %inst = load i64, ptr addrspace(1) addrspacecast (ptr @__test_block_invoke_kernel to ptr addrspace(1)) |
| 73 | + store i64 %inst, ptr addrspace(1) %c |
| 74 | + ret void |
| 75 | +} |
| 76 | + |
| 77 | +; CHECK: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, ptr addrspace(1), i8 }> %arg) !associated !0 { |
| 78 | +define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, ptr addrspace(1), i8 }> %arg) #0 { |
| 79 | +entry: |
| 80 | + %.fca.3.extract = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> %arg, 2 |
| 81 | + %.fca.4.extract = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> %arg, 3 |
| 82 | + store i8 %.fca.4.extract, ptr addrspace(1) %.fca.3.extract, align 1 |
| 83 | + ret void |
| 84 | +} |
| 85 | + |
| 86 | +declare i32 @__enqueue_kernel_basic(ptr addrspace(1), i32, ptr addrspace(5), ptr, ptr) local_unnamed_addr |
| 87 | + |
| 88 | +; CHECK: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg) !associated !1 { |
| 89 | +define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg) #0 { |
| 90 | +entry: |
| 91 | + %.fca.3.extract = extractvalue <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg, 2 |
| 92 | + %.fca.4.extract = extractvalue <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg, 3 |
| 93 | + %.fca.5.extract = extractvalue <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg, 4 |
| 94 | + %.fca.6.extract = extractvalue <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg, 5 |
| 95 | + store i8 %.fca.6.extract, ptr addrspace(1) %.fca.3.extract, align 1 |
| 96 | + store i64 %.fca.5.extract, ptr addrspace(1) %.fca.4.extract, align 8 |
| 97 | + ret void |
| 98 | +} |
| 99 | + |
| 100 | +@kernel_address_user = global [1 x ptr] [ ptr @block_has_used_kernel_address ] |
| 101 | + |
| 102 | +; CHECK: define internal amdgpu_kernel void @block_has_used_kernel_address(<{ i32, i32, ptr addrspace(1), i8 }> %arg) !associated !2 { |
| 103 | +define internal amdgpu_kernel void @block_has_used_kernel_address(<{ i32, i32, ptr addrspace(1), i8 }> %arg) #0 { |
| 104 | +entry: |
| 105 | + %.fca.3.extract = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> %arg, 2 |
| 106 | + %.fca.4.extract = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> %arg, 3 |
| 107 | + store i8 %.fca.4.extract, ptr addrspace(1) %.fca.3.extract, align 1 |
| 108 | + ret void |
| 109 | +} |
| 110 | + |
| 111 | +define amdgpu_kernel void @user_of_kernel_address(ptr addrspace(1) %arg) { |
| 112 | + store ptr @block_has_used_kernel_address, ptr addrspace(1) %arg |
| 113 | + ret void |
| 114 | +} |
| 115 | + |
| 116 | +; CHECK: define internal amdgpu_kernel void @0(<{ i32, i32, ptr addrspace(1), i8 }> %arg) !associated !3 { |
| 117 | +define internal amdgpu_kernel void @0(<{ i32, i32, ptr addrspace(1), i8 }> %arg) #0 { |
| 118 | + ret void |
| 119 | +} |
| 120 | + |
| 121 | +; CHECK: define internal amdgpu_kernel void @1(<{ i32, i32, ptr addrspace(1), i8 }> %arg) !associated !4 { |
| 122 | +define internal amdgpu_kernel void @1(<{ i32, i32, ptr addrspace(1), i8 }> %arg) #0 { |
| 123 | + ret void |
| 124 | +} |
| 125 | + |
| 126 | +; CHECK: define linkonce_odr amdgpu_kernel void @kernel_linkonce_odr_block() !associated !5 { |
| 127 | +define linkonce_odr amdgpu_kernel void @kernel_linkonce_odr_block() #0 { |
| 128 | + ret void |
| 129 | +} |
| 130 | + |
| 131 | +attributes #0 = { "enqueued-block" } |
| 132 | + |
| 133 | +; CHECK: !0 = !{ptr @__test_block_invoke_kernel.runtime.handle} |
| 134 | +; CHECK: !1 = !{ptr @__test_block_invoke_2_kernel.runtime.handle} |
| 135 | +; CHECK: !2 = !{ptr @block_has_used_kernel_address.runtime.handle} |
| 136 | +; CHECK: !3 = !{ptr @.runtime.handle} |
| 137 | +; CHECK: !4 = !{ptr @.runtime.handle.1} |
| 138 | +; CHECK: !5 = !{ptr @kernel_linkonce_odr_block.runtime.handle} |
0 commit comments