Skip to content

Commit f0366a6

Browse files
[OpaquePointers] Fix translation of kernel query instructions (#1848)
This specifies the right operand types for GetKernelWorkGroupSize, GetKernelPreferredWorkGroupSizeMultiple, GetKernelNDrangeMaxSubGroupSize, and GetKernelNDrangeSubGroupCount to fix a function signature mismatch bug on reverse translation.
1 parent bdd7652 commit f0366a6

File tree

2 files changed

+57
-51
lines changed

2 files changed

+57
-51
lines changed

lib/SPIRV/SPIRVTypeScavenger.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,16 @@ bool SPIRVTypeScavenger::typeIntrinsicCall(
182182
}
183183
} else if (TargetFn->getName().startswith("_Z18__spirv_ocl_printf")) {
184184
ArgTys.emplace_back(0, Type::getInt8Ty(Ctx));
185+
} else if (TargetFn->getName() == "__spirv_GetKernelWorkGroupSize__") {
186+
ArgTys.emplace_back(1, Type::getInt8Ty(Ctx));
187+
} else if (TargetFn->getName() ==
188+
"__spirv_GetKernelPreferredWorkGroupSizeMultiple__") {
189+
ArgTys.emplace_back(1, Type::getInt8Ty(Ctx));
190+
} else if (TargetFn->getName() ==
191+
"__spirv_GetKernelNDrangeMaxSubGroupSize__") {
192+
ArgTys.emplace_back(2, Type::getInt8Ty(Ctx));
193+
} else if (TargetFn->getName() == "__spirv_GetKernelNDrangeSubGroupCount__") {
194+
ArgTys.emplace_back(2, Type::getInt8Ty(Ctx));
185195
} else
186196
return false;
187197

test/transcoding/kernel_query.ll

Lines changed: 47 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
; Compilation command:
1616
; clang -cc1 -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -emit-llvm kernel_query.cl
1717

18-
; RUN: llvm-as -opaque-pointers=0 %s -o %t.bc
19-
; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-text -o %t.spv.txt
18+
; RUN: llvm-as %s -o %t.bc
19+
; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt
2020
; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV
21-
; RUN: llvm-spirv %t.bc -opaque-pointers=0 -o %t.spv
21+
; RUN: llvm-spirv %t.bc -o %t.spv
2222
; RUN: llvm-spirv -r -emit-opaque-pointers %t.spv -o %t.rev.bc
2323
; RUN: llvm-dis %t.rev.bc
2424
; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM
@@ -68,119 +68,115 @@ entry:
6868

6969
%ndrange = alloca %struct.ndrange_t, align 4
7070

71-
; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit1Tmp:[0-9]+]] [[BlockGlb1]]
72-
; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] [[BlockLit1Tmp]]
71+
; CHECK-SPIRV: PtrCastToGeneric {{[0-9]+}} [[BlockLit1Tmp:[0-9]+]] [[BlockGlb1]]
72+
; CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] [[BlockLit1Tmp]]
7373
; CHECK-SPIRV: GetKernelWorkGroupSize [[Int32Ty]] {{[0-9]+}} [[BlockKer1]] [[BlockLit1]] [[ConstInt8]] [[ConstInt8]]
7474

7575
; CHECK-LLVM: call i32 @__get_kernel_work_group_size_impl(ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}})
7676

77-
%0 = call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*))
77+
%0 = call i32 @__get_kernel_work_group_size_impl(ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_kernel to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @__block_literal_global to ptr addrspace(4)))
7878

79-
; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit2Tmp:[0-9]+]] [[BlockGlb2]]
80-
; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit2:[0-9]+]] [[BlockLit2Tmp]]
79+
; CHECK-SPIRV: PtrCastToGeneric {{[0-9]+}} [[BlockLit2Tmp:[0-9]+]] [[BlockGlb2]]
80+
; CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit2:[0-9]+]] [[BlockLit2Tmp]]
8181
; CHECK-SPIRV: GetKernelPreferredWorkGroupSizeMultiple [[Int32Ty]] {{[0-9]+}} [[BlockKer2]] [[BlockLit2]] [[ConstInt8]] [[ConstInt8]]
8282

8383
; CHECK-LLVM: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}) #1
8484

85-
%1 = call i32 @__get_kernel_preferred_work_group_size_multiple_impl(i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*))
85+
%1 = call i32 @__get_kernel_preferred_work_group_size_multiple_impl(ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_2_kernel to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @__block_literal_global.1 to ptr addrspace(4)))
8686

87-
; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb3]]
88-
; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit3:[0-9]+]] [[BlockLit3Tmp]]
87+
; CHECK-SPIRV: PtrCastToGeneric {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb3]]
88+
; CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit3:[0-9]+]] [[BlockLit3Tmp]]
8989
; CHECK-SPIRV: GetKernelNDrangeMaxSubGroupSize [[Int32Ty]] {{[0-9]+}} [[NDRange]] [[BlockKer3]] [[BlockLit3]] [[ConstInt8]] [[ConstInt8]]
9090

9191
; CHECK-LLVM: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(ptr {{.*}}, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}})
9292

93-
%2 = call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.2 to i8 addrspace(1)*) to i8 addrspace(4)*))
93+
%2 = call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(ptr %ndrange, ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_3_kernel to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @__block_literal_global.2 to ptr addrspace(4)))
9494

95-
; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit4Tmp:[0-9]+]] [[BlockGlb4]]
96-
; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit4:[0-9]+]] [[BlockLit4Tmp]]
95+
; CHECK-SPIRV: PtrCastToGeneric {{[0-9]+}} [[BlockLit4Tmp:[0-9]+]] [[BlockGlb4]]
96+
; CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit4:[0-9]+]] [[BlockLit4Tmp]]
9797
; CHECK-SPIRV: GetKernelNDrangeSubGroupCount [[Int32Ty]] {{[0-9]+}} [[NDRange]] [[BlockKer4]] [[BlockLit4]] [[ConstInt8]] [[ConstInt8]]
9898

9999
; CHECK-LLVM: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(ptr {{.*}}, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}})
100100

101-
%3 = call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.3 to i8 addrspace(1)*) to i8 addrspace(4)*))
101+
%3 = call i32 @__get_kernel_sub_group_count_for_ndrange_impl(ptr %ndrange, ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_4_kernel to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @__block_literal_global.3 to ptr addrspace(4)))
102102
ret void
103103
}
104104

105105
; Function Attrs: convergent noinline nounwind optnone
106-
define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #1 {
106+
define internal spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) %.block_descriptor) #1 {
107107
entry:
108-
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
109-
%block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
110-
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
111-
%block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
112-
store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
108+
%.block_descriptor.addr = alloca ptr addrspace(4), align 4
109+
%block.addr = alloca ptr addrspace(4), align 4
110+
store ptr addrspace(4) %.block_descriptor, ptr %.block_descriptor.addr, align 4
111+
store ptr addrspace(4) %.block_descriptor, ptr %block.addr, align 4
113112
ret void
114113
}
115114

116115
; Function Attrs: nounwind
117-
define internal spir_kernel void @__device_side_enqueue_block_invoke_kernel(i8 addrspace(4)*) #2 {
116+
define internal spir_kernel void @__device_side_enqueue_block_invoke_kernel(ptr addrspace(4)) #2 {
118117
entry:
119-
call void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %0)
118+
call void @__device_side_enqueue_block_invoke(ptr addrspace(4) %0)
120119
ret void
121120
}
122121

123-
declare i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)*, i8 addrspace(4)*)
122+
declare i32 @__get_kernel_work_group_size_impl(ptr addrspace(4), ptr addrspace(4))
124123

125124
; Function Attrs: convergent noinline nounwind optnone
126-
define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #1 {
125+
define internal spir_func void @__device_side_enqueue_block_invoke_2(ptr addrspace(4) %.block_descriptor) #1 {
127126
entry:
128-
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
129-
%block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
130-
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
131-
%block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
132-
store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
127+
%.block_descriptor.addr = alloca ptr addrspace(4), align 4
128+
%block.addr = alloca ptr addrspace(4), align 4
129+
store ptr addrspace(4) %.block_descriptor, ptr %.block_descriptor.addr, align 4
130+
store ptr addrspace(4) %.block_descriptor, ptr %block.addr, align 4
133131
ret void
134132
}
135133

136134
; Function Attrs: nounwind
137-
define internal spir_kernel void @__device_side_enqueue_block_invoke_2_kernel(i8 addrspace(4)*) #2 {
135+
define internal spir_kernel void @__device_side_enqueue_block_invoke_2_kernel(ptr addrspace(4)) #2 {
138136
entry:
139-
call void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %0)
137+
call void @__device_side_enqueue_block_invoke_2(ptr addrspace(4) %0)
140138
ret void
141139
}
142140

143-
declare i32 @__get_kernel_preferred_work_group_size_multiple_impl(i8 addrspace(4)*, i8 addrspace(4)*)
141+
declare i32 @__get_kernel_preferred_work_group_size_multiple_impl(ptr addrspace(4), ptr addrspace(4))
144142

145143
; Function Attrs: convergent noinline nounwind optnone
146-
define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspace(4)* %.block_descriptor) #1 {
144+
define internal spir_func void @__device_side_enqueue_block_invoke_3(ptr addrspace(4) %.block_descriptor) #1 {
147145
entry:
148-
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
149-
%block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
150-
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
151-
%block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
152-
store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
146+
%.block_descriptor.addr = alloca ptr addrspace(4), align 4
147+
%block.addr = alloca ptr addrspace(4), align 4
148+
store ptr addrspace(4) %.block_descriptor, ptr %.block_descriptor.addr, align 4
149+
store ptr addrspace(4) %.block_descriptor, ptr %block.addr, align 4
153150
ret void
154151
}
155152

156153
; Function Attrs: nounwind
157-
define internal spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*) #2 {
154+
define internal spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(ptr addrspace(4)) #2 {
158155
entry:
159-
call void @__device_side_enqueue_block_invoke_3(i8 addrspace(4)* %0)
156+
call void @__device_side_enqueue_block_invoke_3(ptr addrspace(4) %0)
160157
ret void
161158
}
162159

163-
declare i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t*, i8 addrspace(4)*, i8 addrspace(4)*)
160+
declare i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(ptr, ptr addrspace(4), ptr addrspace(4))
164161

165162
; Function Attrs: convergent noinline nounwind optnone
166-
define internal spir_func void @__device_side_enqueue_block_invoke_4(i8 addrspace(4)* %.block_descriptor) #1 {
163+
define internal spir_func void @__device_side_enqueue_block_invoke_4(ptr addrspace(4) %.block_descriptor) #1 {
167164
entry:
168-
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
169-
%block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
170-
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
171-
%block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
172-
store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
165+
%.block_descriptor.addr = alloca ptr addrspace(4), align 4
166+
%block.addr = alloca ptr addrspace(4), align 4
167+
store ptr addrspace(4) %.block_descriptor, ptr %.block_descriptor.addr, align 4
168+
store ptr addrspace(4) %.block_descriptor, ptr %block.addr, align 4
173169
ret void
174170
}
175171

176172
; Function Attrs: nounwind
177-
define internal spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*) #2 {
173+
define internal spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(ptr addrspace(4)) #2 {
178174
entry:
179-
call void @__device_side_enqueue_block_invoke_4(i8 addrspace(4)* %0)
175+
call void @__device_side_enqueue_block_invoke_4(ptr addrspace(4) %0)
180176
ret void
181177
}
182178

183-
declare i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t*, i8 addrspace(4)*, i8 addrspace(4)*)
179+
declare i32 @__get_kernel_sub_group_count_for_ndrange_impl(ptr, ptr addrspace(4), ptr addrspace(4))
184180

185181
; CHECK-SPIRV-DAG: Function [[VoidTy]] [[BlockKer1]] 0 [[BlockKerTy]]
186182
; CHECK-SPIRV-DAG: Function [[VoidTy]] [[BlockKer2]] 0 [[BlockKerTy]]

0 commit comments

Comments
 (0)