Skip to content

Commit c714d03

Browse files
committed
[AMDGPU] Expose __builtin_amdgcn_perm for v_perm_b32
Differential Revision: https://reviews.llvm.org/D102022
1 parent d3e987c commit c714d03

File tree

8 files changed

+73
-2
lines changed

8 files changed

+73
-2
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
182182
TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
183183
TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
184184
TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "gfx8-insts")
185+
TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts")
185186

186187
//===----------------------------------------------------------------------===//
187188
// GFX9+ only builtins.

clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
88

99
typedef unsigned long ulong;
10+
typedef unsigned int uint;
1011

1112
// CHECK-LABEL: @test_div_fixup_f16
1213
// CHECK: call half @llvm.amdgcn.div.fixup.f16
@@ -137,3 +138,10 @@ void test_s_memtime(global ulong* out)
137138
{
138139
*out = __builtin_amdgcn_s_memtime();
139140
}
141+
142+
// CHECK-LABEL: @test_perm
143+
// CHECK: call i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
144+
void test_perm(global uint* out, uint a, uint b, uint s)
145+
{
146+
*out = __builtin_amdgcn_perm(a, b, s);
147+
}

clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
33
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu hawaii -verify -S -o - %s
44

5-
void test_vi_s_dcache_wb()
5+
void test_vi_builtins()
66
{
77
__builtin_amdgcn_s_dcache_wb(); // expected-error {{'__builtin_amdgcn_s_dcache_wb' needs target feature gfx8-insts}}
8+
(void)__builtin_amdgcn_perm(1, 2, 3); // expected-error {{'__builtin_amdgcn_perm' needs target feature gfx8-insts}}
89
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,6 +1716,12 @@ def int_amdgcn_ds_bpermute :
17161716
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
17171717
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
17181718

1719+
// llvm.amdgcn.perm <src0> <src1> <selector>
1720+
def int_amdgcn_perm :
1721+
GCCBuiltin<"__builtin_amdgcn_perm">,
1722+
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1723+
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
1724+
17191725
//===----------------------------------------------------------------------===//
17201726
// GFX10 Intrinsics
17211727
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2",
313313
SDTCisInt<4>]>,
314314
[]>;
315315

316-
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
316+
def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
317317

318318
// SI+ export
319319
def AMDGPUExportOp : SDTypeProfile<0, 8, [
@@ -463,3 +463,7 @@ def AMDGPUfdot2 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$clamp)
463463
def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc),
464464
[(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc),
465465
(AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>;
466+
467+
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
468+
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
469+
(AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3949,6 +3949,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
39493949
case Intrinsic::amdgcn_cvt_pk_u8_f32:
39503950
case Intrinsic::amdgcn_alignbit:
39513951
case Intrinsic::amdgcn_alignbyte:
3952+
case Intrinsic::amdgcn_perm:
39523953
case Intrinsic::amdgcn_fdot2:
39533954
case Intrinsic::amdgcn_sdot2:
39543955
case Intrinsic::amdgcn_udot2:

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6695,6 +6695,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
66956695
case Intrinsic::amdgcn_alignbit:
66966696
return DAG.getNode(ISD::FSHR, DL, VT,
66976697
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
6698+
case Intrinsic::amdgcn_perm:
6699+
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
6700+
Op.getOperand(2), Op.getOperand(3));
66986701
case Intrinsic::amdgcn_reloc_constant: {
66996702
Module *M = const_cast<Module *>(MF.getFunction().getParent());
67006703
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2+
; RUN: llc -march=amdgcn -mcpu=tonga -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3+
4+
declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0
5+
6+
; GCN-LABEL: {{^}}v_perm_b32_v_v_v:
7+
; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2
8+
define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, i32 addrspace(1)* %out) #1 {
9+
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) #0
10+
store i32 %val, i32 addrspace(1)* %out
11+
ret void
12+
}
13+
14+
; GCN-LABEL: {{^}}v_perm_b32_v_v_c:
15+
; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, {{[vs][0-9]+}}
16+
define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
17+
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
18+
store i32 %val, i32 addrspace(1)* %out
19+
ret void
20+
}
21+
22+
; GCN-LABEL: {{^}}v_perm_b32_s_v_c:
23+
; GCN: v_perm_b32 v{{[0-9]+}}, s0, v0, v{{[0-9]+}}
24+
define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
25+
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
26+
store i32 %val, i32 addrspace(1)* %out
27+
ret void
28+
}
29+
30+
; GCN-LABEL: {{^}}v_perm_b32_s_s_c:
31+
; GCN: v_perm_b32 v{{[0-9]+}}, s0, v{{[0-9]+}}, v{{[0-9]+}}
32+
define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
33+
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
34+
store i32 %val, i32 addrspace(1)* %out
35+
ret void
36+
}
37+
38+
; GCN-LABEL: {{^}}v_perm_b32_v_s_i:
39+
; GCN: v_perm_b32 v{{[0-9]+}}, v0, s0, 1
40+
define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
41+
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) #0
42+
store i32 %val, i32 addrspace(1)* %out
43+
ret void
44+
}
45+
46+
attributes #0 = { nounwind readnone }
47+
attributes #1 = { nounwind }

0 commit comments

Comments
 (0)