Skip to content

Commit de3b5d7

Browse files
committed
[AMDGPU] More GFX11 coverage for tests with generated checks
1 parent 03af9ba commit de3b5d7

File tree

7 files changed

+3607
-2120
lines changed

7 files changed

+3607
-2120
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll

Lines changed: 3202 additions & 1921 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll

Lines changed: 131 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,166 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
3-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
2+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX10
3+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX10
4+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX11
45

56
define i32 @global_atomic_csub(i32 addrspace(1)* %ptr, i32 %data) {
6-
; GCN-LABEL: global_atomic_csub:
7-
; GCN: ; %bb.0:
8-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9-
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
10-
; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
11-
; GCN-NEXT: s_waitcnt vmcnt(0)
12-
; GCN-NEXT: s_setpc_b64 s[30:31]
7+
; GFX10-LABEL: global_atomic_csub:
8+
; GFX10: ; %bb.0:
9+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10+
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
11+
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
12+
; GFX10-NEXT: s_waitcnt vmcnt(0)
13+
; GFX10-NEXT: s_setpc_b64 s[30:31]
14+
;
15+
; GFX11-LABEL: global_atomic_csub:
16+
; GFX11: ; %bb.0:
17+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
19+
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
20+
; GFX11-NEXT: s_waitcnt vmcnt(0)
21+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1322
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data)
1423
ret i32 %ret
1524
}
1625

1726
define i32 @global_atomic_csub_offset(i32 addrspace(1)* %ptr, i32 %data) {
18-
; GCN-LABEL: global_atomic_csub_offset:
19-
; GCN: ; %bb.0:
20-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21-
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
22-
; GCN-NEXT: s_mov_b64 s[4:5], 0x1000
23-
; GCN-NEXT: v_mov_b32_e32 v3, s4
24-
; GCN-NEXT: v_mov_b32_e32 v4, s5
25-
; GCN-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
26-
; GCN-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
27-
; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
28-
; GCN-NEXT: s_waitcnt vmcnt(0)
29-
; GCN-NEXT: s_setpc_b64 s[30:31]
27+
; GFX10-LABEL: global_atomic_csub_offset:
28+
; GFX10: ; %bb.0:
29+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30+
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
31+
; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000
32+
; GFX10-NEXT: v_mov_b32_e32 v3, s4
33+
; GFX10-NEXT: v_mov_b32_e32 v4, s5
34+
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
35+
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
36+
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
37+
; GFX10-NEXT: s_waitcnt vmcnt(0)
38+
; GFX10-NEXT: s_setpc_b64 s[30:31]
39+
;
40+
; GFX11-LABEL: global_atomic_csub_offset:
41+
; GFX11: ; %bb.0:
42+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
44+
; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000
45+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
46+
; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
47+
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
48+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
49+
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
50+
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
51+
; GFX11-NEXT: s_waitcnt vmcnt(0)
52+
; GFX11-NEXT: s_setpc_b64 s[30:31]
3053
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
3154
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
3255
ret i32 %ret
3356
}
3457

3558
define void @global_atomic_csub_nortn(i32 addrspace(1)* %ptr, i32 %data) {
36-
; GCN-LABEL: global_atomic_csub_nortn:
37-
; GCN: ; %bb.0:
38-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39-
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
40-
; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
41-
; GCN-NEXT: s_waitcnt vmcnt(0)
42-
; GCN-NEXT: s_setpc_b64 s[30:31]
59+
; GFX10-LABEL: global_atomic_csub_nortn:
60+
; GFX10: ; %bb.0:
61+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62+
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
63+
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
64+
; GFX10-NEXT: s_waitcnt vmcnt(0)
65+
; GFX10-NEXT: s_setpc_b64 s[30:31]
66+
;
67+
; GFX11-LABEL: global_atomic_csub_nortn:
68+
; GFX11: ; %bb.0:
69+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
71+
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
72+
; GFX11-NEXT: s_waitcnt vmcnt(0)
73+
; GFX11-NEXT: s_setpc_b64 s[30:31]
4374
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data)
4475
ret void
4576
}
4677

4778
define void @global_atomic_csub_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) {
48-
; GCN-LABEL: global_atomic_csub_offset_nortn:
49-
; GCN: ; %bb.0:
50-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51-
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
52-
; GCN-NEXT: s_mov_b64 s[4:5], 0x1000
53-
; GCN-NEXT: v_mov_b32_e32 v3, s4
54-
; GCN-NEXT: v_mov_b32_e32 v4, s5
55-
; GCN-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
56-
; GCN-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
57-
; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
58-
; GCN-NEXT: s_waitcnt vmcnt(0)
59-
; GCN-NEXT: s_setpc_b64 s[30:31]
79+
; GFX10-LABEL: global_atomic_csub_offset_nortn:
80+
; GFX10: ; %bb.0:
81+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82+
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
83+
; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000
84+
; GFX10-NEXT: v_mov_b32_e32 v3, s4
85+
; GFX10-NEXT: v_mov_b32_e32 v4, s5
86+
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
87+
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
88+
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
89+
; GFX10-NEXT: s_waitcnt vmcnt(0)
90+
; GFX10-NEXT: s_setpc_b64 s[30:31]
91+
;
92+
; GFX11-LABEL: global_atomic_csub_offset_nortn:
93+
; GFX11: ; %bb.0:
94+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
96+
; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000
97+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
98+
; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
99+
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
100+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
101+
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
102+
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
103+
; GFX11-NEXT: s_waitcnt vmcnt(0)
104+
; GFX11-NEXT: s_setpc_b64 s[30:31]
60105
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
61106
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
62107
ret void
63108
}
64109

65110
define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(i32 addrspace(1)* %ptr, i32 %data) {
66-
; GCN-LABEL: global_atomic_csub_sgpr_base_offset:
67-
; GCN: ; %bb.0:
68-
; GCN-NEXT: s_clause 0x1
69-
; GCN-NEXT: s_load_dword s2, s[4:5], 0x8
70-
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
71-
; GCN-NEXT: v_mov_b32_e32 v1, 0x1000
72-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
73-
; GCN-NEXT: v_mov_b32_e32 v0, s2
74-
; GCN-NEXT: global_atomic_csub v0, v1, v0, s[0:1] glc
75-
; GCN-NEXT: s_waitcnt vmcnt(0)
76-
; GCN-NEXT: global_store_dword v[0:1], v0, off
77-
; GCN-NEXT: s_endpgm
111+
; GFX10-LABEL: global_atomic_csub_sgpr_base_offset:
112+
; GFX10: ; %bb.0:
113+
; GFX10-NEXT: s_clause 0x1
114+
; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8
115+
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
116+
; GFX10-NEXT: v_mov_b32_e32 v1, 0x1000
117+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
118+
; GFX10-NEXT: v_mov_b32_e32 v0, s2
119+
; GFX10-NEXT: global_atomic_csub v0, v1, v0, s[0:1] glc
120+
; GFX10-NEXT: s_waitcnt vmcnt(0)
121+
; GFX10-NEXT: global_store_dword v[0:1], v0, off
122+
; GFX10-NEXT: s_endpgm
123+
;
124+
; GFX11-LABEL: global_atomic_csub_sgpr_base_offset:
125+
; GFX11: ; %bb.0:
126+
; GFX11-NEXT: s_clause 0x1
127+
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8
128+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
129+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
130+
; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s2
131+
; GFX11-NEXT: global_atomic_csub_u32 v0, v1, v0, s[0:1] glc
132+
; GFX11-NEXT: s_waitcnt vmcnt(0)
133+
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
134+
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
135+
; GFX11-NEXT: s_endpgm
78136
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
79137
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
80138
store i32 %ret, i32 addrspace(1)* undef
81139
ret void
82140
}
83141

84142
define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) {
85-
; GCN-LABEL: global_atomic_csub_sgpr_base_offset_nortn:
86-
; GCN: ; %bb.0:
87-
; GCN-NEXT: s_clause 0x1
88-
; GCN-NEXT: s_load_dword s2, s[4:5], 0x8
89-
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
90-
; GCN-NEXT: v_mov_b32_e32 v1, 0x1000
91-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
92-
; GCN-NEXT: v_mov_b32_e32 v0, s2
93-
; GCN-NEXT: global_atomic_csub v0, v1, v0, s[0:1] glc
94-
; GCN-NEXT: s_endpgm
143+
; GFX10-LABEL: global_atomic_csub_sgpr_base_offset_nortn:
144+
; GFX10: ; %bb.0:
145+
; GFX10-NEXT: s_clause 0x1
146+
; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8
147+
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
148+
; GFX10-NEXT: v_mov_b32_e32 v1, 0x1000
149+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
150+
; GFX10-NEXT: v_mov_b32_e32 v0, s2
151+
; GFX10-NEXT: global_atomic_csub v0, v1, v0, s[0:1] glc
152+
; GFX10-NEXT: s_endpgm
153+
;
154+
; GFX11-LABEL: global_atomic_csub_sgpr_base_offset_nortn:
155+
; GFX11: ; %bb.0:
156+
; GFX11-NEXT: s_clause 0x1
157+
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8
158+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
159+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
160+
; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s2
161+
; GFX11-NEXT: global_atomic_csub_u32 v0, v1, v0, s[0:1] glc
162+
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
163+
; GFX11-NEXT: s_endpgm
95164
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
96165
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
97166
ret void

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,CI %s
33
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI %s
44
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
5+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
67

78
define double @v_trig_preop_f64(double %a, i32 %b) {
89
; GCN-LABEL: v_trig_preop_f64:
@@ -11,12 +12,12 @@ define double @v_trig_preop_f64(double %a, i32 %b) {
1112
; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
1213
; GCN-NEXT: s_setpc_b64 s[30:31]
1314
;
14-
; GFX10-LABEL: v_trig_preop_f64:
15-
; GFX10: ; %bb.0:
16-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17-
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
18-
; GFX10-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
19-
; GFX10-NEXT: s_setpc_b64 s[30:31]
15+
; GFX10PLUS-LABEL: v_trig_preop_f64:
16+
; GFX10PLUS: ; %bb.0:
17+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18+
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
19+
; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
20+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
2021
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
2122
ret double %result
2223
}
@@ -28,12 +29,12 @@ define double @v_trig_preop_f64_imm(double %a, i32 %b) {
2829
; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
2930
; GCN-NEXT: s_setpc_b64 s[30:31]
3031
;
31-
; GFX10-LABEL: v_trig_preop_f64_imm:
32-
; GFX10: ; %bb.0:
33-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34-
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
35-
; GFX10-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
36-
; GFX10-NEXT: s_setpc_b64 s[30:31]
32+
; GFX10PLUS-LABEL: v_trig_preop_f64_imm:
33+
; GFX10PLUS: ; %bb.0:
34+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35+
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
36+
; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
37+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
3738
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
3839
ret double %result
3940
}
@@ -82,6 +83,18 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
8283
; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
8384
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8485
; GFX10-NEXT: s_endpgm
86+
;
87+
; GFX11-LABEL: s_trig_preop_f64:
88+
; GFX11: ; %bb.0:
89+
; GFX11-NEXT: s_clause 0x1
90+
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
91+
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x8
92+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
93+
; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[2:3], s0
94+
; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
95+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
96+
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
97+
; GFX11-NEXT: s_endpgm
8598
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
8699
store volatile double %result, double* undef
87100
ret void
@@ -105,6 +118,16 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
105118
; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
106119
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
107120
; GFX10-NEXT: s_endpgm
121+
;
122+
; GFX11-LABEL: s_trig_preop_f64_imm:
123+
; GFX11: ; %bb.0:
124+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
125+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
126+
; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
127+
; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
128+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
129+
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
130+
; GFX11-NEXT: s_endpgm
108131
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
109132
store volatile double %result, double* undef
110133
ret void

0 commit comments

Comments
 (0)