@@ -9,43 +9,34 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
99; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1010; GCN-NEXT: v_lshlrev_b32_e32 v64, 8, v0
1111; GCN-NEXT: s_waitcnt lgkmcnt(0)
12- ; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144
1312; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1]
14- ; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:16
15- ; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:32
16- ; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:48
17- ; GCN-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:64
18- ; GCN-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:80
19- ; GCN-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:96
20- ; GCN-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:112
21- ; GCN-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:128
13+ ; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:16
14+ ; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:32
15+ ; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:48
16+ ; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:64
17+ ; GCN-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:80
18+ ; GCN-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:96
19+ ; GCN-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:112
20+ ; GCN-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:128
21+ ; GCN-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:144
2222; GCN-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:160
2323; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:176
2424; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:192
2525; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:208
2626; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:224
2727; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:240
28- ; GCN-NEXT: s_waitcnt vmcnt(15)
29- ; GCN-NEXT: v_mov_b32_e32 v5, 0x3e7
30- ; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:144
31- ; GCN-NEXT: s_waitcnt vmcnt(15)
28+ ; GCN-NEXT: s_waitcnt vmcnt(6)
29+ ; GCN-NEXT: v_mov_b32_e32 v37, 0x3e7
3230; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3]
33- ; GCN-NEXT: s_waitcnt vmcnt(15)
34- ; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:16
35- ; GCN-NEXT: s_waitcnt vmcnt(15)
36- ; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:32
37- ; GCN-NEXT: s_waitcnt vmcnt(15)
38- ; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:48
39- ; GCN-NEXT: s_waitcnt vmcnt(15)
40- ; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:64
41- ; GCN-NEXT: s_waitcnt vmcnt(15)
42- ; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:80
43- ; GCN-NEXT: s_waitcnt vmcnt(15)
44- ; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:96
45- ; GCN-NEXT: s_waitcnt vmcnt(15)
46- ; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:112
47- ; GCN-NEXT: s_waitcnt vmcnt(15)
48- ; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:128
31+ ; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:16
32+ ; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:32
33+ ; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:48
34+ ; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:64
35+ ; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:80
36+ ; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:96
37+ ; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:112
38+ ; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:128
39+ ; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:144
4940; GCN-NEXT: s_waitcnt vmcnt(15)
5041; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:160
5142; GCN-NEXT: s_waitcnt vmcnt(15)
@@ -67,39 +58,45 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
6758; GFX10-NEXT: s_waitcnt lgkmcnt(0)
6859; GFX10-NEXT: s_clause 0xf
6960; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1]
70- ; GFX10-NEXT: global_load_dwordx4 v[8:11 ], v64, s[0:1] offset:16
71- ; GFX10-NEXT: global_load_dwordx4 v[12:15 ], v64, s[0:1] offset:32
72- ; GFX10-NEXT: global_load_dwordx4 v[16:19 ], v64, s[0:1] offset:48
73- ; GFX10-NEXT: global_load_dwordx4 v[20:23 ], v64, s[0:1] offset:64
74- ; GFX10-NEXT: global_load_dwordx4 v[24:27 ], v64, s[0:1] offset:80
75- ; GFX10-NEXT: global_load_dwordx4 v[28:31 ], v64, s[0:1] offset:96
76- ; GFX10-NEXT: global_load_dwordx4 v[32:35 ], v64, s[0:1] offset:112
77- ; GFX10-NEXT: global_load_dwordx4 v[36:39 ], v64, s[0:1] offset:160
78- ; GFX10-NEXT: global_load_dwordx4 v[40:43 ], v64, s[0:1] offset:176
79- ; GFX10-NEXT: global_load_dwordx4 v[44:47 ], v64, s[0:1] offset:192
80- ; GFX10-NEXT: global_load_dwordx4 v[48:51 ], v64, s[0:1] offset:208
81- ; GFX10-NEXT: global_load_dwordx4 v[52:55 ], v64, s[0:1] offset:224
82- ; GFX10-NEXT: global_load_dwordx4 v[56:59 ], v64, s[0:1] offset:240
83- ; GFX10-NEXT: global_load_dwordx4 v[60:63 ], v64, s[0:1] offset:128
84- ; GFX10-NEXT: global_load_dwordx4 v[4:7 ], v64, s[0:1] offset:144
85- ; GFX10-NEXT: s_waitcnt vmcnt(0 )
86- ; GFX10-NEXT: v_mov_b32_e32 v5 , 0x3e7
61+ ; GFX10-NEXT: global_load_dwordx4 v[4:7 ], v64, s[0:1] offset:16
62+ ; GFX10-NEXT: global_load_dwordx4 v[8:11 ], v64, s[0:1] offset:32
63+ ; GFX10-NEXT: global_load_dwordx4 v[12:15 ], v64, s[0:1] offset:48
64+ ; GFX10-NEXT: global_load_dwordx4 v[16:19 ], v64, s[0:1] offset:64
65+ ; GFX10-NEXT: global_load_dwordx4 v[20:23 ], v64, s[0:1] offset:80
66+ ; GFX10-NEXT: global_load_dwordx4 v[24:27 ], v64, s[0:1] offset:96
67+ ; GFX10-NEXT: global_load_dwordx4 v[28:31 ], v64, s[0:1] offset:112
68+ ; GFX10-NEXT: global_load_dwordx4 v[32:35 ], v64, s[0:1] offset:128
69+ ; GFX10-NEXT: global_load_dwordx4 v[36:39 ], v64, s[0:1] offset:144
70+ ; GFX10-NEXT: global_load_dwordx4 v[40:43 ], v64, s[0:1] offset:160
71+ ; GFX10-NEXT: global_load_dwordx4 v[44:47 ], v64, s[0:1] offset:176
72+ ; GFX10-NEXT: global_load_dwordx4 v[48:51 ], v64, s[0:1] offset:192
73+ ; GFX10-NEXT: global_load_dwordx4 v[52:55 ], v64, s[0:1] offset:208
74+ ; GFX10-NEXT: global_load_dwordx4 v[56:59 ], v64, s[0:1] offset:224
75+ ; GFX10-NEXT: global_load_dwordx4 v[60:63 ], v64, s[0:1] offset:240
76+ ; GFX10-NEXT: s_waitcnt vmcnt(6 )
77+ ; GFX10-NEXT: v_mov_b32_e32 v37 , 0x3e7
8778; GFX10-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3]
88- ; GFX10-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:16
89- ; GFX10-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:32
90- ; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:48
91- ; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:64
92- ; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:80
93- ; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:96
94- ; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:112
95- ; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:128
96- ; GFX10-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:144
97- ; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:160
98- ; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:176
99- ; GFX10-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:192
100- ; GFX10-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:208
101- ; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:224
102- ; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:240
79+ ; GFX10-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:16
80+ ; GFX10-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:32
81+ ; GFX10-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:48
82+ ; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:64
83+ ; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:80
84+ ; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:96
85+ ; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:112
86+ ; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:128
87+ ; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:144
88+ ; GFX10-NEXT: s_waitcnt vmcnt(5)
89+ ; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:160
90+ ; GFX10-NEXT: s_waitcnt vmcnt(4)
91+ ; GFX10-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:176
92+ ; GFX10-NEXT: s_waitcnt vmcnt(3)
93+ ; GFX10-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:192
94+ ; GFX10-NEXT: s_waitcnt vmcnt(2)
95+ ; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:208
96+ ; GFX10-NEXT: s_waitcnt vmcnt(1)
97+ ; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:224
98+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
99+ ; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:240
103100; GFX10-NEXT: s_endpgm
104101;
105102; GFX11-LABEL: v_insert_v64i32_37:
@@ -109,34 +106,34 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
109106; GFX11-NEXT: s_waitcnt lgkmcnt(0)
110107; GFX11-NEXT: s_clause 0xf
111108; GFX11-NEXT: global_load_b128 v[0:3], v64, s[0:1]
112- ; GFX11-NEXT: global_load_b128 v[8:11 ], v64, s[0:1] offset:16
113- ; GFX11-NEXT: global_load_b128 v[12:15 ], v64, s[0:1] offset:32
114- ; GFX11-NEXT: global_load_b128 v[16:19 ], v64, s[0:1] offset:48
115- ; GFX11-NEXT: global_load_b128 v[20:23 ], v64, s[0:1] offset:64
116- ; GFX11-NEXT: global_load_b128 v[24:27 ], v64, s[0:1] offset:80
117- ; GFX11-NEXT: global_load_b128 v[28:31 ], v64, s[0:1] offset:96
118- ; GFX11-NEXT: global_load_b128 v[32:35 ], v64, s[0:1] offset:112
119- ; GFX11-NEXT: global_load_b128 v[36:39 ], v64, s[0:1] offset:128
120- ; GFX11-NEXT: global_load_b128 v[4:7 ], v64, s[0:1] offset:144
109+ ; GFX11-NEXT: global_load_b128 v[4:7 ], v64, s[0:1] offset:16
110+ ; GFX11-NEXT: global_load_b128 v[8:11 ], v64, s[0:1] offset:32
111+ ; GFX11-NEXT: global_load_b128 v[12:15 ], v64, s[0:1] offset:48
112+ ; GFX11-NEXT: global_load_b128 v[16:19 ], v64, s[0:1] offset:64
113+ ; GFX11-NEXT: global_load_b128 v[20:23 ], v64, s[0:1] offset:80
114+ ; GFX11-NEXT: global_load_b128 v[24:27 ], v64, s[0:1] offset:96
115+ ; GFX11-NEXT: global_load_b128 v[28:31 ], v64, s[0:1] offset:112
116+ ; GFX11-NEXT: global_load_b128 v[32:35 ], v64, s[0:1] offset:128
117+ ; GFX11-NEXT: global_load_b128 v[36:39 ], v64, s[0:1] offset:144
121118; GFX11-NEXT: global_load_b128 v[40:43], v64, s[0:1] offset:160
122119; GFX11-NEXT: global_load_b128 v[44:47], v64, s[0:1] offset:176
123120; GFX11-NEXT: global_load_b128 v[48:51], v64, s[0:1] offset:192
124121; GFX11-NEXT: global_load_b128 v[52:55], v64, s[0:1] offset:208
125122; GFX11-NEXT: global_load_b128 v[56:59], v64, s[0:1] offset:224
126123; GFX11-NEXT: global_load_b128 v[60:63], v64, s[0:1] offset:240
127124; GFX11-NEXT: s_waitcnt vmcnt(6)
128- ; GFX11-NEXT: v_mov_b32_e32 v5 , 0x3e7
125+ ; GFX11-NEXT: v_mov_b32_e32 v37 , 0x3e7
129126; GFX11-NEXT: s_clause 0x9
130127; GFX11-NEXT: global_store_b128 v64, v[0:3], s[2:3]
131- ; GFX11-NEXT: global_store_b128 v64, v[8:11 ], s[2:3] offset:16
132- ; GFX11-NEXT: global_store_b128 v64, v[12:15 ], s[2:3] offset:32
133- ; GFX11-NEXT: global_store_b128 v64, v[16:19 ], s[2:3] offset:48
134- ; GFX11-NEXT: global_store_b128 v64, v[20:23 ], s[2:3] offset:64
135- ; GFX11-NEXT: global_store_b128 v64, v[24:27 ], s[2:3] offset:80
136- ; GFX11-NEXT: global_store_b128 v64, v[28:31 ], s[2:3] offset:96
137- ; GFX11-NEXT: global_store_b128 v64, v[32:35 ], s[2:3] offset:112
138- ; GFX11-NEXT: global_store_b128 v64, v[36:39 ], s[2:3] offset:128
139- ; GFX11-NEXT: global_store_b128 v64, v[4:7 ], s[2:3] offset:144
128+ ; GFX11-NEXT: global_store_b128 v64, v[4:7 ], s[2:3] offset:16
129+ ; GFX11-NEXT: global_store_b128 v64, v[8:11 ], s[2:3] offset:32
130+ ; GFX11-NEXT: global_store_b128 v64, v[12:15 ], s[2:3] offset:48
131+ ; GFX11-NEXT: global_store_b128 v64, v[16:19 ], s[2:3] offset:64
132+ ; GFX11-NEXT: global_store_b128 v64, v[20:23 ], s[2:3] offset:80
133+ ; GFX11-NEXT: global_store_b128 v64, v[24:27 ], s[2:3] offset:96
134+ ; GFX11-NEXT: global_store_b128 v64, v[28:31 ], s[2:3] offset:112
135+ ; GFX11-NEXT: global_store_b128 v64, v[32:35 ], s[2:3] offset:128
136+ ; GFX11-NEXT: global_store_b128 v64, v[36:39 ], s[2:3] offset:144
140137; GFX11-NEXT: s_waitcnt vmcnt(5)
141138; GFX11-NEXT: global_store_b128 v64, v[40:43], s[2:3] offset:160
142139; GFX11-NEXT: s_waitcnt vmcnt(4)
0 commit comments