@@ -117,3 +117,173 @@ body: |
117
117
S_ENDPGM 0
118
118
119
119
...
120
+
121
+ # Soft waitcnt should be honored here.
122
+ # GCN-LABEL: name: buffer_load_dword_lds_ds_read_soft_wait
123
+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
124
+ # GCN-NEXT: S_WAITCNT 3952
125
+ # vmcnt(0)
126
+ # GCN-NEXT: S_BARRIER
127
+ ---
128
+ name : buffer_load_dword_lds_ds_read_soft_wait
129
+ body : |
130
+ bb.0:
131
+ $m0 = S_MOV_B32 0
132
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
133
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3952
134
+ S_BARRIER
135
+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
136
+ S_ENDPGM 0
137
+
138
+ ...
139
+
140
+ # No need for waitcnt.
141
+ # GCN-LABEL: name: buffer_store_lds_dword_ds_read_soft_wait
142
+ # GCN: BUFFER_STORE_LDS_DWORD
143
+ # GCN-NEXT: S_BARRIER
144
+ ---
145
+ name : buffer_store_lds_dword_ds_read_soft_wait
146
+ body : |
147
+ bb.0:
148
+ $m0 = S_MOV_B32 0
149
+ BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(3) poison` + 4), (store (s32) into `ptr addrspace(1) poison` + 4)
150
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3952
151
+ S_BARRIER
152
+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
153
+ S_ENDPGM 0
154
+
155
+ ...
156
+
157
+ # Soft waitcnt should mean vmcnt(1) before the barrier and vmcnt(0) after.
158
+ # GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read_soft_wait
159
+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
160
+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
161
+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
162
+ # GCN-NEXT: S_WAITCNT 3953
163
+ # vmcnt(1)
164
+ # GCN-NEXT: S_BARRIER
165
+ # GCN-NEXT: S_WAITCNT 3952
166
+ # vmcnt(0)
167
+ # GCN-NEXT: DS_READ_B32_gfx9
168
+ ---
169
+ name : series_of_buffer_load_dword_lds_ds_read_soft_wait
170
+ body : |
171
+ bb.0:
172
+ $m0 = S_MOV_B32 0
173
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
174
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
175
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 8), (store (s32) into `ptr addrspace(3) poison` + 8)
176
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
177
+ S_BARRIER
178
+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
179
+ S_ENDPGM 0
180
+
181
+ ...
182
+
183
+ # No waitcnt before the barrier because counter is too high
184
+ # GCN-LABEL: name: buffer_load_dword_lds_ds_read_soft_wait_redundant
185
+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
186
+ # GCN-NEXT: S_BARRIER
187
+ # GCN-NEXT: S_WAITCNT 3952
188
+ # vmcnt(0)
189
+ # GCN-NEXT: DS_READ_B32_gfx9
190
+ ---
191
+ name : buffer_load_dword_lds_ds_read_soft_wait_redundant
192
+ body : |
193
+ bb.0:
194
+ $m0 = S_MOV_B32 0
195
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
196
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
197
+ S_BARRIER
198
+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
199
+ S_ENDPGM 0
200
+
201
+ ...
202
+
203
+ # Combine waitcnt.
204
+ # GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read_soft_wait_repeat
205
+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
206
+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
207
+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
208
+ # GCN-NEXT: S_WAITCNT 3953
209
+ # vmcnt(1)
210
+ # GCN-NEXT: S_BARRIER
211
+ # GCN-NEXT: S_WAITCNT 3952
212
+ # vmcnt(0)
213
+ # GCN-NEXT: DS_READ_B32_gfx9
214
+ ---
215
+ name : series_of_buffer_load_dword_lds_ds_read_soft_wait_repeat
216
+ body : |
217
+ bb.0:
218
+ $m0 = S_MOV_B32 0
219
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
220
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
221
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 8), (store (s32) into `ptr addrspace(3) poison` + 8)
222
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
223
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
224
+ S_BARRIER
225
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
226
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
227
+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
228
+ S_ENDPGM 0
229
+
230
+ ...
231
+
232
+ # Merge waitcnt.
233
+ # GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read_soft_wait_merge
234
+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
235
+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
236
+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
237
+ # GCN-NEXT: S_WAITCNT 3953
238
+ # vmcnt(1)
239
+ # GCN-NEXT: S_BARRIER
240
+ # GCN-NEXT: S_WAITCNT 3952
241
+ # vmcnt(0)
242
+ # GCN-NEXT: DS_READ_B32_gfx9
243
+ ---
244
+ name : series_of_buffer_load_dword_lds_ds_read_soft_wait_merge
245
+ body : |
246
+ bb.0:
247
+ $m0 = S_MOV_B32 0
248
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
249
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
250
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 8), (store (s32) into `ptr addrspace(3) poison` + 8)
251
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3954
252
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
253
+ S_BARRIER
254
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3952
255
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3952
256
+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
257
+ S_ENDPGM 0
258
+
259
+ ...
260
+
261
+
262
+ # Handle the preexisting waitcnt.
263
+ # GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read_soft_wait_preexisting
264
+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
265
+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
266
+ # GCN-NEXT: S_WAITCNT 0
267
+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
268
+ # GCN-NEXT: S_BARRIER
269
+ # GCN-NEXT: S_WAITCNT 3952
270
+ # vmcnt(0)
271
+ # GCN-NEXT: DS_READ_B32_gfx9
272
+ ---
273
+ name : series_of_buffer_load_dword_lds_ds_read_soft_wait_preexisting
274
+ body : |
275
+ bb.0:
276
+ $m0 = S_MOV_B32 0
277
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
278
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
279
+ S_WAITCNT 0
280
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 8), (store (s32) into `ptr addrspace(3) poison` + 8)
281
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
282
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
283
+ S_BARRIER
284
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
285
+ S_WAITCNT_VMCNT_LDS_DMA_soft 3953
286
+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
287
+ S_ENDPGM 0
288
+
289
+ ...
0 commit comments