33; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
44; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
55; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
6+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
7+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
68
79define i8 @atomic_load_monotonic_i8 (ptr addrspace (3 ) %ptr ) {
810; CI-LABEL: atomic_load_monotonic_i8:
@@ -33,6 +35,14 @@ define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
3335; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0
3436; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
3537; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
38+ ;
39+ ; GFX1250-LABEL: atomic_load_monotonic_i8:
40+ ; GFX1250: ; %bb.0:
41+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
42+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
43+ ; GFX1250-NEXT: ds_load_u8 v0, v0
44+ ; GFX1250-NEXT: s_wait_dscnt 0x0
45+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
3646 %load = load atomic i8 , ptr addrspace (3 ) %ptr monotonic , align 1
3747 ret i8 %load
3848}
@@ -66,6 +76,14 @@ define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) {
6676; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0 offset:16
6777; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
6878; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
79+ ;
80+ ; GFX1250-LABEL: atomic_load_monotonic_i8_offset:
81+ ; GFX1250: ; %bb.0:
82+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
83+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
84+ ; GFX1250-NEXT: ds_load_u8 v0, v0 offset:16
85+ ; GFX1250-NEXT: s_wait_dscnt 0x0
86+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
6987 %gep = getelementptr inbounds i8 , ptr addrspace (3 ) %ptr , i8 16
7088 %load = load atomic i8 , ptr addrspace (3 ) %gep monotonic , align 1
7189 ret i8 %load
@@ -100,6 +118,14 @@ define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) {
100118; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
101119; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
102120; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
121+ ;
122+ ; GFX1250-LABEL: atomic_load_monotonic_i16:
123+ ; GFX1250: ; %bb.0:
124+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
125+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
126+ ; GFX1250-NEXT: ds_load_u16 v0, v0
127+ ; GFX1250-NEXT: s_wait_dscnt 0x0
128+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
103129 %load = load atomic i16 , ptr addrspace (3 ) %ptr monotonic , align 2
104130 ret i16 %load
105131}
@@ -133,6 +159,14 @@ define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) {
133159; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
134160; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
135161; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
162+ ;
163+ ; GFX1250-LABEL: atomic_load_monotonic_i16_offset:
164+ ; GFX1250: ; %bb.0:
165+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
166+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
167+ ; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
168+ ; GFX1250-NEXT: s_wait_dscnt 0x0
169+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
136170 %gep = getelementptr inbounds i16 , ptr addrspace (3 ) %ptr , i16 16
137171 %load = load atomic i16 , ptr addrspace (3 ) %gep monotonic , align 2
138172 ret i16 %load
@@ -160,6 +194,14 @@ define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) {
160194; GFX11-NEXT: ds_load_b32 v0, v0
161195; GFX11-NEXT: s_waitcnt lgkmcnt(0)
162196; GFX11-NEXT: s_setpc_b64 s[30:31]
197+ ;
198+ ; GFX1250-LABEL: atomic_load_monotonic_i32:
199+ ; GFX1250: ; %bb.0:
200+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
201+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
202+ ; GFX1250-NEXT: ds_load_b32 v0, v0
203+ ; GFX1250-NEXT: s_wait_dscnt 0x0
204+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
163205 %load = load atomic i32 , ptr addrspace (3 ) %ptr monotonic , align 4
164206 ret i32 %load
165207}
@@ -186,6 +228,14 @@ define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) {
186228; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
187229; GFX11-NEXT: s_waitcnt lgkmcnt(0)
188230; GFX11-NEXT: s_setpc_b64 s[30:31]
231+ ;
232+ ; GFX1250-LABEL: atomic_load_monotonic_i32_offset:
233+ ; GFX1250: ; %bb.0:
234+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
235+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
236+ ; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
237+ ; GFX1250-NEXT: s_wait_dscnt 0x0
238+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
189239 %gep = getelementptr inbounds i32 , ptr addrspace (3 ) %ptr , i32 16
190240 %load = load atomic i32 , ptr addrspace (3 ) %gep monotonic , align 4
191241 ret i32 %load
@@ -213,6 +263,14 @@ define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) {
213263; GFX11-NEXT: ds_load_b64 v[0:1], v0
214264; GFX11-NEXT: s_waitcnt lgkmcnt(0)
215265; GFX11-NEXT: s_setpc_b64 s[30:31]
266+ ;
267+ ; GFX1250-LABEL: atomic_load_monotonic_i64:
268+ ; GFX1250: ; %bb.0:
269+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
270+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
271+ ; GFX1250-NEXT: ds_load_b64 v[0:1], v0
272+ ; GFX1250-NEXT: s_wait_dscnt 0x0
273+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
216274 %load = load atomic i64 , ptr addrspace (3 ) %ptr monotonic , align 8
217275 ret i64 %load
218276}
@@ -239,6 +297,14 @@ define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) {
239297; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
240298; GFX11-NEXT: s_waitcnt lgkmcnt(0)
241299; GFX11-NEXT: s_setpc_b64 s[30:31]
300+ ;
301+ ; GFX1250-LABEL: atomic_load_monotonic_i64_offset:
302+ ; GFX1250: ; %bb.0:
303+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
304+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
305+ ; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
306+ ; GFX1250-NEXT: s_wait_dscnt 0x0
307+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
242308 %gep = getelementptr inbounds i64 , ptr addrspace (3 ) %ptr , i32 16
243309 %load = load atomic i64 , ptr addrspace (3 ) %gep monotonic , align 8
244310 ret i64 %load
@@ -266,6 +332,14 @@ define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) {
266332; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
267333; GFX11-NEXT: s_waitcnt lgkmcnt(0)
268334; GFX11-NEXT: s_setpc_b64 s[30:31]
335+ ;
336+ ; GFX1250-LABEL: atomic_load_monotonic_f32_offset:
337+ ; GFX1250: ; %bb.0:
338+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
339+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
340+ ; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
341+ ; GFX1250-NEXT: s_wait_dscnt 0x0
342+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
269343 %gep = getelementptr inbounds float , ptr addrspace (3 ) %ptr , i32 16
270344 %load = load atomic float , ptr addrspace (3 ) %gep monotonic , align 4
271345 ret float %load
@@ -293,6 +367,14 @@ define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) {
293367; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
294368; GFX11-NEXT: s_waitcnt lgkmcnt(0)
295369; GFX11-NEXT: s_setpc_b64 s[30:31]
370+ ;
371+ ; GFX1250-LABEL: atomic_load_monotonic_f64_offset:
372+ ; GFX1250: ; %bb.0:
373+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
374+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
375+ ; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
376+ ; GFX1250-NEXT: s_wait_dscnt 0x0
377+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
296378 %gep = getelementptr inbounds double , ptr addrspace (3 ) %ptr , i32 16
297379 %load = load atomic double , ptr addrspace (3 ) %gep monotonic , align 8
298380 ret double %load
@@ -320,6 +402,14 @@ define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) {
320402; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
321403; GFX11-NEXT: s_waitcnt lgkmcnt(0)
322404; GFX11-NEXT: s_setpc_b64 s[30:31]
405+ ;
406+ ; GFX1250-LABEL: atomic_load_monotonic_p0i8_offset:
407+ ; GFX1250: ; %bb.0:
408+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
409+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
410+ ; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
411+ ; GFX1250-NEXT: s_wait_dscnt 0x0
412+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
323413 %gep = getelementptr inbounds ptr , ptr addrspace (3 ) %ptr , i32 16
324414 %load = load atomic ptr , ptr addrspace (3 ) %gep monotonic , align 8
325415 ret ptr %load
@@ -347,6 +437,14 @@ define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr
347437; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
348438; GFX11-NEXT: s_waitcnt lgkmcnt(0)
349439; GFX11-NEXT: s_setpc_b64 s[30:31]
440+ ;
441+ ; GFX1250-LABEL: atomic_load_monotonic_p3i8_offset:
442+ ; GFX1250: ; %bb.0:
443+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
444+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
445+ ; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
446+ ; GFX1250-NEXT: s_wait_dscnt 0x0
447+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
350448 %gep = getelementptr inbounds ptr addrspace (3 ), ptr addrspace (3 ) %ptr , i32 16
351449 %load = load atomic ptr addrspace (3 ), ptr addrspace (3 ) %gep monotonic , align 4
352450 ret ptr addrspace (3 ) %load
@@ -381,6 +479,14 @@ define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) {
381479; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
382480; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
383481; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
482+ ;
483+ ; GFX1250-LABEL: atomic_load_monotonic_f16:
484+ ; GFX1250: ; %bb.0:
485+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
486+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
487+ ; GFX1250-NEXT: ds_load_u16 v0, v0
488+ ; GFX1250-NEXT: s_wait_dscnt 0x0
489+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
384490 %load = load atomic half , ptr addrspace (3 ) %ptr monotonic , align 2
385491 %ret = bitcast half %load to i16
386492 ret i16 %ret
@@ -415,6 +521,14 @@ define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) {
415521; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
416522; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
417523; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
524+ ;
525+ ; GFX1250-LABEL: atomic_load_monotonic_f16_offset:
526+ ; GFX1250: ; %bb.0:
527+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
528+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
529+ ; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
530+ ; GFX1250-NEXT: s_wait_dscnt 0x0
531+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
418532 %gep = getelementptr inbounds half , ptr addrspace (3 ) %ptr , i32 16
419533 %load = load atomic half , ptr addrspace (3 ) %gep monotonic , align 2
420534 %ret = bitcast half %load to i16
@@ -450,6 +564,14 @@ define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) {
450564; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
451565; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
452566; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
567+ ;
568+ ; GFX1250-LABEL: atomic_load_monotonic_bf16:
569+ ; GFX1250: ; %bb.0:
570+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
571+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
572+ ; GFX1250-NEXT: ds_load_u16 v0, v0
573+ ; GFX1250-NEXT: s_wait_dscnt 0x0
574+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
453575 %load = load atomic bfloat, ptr addrspace (3 ) %ptr monotonic , align 2
454576 %ret = bitcast bfloat %load to i16
455577 ret i16 %ret
@@ -484,10 +606,20 @@ define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
484606; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
485607; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
486608; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
609+ ;
610+ ; GFX1250-LABEL: atomic_load_monotonic_bf16_offset:
611+ ; GFX1250: ; %bb.0:
612+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
613+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
614+ ; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
615+ ; GFX1250-NEXT: s_wait_dscnt 0x0
616+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
487617 %gep = getelementptr inbounds bfloat, ptr addrspace (3 ) %ptr , i32 16
488618 %load = load atomic bfloat, ptr addrspace (3 ) %gep monotonic , align 2
489619 %ret = bitcast bfloat %load to i16
490620 ret i16 %ret
491621}
492622;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
493623; GCN: {{.*}}
624+ ; GFX1250-FAKE16: {{.*}}
625+ ; GFX1250-TRUE16: {{.*}}
0 commit comments