@@ -60,13 +60,21 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
6060; GFX10-NEXT: global_store_short v[2:3], v0, off
6161; GFX10-NEXT: s_setpc_b64 s[30:31]
6262;
63- ; GFX11-LABEL: test_load_store:
64- ; GFX11: ; %bb.0:
65- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66- ; GFX11-NEXT: global_load_u16 v0, v[0:1], off
67- ; GFX11-NEXT: s_waitcnt vmcnt(0)
68- ; GFX11-NEXT: global_store_b16 v[2:3], v0, off
69- ; GFX11-NEXT: s_setpc_b64 s[30:31]
63+ ; GFX11TRUE16-LABEL: test_load_store:
64+ ; GFX11TRUE16: ; %bb.0:
65+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66+ ; GFX11TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
67+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
68+ ; GFX11TRUE16-NEXT: global_store_b16 v[2:3], v0, off
69+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
70+ ;
71+ ; GFX11FAKE16-LABEL: test_load_store:
72+ ; GFX11FAKE16: ; %bb.0:
73+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74+ ; GFX11FAKE16-NEXT: global_load_u16 v0, v[0:1], off
75+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
76+ ; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v0, off
77+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
7078 %val = load bfloat, ptr addrspace(1) %in
7179 store bfloat %val, ptr addrspace(1) %out
7280 ret void
@@ -2127,14 +2135,23 @@ define void @test_store_fpimm(ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) {
21272135; GFX10-NEXT: global_store_short v[2:3], v5, off
21282136; GFX10-NEXT: s_setpc_b64 s[30:31]
21292137;
2130- ; GFX11-LABEL: test_store_fpimm:
2131- ; GFX11: ; %bb.0:
2132- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2133- ; GFX11-NEXT: v_mov_b32_e32 v4, 0x3f80
2134- ; GFX11-NEXT: v_mov_b32_e32 v5, 0x4228
2135- ; GFX11-NEXT: global_store_b16 v[0:1], v4, off
2136- ; GFX11-NEXT: global_store_b16 v[2:3], v5, off
2137- ; GFX11-NEXT: s_setpc_b64 s[30:31]
2138+ ; GFX11TRUE16-LABEL: test_store_fpimm:
2139+ ; GFX11TRUE16: ; %bb.0:
2140+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, 0x3f80
2142+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.h, 0x4228
2143+ ; GFX11TRUE16-NEXT: global_store_b16 v[0:1], v4, off
2144+ ; GFX11TRUE16-NEXT: global_store_d16_hi_b16 v[2:3], v4, off
2145+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
2146+ ;
2147+ ; GFX11FAKE16-LABEL: test_store_fpimm:
2148+ ; GFX11FAKE16: ; %bb.0:
2149+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150+ ; GFX11FAKE16-NEXT: v_mov_b32_e32 v4, 0x3f80
2151+ ; GFX11FAKE16-NEXT: v_mov_b32_e32 v5, 0x4228
2152+ ; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v4, off
2153+ ; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v5, off
2154+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
21382155 store bfloat 1.0, ptr addrspace(1) %ptr0
21392156 store bfloat 42.0, ptr addrspace(1) %ptr1
21402157 ret void
@@ -3330,12 +3347,19 @@ define amdgpu_gfx void @test_inreg_arg_store(bfloat inreg %in, ptr addrspace(1)
33303347; GFX10-NEXT: global_store_short v[0:1], v2, off
33313348; GFX10-NEXT: s_setpc_b64 s[30:31]
33323349;
3333- ; GFX11-LABEL: test_inreg_arg_store:
3334- ; GFX11: ; %bb.0:
3335- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3336- ; GFX11-NEXT: v_mov_b32_e32 v2, s4
3337- ; GFX11-NEXT: global_store_b16 v[0:1], v2, off
3338- ; GFX11-NEXT: s_setpc_b64 s[30:31]
3350+ ; GFX11TRUE16-LABEL: test_inreg_arg_store:
3351+ ; GFX11TRUE16: ; %bb.0:
3352+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3353+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s4
3354+ ; GFX11TRUE16-NEXT: global_store_b16 v[0:1], v2, off
3355+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3356+ ;
3357+ ; GFX11FAKE16-LABEL: test_inreg_arg_store:
3358+ ; GFX11FAKE16: ; %bb.0:
3359+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3360+ ; GFX11FAKE16-NEXT: v_mov_b32_e32 v2, s4
3361+ ; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v2, off
3362+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
33393363 store bfloat %in, ptr addrspace(1) %out
33403364 ret void
33413365}
@@ -3379,11 +3403,18 @@ define bfloat @test_byval(ptr addrspace(5) byval(bfloat) %bv, bfloat %val) {
33793403; GFX10-NEXT: buffer_store_short v0, off, s[0:3], s32
33803404; GFX10-NEXT: s_setpc_b64 s[30:31]
33813405;
3382- ; GFX11-LABEL: test_byval:
3383- ; GFX11: ; %bb.0:
3384- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3385- ; GFX11-NEXT: scratch_store_b16 off, v0, s32
3386- ; GFX11-NEXT: s_setpc_b64 s[30:31]
3406+ ; GFX11TRUE16-LABEL: test_byval:
3407+ ; GFX11TRUE16: ; %bb.0:
3408+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3409+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
3410+ ; GFX11TRUE16-NEXT: scratch_store_b16 off, v1, s32
3411+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3412+ ;
3413+ ; GFX11FAKE16-LABEL: test_byval:
3414+ ; GFX11FAKE16: ; %bb.0:
3415+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3416+ ; GFX11FAKE16-NEXT: scratch_store_b16 off, v0, s32
3417+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
33873418 store bfloat %val, ptr addrspace(5) %bv
33883419 %retval = load bfloat, ptr addrspace(5) %bv
33893420 ret bfloat %retval
@@ -3490,13 +3521,21 @@ define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %ou
34903521; GFX10-NEXT: global_store_short v[2:3], v0, off
34913522; GFX10-NEXT: s_setpc_b64 s[30:31]
34923523;
3493- ; GFX11-LABEL: test_bitcast_from_bfloat:
3494- ; GFX11: ; %bb.0:
3495- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3496- ; GFX11-NEXT: global_load_u16 v0, v[0:1], off
3497- ; GFX11-NEXT: s_waitcnt vmcnt(0)
3498- ; GFX11-NEXT: global_store_b16 v[2:3], v0, off
3499- ; GFX11-NEXT: s_setpc_b64 s[30:31]
3524+ ; GFX11TRUE16-LABEL: test_bitcast_from_bfloat:
3525+ ; GFX11TRUE16: ; %bb.0:
3526+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3527+ ; GFX11TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
3528+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
3529+ ; GFX11TRUE16-NEXT: global_store_b16 v[2:3], v0, off
3530+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3531+ ;
3532+ ; GFX11FAKE16-LABEL: test_bitcast_from_bfloat:
3533+ ; GFX11FAKE16: ; %bb.0:
3534+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3535+ ; GFX11FAKE16-NEXT: global_load_u16 v0, v[0:1], off
3536+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
3537+ ; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v0, off
3538+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
35003539 %val = load bfloat, ptr addrspace(1) %in
35013540 %val_int = bitcast bfloat %val to i16
35023541 store i16 %val_int, ptr addrspace(1) %out
@@ -3556,13 +3595,21 @@ define void @test_bitcast_to_bfloat(ptr addrspace(1) %out, ptr addrspace(1) %in)
35563595; GFX10-NEXT: global_store_short v[0:1], v2, off
35573596; GFX10-NEXT: s_setpc_b64 s[30:31]
35583597;
3559- ; GFX11-LABEL: test_bitcast_to_bfloat:
3560- ; GFX11: ; %bb.0:
3561- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3562- ; GFX11-NEXT: global_load_u16 v2, v[2:3], off
3563- ; GFX11-NEXT: s_waitcnt vmcnt(0)
3564- ; GFX11-NEXT: global_store_b16 v[0:1], v2, off
3565- ; GFX11-NEXT: s_setpc_b64 s[30:31]
3598+ ; GFX11TRUE16-LABEL: test_bitcast_to_bfloat:
3599+ ; GFX11TRUE16: ; %bb.0:
3600+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3601+ ; GFX11TRUE16-NEXT: global_load_d16_b16 v2, v[2:3], off
3602+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
3603+ ; GFX11TRUE16-NEXT: global_store_b16 v[0:1], v2, off
3604+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3605+ ;
3606+ ; GFX11FAKE16-LABEL: test_bitcast_to_bfloat:
3607+ ; GFX11FAKE16: ; %bb.0:
3608+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3609+ ; GFX11FAKE16-NEXT: global_load_u16 v2, v[2:3], off
3610+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
3611+ ; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v2, off
3612+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
35663613 %val = load i16, ptr addrspace(1) %in
35673614 %val_fp = bitcast i16 %val to bfloat
35683615 store bfloat %val_fp, ptr addrspace(1) %out
@@ -5309,14 +5356,23 @@ define bfloat @test_alloca_load_store_ret(bfloat %in) {
53095356; GFX10-NEXT: s_waitcnt vmcnt(0)
53105357; GFX10-NEXT: s_setpc_b64 s[30:31]
53115358;
5312- ; GFX11-LABEL: test_alloca_load_store_ret:
5313- ; GFX11: ; %bb.0: ; %entry
5314- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5315- ; GFX11-NEXT: scratch_store_b16 off, v0, s32 dlc
5316- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
5317- ; GFX11-NEXT: scratch_load_u16 v0, off, s32 glc dlc
5318- ; GFX11-NEXT: s_waitcnt vmcnt(0)
5319- ; GFX11-NEXT: s_setpc_b64 s[30:31]
5359+ ; GFX11TRUE16-LABEL: test_alloca_load_store_ret:
5360+ ; GFX11TRUE16: ; %bb.0: ; %entry
5361+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5362+ ; GFX11TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
5363+ ; GFX11TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
5364+ ; GFX11TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
5365+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
5366+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
5367+ ;
5368+ ; GFX11FAKE16-LABEL: test_alloca_load_store_ret:
5369+ ; GFX11FAKE16: ; %bb.0: ; %entry
5370+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5371+ ; GFX11FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
5372+ ; GFX11FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
5373+ ; GFX11FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
5374+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
5375+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
53205376entry:
53215377 %in.addr = alloca bfloat, align 2, addrspace(5)
53225378 store volatile bfloat %in, ptr addrspace(5) %in.addr, align 2
@@ -5667,26 +5723,48 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) {
56675723; GFX10-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen offset:128
56685724; GFX10-NEXT: s_setpc_b64 s[30:31]
56695725;
5670- ; GFX11-LABEL: test_overflow_stack:
5671- ; GFX11: ; %bb.0:
5672- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5673- ; GFX11-NEXT: s_clause 0x2
5674- ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
5675- ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
5676- ; GFX11-NEXT: scratch_load_b32 v31, off, s32
5677- ; GFX11-NEXT: s_clause 0x5
5678- ; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
5679- ; GFX11-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
5680- ; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
5681- ; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
5682- ; GFX11-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
5683- ; GFX11-NEXT: scratch_store_b128 v0, v[2:5], off
5684- ; GFX11-NEXT: s_waitcnt vmcnt(0)
5685- ; GFX11-NEXT: s_clause 0x2
5686- ; GFX11-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
5687- ; GFX11-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
5688- ; GFX11-NEXT: scratch_store_b16 v0, v1, off offset:128
5689- ; GFX11-NEXT: s_setpc_b64 s[30:31]
5726+ ; GFX11TRUE16-LABEL: test_overflow_stack:
5727+ ; GFX11TRUE16: ; %bb.0:
5728+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5729+ ; GFX11TRUE16-NEXT: s_clause 0x2
5730+ ; GFX11TRUE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
5731+ ; GFX11TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4
5732+ ; GFX11TRUE16-NEXT: scratch_load_b32 v31, off, s32
5733+ ; GFX11TRUE16-NEXT: s_clause 0x3
5734+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
5735+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
5736+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
5737+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
5738+ ; GFX11TRUE16-NEXT: s_clause 0x1
5739+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
5740+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[2:5], off
5741+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
5742+ ; GFX11TRUE16-NEXT: s_clause 0x2
5743+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
5744+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
5745+ ; GFX11TRUE16-NEXT: scratch_store_b16 v0, v1, off offset:128
5746+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
5747+ ;
5748+ ; GFX11FAKE16-LABEL: test_overflow_stack:
5749+ ; GFX11FAKE16: ; %bb.0:
5750+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5751+ ; GFX11FAKE16-NEXT: s_clause 0x2
5752+ ; GFX11FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
5753+ ; GFX11FAKE16-NEXT: scratch_load_b32 v32, off, s32 offset:4
5754+ ; GFX11FAKE16-NEXT: scratch_load_b32 v31, off, s32
5755+ ; GFX11FAKE16-NEXT: s_clause 0x5
5756+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
5757+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
5758+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
5759+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
5760+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
5761+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[2:5], off
5762+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
5763+ ; GFX11FAKE16-NEXT: s_clause 0x2
5764+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
5765+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
5766+ ; GFX11FAKE16-NEXT: scratch_store_b16 v0, v1, off offset:128
5767+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
56905768 %ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0
56915769 %ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1
56925770 ret { <32 x i32>, bfloat } %ins.1
@@ -42719,7 +42797,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4271942797; GFX11TRUE16: ; %bb.0:
4272042798; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4272142799; GFX11TRUE16-NEXT: s_clause 0x1f
42722- ; GFX11TRUE16-NEXT: scratch_load_u16 v31, off, s32
42800+ ; GFX11TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32
4272342801; GFX11TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:64
4272442802; GFX11TRUE16-NEXT: scratch_load_b32 v33, off, s32 offset:128
4272542803; GFX11TRUE16-NEXT: scratch_load_b32 v34, off, s32 offset:60
@@ -42752,16 +42830,17 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4275242830; GFX11TRUE16-NEXT: scratch_load_b32 v85, off, s32 offset:72
4275342831; GFX11TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:4
4275442832; GFX11TRUE16-NEXT: scratch_load_b32 v87, off, s32 offset:68
42833+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
4275542834; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
4275642835; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14
42757- ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
4275842836; GFX11TRUE16-NEXT: v_and_b32_e32 v18, 1, v18
4275942837; GFX11TRUE16-NEXT: v_and_b32_e32 v20, 1, v20
4276042838; GFX11TRUE16-NEXT: v_and_b32_e32 v22, 1, v22
4276142839; GFX11TRUE16-NEXT: v_and_b32_e32 v24, 1, v24
4276242840; GFX11TRUE16-NEXT: v_and_b32_e32 v26, 1, v26
4276342841; GFX11TRUE16-NEXT: v_and_b32_e32 v28, 1, v28
4276442842; GFX11TRUE16-NEXT: v_and_b32_e32 v30, 1, v30
42843+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v16
4276542844; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
4276642845; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
4276742846; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
@@ -42785,7 +42864,6 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4278542864; GFX11TRUE16-NEXT: v_and_b32_e32 v29, 1, v29
4278642865; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
4278742866; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s13, 1, v14
42788- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v16
4278942867; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s17, 1, v18
4279042868; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s19, 1, v20
4279142869; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s21, 1, v22
@@ -42815,14 +42893,15 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4281542893; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s26, 1, v27
4281642894; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s29, 1, v29
4281742895; GFX11TRUE16-NEXT: s_waitcnt vmcnt(32)
42818- ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v31
42896+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v31.l
4281942897; GFX11TRUE16-NEXT: s_waitcnt vmcnt(31)
4282042898; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v32
4282142899; GFX11TRUE16-NEXT: s_waitcnt vmcnt(30)
4282242900; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v33
4282342901; GFX11TRUE16-NEXT: v_cndmask_b16 v15.l, v33.l, v32.l, s28
4282442902; GFX11TRUE16-NEXT: s_waitcnt vmcnt(29)
4282542903; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v34
42904+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
4282642905; GFX11TRUE16-NEXT: s_waitcnt vmcnt(28)
4282742906; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v35
4282842907; GFX11TRUE16-NEXT: v_cndmask_b16 v14.l, v35.l, v34.l, s27
0 commit comments