@@ -1524,7 +1524,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %p
1524
1524
; GFX900-NEXT: v_lshrrev_b32_sdwa v1, s5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1525
1525
; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v1
1526
1526
; GFX900-NEXT: v_bfe_u32 v3, v1, 16, 1
1527
- ; GFX900-NEXT: v_and_b32_e32 v4, 0x80000000 , v1
1527
+ ; GFX900-NEXT: v_and_b32_e32 v4, 0xff800000 , v1
1528
1528
; GFX900-NEXT: v_add3_u32 v3, v3, v1, s4
1529
1529
; GFX900-NEXT: v_or_b32_e32 v4, 0x400000, v4
1530
1530
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
@@ -1566,7 +1566,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %p
1566
1566
; GFX908-NEXT: v_lshrrev_b32_sdwa v1, s5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1567
1567
; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v1
1568
1568
; GFX908-NEXT: v_bfe_u32 v3, v1, 16, 1
1569
- ; GFX908-NEXT: v_and_b32_e32 v4, 0x80000000 , v1
1569
+ ; GFX908-NEXT: v_and_b32_e32 v4, 0xff800000 , v1
1570
1570
; GFX908-NEXT: v_add3_u32 v3, v3, v1, s4
1571
1571
; GFX908-NEXT: v_or_b32_e32 v4, 0x400000, v4
1572
1572
; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
@@ -1608,7 +1608,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %p
1608
1608
; GFX90A-NEXT: v_lshrrev_b32_sdwa v1, s5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1609
1609
; GFX90A-NEXT: v_add_f32_e32 v1, 4.0, v1
1610
1610
; GFX90A-NEXT: v_bfe_u32 v2, v1, 16, 1
1611
- ; GFX90A-NEXT: v_and_b32_e32 v4, 0x80000000 , v1
1611
+ ; GFX90A-NEXT: v_and_b32_e32 v4, 0xff800000 , v1
1612
1612
; GFX90A-NEXT: v_add3_u32 v2, v2, v1, s4
1613
1613
; GFX90A-NEXT: v_or_b32_e32 v4, 0x400000, v4
1614
1614
; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
@@ -1632,7 +1632,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %p
1632
1632
; GFX10: ; %bb.0:
1633
1633
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
1634
1634
; GFX10-NEXT: v_mov_b32_e32 v0, 0
1635
- ; GFX10-NEXT: s_brev_b32 s5, 1
1635
+ ; GFX10-NEXT: s_mov_b32 s5, 0xff800000
1636
1636
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1637
1637
; GFX10-NEXT: s_and_b32 s0, s2, -4
1638
1638
; GFX10-NEXT: s_mov_b32 s1, s3
@@ -1673,7 +1673,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %p
1673
1673
; GFX11-LABEL: global_atomic_fadd_ret_bf16_agent:
1674
1674
; GFX11: ; %bb.0:
1675
1675
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
1676
- ; GFX11-NEXT: s_brev_b32 s5, 1
1676
+ ; GFX11-NEXT: s_mov_b32 s5, 0xff800000
1677
1677
; GFX11-NEXT: v_mov_b32_e32 v0, 0
1678
1678
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1679
1679
; GFX11-NEXT: s_and_b32 s0, s2, -4
@@ -1744,7 +1744,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) %
1744
1744
; GFX900-NEXT: v_lshrrev_b32_sdwa v1, s5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1745
1745
; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v1
1746
1746
; GFX900-NEXT: v_bfe_u32 v3, v1, 16, 1
1747
- ; GFX900-NEXT: v_and_b32_e32 v4, 0x80000000 , v1
1747
+ ; GFX900-NEXT: v_and_b32_e32 v4, 0xff800000 , v1
1748
1748
; GFX900-NEXT: v_add3_u32 v3, v3, v1, s4
1749
1749
; GFX900-NEXT: v_or_b32_e32 v4, 0x400000, v4
1750
1750
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
@@ -1786,7 +1786,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) %
1786
1786
; GFX908-NEXT: v_lshrrev_b32_sdwa v1, s5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1787
1787
; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v1
1788
1788
; GFX908-NEXT: v_bfe_u32 v3, v1, 16, 1
1789
- ; GFX908-NEXT: v_and_b32_e32 v4, 0x80000000 , v1
1789
+ ; GFX908-NEXT: v_and_b32_e32 v4, 0xff800000 , v1
1790
1790
; GFX908-NEXT: v_add3_u32 v3, v3, v1, s4
1791
1791
; GFX908-NEXT: v_or_b32_e32 v4, 0x400000, v4
1792
1792
; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
@@ -1828,7 +1828,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) %
1828
1828
; GFX90A-NEXT: v_lshrrev_b32_sdwa v1, s5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1829
1829
; GFX90A-NEXT: v_add_f32_e32 v1, 4.0, v1
1830
1830
; GFX90A-NEXT: v_bfe_u32 v2, v1, 16, 1
1831
- ; GFX90A-NEXT: v_and_b32_e32 v4, 0x80000000 , v1
1831
+ ; GFX90A-NEXT: v_and_b32_e32 v4, 0xff800000 , v1
1832
1832
; GFX90A-NEXT: v_add3_u32 v2, v2, v1, s4
1833
1833
; GFX90A-NEXT: v_or_b32_e32 v4, 0x400000, v4
1834
1834
; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
@@ -1854,7 +1854,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) %
1854
1854
; GFX10: ; %bb.0:
1855
1855
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
1856
1856
; GFX10-NEXT: v_mov_b32_e32 v0, 0
1857
- ; GFX10-NEXT: s_brev_b32 s5, 1
1857
+ ; GFX10-NEXT: s_mov_b32 s5, 0xff800000
1858
1858
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1859
1859
; GFX10-NEXT: s_and_b32 s0, s2, -4
1860
1860
; GFX10-NEXT: s_mov_b32 s1, s3
@@ -1895,7 +1895,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) %
1895
1895
; GFX11-LABEL: global_atomic_fadd_ret_bf16_system:
1896
1896
; GFX11: ; %bb.0:
1897
1897
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
1898
- ; GFX11-NEXT: s_brev_b32 s5, 1
1898
+ ; GFX11-NEXT: s_mov_b32 s5, 0xff800000
1899
1899
; GFX11-NEXT: v_mov_b32_e32 v0, 0
1900
1900
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1901
1901
; GFX11-NEXT: s_and_b32 s0, s2, -4
0 commit comments