Skip to content

Commit 687bf11

Browse files
committed
[AMDGPU] Add KnownBits simplification combines to RegBankCombiner
1 parent 9b283cd commit 687bf11

File tree

8 files changed

+104
-157
lines changed

8 files changed

+104
-157
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,5 +210,6 @@ def AMDGPURegBankCombiner : GICombiner<
210210
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
211211
identity_combines, redundant_and, constant_fold_cast_op,
212212
cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
213-
lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> {
213+
lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract,
214+
known_bits_simplifications]> {
214215
}

llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,63 +1744,64 @@ define i65 @v_lshr_i65_33(i65 %value) {
17441744
; GFX6-LABEL: v_lshr_i65_33:
17451745
; GFX6: ; %bb.0:
17461746
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1747-
; GFX6-NEXT: v_mov_b32_e32 v3, v1
1748-
; GFX6-NEXT: v_mov_b32_e32 v0, 1
1747+
; GFX6-NEXT: v_mov_b32_e32 v3, 1
1748+
; GFX6-NEXT: v_mov_b32_e32 v4, 0
1749+
; GFX6-NEXT: v_and_b32_e32 v3, 1, v2
1750+
; GFX6-NEXT: v_lshl_b64 v[2:3], v[3:4], 31
1751+
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v1
1752+
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
17491753
; GFX6-NEXT: v_mov_b32_e32 v1, 0
1750-
; GFX6-NEXT: v_and_b32_e32 v0, 1, v2
1751-
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
1752-
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v3
1753-
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
17541754
; GFX6-NEXT: v_mov_b32_e32 v2, 0
17551755
; GFX6-NEXT: s_setpc_b64 s[30:31]
17561756
;
17571757
; GFX8-LABEL: v_lshr_i65_33:
17581758
; GFX8: ; %bb.0:
17591759
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760-
; GFX8-NEXT: v_mov_b32_e32 v3, v1
1761-
; GFX8-NEXT: v_mov_b32_e32 v0, 1
1760+
; GFX8-NEXT: v_mov_b32_e32 v3, 1
1761+
; GFX8-NEXT: v_mov_b32_e32 v4, 0
1762+
; GFX8-NEXT: v_and_b32_e32 v3, 1, v2
1763+
; GFX8-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
1764+
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v1
1765+
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
17621766
; GFX8-NEXT: v_mov_b32_e32 v1, 0
1763-
; GFX8-NEXT: v_and_b32_e32 v0, 1, v2
1764-
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1765-
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v3
1766-
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
17671767
; GFX8-NEXT: v_mov_b32_e32 v2, 0
17681768
; GFX8-NEXT: s_setpc_b64 s[30:31]
17691769
;
17701770
; GFX9-LABEL: v_lshr_i65_33:
17711771
; GFX9: ; %bb.0:
17721772
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1773-
; GFX9-NEXT: v_mov_b32_e32 v3, v1
1774-
; GFX9-NEXT: v_mov_b32_e32 v0, 1
1773+
; GFX9-NEXT: v_mov_b32_e32 v3, 1
1774+
; GFX9-NEXT: v_mov_b32_e32 v4, 0
1775+
; GFX9-NEXT: v_and_b32_e32 v3, 1, v2
1776+
; GFX9-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
1777+
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v1
1778+
; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
17751779
; GFX9-NEXT: v_mov_b32_e32 v1, 0
1776-
; GFX9-NEXT: v_and_b32_e32 v0, 1, v2
1777-
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1778-
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
1779-
; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
17801780
; GFX9-NEXT: v_mov_b32_e32 v2, 0
17811781
; GFX9-NEXT: s_setpc_b64 s[30:31]
17821782
;
17831783
; GFX10-LABEL: v_lshr_i65_33:
17841784
; GFX10: ; %bb.0:
17851785
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1786-
; GFX10-NEXT: v_mov_b32_e32 v3, v1
1787-
; GFX10-NEXT: v_mov_b32_e32 v0, 1
1786+
; GFX10-NEXT: v_mov_b32_e32 v3, 1
1787+
; GFX10-NEXT: v_mov_b32_e32 v4, 0
1788+
; GFX10-NEXT: v_and_b32_e32 v3, 1, v2
1789+
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v1
17881790
; GFX10-NEXT: v_mov_b32_e32 v1, 0
1789-
; GFX10-NEXT: v_and_b32_e32 v0, 1, v2
1790-
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v3
1791-
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1792-
; GFX10-NEXT: v_or_b32_e32 v0, v2, v0
1791+
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
1792+
; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
17931793
; GFX10-NEXT: v_mov_b32_e32 v2, 0
17941794
; GFX10-NEXT: s_setpc_b64 s[30:31]
17951795
;
17961796
; GFX11-LABEL: v_lshr_i65_33:
17971797
; GFX11: ; %bb.0:
17981798
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1799-
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, 1
1800-
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v2
1801-
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v3
1802-
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1803-
; GFX11-NEXT: v_or_b32_e32 v0, v2, v0
1799+
; GFX11-NEXT: v_mov_b32_e32 v3, 1
1800+
; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_and_b32 v3, 1, v2
1801+
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v1
1802+
; GFX11-NEXT: v_mov_b32_e32 v1, 0
1803+
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
1804+
; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
18041805
; GFX11-NEXT: v_mov_b32_e32 v2, 0
18051806
; GFX11-NEXT: s_setpc_b64 s[30:31]
18061807
%result = lshr i65 %value, 33

llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll

Lines changed: 21 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
8080
; GFX8-NEXT: s_min_i32 s2, s2, 0
8181
; GFX8-NEXT: s_lshl_b32 s1, s1, 9
8282
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
83+
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
8384
; GFX8-NEXT: s_sext_i32_i16 s2, s2
8485
; GFX8-NEXT: s_sext_i32_i16 s1, s1
85-
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
8686
; GFX8-NEXT: s_max_i32 s1, s2, s1
87-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
8887
; GFX8-NEXT: s_sext_i32_i16 s2, s3
8988
; GFX8-NEXT: s_min_i32 s1, s1, s2
9089
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
189188
; GFX8-NEXT: s_min_i32 s2, s2, 0
190189
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
191190
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
191+
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
192192
; GFX8-NEXT: s_sext_i32_i16 s2, s2
193193
; GFX8-NEXT: s_sext_i32_i16 s1, s1
194-
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
195194
; GFX8-NEXT: s_max_i32 s1, s2, s1
196-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
197195
; GFX8-NEXT: s_sext_i32_i16 s2, s3
198196
; GFX8-NEXT: s_min_i32 s1, s1, s2
199197
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -386,11 +384,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
386384
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
387385
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
388386
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
387+
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
389388
; GFX8-NEXT: s_sext_i32_i16 s4, s4
390389
; GFX8-NEXT: s_sext_i32_i16 s1, s1
391-
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
392390
; GFX8-NEXT: s_max_i32 s1, s4, s1
393-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
394391
; GFX8-NEXT: s_sext_i32_i16 s4, s5
395392
; GFX8-NEXT: s_min_i32 s1, s1, s4
396393
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -400,11 +397,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
400397
; GFX8-NEXT: s_max_i32 s4, s3, 0
401398
; GFX8-NEXT: s_min_i32 s3, s3, 0
402399
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
400+
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
403401
; GFX8-NEXT: s_sext_i32_i16 s3, s3
404402
; GFX8-NEXT: s_sext_i32_i16 s2, s2
405-
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
406403
; GFX8-NEXT: s_max_i32 s2, s3, s2
407-
; GFX8-NEXT: s_sext_i32_i16 s2, s2
408404
; GFX8-NEXT: s_sext_i32_i16 s3, s4
409405
; GFX8-NEXT: s_min_i32 s2, s2, s3
410406
; GFX8-NEXT: s_add_i32 s1, s1, s2
@@ -787,11 +783,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
787783
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
788784
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
789785
; GFX8-NEXT: s_sub_i32 s8, 0x8000, s8
786+
; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
790787
; GFX8-NEXT: s_sext_i32_i16 s8, s8
791788
; GFX8-NEXT: s_sext_i32_i16 s1, s1
792-
; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
793789
; GFX8-NEXT: s_max_i32 s1, s8, s1
794-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
795790
; GFX8-NEXT: s_sext_i32_i16 s8, s9
796791
; GFX8-NEXT: s_min_i32 s1, s1, s8
797792
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -801,11 +796,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
801796
; GFX8-NEXT: s_max_i32 s8, s5, 0
802797
; GFX8-NEXT: s_min_i32 s5, s5, 0
803798
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
799+
; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
804800
; GFX8-NEXT: s_sext_i32_i16 s5, s5
805801
; GFX8-NEXT: s_sext_i32_i16 s2, s2
806-
; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
807802
; GFX8-NEXT: s_max_i32 s2, s5, s2
808-
; GFX8-NEXT: s_sext_i32_i16 s2, s2
809803
; GFX8-NEXT: s_sext_i32_i16 s5, s8
810804
; GFX8-NEXT: s_min_i32 s2, s2, s5
811805
; GFX8-NEXT: s_add_i32 s1, s1, s2
@@ -815,11 +809,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
815809
; GFX8-NEXT: s_max_i32 s6, s5, 0
816810
; GFX8-NEXT: s_min_i32 s5, s5, 0
817811
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
812+
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
818813
; GFX8-NEXT: s_sext_i32_i16 s5, s5
819814
; GFX8-NEXT: s_sext_i32_i16 s3, s3
820-
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
821815
; GFX8-NEXT: s_max_i32 s3, s5, s3
822-
; GFX8-NEXT: s_sext_i32_i16 s3, s3
823816
; GFX8-NEXT: s_sext_i32_i16 s5, s6
824817
; GFX8-NEXT: s_min_i32 s3, s3, s5
825818
; GFX8-NEXT: s_add_i32 s2, s2, s3
@@ -829,14 +822,13 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
829822
; GFX8-NEXT: s_min_i32 s5, s5, 0
830823
; GFX8-NEXT: s_lshl_b32 s4, s7, 8
831824
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
832-
; GFX8-NEXT: s_sext_i32_i16 s5, s5
833-
; GFX8-NEXT: s_sext_i32_i16 s4, s4
834825
; GFX8-NEXT: s_sext_i32_i16 s1, s1
835826
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
836-
; GFX8-NEXT: s_max_i32 s4, s5, s4
827+
; GFX8-NEXT: s_sext_i32_i16 s5, s5
828+
; GFX8-NEXT: s_sext_i32_i16 s4, s4
837829
; GFX8-NEXT: s_sext_i32_i16 s0, s0
838830
; GFX8-NEXT: s_ashr_i32 s1, s1, 8
839-
; GFX8-NEXT: s_sext_i32_i16 s4, s4
831+
; GFX8-NEXT: s_max_i32 s4, s5, s4
840832
; GFX8-NEXT: s_sext_i32_i16 s5, s6
841833
; GFX8-NEXT: s_ashr_i32 s0, s0, 8
842834
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -2631,11 +2623,10 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
26312623
; GFX8-NEXT: s_max_i32 s3, s2, 0
26322624
; GFX8-NEXT: s_min_i32 s2, s2, 0
26332625
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
2626+
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
26342627
; GFX8-NEXT: s_sext_i32_i16 s2, s2
26352628
; GFX8-NEXT: s_sext_i32_i16 s1, s1
2636-
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
26372629
; GFX8-NEXT: s_max_i32 s1, s2, s1
2638-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
26392630
; GFX8-NEXT: s_sext_i32_i16 s2, s3
26402631
; GFX8-NEXT: s_min_i32 s1, s1, s2
26412632
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -2835,11 +2826,10 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
28352826
; GFX8-NEXT: s_max_i32 s4, s3, 0
28362827
; GFX8-NEXT: s_min_i32 s3, s3, 0
28372828
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
2829+
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
28382830
; GFX8-NEXT: s_sext_i32_i16 s3, s3
28392831
; GFX8-NEXT: s_sext_i32_i16 s5, s1
2840-
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
28412832
; GFX8-NEXT: s_max_i32 s3, s3, s5
2842-
; GFX8-NEXT: s_sext_i32_i16 s3, s3
28432833
; GFX8-NEXT: s_sext_i32_i16 s4, s4
28442834
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
28452835
; GFX8-NEXT: s_min_i32 s3, s3, s4
@@ -3190,11 +3180,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
31903180
; GFX8-NEXT: s_max_i32 s7, s6, 0
31913181
; GFX8-NEXT: s_min_i32 s6, s6, 0
31923182
; GFX8-NEXT: s_sub_i32 s6, 0x8000, s6
3183+
; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
31933184
; GFX8-NEXT: s_sext_i32_i16 s6, s6
31943185
; GFX8-NEXT: s_sext_i32_i16 s8, s2
3195-
; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
31963186
; GFX8-NEXT: s_max_i32 s6, s6, s8
3197-
; GFX8-NEXT: s_sext_i32_i16 s6, s6
31983187
; GFX8-NEXT: s_sext_i32_i16 s7, s7
31993188
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
32003189
; GFX8-NEXT: s_min_i32 s6, s6, s7
@@ -3215,11 +3204,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
32153204
; GFX8-NEXT: s_max_i32 s6, s2, 0
32163205
; GFX8-NEXT: s_min_i32 s2, s2, 0
32173206
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
3207+
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
32183208
; GFX8-NEXT: s_sext_i32_i16 s2, s2
32193209
; GFX8-NEXT: s_sext_i32_i16 s7, s3
3220-
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
32213210
; GFX8-NEXT: s_max_i32 s2, s2, s7
3222-
; GFX8-NEXT: s_sext_i32_i16 s2, s2
32233211
; GFX8-NEXT: s_sext_i32_i16 s6, s6
32243212
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
32253213
; GFX8-NEXT: s_min_i32 s2, s2, s6
@@ -3513,11 +3501,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
35133501
; GFX8-NEXT: s_max_i32 s10, s9, 0
35143502
; GFX8-NEXT: s_min_i32 s9, s9, 0
35153503
; GFX8-NEXT: s_sub_i32 s9, 0x8000, s9
3504+
; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
35163505
; GFX8-NEXT: s_sext_i32_i16 s9, s9
35173506
; GFX8-NEXT: s_sext_i32_i16 s11, s3
3518-
; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
35193507
; GFX8-NEXT: s_max_i32 s9, s9, s11
3520-
; GFX8-NEXT: s_sext_i32_i16 s9, s9
35213508
; GFX8-NEXT: s_sext_i32_i16 s10, s10
35223509
; GFX8-NEXT: s_lshr_b32 s6, s0, 16
35233510
; GFX8-NEXT: s_min_i32 s9, s9, s10
@@ -3538,11 +3525,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
35383525
; GFX8-NEXT: s_max_i32 s9, s3, 0
35393526
; GFX8-NEXT: s_min_i32 s3, s3, 0
35403527
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
3528+
; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
35413529
; GFX8-NEXT: s_sext_i32_i16 s3, s3
35423530
; GFX8-NEXT: s_sext_i32_i16 s10, s4
3543-
; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
35443531
; GFX8-NEXT: s_max_i32 s3, s3, s10
3545-
; GFX8-NEXT: s_sext_i32_i16 s3, s3
35463532
; GFX8-NEXT: s_sext_i32_i16 s9, s9
35473533
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
35483534
; GFX8-NEXT: s_min_i32 s3, s3, s9
@@ -3563,11 +3549,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
35633549
; GFX8-NEXT: s_max_i32 s4, s3, 0
35643550
; GFX8-NEXT: s_min_i32 s3, s3, 0
35653551
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
3552+
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
35663553
; GFX8-NEXT: s_sext_i32_i16 s3, s3
35673554
; GFX8-NEXT: s_sext_i32_i16 s9, s5
3568-
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
35693555
; GFX8-NEXT: s_max_i32 s3, s3, s9
3570-
; GFX8-NEXT: s_sext_i32_i16 s3, s3
35713556
; GFX8-NEXT: s_sext_i32_i16 s4, s4
35723557
; GFX8-NEXT: s_lshr_b32 s8, s2, 16
35733558
; GFX8-NEXT: s_min_i32 s3, s3, s4
@@ -3924,11 +3909,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
39243909
; GFX8-NEXT: s_max_i32 s13, s12, 0
39253910
; GFX8-NEXT: s_min_i32 s12, s12, 0
39263911
; GFX8-NEXT: s_sub_i32 s12, 0x8000, s12
3912+
; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
39273913
; GFX8-NEXT: s_sext_i32_i16 s12, s12
39283914
; GFX8-NEXT: s_sext_i32_i16 s14, s4
3929-
; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
39303915
; GFX8-NEXT: s_max_i32 s12, s12, s14
3931-
; GFX8-NEXT: s_sext_i32_i16 s12, s12
39323916
; GFX8-NEXT: s_sext_i32_i16 s13, s13
39333917
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
39343918
; GFX8-NEXT: s_min_i32 s12, s12, s13
@@ -3949,11 +3933,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
39493933
; GFX8-NEXT: s_max_i32 s12, s4, 0
39503934
; GFX8-NEXT: s_min_i32 s4, s4, 0
39513935
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
3936+
; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
39523937
; GFX8-NEXT: s_sext_i32_i16 s4, s4
39533938
; GFX8-NEXT: s_sext_i32_i16 s13, s5
3954-
; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
39553939
; GFX8-NEXT: s_max_i32 s4, s4, s13
3956-
; GFX8-NEXT: s_sext_i32_i16 s4, s4
39573940
; GFX8-NEXT: s_sext_i32_i16 s12, s12
39583941
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
39593942
; GFX8-NEXT: s_min_i32 s4, s4, s12
@@ -3974,11 +3957,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
39743957
; GFX8-NEXT: s_max_i32 s5, s4, 0
39753958
; GFX8-NEXT: s_min_i32 s4, s4, 0
39763959
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
3960+
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
39773961
; GFX8-NEXT: s_sext_i32_i16 s4, s4
39783962
; GFX8-NEXT: s_sext_i32_i16 s12, s6
3979-
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
39803963
; GFX8-NEXT: s_max_i32 s4, s4, s12
3981-
; GFX8-NEXT: s_sext_i32_i16 s4, s4
39823964
; GFX8-NEXT: s_sext_i32_i16 s5, s5
39833965
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
39843966
; GFX8-NEXT: s_min_i32 s4, s4, s5
@@ -3999,11 +3981,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
39993981
; GFX8-NEXT: s_max_i32 s5, s4, 0
40003982
; GFX8-NEXT: s_min_i32 s4, s4, 0
40013983
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
3984+
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
40023985
; GFX8-NEXT: s_sext_i32_i16 s4, s4
40033986
; GFX8-NEXT: s_sext_i32_i16 s6, s7
4004-
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
40053987
; GFX8-NEXT: s_max_i32 s4, s4, s6
4006-
; GFX8-NEXT: s_sext_i32_i16 s4, s4
40073988
; GFX8-NEXT: s_sext_i32_i16 s5, s5
40083989
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
40093990
; GFX8-NEXT: s_min_i32 s4, s4, s5

0 commit comments

Comments
 (0)