-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Add KnownBits simplification combines to RegBankCombiner #141591
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/pierre-vh/rbcomb-bfx
Are you sure you want to change the base?
Conversation
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-globalisel Author: Pierre van Houtryve (Pierre-vh) ChangesThis Patch is 38.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141591.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 96be17c487130..df867aaa204b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -210,5 +210,6 @@ def AMDGPURegBankCombiner : GICombiner<
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
identity_combines, redundant_and, constant_fold_cast_op,
cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
- lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> {
+ lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract,
+ known_bits_simplifications]> {
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 6baa10bb48621..cc0f45681a3e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1744,63 +1744,64 @@ define i65 @v_lshr_i65_33(i65 %value) {
; GFX6-LABEL: v_lshr_i65_33:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v3, v1
-; GFX6-NEXT: v_mov_b32_e32 v0, 1
+; GFX6-NEXT: v_mov_b32_e32 v3, 1
+; GFX6-NEXT: v_mov_b32_e32 v4, 0
+; GFX6-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX6-NEXT: v_lshl_b64 v[2:3], v[3:4], 31
+; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v1
+; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_mov_b32_e32 v1, 0
-; GFX6-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
-; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_lshr_i65_33:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v3, v1
-; GFX8-NEXT: v_mov_b32_e32 v0, 1
+; GFX8-NEXT: v_mov_b32_e32 v3, 1
+; GFX8-NEXT: v_mov_b32_e32 v4, 0
+; GFX8-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX8-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v1, 0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_lshr_i65_33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-NEXT: v_mov_b32_e32 v0, 1
+; GFX9-NEXT: v_mov_b32_e32 v3, 1
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX9-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v1
+; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_lshr_i65_33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v3, v1
-; GFX10-NEXT: v_mov_b32_e32 v0, 1
+; GFX10-NEXT: v_mov_b32_e32 v3, 1
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX10-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX10-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX10-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_lshr_i65_33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, 1
-; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v2
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX11-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX11-NEXT: v_mov_b32_e32 v3, 1
+; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_and_b32 v3, 1, v2
+; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v1
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = lshr i65 %value, 33
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 7b2872f081979..93629f3bf9548 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_lshl_b32 s1, s1, 9
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_max_i32 s1, s2, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s3
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_max_i32 s1, s2, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s3
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -386,11 +384,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_max_i32 s1, s4, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s4, s5
; GFX8-NEXT: s_min_i32 s1, s1, s4
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -400,11 +397,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s2, s3, s2
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_add_i32 s1, s1, s2
@@ -787,11 +783,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sub_i32 s8, 0x8000, s8
+; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_max_i32 s1, s8, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s8, s9
; GFX8-NEXT: s_min_i32 s1, s1, s8
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -801,11 +796,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s8, s5, 0
; GFX8-NEXT: s_min_i32 s5, s5, 0
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
; GFX8-NEXT: s_max_i32 s2, s5, s2
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s8
; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_add_i32 s1, s1, s2
@@ -815,11 +809,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s6, s5, 0
; GFX8-NEXT: s_min_i32 s5, s5, 0
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_max_i32 s3, s5, s3
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s6
; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_add_i32 s2, s2, s3
@@ -829,14 +822,13 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_min_i32 s5, s5, 0
; GFX8-NEXT: s_lshl_b32 s4, s7, 8
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
-; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
-; GFX8-NEXT: s_max_i32 s4, s5, s4
+; GFX8-NEXT: s_sext_i32_i16 s5, s5
+; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s1, s1, 8
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
+; GFX8-NEXT: s_max_i32 s4, s5, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s6
; GFX8-NEXT: s_ashr_i32 s0, s0, 8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -2631,11 +2623,10 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX8-NEXT: s_max_i32 s3, s2, 0
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_max_i32 s1, s2, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s3
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -2835,11 +2826,10 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s1
-; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s3, s3, s5
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_min_i32 s3, s3, s4
@@ -3190,11 +3180,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_max_i32 s7, s6, 0
; GFX8-NEXT: s_min_i32 s6, s6, 0
; GFX8-NEXT: s_sub_i32 s6, 0x8000, s6
+; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s2
-; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
; GFX8-NEXT: s_max_i32 s6, s6, s8
-; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
; GFX8-NEXT: s_min_i32 s6, s6, s7
@@ -3215,11 +3204,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_max_i32 s6, s2, 0
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s7, s3
-; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_max_i32 s2, s2, s7
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_min_i32 s2, s2, s6
@@ -3513,11 +3501,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_max_i32 s10, s9, 0
; GFX8-NEXT: s_min_i32 s9, s9, 0
; GFX8-NEXT: s_sub_i32 s9, 0x8000, s9
+; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s11, s3
-; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
; GFX8-NEXT: s_max_i32 s9, s9, s11
-; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_lshr_b32 s6, s0, 16
; GFX8-NEXT: s_min_i32 s9, s9, s10
@@ -3538,11 +3525,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_max_i32 s9, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s10, s4
-; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_max_i32 s3, s3, s10
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
; GFX8-NEXT: s_min_i32 s3, s3, s9
@@ -3563,11 +3549,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s9, s5
-; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s3, s3, s9
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_lshr_b32 s8, s2, 16
; GFX8-NEXT: s_min_i32 s3, s3, s4
@@ -3924,11 +3909,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s13, s12, 0
; GFX8-NEXT: s_min_i32 s12, s12, 0
; GFX8-NEXT: s_sub_i32 s12, 0x8000, s12
+; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s14, s4
-; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
; GFX8-NEXT: s_max_i32 s12, s12, s14
-; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s13, s13
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
; GFX8-NEXT: s_min_i32 s12, s12, s13
@@ -3949,11 +3933,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s12, s4, 0
; GFX8-NEXT: s_min_i32 s4, s4, 0
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s13, s5
-; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
; GFX8-NEXT: s_max_i32 s4, s4, s13
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
; GFX8-NEXT: s_min_i32 s4, s4, s12
@@ -3974,11 +3957,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s5, s4, 0
; GFX8-NEXT: s_min_i32 s4, s4, 0
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s12, s6
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_max_i32 s4, s4, s12
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
; GFX8-NEXT: s_min_i32 s4, s4, s5
@@ -3999,11 +3981,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s5, s4, 0
; GFX8-NEXT: s_min_i32 s4, s4, 0
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s7
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_max_i32 s4, s4, s6
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_min_i32 s4, s4, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index a60995ecde3a8..558c11ec9c300 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX8-NEXT: s_lshl_b32 s1, s1, 9
; GFX8-NEXT: s_add_i32 s3, s3, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, -1
+; GFX8-NEXT: s_add_i32 s2, s2, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_add_i32 s2, s2, 0x8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_add_i32 s3, s3, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, -1
+; GFX8-NEXT: s_add_i32 s2, s2, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_add_i32 s2, s2, 0x8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -387,11 +385,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_add_i32 s5, s5, 0x8001
; GFX8-NEXT: s_min_i32 s4, s4, -1
+; GFX8-NEXT: s_add_i32 s4, s4, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_add_i32 s4, s4, 0x8000
; GFX8-NEXT: s_max_i32 s1, s5, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_min_i32 s1, s1, s4
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -401,11 +398,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s4, s3, -1
; GFX8-NEXT: s_add_i32 s4, s4, 0x8001
; GFX8-NEXT: s_min_i32 s3, s3, -1
+; GFX8-NEXT: s_add_i32 s3, s3, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_add_i32 s3, s3, 0x8000
; GFX8-NEXT: s_max_i32 s2, s4, s2
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sub_i32 s1, s1, s2
@@ -788,11 +784,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_add_i32 s9, s9, 0x8001
; GFX8-NEXT: s_min_i32 s8, s8, -1
+; GFX8-NEXT: s_add_i32 s8, s8, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_add_i32 s8, s8, 0x8000
; GFX8-NEXT: s_max_i32 s1, s9, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_min_i32 s1, s1, s8
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -802,11 +797,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s8, s5, -1
; GFX8-NEXT: s_add_i32 s8, s8, 0x8001
; GFX8-NEXT: s_min_i32 s5, s5, -1
+; GFX8-NEXT: s_add_i32 s5, s5, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_add_i32 s5, s5, 0x8000
; GFX8-NEXT: s_max_i32 s2, s8, s2
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_sub_i32 s1, s1, s2
@@ -816,11 +810,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s6, s5, -1
; GFX8-NEXT: s_add_i32 s6, s6, 0x8001
; GFX8-NEXT: s_min_i32 s5, s5, -1
+; GFX8-NEXT: s_add_i32 s5, s5, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_add_i32 s5, s5, 0x8000
; GFX8-NEXT: s_max_i32 s3, s6, s3
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_sub_i32 s2, s2, s3
@@ -830,14 +823,13 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s4, s7, 8
; GFX8-NEXT: s_add_i32 s6, s6, 0x8001
; GFX8-NEXT:...
[truncated]
|
6e5a085
to
9b283cd
Compare
687bf11
to
62031c0
Compare
6c9e836
to
082f286
Compare
39ae19d
to
b7032eb
Compare
082f286
to
9731c98
Compare
b7032eb
to
ad05d05
Compare
9731c98
to
8a7e773
Compare
No description provided.