Skip to content

[AMDGPU] Add KnownBits simplification combines to RegBankCombiner #141591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: users/pierre-vh/rbcomb-bfx
Choose a base branch
from

Conversation

Pierre-vh
Copy link
Contributor

@Pierre-vh Pierre-vh commented May 27, 2025

No description provided.

Copy link
Contributor Author

Pierre-vh commented May 27, 2025

Warning

This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
Learn more

This stack of pull requests is managed by Graphite. Learn more about stacking.

@llvmbot
Copy link
Member

llvmbot commented May 27, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-globalisel

Author: Pierre van Houtryve (Pierre-vh)

Changes

This


Patch is 38.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141591.diff

8 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUCombine.td (+2-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll (+30-29)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (+21-40)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (+22-41)
  • (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+13-17)
  • (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.ll (+5-6)
  • (modified) llvm/test/CodeGen/AMDGPU/lround.ll (+9-9)
  • (modified) llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll (+2-14)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 96be17c487130..df867aaa204b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -210,5 +210,6 @@ def AMDGPURegBankCombiner : GICombiner<
    fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
    identity_combines, redundant_and, constant_fold_cast_op,
    cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
-   lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> {
+   lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract,
+   known_bits_simplifications]> {
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 6baa10bb48621..cc0f45681a3e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1744,63 +1744,64 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX6-LABEL: v_lshr_i65_33:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v3, v1
-; GFX6-NEXT:    v_mov_b32_e32 v0, 1
+; GFX6-NEXT:    v_mov_b32_e32 v3, 1
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0
+; GFX6-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX6-NEXT:    v_lshl_b64 v[2:3], v[3:4], 31
+; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
-; GFX6-NEXT:    v_and_b32_e32 v0, 1, v2
-; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 31
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_lshr_i65_33:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v3, v1
-; GFX8-NEXT:    v_mov_b32_e32 v0, 1
+; GFX8-NEXT:    v_mov_b32_e32 v3, 1
+; GFX8-NEXT:    v_mov_b32_e32 v4, 0
+; GFX8-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8-NEXT:    v_and_b32_e32 v0, 1, v2
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_lshr_i65_33:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1
+; GFX9-NEXT:    v_mov_b32_e32 v3, 1
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9-NEXT:    v_and_b32_e32 v0, 1, v2
-; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX9-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_lshr_i65_33:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v3, v1
-; GFX10-NEXT:    v_mov_b32_e32 v0, 1
+; GFX10-NEXT:    v_mov_b32_e32 v3, 1
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0
+; GFX10-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-NEXT:    v_and_b32_e32 v0, 1, v2
-; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX10-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_lshr_i65_33:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, 1
-; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX11-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX11-NEXT:    v_mov_b32_e32 v3, 1
+; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_and_b32 v3, 1, v2
+; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %result = lshr i65 %value, 33
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 7b2872f081979..93629f3bf9548 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 9
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s3
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s3
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -386,11 +384,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 8
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_max_i32 s1, s4, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s5
 ; GFX8-NEXT:    s_min_i32 s1, s1, s4
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -400,11 +397,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s4, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_max_i32 s2, s3, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s4
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_add_i32 s1, s1, s2
@@ -787,11 +783,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 24
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_sub_i32 s8, 0x8000, s8
+; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_max_i32 s1, s8, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s9
 ; GFX8-NEXT:    s_min_i32 s1, s1, s8
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -801,11 +796,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s8, s5, 0
 ; GFX8-NEXT:    s_min_i32 s5, s5, 0
 ; GFX8-NEXT:    s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT:    s_sub_i32 s8, 0x7fff, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sub_i32 s8, 0x7fff, s8
 ; GFX8-NEXT:    s_max_i32 s2, s5, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s8
 ; GFX8-NEXT:    s_min_i32 s2, s2, s5
 ; GFX8-NEXT:    s_add_i32 s1, s1, s2
@@ -815,11 +809,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s6, s5, 0
 ; GFX8-NEXT:    s_min_i32 s5, s5, 0
 ; GFX8-NEXT:    s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_max_i32 s3, s5, s3
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s6
 ; GFX8-NEXT:    s_min_i32 s3, s3, s5
 ; GFX8-NEXT:    s_add_i32 s2, s2, s3
@@ -829,14 +822,13 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_min_i32 s5, s5, 0
 ; GFX8-NEXT:    s_lshl_b32 s4, s7, 8
 ; GFX8-NEXT:    s_sub_i32 s5, 0x8000, s5
-; GFX8-NEXT:    s_sext_i32_i16 s5, s5
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
-; GFX8-NEXT:    s_max_i32 s4, s5, s4
+; GFX8-NEXT:    s_sext_i32_i16 s5, s5
+; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s0, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
+; GFX8-NEXT:    s_max_i32 s4, s5, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s6
 ; GFX8-NEXT:    s_ashr_i32 s0, s0, 8
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
@@ -2631,11 +2623,10 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ; GFX8-NEXT:    s_max_i32 s3, s2, 0
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s3
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -2835,11 +2826,10 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX8-NEXT:    s_max_i32 s4, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s1
-; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_max_i32 s3, s3, s5
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 ; GFX8-NEXT:    s_min_i32 s3, s3, s4
@@ -3190,11 +3180,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s7, s6, 0
 ; GFX8-NEXT:    s_min_i32 s6, s6, 0
 ; GFX8-NEXT:    s_sub_i32 s6, 0x8000, s6
+; GFX8-NEXT:    s_sub_i32 s7, 0x7fff, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s2
-; GFX8-NEXT:    s_sub_i32 s7, 0x7fff, s7
 ; GFX8-NEXT:    s_max_i32 s6, s6, s8
-; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 ; GFX8-NEXT:    s_min_i32 s6, s6, s7
@@ -3215,11 +3204,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s6, s2, 0
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s3
-; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_max_i32 s2, s2, s7
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 ; GFX8-NEXT:    s_min_i32 s2, s2, s6
@@ -3513,11 +3501,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s10, s9, 0
 ; GFX8-NEXT:    s_min_i32 s9, s9, 0
 ; GFX8-NEXT:    s_sub_i32 s9, 0x8000, s9
+; GFX8-NEXT:    s_sub_i32 s10, 0x7fff, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s11, s3
-; GFX8-NEXT:    s_sub_i32 s10, 0x7fff, s10
 ; GFX8-NEXT:    s_max_i32 s9, s9, s11
-; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s10
 ; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
 ; GFX8-NEXT:    s_min_i32 s9, s9, s10
@@ -3538,11 +3525,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s9, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s4
-; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_max_i32 s3, s3, s10
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
 ; GFX8-NEXT:    s_min_i32 s3, s3, s9
@@ -3563,11 +3549,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s4, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s5
-; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_max_i32 s3, s3, s9
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_lshr_b32 s8, s2, 16
 ; GFX8-NEXT:    s_min_i32 s3, s3, s4
@@ -3924,11 +3909,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s13, s12, 0
 ; GFX8-NEXT:    s_min_i32 s12, s12, 0
 ; GFX8-NEXT:    s_sub_i32 s12, 0x8000, s12
+; GFX8-NEXT:    s_sub_i32 s13, 0x7fff, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s14, s4
-; GFX8-NEXT:    s_sub_i32 s13, 0x7fff, s13
 ; GFX8-NEXT:    s_max_i32 s12, s12, s14
-; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s13
 ; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 ; GFX8-NEXT:    s_min_i32 s12, s12, s13
@@ -3949,11 +3933,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s12, s4, 0
 ; GFX8-NEXT:    s_min_i32 s4, s4, 0
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s12, 0x7fff, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s5
-; GFX8-NEXT:    s_sub_i32 s12, 0x7fff, s12
 ; GFX8-NEXT:    s_max_i32 s4, s4, s13
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s12
@@ -3974,11 +3957,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s5, s4, 0
 ; GFX8-NEXT:    s_min_i32 s4, s4, 0
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s6
-; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_max_i32 s4, s4, s12
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
@@ -3999,11 +3981,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s5, s4, 0
 ; GFX8-NEXT:    s_min_i32 s4, s4, 0
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s7
-; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_max_i32 s4, s4, s6
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index a60995ecde3a8..558c11ec9c300 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 9
 ; GFX8-NEXT:    s_add_i32 s3, s3, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_add_i32 s3, s3, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -387,11 +385,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_add_i32 s5, s5, 0x8001
 ; GFX8-NEXT:    s_min_i32 s4, s4, -1
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s5, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_min_i32 s1, s1, s4
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -401,11 +398,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s4, s3, -1
 ; GFX8-NEXT:    s_add_i32 s4, s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s3, s3, -1
+; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_max_i32 s2, s4, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s2
@@ -788,11 +784,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_add_i32 s9, s9, 0x8001
 ; GFX8-NEXT:    s_min_i32 s8, s8, -1
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s9, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_min_i32 s1, s1, s8
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -802,11 +797,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s8, s5, -1
 ; GFX8-NEXT:    s_add_i32 s8, s8, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, -1
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s2, s8, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_min_i32 s2, s2, s5
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s2
@@ -816,11 +810,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s6, s5, -1
 ; GFX8-NEXT:    s_add_i32 s6, s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, -1
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s3, s6, s3
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_min_i32 s3, s3, s5
 ; GFX8-NEXT:    s_sub_i32 s2, s2, s3
@@ -830,14 +823,13 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s4, s7, 8
 ; GFX8-NEXT:    s_add_i32 s6, s6, 0x8001
 ; GFX8-NEXT:...
[truncated]

@Pierre-vh Pierre-vh force-pushed the users/pierre-vh/rbcomb-bfx branch from 6e5a085 to 9b283cd Compare May 27, 2025 13:45
@Pierre-vh Pierre-vh force-pushed the users/pierre-vh/rbcomb-kb branch 2 times, most recently from 687bf11 to 62031c0 Compare May 28, 2025 08:31
@Pierre-vh Pierre-vh force-pushed the users/pierre-vh/rbcomb-bfx branch 2 times, most recently from 6c9e836 to 082f286 Compare May 28, 2025 09:12
@Pierre-vh Pierre-vh force-pushed the users/pierre-vh/rbcomb-kb branch 2 times, most recently from 39ae19d to b7032eb Compare May 28, 2025 09:17
@Pierre-vh Pierre-vh force-pushed the users/pierre-vh/rbcomb-bfx branch from 082f286 to 9731c98 Compare May 28, 2025 09:17
@Pierre-vh Pierre-vh force-pushed the users/pierre-vh/rbcomb-kb branch from b7032eb to ad05d05 Compare May 28, 2025 11:42
@Pierre-vh Pierre-vh force-pushed the users/pierre-vh/rbcomb-bfx branch from 9731c98 to 8a7e773 Compare May 28, 2025 11:42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants