-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Revert "AMDGPU: Try constant fold after folding immediate (#141862)" #148197
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Juan Manuel Martinez Caamaño (jmmartinez) ChangesThis reverts commit 80064b6. The patch triggers a crash when the folded use can have 2 operands in the fold list. See #148187 for more info. SWDEV-542372 Full diff: https://github.com/llvm/llvm-project/pull/148197.diff 9 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 0ed06c37507af..b8fecc382c64a 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1782,12 +1782,6 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
<< static_cast<int>(Fold.UseOpNo) << " of "
<< *Fold.UseMI);
-
- if (Fold.isImm() && tryConstantFoldOp(Fold.UseMI)) {
- LLVM_DEBUG(dbgs() << "Constant folded " << *Fold.UseMI);
- Changed = true;
- }
-
} else if (Fold.Commuted) {
// Restoring instruction's original operand order if fold has failed.
TII->commuteInstruction(*Fold.UseMI, false);
diff --git a/llvm/test/CodeGen/AMDGPU/bit-op-reduce-width-known-bits.ll b/llvm/test/CodeGen/AMDGPU/bit-op-reduce-width-known-bits.ll
index ad26dfa7f93e8..ac5f9b6b483eb 100644
--- a/llvm/test/CodeGen/AMDGPU/bit-op-reduce-width-known-bits.ll
+++ b/llvm/test/CodeGen/AMDGPU/bit-op-reduce-width-known-bits.ll
@@ -105,8 +105,9 @@ define i64 @v_xor_i64_known_i32_from_range_use_out_of_block(i64 %x) {
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
; CHECK-NEXT: ; %bb.1: ; %inc
; CHECK-NEXT: v_not_b32_e32 v2, v4
+; CHECK-NEXT: v_not_b32_e32 v3, 0
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
-; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v1, vcc
+; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index e7177a5e7160e..fe2b0bb1ff6ae 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -961,25 +961,3 @@ body: |
S_ENDPGM 0, implicit %2, implicit %3
...
-
----
-name: constant_v_or_b32_uses_subreg_or_0_regression
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GCN-LABEL: name: constant_v_or_b32_uses_subreg_or_0_regression
- ; GCN: liveins: $vgpr0, $vgpr1
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]]
- %0:vgpr_32 = COPY $vgpr0
- %1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vreg_64 = REG_SEQUENCE %2:vgpr_32, %subreg.sub0, %0:vgpr_32, %subreg.sub1
- %4:vgpr_32 = V_OR_B32_e64 %3.sub0:vreg_64, %1, implicit $exec
- S_ENDPGM 0, implicit %4
-
-...
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index 74c4a2da50221..d00fd9b967f37 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -43,7 +43,8 @@ body: |
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[DEF]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1
- ; GCN-NEXT: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[DEF2]], [[REG_SEQUENCE]].sub0, implicit $exec
+ ; GCN-NEXT: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 0, [[DEF1]], implicit $exec
+ ; GCN-NEXT: [[V_XOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[DEF2]], [[REG_SEQUENCE]].sub0, implicit $exec
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir
index dc03eb74cbf11..b1aa88969c5bb 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir
@@ -8,8 +8,8 @@ body: |
; CHECK-LABEL: name: test_tryFoldZeroHighBits_skips_nonreg
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
- ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
+ ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 65535, 0, implicit $exec
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_AND_B32_e64_]]
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1
%2:vgpr_32 = V_AND_B32_e64 65535, %1.sub0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index df496258a2509..15eb41a1a5b65 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -404,11 +404,12 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, -1, v0
; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, -1, v1, vcc
-; GCN-IR-NEXT: v_not_b32_e32 v4, v10
+; GCN-IR-NEXT: v_not_b32_e32 v5, v10
; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[6:7], v8
-; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, v4, v11
+; GCN-IR-NEXT: v_not_b32_e32 v4, 0
+; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, v5, v11
; GCN-IR-NEXT: v_mov_b32_e32 v10, 0
-; GCN-IR-NEXT: v_addc_u32_e64 v7, s[4:5], -1, 0, vcc
+; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
; GCN-IR-NEXT: v_mov_b32_e32 v11, 0
; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index 47dfa9f4fc2d3..c729c3fb8a4e4 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -380,11 +380,12 @@ define i64 @v_test_srem(i64 %x, i64 %y) {
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, -1, v2
; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, -1, v3, vcc
-; GCN-IR-NEXT: v_not_b32_e32 v6, v12
+; GCN-IR-NEXT: v_not_b32_e32 v7, v12
; GCN-IR-NEXT: v_lshr_b64 v[10:11], v[0:1], v8
-; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v6, v13
+; GCN-IR-NEXT: v_not_b32_e32 v6, 0
+; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v7, v13
; GCN-IR-NEXT: v_mov_b32_e32 v12, 0
-; GCN-IR-NEXT: v_addc_u32_e64 v9, s[4:5], -1, 0, vcc
+; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
; GCN-IR-NEXT: v_mov_b32_e32 v13, 0
; GCN-IR-NEXT: v_mov_b32_e32 v7, 0
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index e9017939f8a4a..5acbb044c1057 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -348,9 +348,10 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) {
; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[0:1], v10
; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v3, vcc
; GCN-IR-NEXT: v_not_b32_e32 v0, v14
+; GCN-IR-NEXT: v_not_b32_e32 v1, 0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v15
; GCN-IR-NEXT: v_mov_b32_e32 v10, 0
-; GCN-IR-NEXT: v_addc_u32_e64 v1, s[4:5], -1, 0, vcc
+; GCN-IR-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
; GCN-IR-NEXT: v_mov_b32_e32 v11, 0
; GCN-IR-NEXT: v_mov_b32_e32 v7, 0
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index 6480a88d40f5a..94f1b83ea2765 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -355,11 +355,12 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) {
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v2
; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, -1, v3, vcc
-; GCN-IR-NEXT: v_not_b32_e32 v6, v12
+; GCN-IR-NEXT: v_not_b32_e32 v7, v12
; GCN-IR-NEXT: v_lshr_b64 v[10:11], v[0:1], v8
-; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v6, v13
+; GCN-IR-NEXT: v_not_b32_e32 v6, 0
+; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v7, v13
; GCN-IR-NEXT: v_mov_b32_e32 v12, 0
-; GCN-IR-NEXT: v_addc_u32_e64 v9, s[4:5], -1, 0, vcc
+; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
; GCN-IR-NEXT: v_mov_b32_e32 v13, 0
; GCN-IR-NEXT: v_mov_b32_e32 v7, 0
|
063b053
to
031f323
Compare
This reverts commit 80064b6. The patch triggers a crash when the folded use can have 2 operands in the fold list. See llvm#148187 for more info. SWDEV-542372
031f323
to
2f8a888
Compare
@@ -0,0 +1,63 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: llc -O3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
small nit: I'd avoid using the ticket number in the test name. Just name the test si-fold-operand-constant-fold-op
and add a comment to describe what the test case is for
Not needed anymore once #148205 lands |
This reverts commit 80064b6.
The patch triggers a crash when the folded use can have 2 operands in the fold list.
See #148187 for more info.
SWDEV-542372