@@ -10617,43 +10617,38 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1061710617 if (!Def || Def->getParent () != CmpInstr.getParent ())
1061810618 return false ;
1061910619
10620- if (!(Def->getOpcode () == AMDGPU::S_LSHL_B32 ||
10621- Def->getOpcode () == AMDGPU::S_LSHL_B64 ||
10622- Def->getOpcode () == AMDGPU::S_LSHR_B32 ||
10623- Def->getOpcode () == AMDGPU::S_LSHR_B64 ||
10624- Def->getOpcode () == AMDGPU::S_AND_B32 ||
10625- Def->getOpcode () == AMDGPU::S_AND_B64 ||
10626- Def->getOpcode () == AMDGPU::S_OR_B32 ||
10627- Def->getOpcode () == AMDGPU::S_OR_B64 ||
10628- Def->getOpcode () == AMDGPU::S_XOR_B32 ||
10629- Def->getOpcode () == AMDGPU::S_XOR_B64 ||
10630- Def->getOpcode () == AMDGPU::S_NAND_B32 ||
10631- Def->getOpcode () == AMDGPU::S_NAND_B64 ||
10632- Def->getOpcode () == AMDGPU::S_NOR_B32 ||
10633- Def->getOpcode () == AMDGPU::S_NOR_B64 ||
10634- Def->getOpcode () == AMDGPU::S_XNOR_B32 ||
10635- Def->getOpcode () == AMDGPU::S_XNOR_B64 ||
10636- Def->getOpcode () == AMDGPU::S_ANDN2_B32 ||
10637- Def->getOpcode () == AMDGPU::S_ANDN2_B64 ||
10638- Def->getOpcode () == AMDGPU::S_ORN2_B32 ||
10639- Def->getOpcode () == AMDGPU::S_ORN2_B64 ||
10640- Def->getOpcode () == AMDGPU::S_BFE_I32 ||
10641- Def->getOpcode () == AMDGPU::S_BFE_I64 ||
10642- Def->getOpcode () == AMDGPU::S_BFE_U32 ||
10643- Def->getOpcode () == AMDGPU::S_BFE_U64 ||
10644- Def->getOpcode () == AMDGPU::S_BCNT0_I32_B32 ||
10645- Def->getOpcode () == AMDGPU::S_BCNT0_I32_B64 ||
10646- Def->getOpcode () == AMDGPU::S_BCNT1_I32_B32 ||
10647- Def->getOpcode () == AMDGPU::S_BCNT1_I32_B64 ||
10648- Def->getOpcode () == AMDGPU::S_QUADMASK_B32 ||
10649- Def->getOpcode () == AMDGPU::S_QUADMASK_B64 ||
10650- Def->getOpcode () == AMDGPU::S_NOT_B32 ||
10651- Def->getOpcode () == AMDGPU::S_NOT_B64 ||
10652-
10653- ((Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10654- Def->getOpcode () == AMDGPU::S_CSELECT_B64) &&
10655- Def->getOperand (1 ).isImm () && Def->getOperand (1 ).getImm () &&
10656- !Def->getOperand (2 ).isImm () && !Def->getOperand (2 ).getImm ())))
10620+ bool CanOptimize = false ;
10621+ MachineOperand *SccDef =
10622+ Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr );
10623+
10624+ // For S_OP that set SCC = DST!=0, do the transformation
10625+ //
10626+ // s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
10627+ if (SccDef && Def->getOpcode () != AMDGPU::S_ADD_I32 &&
10628+ Def->getOpcode () != AMDGPU::S_ADD_U32 &&
10629+ Def->getOpcode () != AMDGPU::S_ADDC_U32 &&
10630+ Def->getOpcode () != AMDGPU::S_SUB_I32 &&
10631+ Def->getOpcode () != AMDGPU::S_SUB_U32 &&
10632+ Def->getOpcode () != AMDGPU::S_SUBB_U32 &&
10633+ Def->getOpcode () != AMDGPU::S_MIN_I32 &&
10634+ Def->getOpcode () != AMDGPU::S_MIN_U32 &&
10635+ Def->getOpcode () != AMDGPU::S_MAX_I32 &&
10636+ Def->getOpcode () != AMDGPU::S_MAX_U32 &&
10637+ Def->getOpcode () != AMDGPU::S_ADDK_I32)
10638+ CanOptimize = true ;
10639+
10640+ // s_cmp_lg_* is redundant because the SCC input value for S_CSELECT* has
10641+ // the same value that will be calculated by s_cmp_lg_*
10642+ //
10643+ // s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
10644+ // imm), 0)
10645+ if ((Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10646+ Def->getOpcode () == AMDGPU::S_CSELECT_B64) &&
10647+ Def->getOperand (1 ).isImm () && Def->getOperand (1 ).getImm () &&
10648+ !Def->getOperand (2 ).isImm () && !Def->getOperand (2 ).getImm ())
10649+ CanOptimize = true ;
10650+
10651+ if (!CanOptimize)
1065710652 return false ;
1065810653
1065910654 for (auto I = std::next (Def->getIterator ()), E = CmpInstr.getIterator ();
@@ -10663,13 +10658,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1066310658 return false ;
1066410659 }
1066510660
10666- if (!(Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10667- Def->getOpcode () == AMDGPU::S_CSELECT_B64)) {
10668- MachineOperand *SccDef =
10669- Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr );
10670- assert (SccDef && " Def instruction must define SCC" );
10661+ if (SccDef)
1067110662 SccDef->setIsDead (false );
10672- }
1067310663
1067410664 CmpInstr.eraseFromParent ();
1067510665 return true ;
0 commit comments