@@ -10608,6 +10608,73 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1060810608 if (SrcReg2 && !getFoldableImm (SrcReg2, *MRI, CmpValue))
1060910609 return false ;
1061010610
10611+ const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
10612+ this ]() -> bool {
10613+ if (CmpValue != 0 )
10614+ return false ;
10615+
10616+ MachineInstr *Def = MRI->getUniqueVRegDef (SrcReg);
10617+ if (!Def || Def->getParent () != CmpInstr.getParent ())
10618+ return false ;
10619+
10620+ if (!(Def->getOpcode () == AMDGPU::S_LSHL_B32 ||
10621+ Def->getOpcode () == AMDGPU::S_LSHL_B64 ||
10622+ Def->getOpcode () == AMDGPU::S_LSHR_B32 ||
10623+ Def->getOpcode () == AMDGPU::S_LSHR_B64 ||
10624+ Def->getOpcode () == AMDGPU::S_AND_B32 ||
10625+ Def->getOpcode () == AMDGPU::S_AND_B64 ||
10626+ Def->getOpcode () == AMDGPU::S_OR_B32 ||
10627+ Def->getOpcode () == AMDGPU::S_OR_B64 ||
10628+ Def->getOpcode () == AMDGPU::S_XOR_B32 ||
10629+ Def->getOpcode () == AMDGPU::S_XOR_B64 ||
10630+ Def->getOpcode () == AMDGPU::S_NAND_B32 ||
10631+ Def->getOpcode () == AMDGPU::S_NAND_B64 ||
10632+ Def->getOpcode () == AMDGPU::S_NOR_B32 ||
10633+ Def->getOpcode () == AMDGPU::S_NOR_B64 ||
10634+ Def->getOpcode () == AMDGPU::S_XNOR_B32 ||
10635+ Def->getOpcode () == AMDGPU::S_XNOR_B64 ||
10636+ Def->getOpcode () == AMDGPU::S_ANDN2_B32 ||
10637+ Def->getOpcode () == AMDGPU::S_ANDN2_B64 ||
10638+ Def->getOpcode () == AMDGPU::S_ORN2_B32 ||
10639+ Def->getOpcode () == AMDGPU::S_ORN2_B64 ||
10640+ Def->getOpcode () == AMDGPU::S_BFE_I32 ||
10641+ Def->getOpcode () == AMDGPU::S_BFE_I64 ||
10642+ Def->getOpcode () == AMDGPU::S_BFE_U32 ||
10643+ Def->getOpcode () == AMDGPU::S_BFE_U64 ||
10644+ Def->getOpcode () == AMDGPU::S_BCNT0_I32_B32 ||
10645+ Def->getOpcode () == AMDGPU::S_BCNT0_I32_B64 ||
10646+ Def->getOpcode () == AMDGPU::S_BCNT1_I32_B32 ||
10647+ Def->getOpcode () == AMDGPU::S_BCNT1_I32_B64 ||
10648+ Def->getOpcode () == AMDGPU::S_QUADMASK_B32 ||
10649+ Def->getOpcode () == AMDGPU::S_QUADMASK_B64 ||
10650+ Def->getOpcode () == AMDGPU::S_NOT_B32 ||
10651+ Def->getOpcode () == AMDGPU::S_NOT_B64 ||
10652+
10653+ ((Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10654+ Def->getOpcode () == AMDGPU::S_CSELECT_B64) &&
10655+ Def->getOperand (1 ).isImm () && Def->getOperand (1 ).getImm () &&
10656+ !Def->getOperand (2 ).isImm () && !Def->getOperand (2 ).getImm ())))
10657+ return false ;
10658+
10659+ for (auto I = std::next (Def->getIterator ()), E = CmpInstr.getIterator ();
10660+ I != E; ++I) {
10661+ if (I->modifiesRegister (AMDGPU::SCC, &RI) ||
10662+ I->killsRegister (AMDGPU::SCC, &RI))
10663+ return false ;
10664+ }
10665+
10666+ if (!(Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10667+ Def->getOpcode () == AMDGPU::S_CSELECT_B64)) {
10668+ MachineOperand *SccDef =
10669+ Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr );
10670+ assert (SccDef && " Def instruction must define SCC" );
10671+ SccDef->setIsDead (false );
10672+ }
10673+
10674+ CmpInstr.eraseFromParent ();
10675+ return true ;
10676+ };
10677+
1061110678 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
1061210679 this ](int64_t ExpectedValue, unsigned SrcSize,
1061310680 bool IsReversible, bool IsSigned) -> bool {
@@ -10735,15 +10802,15 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1073510802 case AMDGPU::S_CMP_LG_I32:
1073610803 case AMDGPU::S_CMPK_LG_U32:
1073710804 case AMDGPU::S_CMPK_LG_I32:
10738- return optimizeCmpAnd (0 , 32 , true , false );
10805+ return optimizeCmpAnd (0 , 32 , true , false ) || optimizeCmpSelect () ;
1073910806 case AMDGPU::S_CMP_GT_U32:
1074010807 case AMDGPU::S_CMPK_GT_U32:
1074110808 return optimizeCmpAnd (0 , 32 , false , false );
1074210809 case AMDGPU::S_CMP_GT_I32:
1074310810 case AMDGPU::S_CMPK_GT_I32:
1074410811 return optimizeCmpAnd (0 , 32 , false , true );
1074510812 case AMDGPU::S_CMP_LG_U64:
10746- return optimizeCmpAnd (0 , 64 , true , false );
10813+ return optimizeCmpAnd (0 , 64 , true , false ) || optimizeCmpSelect () ;
1074710814 }
1074810815
1074910816 return false ;
0 commit comments