@@ -10163,7 +10163,7 @@ static bool followSubRegDef(MachineInstr &MI,
1016310163}
1016410164
1016510165MachineInstr *llvm::getVRegSubRegDef (const TargetInstrInfo::RegSubRegPair &P,
10166- MachineRegisterInfo &MRI) {
10166+ const MachineRegisterInfo &MRI) {
1016710167 assert (MRI.isSSA ());
1016810168 if (!P.Reg .isVirtual ())
1016910169 return nullptr ;
@@ -10628,6 +10628,8 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1062810628static bool optimizeSCC (MachineInstr *SCCValid, MachineInstr *SCCRedefine,
1062910629 const SIRegisterInfo &RI) {
1063010630 MachineInstr *KillsSCC = nullptr ;
10631+ if (SCCValid->getParent () != SCCRedefine->getParent ())
10632+ return false ;
1063110633 for (MachineInstr &MI : make_range (std::next (SCCValid->getIterator ()),
1063210634 SCCRedefine->getIterator ())) {
1063310635 if (MI.modifiesRegister (AMDGPU::SCC, &RI))
@@ -10672,8 +10674,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1067210674 if (CmpValue != 0 )
1067310675 return false ;
1067410676
10675- MachineInstr *Def = MRI->getUniqueVRegDef (SrcReg);
10676- if (!Def || Def-> getParent () != CmpInstr. getParent () )
10677+ MachineInstr *Def = MRI->getVRegDef (SrcReg);
10678+ if (!Def)
1067710679 return false ;
1067810680
1067910681 // For S_OP that set SCC = DST!=0, do the transformation
@@ -10692,6 +10694,32 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1069210694 if (!optimizeSCC (Def, &CmpInstr, RI))
1069310695 return false ;
1069410696
10697+ // If s_or_b32 result, sY, is unused (i.e. it is effectively a 64-bit
10698+ // s_cmp_lg of a register pair) and the inputs are the hi and lo-halves of a
10699+ // 64-bit foldableSelect then delete s_or_b32 in the sequence:
10700+ // sX = s_cselect_b64 (non-zero imm), 0
10701+ // sLo = copy sX.sub0
10702+ // sHi = copy sX.sub1
10703+ // sY = s_or_b32 sLo, sHi
10704+ if (Def->getOpcode () == AMDGPU::S_OR_B32 &&
10705+ MRI->use_nodbg_empty (Def->getOperand (0 ).getReg ())) {
10706+ const MachineOperand &OrOpnd1 = Def->getOperand (1 );
10707+ const MachineOperand &OrOpnd2 = Def->getOperand (2 );
10708+ if (OrOpnd1.isReg () && OrOpnd2.isReg ()) {
10709+ MachineInstr *Def1 = MRI->getVRegDef (OrOpnd1.getReg ());
10710+ MachineInstr *Def2 = MRI->getVRegDef (OrOpnd2.getReg ());
10711+ if (Def1 && Def1->getOpcode () == AMDGPU::COPY && Def2 &&
10712+ Def2->getOpcode () == AMDGPU::COPY && Def1->getOperand (1 ).isReg () &&
10713+ Def2->getOperand (1 ).isReg () &&
10714+ Def1->getOperand (1 ).getSubReg () == AMDGPU::sub0 &&
10715+ Def2->getOperand (1 ).getSubReg () == AMDGPU::sub1 &&
10716+ Def1->getOperand (1 ).getReg () == Def2->getOperand (1 ).getReg ()) {
10717+ MachineInstr *Select = MRI->getVRegDef (Def1->getOperand (1 ).getReg ());
10718+ if (Select && foldableSelect (*Select))
10719+ optimizeSCC (Select, Def, RI);
10720+ }
10721+ }
10722+ }
1069510723 return true ;
1069610724 };
1069710725
@@ -10721,8 +10749,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1072110749 // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n
1072210750 // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 1 << n => s_bitcmp0_b64 $src, n
1072310751
10724- MachineInstr *Def = MRI->getUniqueVRegDef (SrcReg);
10725- if (!Def || Def-> getParent () != CmpInstr. getParent () )
10752+ MachineInstr *Def = MRI->getVRegDef (SrcReg);
10753+ if (!Def)
1072610754 return false ;
1072710755
1072810756 if (Def->getOpcode () != AMDGPU::S_AND_B32 &&
0 commit comments