Skip to content

Commit a5aaadf

Browse files
committed
added sgpr case, refactoring
1 parent 6c6a477 commit a5aaadf

13 files changed

+782
-783
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,8 @@ class SIShrinkInstructions {
5252
bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
5353
unsigned SubReg) const;
5454
Register trySwapCndOperands(MachineInstr &MI) const;
55-
bool
56-
shouldSwapCndOperands(MachineInstr &MI,
57-
SmallVector<MachineOperand *, 4> &UsesToProcess) const;
55+
bool shouldSwapCndOperands(Register Reg,
56+
std::vector<MachineInstr *> &UsesToProcess) const;
5857
unsigned getInverseCompareOpcode(MachineInstr &MI) const;
5958
TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
6059
unsigned I) const;
@@ -954,31 +953,34 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
954953
}
955954

956955
bool SIShrinkInstructions::shouldSwapCndOperands(
957-
MachineInstr &MI, SmallVector<MachineOperand *, 4> &UsesToProcess) const {
958-
auto AllUses = MRI->use_nodbg_operands(MI.getOperand(0).getReg());
956+
Register Reg, std::vector<MachineInstr *> &UsesToProcess) const {
957+
auto AllUses = MRI->use_nodbg_instructions(Reg);
959958
int InstsToSwap = 0;
960959

961-
for (auto &Use : AllUses) {
962-
MachineInstr *UseInst = Use.getParent();
963-
if (UseInst->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
960+
for (auto &UseInst : AllUses) {
961+
if (UseInst.getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
964962
return false;
965963

966-
UsesToProcess.push_back(&Use);
964+
UsesToProcess.push_back(&UseInst);
967965

968-
MachineOperand &Src0 = UseInst->getOperand(2);
969-
MachineOperand &Src1 = UseInst->getOperand(4);
966+
MachineOperand &Src0 = UseInst.getOperand(2);
967+
MachineOperand &Src1 = UseInst.getOperand(4);
970968

971-
bool Src0Imm = Src0.isImm();
972-
bool Src1Imm = Src1.isImm();
969+
//if instruction has source modifiers it cannot be converted to VOP2
970+
if (UseInst.getOperand(1).getImm() != SISrcMods::NONE ||
971+
UseInst.getOperand(3).getImm() != SISrcMods::NONE)
972+
continue;
973+
974+
bool Src0IsVGPR = Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg());
975+
bool Src1IsVGPR = Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg());
973976

974-
if (!Src1Imm && Src0Imm)
977+
//Src1 always has to be VGPR in VOP2
978+
if (!Src0IsVGPR && Src1IsVGPR)
975979
InstsToSwap--;
976-
else if (Src1Imm && !Src0Imm &&
977-
UseInst->getOperand(1).getImm() == SISrcMods::NONE &&
978-
TRI->isVGPR(*MRI, Src0.getReg()))
980+
else if (Src0IsVGPR && !Src1IsVGPR)
979981
InstsToSwap++;
980982
}
981-
return (InstsToSwap > 0);
983+
return InstsToSwap > 0;
982984
}
983985

984986
static void swapCndOperands(MachineInstr &MI) {
@@ -1013,9 +1015,9 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
10131015
Register Reg = MI.getOperand(0).getReg();
10141016

10151017
unsigned Opcode = getInverseCompareOpcode(MI);
1016-
SmallVector<MachineOperand *, 4> UsesToProcess;
1018+
std::vector<MachineInstr *> UsesToProcess;
10171019
if (!Opcode ||
1018-
!SIShrinkInstructions::shouldSwapCndOperands(MI, UsesToProcess))
1020+
!SIShrinkInstructions::shouldSwapCndOperands(Reg, UsesToProcess))
10191021
return Reg;
10201022

10211023
auto DL = MI.getDebugLoc();
@@ -1027,15 +1029,14 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
10271029

10281030
unsigned OpNum = MI.getNumExplicitOperands();
10291031
for (unsigned Idx = 1; Idx < OpNum; Idx++) {
1030-
MachineOperand Op = MI.getOperand(Idx);
1032+
MachineOperand &Op = MI.getOperand(Idx);
10311033
InverseCompare.add(Op);
10321034
if (Op.isReg() && Op.isKill())
10331035
InverseCompare->getOperand(Idx).setIsKill(false);
10341036
}
10351037

1036-
for (auto &Use : UsesToProcess) {
1037-
MachineInstr *Inst = Use->getParent();
1038-
swapCndOperands(*Inst);
1038+
for (auto Use : UsesToProcess) {
1039+
swapCndOperands(*Use);
10391040
}
10401041

10411042
MRI->replaceRegWith(Reg, NewVCC);

llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %sr
145145
; GFX10PLUS-LABEL: select_vcc_s_s:
146146
; GFX10PLUS: ; %bb.0:
147147
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, s3
148-
; GFX10PLUS-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1
149-
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v2, s2, vcc_lo
148+
; GFX10PLUS-NEXT: v_cmp_neq_f32_e32 vcc_lo, v0, v1
149+
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, s2, v2, vcc_lo
150150
; GFX10PLUS-NEXT: ; return to shader part epilog
151151
%cmp = fcmp oeq float %cmp0, %cmp1
152152
%result = select i1 %cmp, float %src0, float %src1

0 commit comments

Comments
 (0)