@@ -7227,27 +7227,52 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
7227
7227
return DeferredList.contains (MI);
7228
7228
}
7229
7229
7230
- // 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7231
- // sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7232
- // subreg access properly. This can be removed after we have sgpr16 in place
7233
- void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &Inst,
7230
+ // Legalize size mismatches between 16bit and 32bit registers in v2s copy
7231
+ // lowering (change spgr to vgpr).
7232
+ // This is mainly caused by 16bit SALU and 16bit VALU using reg with different
7233
+ // size. Need to legalize the size of the operands during the vgpr lowering
7234
+ // chain. This can be removed after we have sgpr16 in place
7235
+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI, unsigned OpIdx,
7234
7236
MachineRegisterInfo &MRI) const {
7235
- unsigned Opcode = Inst.getOpcode ();
7236
- if (!AMDGPU::isTrue16Inst (Opcode) || !ST.useRealTrue16Insts ())
7237
+ if (!ST.useRealTrue16Insts ())
7237
7238
return ;
7238
7239
7239
- for (MachineOperand &Op : Inst.explicit_operands ()) {
7240
- unsigned OpIdx = Op.getOperandNo ();
7241
- if (!OpIdx)
7242
- continue ;
7243
- if (Op.isReg () && RI.isVGPR (MRI, Op.getReg ())) {
7244
- unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7245
- const TargetRegisterClass *RC = RI.getRegClass (RCID);
7246
- if (RI.getRegSizeInBits (*RC) == 16 ) {
7247
- Op.setSubReg (AMDGPU::lo16);
7248
- }
7249
- }
7250
- }
7240
+ unsigned Opcode = MI.getOpcode ();
7241
+ MachineBasicBlock *MBB = MI.getParent ();
7242
+ // Legalize operands and check for size mismatch
7243
+ if (!OpIdx || OpIdx >= MI.getNumExplicitOperands () ||
7244
+ OpIdx >= get (Opcode).getNumOperands ())
7245
+ return ;
7246
+
7247
+ MachineOperand &Op = MI.getOperand (OpIdx);
7248
+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
7249
+ return ;
7250
+
7251
+ const TargetRegisterClass *CurrRC = MRI.getRegClass (Op.getReg ());
7252
+ if (!RI.isVGPRClass (CurrRC))
7253
+ return ;
7254
+
7255
+ unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7256
+ const TargetRegisterClass *ExpectedRC = RI.getRegClass (RCID);
7257
+ if (RI.getMatchingSuperRegClass (CurrRC, ExpectedRC, AMDGPU::lo16)) {
7258
+ Op.setSubReg (AMDGPU::lo16);
7259
+ } else if (RI.getMatchingSuperRegClass (ExpectedRC, CurrRC, AMDGPU::lo16)) {
7260
+ const DebugLoc &DL = MI.getDebugLoc ();
7261
+ Register NewDstReg = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7262
+ Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7263
+ BuildMI (*MBB, MI, DL, get (AMDGPU::IMPLICIT_DEF), Undef);
7264
+ BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDstReg)
7265
+ .addReg (Op.getReg ())
7266
+ .addImm (AMDGPU::lo16)
7267
+ .addReg (Undef)
7268
+ .addImm (AMDGPU::hi16);
7269
+ Op.setReg (NewDstReg);
7270
+ }
7271
+ }
7272
+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI,
7273
+ MachineRegisterInfo &MRI) const {
7274
+ for (unsigned OpIdx = 1 ; OpIdx < MI.getNumExplicitOperands (); OpIdx++)
7275
+ legalizeOperandsVALUt16 (MI, OpIdx, MRI);
7251
7276
}
7252
7277
7253
7278
void SIInstrInfo::moveToVALU (SIInstrWorklist &Worklist,
@@ -7769,15 +7794,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7769
7794
return ;
7770
7795
}
7771
7796
7772
- // If this is a v2s copy src from 16bit to 32bit,
7773
- // replace vgpr copy to reg_sequence
7797
+ // If this is a v2s copy between 16bit and 32bit reg ,
7798
+ // replace vgpr copy to reg_sequence/extract_subreg
7774
7799
// This can be remove after we have sgpr16 in place
7775
7800
if (ST.useRealTrue16Insts () && Inst.isCopy () &&
7776
7801
Inst.getOperand (1 ).getReg ().isVirtual () &&
7777
7802
RI.isVGPR (MRI, Inst.getOperand (1 ).getReg ())) {
7778
7803
const TargetRegisterClass *SrcRegRC = getOpRegClass (Inst, 1 );
7779
- if (16 == RI.getRegSizeInBits (*SrcRegRC) &&
7780
- 32 == RI.getRegSizeInBits (*NewDstRC)) {
7804
+ if (RI.getMatchingSuperRegClass (NewDstRC, SrcRegRC, AMDGPU::lo16)) {
7781
7805
Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
7782
7806
Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7783
7807
BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
@@ -7789,7 +7813,13 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7789
7813
.addReg (Undef)
7790
7814
.addImm (AMDGPU::hi16);
7791
7815
Inst.eraseFromParent ();
7792
-
7816
+ MRI.replaceRegWith (DstReg, NewDstReg);
7817
+ addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
7818
+ return ;
7819
+ } else if (RI.getMatchingSuperRegClass (SrcRegRC, NewDstRC,
7820
+ AMDGPU::lo16)) {
7821
+ Inst.getOperand (1 ).setSubReg (AMDGPU::lo16);
7822
+ Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
7793
7823
MRI.replaceRegWith (DstReg, NewDstReg);
7794
7824
addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
7795
7825
return ;
@@ -7885,23 +7915,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7885
7915
assert (NewDstRC);
7886
7916
NewDstReg = MRI.createVirtualRegister (NewDstRC);
7887
7917
MRI.replaceRegWith (DstReg, NewDstReg);
7888
-
7889
- // Check useMI of NewInstr. If used by a true16 instruction,
7890
- // add a lo16 subreg access if size mismatched
7891
- // This can be remove after we have sgpr16 in place
7892
- if (ST.useRealTrue16Insts () && NewDstRC == &AMDGPU::VGPR_32RegClass) {
7893
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7894
- E = MRI.use_end ();
7895
- I != E; ++I) {
7896
- MachineInstr &UseMI = *I->getParent ();
7897
- unsigned UseMIOpcode = UseMI.getOpcode ();
7898
- if (AMDGPU::isTrue16Inst (UseMIOpcode) &&
7899
- (16 ==
7900
- RI.getRegSizeInBits (*getOpRegClass (UseMI, I.getOperandNo ())))) {
7901
- I->setSubReg (AMDGPU::lo16);
7902
- }
7903
- }
7904
- }
7905
7918
}
7906
7919
fixImplicitOperands (*NewInstr);
7907
7920
@@ -8709,6 +8722,8 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
8709
8722
++I;
8710
8723
} while (I != E && I->getParent () == &UseMI);
8711
8724
} else {
8725
+ legalizeOperandsVALUt16 (UseMI, OpNo, MRI);
8726
+
8712
8727
++I;
8713
8728
}
8714
8729
}
0 commit comments