@@ -15677,133 +15677,6 @@ static int getAlignedAGPRClassID(unsigned UnalignedClassID) {
15677
15677
}
15678
15678
}
15679
15679
15680
- static void removeInitWholeWaveBranch(MachineFunction &MF,
15681
- MachineRegisterInfo &MRI,
15682
- const SIInstrInfo *TII) {
15683
- // Remove SI_INIT_WHOLE_WAVE and the following SI_IF/END_CF and instead set
15684
- // EXEC to -1 at SI_END_CF.
15685
- auto IWWIt = find_if(MF.begin()->instrs(), [](const MachineInstr &MI) {
15686
- return MI.getOpcode() == AMDGPU::SI_INIT_WHOLE_WAVE;
15687
- });
15688
- if (IWWIt == MF.begin()->instr_end())
15689
- return; // We've been here before (GISel runs finalizeLowering twice).
15690
-
15691
- MachineInstr &If = *MRI.use_begin(IWWIt->getOperand(0).getReg())->getParent();
15692
- assert(If.getOpcode() == AMDGPU::SI_IF &&
15693
- "Unexpected user for init.whole.wave result");
15694
- assert(MRI.hasOneUse(IWWIt->getOperand(0).getReg()) &&
15695
- "Expected simple control flow");
15696
-
15697
- MachineInstr &EndCf = *MRI.use_begin(If.getOperand(0).getReg())->getParent();
15698
- MachineBasicBlock *EndBB = EndCf.getParent();
15699
-
15700
- // Update all the Phis: since we're removing a predecessor, we need to remove
15701
- // the corresponding pair of operands. However, we can't just drop the value
15702
- // coming from the 'if' block - that's going to be the value of the inactive
15703
- // lanes.
15704
- // %v = phi (%inactive, %if), (%active1, %shader1), ... (%activeN, %shaderN)
15705
- // should become
15706
- // %t = phi (%active1, %shader1), ... (%activeN, %shaderN)
15707
- // %v = v_set_inactive %t, %inactive
15708
- // Note that usually EndCf will be the first instruction after the phis and as
15709
- // such will serve as the end of the range when iterating over phis.
15710
- // Therefore, we shouldn't introduce any new instructions before it.
15711
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
15712
- auto AfterEndCf = std::next(EndCf.getIterator());
15713
- for (auto &Phi : EndBB->phis()) {
15714
- Register PhiDest = Phi.getOperand(0).getReg();
15715
- const TargetRegisterClass *PhiRC = MRI.getRegClass(PhiDest);
15716
-
15717
- Register NewPhiDest = MRI.createVirtualRegister(PhiRC);
15718
- Phi.getOperand(0).setReg(NewPhiDest);
15719
-
15720
- unsigned InactiveOpIdx = 0;
15721
- for (unsigned I = 1; I < Phi.getNumOperands(); I += 2) {
15722
- if (Phi.getOperand(I + 1).getMBB() == If.getParent()) {
15723
- InactiveOpIdx = I;
15724
- break;
15725
- }
15726
- }
15727
- assert(InactiveOpIdx != 0 && "Broken phi?");
15728
-
15729
- // At this point, the register class could be larger than 32 or 64, so we
15730
- // might have to use more than one V_SET_INACTIVE instruction.
15731
- unsigned Size = TRI.getRegSizeInBits(*PhiRC);
15732
- switch (Size) {
15733
- case 32:
15734
- BuildMI(*EndBB, AfterEndCf, Phi.getDebugLoc(),
15735
- TII->get(AMDGPU::V_SET_INACTIVE_B32), PhiDest)
15736
- .addReg(NewPhiDest)
15737
- .add(Phi.getOperand(InactiveOpIdx));
15738
- break;
15739
- case 64:
15740
- BuildMI(*EndBB, AfterEndCf, Phi.getDebugLoc(),
15741
- TII->get(AMDGPU::V_SET_INACTIVE_B64), PhiDest)
15742
- .addReg(NewPhiDest)
15743
- .add(Phi.getOperand(InactiveOpIdx));
15744
- break;
15745
- default: {
15746
- // For each 32-bit subregister of the register at InactiveOpIdx, insert
15747
- // a COPY to a new register, and a V_SET_INACTIVE_B32 using the
15748
- // corresponding subregisters of PhiDest and NewPhiDest.
15749
- // FIXME: There has to be a better way to iterate over this...
15750
- llvm::SmallVector<Register, 16> PhiSubRegs;
15751
- const unsigned SubRegIndices[] = {
15752
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
15753
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
15754
- AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
15755
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
15756
- AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
15757
- AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
15758
- AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
15759
- AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31};
15760
- const unsigned NumSubRegs = Size / 32;
15761
- assert(sizeof(SubRegIndices) / sizeof(SubRegIndices[0]) >= NumSubRegs &&
15762
- "Not enough subregister indices");
15763
- for (unsigned I = 0; I != NumSubRegs; ++I) {
15764
- unsigned SubRegIdx = SubRegIndices[I];
15765
- Register InactiveSubReg =
15766
- MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
15767
- BuildMI(*EndBB, AfterEndCf, Phi.getDebugLoc(), TII->get(AMDGPU::COPY),
15768
- InactiveSubReg)
15769
- .addReg(Phi.getOperand(InactiveOpIdx).getReg(), 0, SubRegIdx);
15770
-
15771
- Register AllLanesSubReg =
15772
- MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
15773
- BuildMI(*EndBB, AfterEndCf, Phi.getDebugLoc(),
15774
- TII->get(AMDGPU::V_SET_INACTIVE_B32), AllLanesSubReg)
15775
- .addReg(NewPhiDest, 0, SubRegIdx)
15776
- .addReg(InactiveSubReg);
15777
- PhiSubRegs.push_back(AllLanesSubReg);
15778
- }
15779
- // Now we need to combine the subregisters into the original register.
15780
- auto RegSequence = BuildMI(*EndBB, AfterEndCf, Phi.getDebugLoc(),
15781
- TII->get(AMDGPU::REG_SEQUENCE), PhiDest);
15782
- for (unsigned I = 0; I < NumSubRegs; ++I) {
15783
- RegSequence.addReg(PhiSubRegs[I]);
15784
- RegSequence.addImm(SubRegIndices[I]);
15785
- }
15786
- break;
15787
- }
15788
- }
15789
-
15790
- Phi.removeOperand(InactiveOpIdx + 1);
15791
- Phi.removeOperand(InactiveOpIdx);
15792
- }
15793
- If.getParent()->removeSuccessor(EndBB);
15794
-
15795
- BuildMI(*EndBB, AfterEndCf, IWWIt->getDebugLoc(),
15796
- TII->get(MF.getSubtarget<GCNSubtarget>().isWave32()
15797
- ? AMDGPU::S_MOV_B32
15798
- : AMDGPU::S_MOV_B64),
15799
- TII->getRegisterInfo().getExec())
15800
- .addImm(-1);
15801
-
15802
- EndCf.eraseFromParent();
15803
- If.eraseFromParent();
15804
- IWWIt->eraseFromParent();
15805
- }
15806
-
15807
15680
// Figure out which registers should be reserved for stack access. Only after
15808
15681
// the function is legalized do we know all of the non-spill stack objects or if
15809
15682
// calls are present.
@@ -15814,12 +15687,6 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
15814
15687
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
15815
15688
const SIInstrInfo *TII = ST.getInstrInfo();
15816
15689
15817
- if (Info->hasInitWholeWave()) {
15818
- assert(Info->isChainFunction() &&
15819
- "init.whole.wave may only be used in chain functions");
15820
- removeInitWholeWaveBranch(MF, MRI, TII);
15821
- }
15822
-
15823
15690
if (Info->isEntryFunction()) {
15824
15691
// Callable functions have fixed registers used for stack access.
15825
15692
reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
0 commit comments