@@ -362,6 +362,9 @@ void GCNScheduleDAGMILive::schedule() {
362
362
if (PressureAfter.getSGPRNum () <= S.SGPRCriticalLimit &&
363
363
PressureAfter.getVGPRNum (ST.hasGFX90AInsts ()) <= S.VGPRCriticalLimit ) {
364
364
Pressure[RegionIdx] = PressureAfter;
365
+ RegionsWithMinOcc[RegionIdx] =
366
+ PressureAfter.getOccupancy (ST) == MinOccupancy;
367
+
365
368
LLVM_DEBUG (dbgs () << " Pressure in desired limits, done.\n " );
366
369
return ;
367
370
}
@@ -378,6 +381,7 @@ void GCNScheduleDAGMILive::schedule() {
378
381
// occupancy before was higher, or if the current schedule has register
379
382
// pressure higher than the excess limits which could lead to more spilling.
380
383
unsigned NewOccupancy = std::max (WavesAfter, WavesBefore);
384
+
381
385
// Allow memory bound functions to drop to 4 waves if not limited by an
382
386
// attribute.
383
387
if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
@@ -390,6 +394,7 @@ void GCNScheduleDAGMILive::schedule() {
390
394
if (NewOccupancy < MinOccupancy) {
391
395
MinOccupancy = NewOccupancy;
392
396
MFI.limitOccupancy (MinOccupancy);
397
+ RegionsWithMinOcc.reset ();
393
398
LLVM_DEBUG (dbgs () << " Occupancy lowered for the function to "
394
399
<< MinOccupancy << " .\n " );
395
400
}
@@ -416,6 +421,8 @@ void GCNScheduleDAGMILive::schedule() {
416
421
PressureAfter.less (ST, PressureBefore) ||
417
422
!RescheduleRegions[RegionIdx]) {
418
423
Pressure[RegionIdx] = PressureAfter;
424
+ RegionsWithMinOcc[RegionIdx] =
425
+ PressureAfter.getOccupancy (ST) == MinOccupancy;
419
426
if (!RegionsWithClusters[RegionIdx] &&
420
427
(Stage + 1 ) == UnclusteredReschedule)
421
428
RescheduleRegions[RegionIdx] = false ;
@@ -425,6 +432,8 @@ void GCNScheduleDAGMILive::schedule() {
425
432
}
426
433
}
427
434
435
+ RegionsWithMinOcc[RegionIdx] =
436
+ PressureBefore.getOccupancy (ST) == MinOccupancy;
428
437
LLVM_DEBUG (dbgs () << " Attempting to revert scheduling.\n " );
429
438
RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
430
439
(Stage + 1 ) != UnclusteredReschedule;
@@ -585,9 +594,11 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
585
594
RescheduleRegions.resize (Regions.size ());
586
595
RegionsWithClusters.resize (Regions.size ());
587
596
RegionsWithHighRP.resize (Regions.size ());
597
+ RegionsWithMinOcc.resize (Regions.size ());
588
598
RescheduleRegions.set ();
589
599
RegionsWithClusters.reset ();
590
600
RegionsWithHighRP.reset ();
601
+ RegionsWithMinOcc.reset ();
591
602
592
603
if (!Regions.empty ())
593
604
BBLiveInMap = getBBLiveInMap ();
@@ -624,13 +635,42 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
624
635
<< " Retrying function scheduling with lowest recorded occupancy "
625
636
<< MinOccupancy << " .\n " );
626
637
}
638
+
639
+ if (Stage == PreRARematerialize) {
640
+ if (RegionsWithMinOcc.count () != 1 || Regions.size () == 1 )
641
+ break ;
642
+
643
+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
644
+ const TargetInstrInfo *TII = MF.getSubtarget ().getInstrInfo ();
645
+ // Check maximum occupancy
646
+ if (ST.computeOccupancy (MF.getFunction (), MFI.getLDSSize ()) ==
647
+ MinOccupancy)
648
+ break ;
649
+
650
+ // FIXME: This pass will invalidate cached LiveIns, MBBLiveIns and
651
+ // Pressure for regions inbetween the defs and region we sinked the def
652
+ // to. Will need to be fixed if there is another pass after this pass.
653
+ static_assert (LastStage == PreRARematerialize,
654
+ " Passes after PreRARematerialize are not supported" );
655
+
656
+ unsigned HighRPIdx = RegionsWithMinOcc.find_first ();
657
+ collectRematerializableInstructions (HighRPIdx);
658
+ if (RematerializableInsts.empty () ||
659
+ !sinkTriviallyRematInsts (ST, TII, HighRPIdx))
660
+ break ;
661
+
662
+ LLVM_DEBUG (
663
+ dbgs () << " Retrying function scheduling with improved occupancy of "
664
+ << MinOccupancy << " from rematerializing\n " );
665
+ }
627
666
}
628
667
629
668
if (Stage == UnclusteredReschedule)
630
669
SavedMutations.swap (Mutations);
631
670
632
671
for (auto Region : Regions) {
633
- if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) ||
672
+ if (((Stage == UnclusteredReschedule || Stage == PreRARematerialize) &&
673
+ !RescheduleRegions[RegionIdx]) ||
634
674
(Stage == ClusteredLowOccupancyReschedule &&
635
675
!RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
636
676
@@ -655,6 +695,7 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
655
695
// Skip empty scheduling regions (0 or 1 schedulable instructions).
656
696
if (begin () == end () || begin () == std::prev (end ())) {
657
697
exitRegion ();
698
+ ++RegionIdx;
658
699
continue ;
659
700
}
660
701
@@ -677,3 +718,211 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
677
718
SavedMutations.swap (Mutations);
678
719
} while (Stage != LastStage);
679
720
}
721
+
722
+ void GCNScheduleDAGMILive::collectRematerializableInstructions (
723
+ unsigned HighRPIdx) {
724
+ const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo *>(TRI);
725
+ const GCNRPTracker::LiveRegSet &HighRPLiveIns = LiveIns[HighRPIdx];
726
+ for (unsigned I = 0 , E = MRI.getNumVirtRegs (); I != E; ++I) {
727
+ Register Reg = Register::index2VirtReg (I);
728
+ if (!LIS->hasInterval (Reg))
729
+ continue ;
730
+
731
+ // TODO: Handle AGPR and SGPR rematerialization
732
+ if (!SRI->isVGPRClass (MRI.getRegClass (Reg)) || !MRI.hasOneDef (Reg) ||
733
+ !MRI.hasOneUse (Reg))
734
+ continue ;
735
+
736
+ // We are only collecting defs that are live-through or defined in another
737
+ // block and used inside this region. This means that the register must be
738
+ // in the live-in set for this region, else skip this def.
739
+ if (HighRPLiveIns.find (Reg) == HighRPLiveIns.end ())
740
+ continue ;
741
+
742
+ MachineInstr *Def = MRI.getOneDef (Reg)->getParent ();
743
+ if (!Def || !isTriviallyReMaterializable (*Def, AA))
744
+ continue ;
745
+
746
+ MachineInstr *UseI = &*MRI.use_instr_begin (Reg);
747
+ if (Def->getParent () == UseI->getParent ())
748
+ continue ;
749
+
750
+ RematerializableInsts.push_back (std::make_pair (Def, UseI));
751
+ }
752
+ }
753
+
754
+ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts (const GCNSubtarget &ST,
755
+ const TargetInstrInfo *TII,
756
+ unsigned HighRPIdx) {
757
+ RescheduleRegions.reset ();
758
+ GCNRPTracker::LiveRegSet NewLiveIns;
759
+ // We may not need to rematerialize all instructions. Keep a list of
760
+ // instructions we are rematerializing at the end.
761
+ SmallVector<std::pair<MachineInstr *, MachineInstr *>, 4 >
762
+ TrivialRematDefsToSink;
763
+
764
+ GCNRegPressure RegionPressure = Pressure[HighRPIdx];
765
+ int VGPRUsage = RegionPressure.getVGPRNum (ST.hasGFX90AInsts ());
766
+ int SGPRUsage = RegionPressure.getSGPRNum ();
767
+
768
+ // TODO: Handle occupancy drop due to AGPR and SGPR.
769
+ // Check if cause of occupancy drop is due to VGPR usage.
770
+ if (ST.getOccupancyWithNumVGPRs (VGPRUsage) > MinOccupancy ||
771
+ ST.getOccupancyWithNumSGPRs (SGPRUsage) == MinOccupancy)
772
+ return false ;
773
+
774
+ NewLiveIns.copyFrom (LiveIns[HighRPIdx]);
775
+ // First check if we have enough trivially rematerializable instructions to
776
+ // improve occupancy. Optimistically assume all instructions we are able to
777
+ // sink decreased RP.
778
+ int TotalSinkableRegs = 0 ;
779
+ for (auto &It : RematerializableInsts) {
780
+ Register DefReg = It.first ->getOperand (0 ).getReg ();
781
+ TotalSinkableRegs += SIRegisterInfo::getNumCoveredRegs (NewLiveIns[DefReg]);
782
+ }
783
+ int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
784
+ unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs (VGPRsAfterSink);
785
+ // If in the most optimistic scenario, we cannot improve occupancy, then do
786
+ // not attempt to sink any instructions.
787
+ if (OptimisticOccupancy <= MinOccupancy)
788
+ return false ;
789
+
790
+ // Keep a list of newly rematerialized instructions so that we can easily
791
+ // undo if occupancy is not improved.
792
+ DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
793
+ GCNDownwardRPTracker RPT (*LIS);
794
+ auto *NonDbgMI = &*skipDebugInstructionsForward (Regions[HighRPIdx].first ,
795
+ Regions[HighRPIdx].second );
796
+ unsigned ImproveOccupancy = 0 ;
797
+ for (auto &It : RematerializableInsts) {
798
+ MachineInstr *Def = It.first ;
799
+ MachineBasicBlock::iterator InsertPos =
800
+ MachineBasicBlock::iterator (It.second );
801
+ Register Reg = Def->getOperand (0 ).getReg ();
802
+ // Rematerialize MI to its use block. Since we are only rematerializing
803
+ // instructions that do not have any virtual reg uses, we do not need to
804
+ // call LiveRangeEdit::allUsesAvailableAt() and
805
+ // LiveRangeEdit::canRematerializeAt().
806
+ NewLiveIns[Reg] = LaneBitmask::getNone ();
807
+ TII->reMaterialize (*InsertPos->getParent (), InsertPos, Reg,
808
+ Def->getOperand (0 ).getSubReg (), *Def, *TRI);
809
+ MachineInstr *NewMI = &*(--InsertPos);
810
+ LIS->InsertMachineInstrInMaps (*NewMI);
811
+ LIS->removeInterval (Reg);
812
+ LIS->createAndComputeVirtRegInterval (Reg);
813
+ InsertedMIToOldDef[NewMI] = Def;
814
+
815
+ // FIXME: Need better way to update RP without re-iterating over region
816
+ RPT.reset (*NonDbgMI, &NewLiveIns);
817
+ RPT.advance (Regions[HighRPIdx].second );
818
+ GCNRegPressure RPAfterSinking = RPT.moveMaxPressure ();
819
+ ImproveOccupancy = RPAfterSinking.getOccupancy (ST);
820
+ if (ImproveOccupancy > MinOccupancy)
821
+ break ;
822
+ }
823
+
824
+ if (ImproveOccupancy <= MinOccupancy) {
825
+ // Occupancy is not improved. Undo sinking for the region
826
+ for (auto &Entry : InsertedMIToOldDef) {
827
+ MachineInstr *MI = Entry.first ;
828
+ MachineInstr *OldMI = Entry.second ;
829
+ Register Reg = MI->getOperand (0 ).getReg ();
830
+ LIS->RemoveMachineInstrFromMaps (*MI);
831
+ MI->eraseFromParent ();
832
+ OldMI->clearRegisterDeads (Reg);
833
+ LIS->removeInterval (Reg);
834
+ LIS->createAndComputeVirtRegInterval (Reg);
835
+ }
836
+ return false ;
837
+ }
838
+
839
+ // Occupancy is improved.
840
+ for (auto &Entry : InsertedMIToOldDef) {
841
+ MachineInstr *MI = Entry.first ;
842
+ MachineInstr *OldMI = Entry.second ;
843
+ // Update region boundaries in scheduling region we sinked from since we
844
+ // may sink an instruction that was at the beginning or end of its region
845
+ updateRegionBoundaries (OldMI, /* NewMI =*/ nullptr , /* Removing =*/ true );
846
+
847
+ // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
848
+ BBLiveInMap.erase (OldMI);
849
+
850
+ // Remove OldMI and update LIS
851
+ Register Reg = MI->getOperand (0 ).getReg ();
852
+ LIS->RemoveMachineInstrFromMaps (*OldMI);
853
+ OldMI->eraseFromParent ();
854
+ LIS->removeInterval (Reg);
855
+ LIS->createAndComputeVirtRegInterval (Reg);
856
+
857
+ // Update region boundaries in region we sinked to.
858
+ MachineBasicBlock::iterator InsertPos =
859
+ std::next (MachineBasicBlock::iterator (MI));
860
+ updateRegionBoundaries (InsertPos, MI);
861
+ }
862
+
863
+ // Update cached live-ins and register pressure after rematerializing
864
+ LiveIns[HighRPIdx].copyFrom (NewLiveIns);
865
+ MBBLiveIns.erase (Regions[HighRPIdx].first ->getParent ());
866
+
867
+ GCNDownwardRPTracker RPTracker (*LIS);
868
+ RPTracker.advance (Regions[HighRPIdx].first , Regions[HighRPIdx].second ,
869
+ &LiveIns[HighRPIdx]);
870
+ Pressure[HighRPIdx] = RPTracker.moveMaxPressure ();
871
+
872
+ SIMachineFunctionInfo &MFI = *MF.getInfo <SIMachineFunctionInfo>();
873
+ MFI.increaseOccupancy (MF, ++MinOccupancy);
874
+ RescheduleRegions[HighRPIdx] = true ;
875
+
876
+ return true ;
877
+ }
878
+
879
+ // Copied from MachineLICM
880
+ bool GCNScheduleDAGMILive::isTriviallyReMaterializable (const MachineInstr &MI,
881
+ AAResults *AA) {
882
+ if (!TII->isTriviallyReMaterializable (MI, AA))
883
+ return false ;
884
+
885
+ for (const MachineOperand &MO : MI.operands ())
886
+ if (MO.isReg () && MO.isUse () && MO.getReg ().isVirtual ())
887
+ return false ;
888
+
889
+ return true ;
890
+ }
891
+
892
+ // When removing, we will have to check both beginning and ending of the region.
893
+ // When inserting, we will only have to check if we are inserting NewMI in front
894
+ // of a scheduling region and do not need to check the ending since we will only
895
+ // ever be inserting before an already existing MI.
896
+ void GCNScheduleDAGMILive::updateRegionBoundaries (
897
+ MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
898
+ unsigned I = 0 , E = Regions.size ();
899
+ // Search for first region of the block where MI is located
900
+ while (I != E && MI->getParent () != Regions[I].first ->getParent ())
901
+ ++I;
902
+
903
+ for (; I != E; ++I) {
904
+ if (MI->getParent () != Regions[I].first ->getParent ())
905
+ return ;
906
+
907
+ if (Removing && MI == Regions[I].first && MI == Regions[I].second ) {
908
+ // MI is in a region with size 1, after removing, the region will be
909
+ // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
910
+ Regions[I] =
911
+ std::make_pair (MI->getParent ()->end (), MI->getParent ()->end ());
912
+ return ;
913
+ }
914
+ if (MI == Regions[I].first ) {
915
+ if (Removing)
916
+ Regions[I] = std::make_pair (std::next (MI), Regions[I].second );
917
+ else
918
+ // Inserted NewMI in front of region, set new RegionBegin to NewMI
919
+ Regions[I] = std::make_pair (MachineBasicBlock::iterator (NewMI),
920
+ Regions[I].second );
921
+ return ;
922
+ }
923
+ if (Removing && MI == Regions[I].second ) {
924
+ Regions[I] = std::make_pair (Regions[I].first , std::prev (MI));
925
+ return ;
926
+ }
927
+ }
928
+ }
0 commit comments