Skip to content

Commit 4a68eae

Browse files
committed
[CodeGen] Fix register pressure computation in MachinePipeliner
MachinePipeliner called `RegisterClassInfo::getRegPressureSetLimit` to obtain the limit of a register pressure set, then decrease the value by the amount of fixed registers. However, the function calls `TargetRegisterInfo::getRegPressureSetLimit` to get the limit and modify the value to eliminate the influence of reserved registers. It means that for the impact of some registers are doubly taken into account. In the worst case, the limit is below zero, at which time assertion error occurs. Therefore, in MachinePipeliner, we should use `TargetRegisterInfo::getSetRegPressureLimit` instead of current one. Also improve tests.
1 parent 39e8137 commit 4a68eae

File tree

3 files changed

+180
-166
lines changed

3 files changed

+180
-166
lines changed

llvm/lib/CodeGen/MachinePipeliner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1268,7 +1268,7 @@ class HighRegisterPressureDetector {
12681268
// Calculate the upper limit of each pressure set
12691269
void computePressureSetLimit(const RegisterClassInfo &RCI) {
12701270
for (unsigned PSet = 0; PSet < PSetNum; PSet++)
1271-
PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet);
1271+
PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet);
12721272

12731273
// We assume fixed registers, such as stack pointer, are already in use.
12741274
// Therefore subtracting the weight of the fixed registers from the limit of
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-max-mii=40 -pipeliner-register-pressure -pipeliner-ii-search-range=30 -debug-only=pipeliner 2>&1 | FileCheck %s
2+
3+
# Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues.
4+
# The specific value of II is not important.
5+
6+
# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}}
7+
# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}}
8+
# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}}
9+
# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}}
10+
11+
--- |
12+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
13+
14+
define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr {
15+
entry:
16+
%0 = load double, ptr %a, align 8
17+
%arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8
18+
%1 = load double, ptr %arrayidx1, align 8
19+
%cmp133 = icmp sgt i32 %n, 0
20+
br i1 %cmp133, label %for.body.preheader, label %for.cond.cleanup
21+
22+
for.body.preheader: ; preds = %entry
23+
%wide.trip.count = zext nneg i32 %n to i64
24+
br label %for.body
25+
26+
for.cond.cleanup: ; preds = %for.body, %entry
27+
%res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add54, %for.body ]
28+
ret double %res.0.lcssa
29+
30+
for.body: ; preds = %for.body.preheader, %for.body
31+
%lsr.iv137 = phi i64 [ %wide.trip.count, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
32+
%lsr.iv = phi ptr [ %b, %for.body.preheader ], [ %scevgep, %for.body ]
33+
%res.0135 = phi double [ 0.000000e+00, %for.body.preheader ], [ %add54, %for.body ]
34+
%2 = load double, ptr %lsr.iv, align 8
35+
%3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %0)
36+
%4 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %3)
37+
%5 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %4)
38+
%6 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %5)
39+
%7 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %6)
40+
%8 = tail call double @llvm.fmuladd.f64(double %7, double %2, double %7)
41+
%9 = tail call double @llvm.fmuladd.f64(double %8, double %2, double %8)
42+
%10 = tail call double @llvm.fmuladd.f64(double %9, double %2, double %9)
43+
%11 = tail call double @llvm.fmuladd.f64(double %10, double %2, double %10)
44+
%12 = tail call double @llvm.fmuladd.f64(double %11, double %2, double %11)
45+
%13 = tail call double @llvm.fmuladd.f64(double %12, double %2, double %12)
46+
%14 = tail call double @llvm.fmuladd.f64(double %13, double %2, double %13)
47+
%15 = tail call double @llvm.fmuladd.f64(double %14, double %2, double %14)
48+
%16 = tail call double @llvm.fmuladd.f64(double %15, double %2, double %15)
49+
%17 = tail call double @llvm.fmuladd.f64(double %16, double %2, double %16)
50+
%18 = tail call double @llvm.fmuladd.f64(double %17, double %2, double %17)
51+
%add = fadd double %17, %18
52+
%19 = tail call double @llvm.fmuladd.f64(double %18, double %2, double %add)
53+
%add35 = fadd double %10, %19
54+
%20 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %add35)
55+
%add38 = fadd double %11, %20
56+
%21 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %add38)
57+
%add41 = fadd double %12, %21
58+
%22 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %add41)
59+
%add44 = fadd double %14, %15
60+
%add45 = fadd double %13, %add44
61+
%add46 = fadd double %add45, %22
62+
%23 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %add46)
63+
%mul = fmul double %2, %7
64+
%mul51 = fmul double %1, %mul
65+
%24 = tail call double @llvm.fmuladd.f64(double %mul51, double %9, double %23)
66+
%25 = tail call double @llvm.fmuladd.f64(double %8, double %1, double %24)
67+
%add54 = fadd double %res.0135, %25
68+
%scevgep = getelementptr i8, ptr %lsr.iv, i64 8
69+
%lsr.iv.next = add nsw i64 %lsr.iv137, -1
70+
%exitcond.not = icmp eq i64 %lsr.iv.next, 0
71+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
72+
}
73+
74+
declare double @llvm.fmuladd.f64(double, double, double)
75+
76+
...
77+
---
78+
name: kernel
79+
tracksRegLiveness: true
80+
liveins:
81+
- { reg: '$x0', virtual-reg: '%10' }
82+
- { reg: '$x1', virtual-reg: '%11' }
83+
- { reg: '$w2', virtual-reg: '%12' }
84+
body: |
85+
bb.0.entry:
86+
successors: %bb.1, %bb.4
87+
liveins: $x0, $x1, $w2
88+
89+
%12:gpr32common = COPY $w2
90+
%11:gpr64 = COPY $x1
91+
%10:gpr64common = COPY $x0
92+
dead $wzr = SUBSWri %12, 1, 0, implicit-def $nzcv
93+
Bcc 10, %bb.1, implicit $nzcv
94+
95+
bb.4:
96+
%13:fpr64 = FMOVD0
97+
B %bb.2
98+
99+
bb.1.for.body.preheader:
100+
%0:fpr64 = LDRDui %10, 0 :: (load (s64) from %ir.a)
101+
%1:fpr64 = LDRDui %10, 1 :: (load (s64) from %ir.arrayidx1)
102+
%16:gpr32 = ORRWrs $wzr, %12, 0
103+
%2:gpr64all = SUBREG_TO_REG 0, killed %16, %subreg.sub_32
104+
%15:fpr64 = FMOVD0
105+
B %bb.3
106+
107+
bb.2.for.cond.cleanup:
108+
%3:fpr64 = PHI %13, %bb.4, %7, %bb.3
109+
$d0 = COPY %3
110+
RET_ReallyLR implicit $d0
111+
112+
bb.3.for.body:
113+
successors: %bb.2, %bb.3
114+
115+
%4:gpr64sp = PHI %2, %bb.1, %9, %bb.3
116+
%5:gpr64sp = PHI %11, %bb.1, %8, %bb.3
117+
%6:fpr64 = PHI %15, %bb.1, %7, %bb.3
118+
early-clobber %17:gpr64sp, %18:fpr64 = LDRDpost %5, 8 :: (load (s64) from %ir.lsr.iv)
119+
%19:fpr64 = nofpexcept FMADDDrrr %0, %18, %0, implicit $fpcr
120+
%20:fpr64 = nofpexcept FMADDDrrr %19, %18, %19, implicit $fpcr
121+
%21:fpr64 = nofpexcept FMADDDrrr %20, %18, %20, implicit $fpcr
122+
%22:fpr64 = nofpexcept FMADDDrrr %21, %18, %21, implicit $fpcr
123+
%23:fpr64 = nofpexcept FMADDDrrr %22, %18, %22, implicit $fpcr
124+
%24:fpr64 = nofpexcept FMADDDrrr %23, %18, %23, implicit $fpcr
125+
%25:fpr64 = nofpexcept FMADDDrrr %24, %18, %24, implicit $fpcr
126+
%26:fpr64 = nofpexcept FMADDDrrr %25, %18, %25, implicit $fpcr
127+
%27:fpr64 = nofpexcept FMADDDrrr %26, %18, %26, implicit $fpcr
128+
%28:fpr64 = nofpexcept FMADDDrrr %27, %18, %27, implicit $fpcr
129+
%29:fpr64 = nofpexcept FMADDDrrr %28, %18, %28, implicit $fpcr
130+
%30:fpr64 = nofpexcept FMADDDrrr %29, %18, %29, implicit $fpcr
131+
%31:fpr64 = nofpexcept FMADDDrrr %30, %18, %30, implicit $fpcr
132+
%32:fpr64 = nofpexcept FMADDDrrr %31, %18, %31, implicit $fpcr
133+
%33:fpr64 = nofpexcept FMADDDrrr %32, %18, %32, implicit $fpcr
134+
%34:fpr64 = nofpexcept FMADDDrrr %33, %18, %33, implicit $fpcr
135+
%35:fpr64 = nofpexcept FADDDrr %33, %34, implicit $fpcr
136+
%36:fpr64 = nofpexcept FMADDDrrr %34, %18, killed %35, implicit $fpcr
137+
%37:fpr64 = nofpexcept FADDDrr %26, killed %36, implicit $fpcr
138+
%38:fpr64 = nofpexcept FMADDDrrr %19, %18, killed %37, implicit $fpcr
139+
%39:fpr64 = nofpexcept FADDDrr %27, killed %38, implicit $fpcr
140+
%40:fpr64 = nofpexcept FMADDDrrr %20, %18, killed %39, implicit $fpcr
141+
%41:fpr64 = nofpexcept FADDDrr %28, killed %40, implicit $fpcr
142+
%42:fpr64 = nofpexcept FMADDDrrr %21, %18, killed %41, implicit $fpcr
143+
%43:fpr64 = nofpexcept FADDDrr %30, %31, implicit $fpcr
144+
%44:fpr64 = nofpexcept FADDDrr %29, killed %43, implicit $fpcr
145+
%45:fpr64 = nofpexcept FADDDrr killed %44, killed %42, implicit $fpcr
146+
%46:fpr64 = nofpexcept FMADDDrrr %22, %18, killed %45, implicit $fpcr
147+
%47:fpr64 = nofpexcept FMULDrr %18, %23, implicit $fpcr
148+
%48:fpr64 = nofpexcept FMULDrr %1, killed %47, implicit $fpcr
149+
%49:fpr64 = nofpexcept FMADDDrrr killed %48, %25, killed %46, implicit $fpcr
150+
%50:fpr64 = nofpexcept FMADDDrrr %24, %1, killed %49, implicit $fpcr
151+
%7:fpr64 = nofpexcept FADDDrr %6, killed %50, implicit $fpcr
152+
%8:gpr64all = COPY %17
153+
%51:gpr64 = nsw SUBSXri %4, 1, 0, implicit-def $nzcv
154+
%9:gpr64all = COPY %51
155+
Bcc 0, %bb.2, implicit $nzcv
156+
B %bb.3
157+
158+
...

0 commit comments

Comments
 (0)