Skip to content

MachineScheduler: Reset next cluster candidate for each node #139513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/lib/CodeGen/MachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,12 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {

/// releaseSuccessors - Call releaseSucc on each of SU's successors.
void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
// Reset the next successor, For example, we want to cluster A B C.
// After A is picked, we will set B as next cluster succ, but if we pick
// D instead of B after A, then we need to reset the next cluster succ because
// we have decided to not pick the cluster candidate B during pickNode().
// Leaving B as the NextClusterSucc just make things messy.
NextClusterSucc = nullptr;
for (SDep &Succ : SU->Succs)
releaseSucc(SU, &Succ);
}
Expand Down Expand Up @@ -1004,6 +1010,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {

/// releasePredecessors - Call releasePred on each of SU's predecessors.
void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
NextClusterPred = nullptr;
for (SDep &Pred : SU->Preds)
releasePred(SU, &Pred);
}
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AArch64/expand-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ define void @foo(i32 %In1, <2 x i128> %In2, <2 x i128> %In3, ptr %Out) {
; CHECK-NEXT: fmov s0, wzr
; CHECK-NEXT: ldr x11, [sp]
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: ldp x9, x10, [sp, #8]
; CHECK-NEXT: ldp x8, x10, [sp, #8]
; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: csel x8, x5, x9, ne
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w9, #0x1
; CHECK-NEXT: csel x8, x5, x8, ne
; CHECK-NEXT: csel x9, x4, x11, ne
; CHECK-NEXT: stp x9, x8, [x10, #16]
; CHECK-NEXT: csel x8, x3, x7, ne
Expand All @@ -36,14 +36,14 @@ define void @bar(i32 %In1, <2 x i96> %In2, <2 x i96> %In3, ptr %Out) {
; CHECK-NEXT: ldr x10, [sp, #16]
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: ldp x9, x8, [sp]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w9, #0x1
; CHECK-NEXT: ldp x8, x9, [sp]
; CHECK-NEXT: csel x11, x2, x6, ne
; CHECK-NEXT: str x11, [x10]
; CHECK-NEXT: csel x9, x4, x9, ne
; CHECK-NEXT: csel x8, x5, x8, ne
; CHECK-NEXT: stur x9, [x10, #12]
; CHECK-NEXT: csel x8, x4, x8, ne
; CHECK-NEXT: stur x8, [x10, #12]
; CHECK-NEXT: csel x8, x5, x9, ne
; CHECK-NEXT: csel x9, x3, x7, ne
; CHECK-NEXT: str w8, [x10, #20]
; CHECK-NEXT: str w9, [x10, #8]
Expand Down
85 changes: 43 additions & 42 deletions llvm/test/CodeGen/AArch64/extbinopload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -667,30 +667,30 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: add x10, x3, #12
; CHECK-NEXT: bic v1.8h, #255, lsl #8
; CHECK-NEXT: ld1 { v0.s }[3], [x3], #4
; CHECK-NEXT: ldr s3, [x0, #12]
; CHECK-NEXT: ldp s2, s7, [x0, #4]
; CHECK-NEXT: ldr s4, [x0, #12]
; CHECK-NEXT: ldp s5, s2, [x2, #4]
; CHECK-NEXT: ldr s6, [x2, #12]
; CHECK-NEXT: ldp s5, s4, [x2, #4]
; CHECK-NEXT: ld1 { v3.s }[1], [x11]
; CHECK-NEXT: ldp s3, s7, [x0, #4]
; CHECK-NEXT: ld1 { v4.s }[1], [x11]
; CHECK-NEXT: ld1 { v6.s }[1], [x10]
; CHECK-NEXT: ld1 { v2.s }[1], [x9]
; CHECK-NEXT: ld1 { v4.s }[1], [x8]
; CHECK-NEXT: ld1 { v2.s }[1], [x8]
; CHECK-NEXT: ld1 { v5.s }[1], [x3]
; CHECK-NEXT: add x8, x1, #8
; CHECK-NEXT: ld1 { v3.s }[1], [x9]
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
; CHECK-NEXT: ushll v4.8h, v4.8b, #0
; CHECK-NEXT: uaddl v3.8h, v5.8b, v6.8b
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
; CHECK-NEXT: uaddl v3.8h, v3.8b, v4.8b
; CHECK-NEXT: uaddl v4.8h, v5.8b, v6.8b
; CHECK-NEXT: uaddw v1.8h, v1.8h, v7.8b
; CHECK-NEXT: uaddw2 v4.8h, v4.8h, v0.16b
; CHECK-NEXT: ushll v0.4s, v2.4h, #3
; CHECK-NEXT: ushll v5.4s, v3.4h, #3
; CHECK-NEXT: uaddw2 v2.8h, v2.8h, v0.16b
; CHECK-NEXT: ushll v0.4s, v3.4h, #3
; CHECK-NEXT: ushll v5.4s, v4.4h, #3
; CHECK-NEXT: ushll2 v4.4s, v4.8h, #3
; CHECK-NEXT: ushll2 v3.4s, v3.8h, #3
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3
; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
; CHECK-NEXT: uaddw2 v1.4s, v2.4s, v1.8h
; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v4.8h
; CHECK-NEXT: uaddw v2.4s, v5.4s, v4.4h
; CHECK-NEXT: uaddw2 v1.4s, v3.4s, v1.8h
; CHECK-NEXT: uaddw2 v3.4s, v4.4s, v2.8h
; CHECK-NEXT: uaddw v2.4s, v5.4s, v2.4h
; CHECK-NEXT: ret
%lp1 = load <4 x i8>, ptr %p
store <4 x i8> %lp1, ptr %z
Expand Down Expand Up @@ -1073,24 +1073,24 @@ define <16 x i32> @extrause_ext2(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: ld1 { v6.s }[1], [x10]
; CHECK-NEXT: ld1 { v5.s }[1], [x9]
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
; CHECK-NEXT: uaddl v16.8h, v2.8b, v3.8b
; CHECK-NEXT: uaddl v3.8h, v1.8b, v6.8b
; CHECK-NEXT: uaddl v2.8h, v4.8b, v5.8b
; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
; CHECK-NEXT: uaddl v1.8h, v1.8b, v6.8b
; CHECK-NEXT: uaddl v3.8h, v4.8b, v5.8b
; CHECK-NEXT: uaddl v4.8h, v0.8b, v7.8b
; CHECK-NEXT: ushll v0.4s, v16.4h, #3
; CHECK-NEXT: ushll2 v1.4s, v16.8h, #3
; CHECK-NEXT: ushll2 v18.4s, v16.8h, #0
; CHECK-NEXT: ushll v6.4s, v2.4h, #3
; CHECK-NEXT: ushll2 v7.4s, v2.8h, #3
; CHECK-NEXT: ushll2 v5.4s, v2.8h, #0
; CHECK-NEXT: ushll2 v0.4s, v2.8h, #0
; CHECK-NEXT: ushll v5.4s, v2.4h, #3
; CHECK-NEXT: ushll2 v16.4s, v2.8h, #3
; CHECK-NEXT: ushll v6.4s, v3.4h, #3
; CHECK-NEXT: ushll2 v7.4s, v3.8h, #3
; CHECK-NEXT: ushll v17.4s, v2.4h, #0
; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v3.8h
; CHECK-NEXT: uaddw v0.4s, v0.4s, v3.4h
; CHECK-NEXT: ushll2 v18.4s, v3.8h, #0
; CHECK-NEXT: ushll v19.4s, v3.4h, #0
; CHECK-NEXT: stp q17, q0, [x4]
; CHECK-NEXT: uaddw v0.4s, v5.4s, v1.4h
; CHECK-NEXT: uaddw2 v1.4s, v16.4s, v1.8h
; CHECK-NEXT: uaddw2 v3.4s, v7.4s, v4.8h
; CHECK-NEXT: uaddw v2.4s, v6.4s, v4.4h
; CHECK-NEXT: ushll v4.4s, v16.4h, #0
; CHECK-NEXT: stp q17, q5, [x4, #32]
; CHECK-NEXT: stp q4, q18, [x4]
; CHECK-NEXT: stp q19, q18, [x4, #32]
; CHECK-NEXT: ret
%lp1 = load <4 x i8>, ptr %p
%p2 = getelementptr i8, ptr %p, i32 4
Expand Down Expand Up @@ -1176,19 +1176,20 @@ define <16 x i32> @extrause_shl(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: ld1 { v5.s }[1], [x9]
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
; CHECK-NEXT: uaddl v1.8h, v1.8b, v6.8b
; CHECK-NEXT: uaddl v3.8h, v4.8b, v5.8b
; CHECK-NEXT: uaddl v4.8h, v1.8b, v6.8b
; CHECK-NEXT: ushll v5.4s, v2.4h, #3
; CHECK-NEXT: ushll2 v6.4s, v2.8h, #3
; CHECK-NEXT: uaddl v2.8h, v0.8b, v7.8b
; CHECK-NEXT: ushll v7.4s, v3.4h, #3
; CHECK-NEXT: ushll2 v16.4s, v3.8h, #3
; CHECK-NEXT: uaddw2 v1.4s, v6.4s, v4.8h
; CHECK-NEXT: uaddw v0.4s, v5.4s, v4.4h
; CHECK-NEXT: stp q5, q6, [x4]
; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v2.8h
; CHECK-NEXT: uaddw v2.4s, v7.4s, v2.4h
; CHECK-NEXT: stp q7, q16, [x4, #32]
; CHECK-NEXT: uaddl v5.8h, v0.8b, v7.8b
; CHECK-NEXT: ushll v4.4s, v2.4h, #3
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3
; CHECK-NEXT: ushll v6.4s, v3.4h, #3
; CHECK-NEXT: ushll2 v7.4s, v3.8h, #3
; CHECK-NEXT: uaddw v0.4s, v4.4s, v1.4h
; CHECK-NEXT: uaddw2 v1.4s, v2.4s, v1.8h
; CHECK-NEXT: str q4, [x4]
; CHECK-NEXT: stp q2, q6, [x4, #16]
; CHECK-NEXT: uaddw2 v3.4s, v7.4s, v5.8h
; CHECK-NEXT: uaddw v2.4s, v6.4s, v5.4h
; CHECK-NEXT: str q7, [x4, #48]
; CHECK-NEXT: ret
%lp1 = load <4 x i8>, ptr %p
%p2 = getelementptr i8, ptr %p, i32 4
Expand Down
Loading
Loading