-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[HEXAGON] [MachinePipeliner] Fix the DAG in case of dependent phis. #135925
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
31cb22e
0678ae2
a180432
86ccfc4
50ad52c
a0abb43
6f0fe17
698e8c2
db4ba49
bfe355a
22d31a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
;RUN: llc -march=hexagon -mv71t -O2 < %s -o - 2>&1 > /dev/null | ||
|
||
; Validate that we do not crash while running this test. | ||
;%3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 | ||
;%7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 | ||
;%27:intregs = A2_zxtb %3:intregs - SU2 | ||
;%13:intregs = C2_muxri %45:predregs, 0, %46:intreg | ||
;If we have dependent phis, SU0 should be the successor of SU1 not | ||
;the other way around. (it used to be SU1 is the successor of SU0). | ||
;In some cases, SU0 is scheduled earlier than SU1 resulting in bad | ||
;IR as we do not have a value that can be used by SU2. | ||
|
||
@global = common dso_local local_unnamed_addr global ptr null, align 4 | ||
@global.1 = common dso_local local_unnamed_addr global i32 0, align 4 | ||
@global.2 = common dso_local local_unnamed_addr global i16 0, align 2 | ||
@global.3 = common dso_local local_unnamed_addr global i16 0, align 2 | ||
@global.4 = common dso_local local_unnamed_addr global i32 0, align 4 | ||
|
||
; Function Attrs: nofree norecurse nosync nounwind | ||
define dso_local i16 @wombat(i8 zeroext %arg, i16 %dummy) local_unnamed_addr #0 { | ||
bb: | ||
%load = load ptr, ptr @global, align 4 | ||
%load1 = load i32, ptr @global.1, align 4 | ||
%add2 = add nsw i32 %load1, -1 | ||
store i32 %add2, ptr @global.1, align 4 | ||
%icmp = icmp eq i32 %load1, 0 | ||
br i1 %icmp, label %bb36, label %bb3 | ||
|
||
bb3: ; preds = %bb3, %bb | ||
%phi = phi i32 [ %add30, %bb3 ], [ %add2, %bb ] | ||
%phi4 = phi i8 [ %phi8, %bb3 ], [ %arg, %bb ] | ||
%phi5 = phi i16 [ %select23, %bb3 ], [ %dummy, %bb ] | ||
%phi6 = phi i16 [ %select26, %bb3 ], [ %dummy, %bb ] | ||
%phi7 = phi i16 [ %select, %bb3 ], [ %dummy, %bb ] | ||
%phi8 = phi i8 [ %select29, %bb3 ], [ %arg, %bb ] | ||
%zext = zext i8 %phi4 to i32 | ||
%getelementptr = getelementptr inbounds i32, ptr %load, i32 %zext | ||
%getelementptr9 = getelementptr inbounds i32, ptr %getelementptr, i32 2 | ||
%ptrtoint = ptrtoint ptr %getelementptr9 to i32 | ||
%trunc = trunc i32 %ptrtoint to i16 | ||
%sext10 = sext i16 %phi7 to i32 | ||
%shl11 = shl i32 %ptrtoint, 16 | ||
%ashr = ashr exact i32 %shl11, 16 | ||
%icmp12 = icmp slt i32 %ashr, %sext10 | ||
%select = select i1 %icmp12, i16 %trunc, i16 %phi7 | ||
%getelementptr13 = getelementptr inbounds i32, ptr %getelementptr, i32 3 | ||
%load14 = load i32, ptr %getelementptr13, align 4 | ||
%shl = shl i32 %load14, 8 | ||
%getelementptr15 = getelementptr inbounds i32, ptr %getelementptr, i32 1 | ||
%load16 = load i32, ptr %getelementptr15, align 4 | ||
%shl17 = shl i32 %load16, 16 | ||
%ashr18 = ashr exact i32 %shl17, 16 | ||
%add = add nsw i32 %ashr18, %load14 | ||
%lshr = lshr i32 %add, 8 | ||
%or = or i32 %lshr, %shl | ||
%sub = sub i32 %or, %load16 | ||
%trunc19 = trunc i32 %sub to i16 | ||
%sext = sext i16 %phi5 to i32 | ||
%shl20 = shl i32 %sub, 16 | ||
%ashr21 = ashr exact i32 %shl20, 16 | ||
%icmp22 = icmp sgt i32 %ashr21, %sext | ||
%select23 = select i1 %icmp22, i16 %trunc19, i16 %phi5 | ||
%sext24 = sext i16 %phi6 to i32 | ||
%icmp25 = icmp slt i32 %ashr21, %sext24 | ||
%select26 = select i1 %icmp25, i16 %trunc19, i16 %phi6 | ||
%icmp27 = icmp eq i8 %phi8, 0 | ||
%add28 = add i8 %phi8, -1 | ||
%select29 = select i1 %icmp27, i8 0, i8 %add28 | ||
%add30 = add nsw i32 %phi, -1 | ||
%icmp31 = icmp eq i32 %phi, 0 | ||
br i1 %icmp31, label %bb32, label %bb3 | ||
|
||
bb32: ; preds = %bb3 | ||
store i16 %trunc, ptr @global.2, align 2 | ||
store i16 %trunc19, ptr @global.3, align 2 | ||
store i32 -1, ptr @global.1, align 4 | ||
%sext33 = sext i16 %select to i32 | ||
%sext34 = sext i16 %select23 to i32 | ||
%sext35 = sext i16 %select26 to i32 | ||
br label %bb36 | ||
|
||
bb36: ; preds = %bb32, %bb | ||
%phi37 = phi i32 [ %sext33, %bb32 ], [ 0, %bb ] | ||
%phi38 = phi i32 [ %sext35, %bb32 ], [ 0, %bb ] | ||
%phi39 = phi i32 [ %sext34, %bb32 ], [ 0, %bb ] | ||
%sub40 = sub nsw i32 %phi39, %phi38 | ||
%icmp41 = icmp slt i32 %sub40, %phi37 | ||
br i1 %icmp41, label %bb43, label %bb42 | ||
|
||
bb42: ; preds = %bb36 | ||
store i32 0, ptr @global.4, align 4 | ||
br label %bb43 | ||
|
||
bb43: ; preds = %bb42, %bb36 | ||
ret i16 %dummy | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,12 +4,13 @@ | |
; blocks, when there are 3 epilog blocks. The Phi was scheduled in stage | ||
; 2, so the computation for the number of Phis needs to be adjusted when | ||
; the incoming prolog block is from prolog 0 or prolog 1. | ||
; Note: the pipeliner no longer generates a 3 stage pipeline for this test. | ||
; Note: the pipeliner has been generating a 4-stage pipelined loop. | ||
|
||
; CHECK: loop0 | ||
; CHECK: [[REG0:r([0-9]+)]] = add(r{{[0-8]+}},#8) | ||
; CHECK: r{{[0-9]+}} = [[REG0]] | ||
; CHECK: endloop0 | ||
; CHECK: [[REG0]] = add(r{{[0-9]+}},#8) | ||
; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},#8) | ||
Comment on lines
+11
to
+13
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this change correct? The purpose of this test seems to verify if the value is properly propagated to epilog blocks. If it is no longer possible to check it with this case, I think it's better to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kasuga-fj Can we keep this test to continue to check whether this particular loop is getting pipelined? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If ensuring that this test is pipelined (i.e., the pipeliner can find a valid schedule) is sufficient, you can use other outputs such as debug messages or pass remarks instead of the generated code. However, judging from the filename and the CHECK directives, I believe the purpose of this test is to verify that the expander works correctly for a given schedule, particularly that the PHIs are generated properly in the epilogue blocks. If you want to check this as well, I think executing the generated code is the most reliable method. Using update_mir_test_checks.py would be another way (in this case, we need to check whether the generated assembly is correct every time the generated code changes). |
||
|
||
; Function Attrs: nounwind | ||
define void @f0(ptr noalias nocapture readonly %a0, i32 %a1, ptr noalias %a2, ptr %a3) #0 { | ||
|
@@ -50,7 +51,3 @@ declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32) #1 | |
|
||
; Function Attrs: nounwind readnone | ||
declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32) #1 | ||
|
||
attributes #0 = { nounwind "target-cpu"="hexagonv60" } | ||
attributes #1 = { nounwind readnone } | ||
attributes #2 = { nounwind } |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried running this test in my local environment with the |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
; RUN: llc -march=hexagon -mv71 -O2 < %s -o - 2>&1 > /dev/null | ||
; Ensure we do not invalidate a DAG by forming a circuit. | ||
; If we form a circuit, this test crashes while creating the DAG | ||
; with topological sorting. | ||
|
||
%struct.quux = type { i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, [2 x i8], %struct.ham, %struct.bar, i8 } | ||
%struct.ham = type { i8, [2 x i8], [2 x i8], [2 x i8] } | ||
%struct.bar = type { [2 x i8], [2 x i8], [2 x i8] } | ||
|
||
define dso_local void @blam(i32 %arg, i8 %dummy, i32 %tmp) local_unnamed_addr #0 { | ||
bb: | ||
br label %bb1 | ||
|
||
bb1: ; preds = %bb1, %bb | ||
%phi = phi i8 [ %phi6, %bb1 ], [ %dummy, %bb ] | ||
%phi2 = phi i8 [ %phi, %bb1 ], [ %dummy, %bb ] | ||
%phi3 = phi i8 [ %phi2, %bb1 ], [ 0, %bb ] | ||
%phi4 = phi i8 [ %phi3, %bb1 ], [ %dummy, %bb ] | ||
%phi5 = phi i8 [ %phi4, %bb1 ], [ %dummy, %bb ] | ||
%phi6 = phi i8 [ %phi5, %bb1 ], [ %dummy, %bb ] | ||
%phi7 = phi i32 [ %add, %bb1 ], [ %tmp, %bb ] | ||
%getelementptr = getelementptr inbounds %struct.quux, ptr null, i32 %arg, i32 12, i32 1, i8 %dummy | ||
store i8 %phi4, ptr %getelementptr, align 1 | ||
%add = add i32 %phi7, -1 | ||
%icmp = icmp eq i32 %add, 0 | ||
br i1 %icmp, label %bb8, label %bb1 | ||
|
||
bb8: ; preds = %bb1 | ||
ret void | ||
} | ||
|
||
attributes #0 = { "target-features"="+v71,-long-calls,-small-data" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the comment one line above is wrong and can be deleted.