-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LV] Add early-exit-with-store tests #140899
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Adds some additional LoopVectorizeLegality tests for early exit loops with a store that we don't vectorize.
@llvm/pr-subscribers-llvm-transforms Author: Graham Hunter (huntergr-arm) ChangesAdds some additional LoopVectorizeLegality tests for early exit loops Test precommit split from #137774 Full diff: https://github.com/llvm/llvm-project/pull/140899.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
index de455c81d363e..b6a497fe4a672 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
@@ -470,6 +470,192 @@ loop.end:
ret i64 %retval
}
+define void @loop_contains_store_single_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_single_user'
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_multi_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_multi_user'
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ %unused = add i16 %ee.val, 42
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_fcmp(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp'
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw half, ptr %pred, i64 %iv
+ %ee.val = load half, ptr %ee.addr, align 2
+ %ee.cond = fcmp ugt half %ee.val, 500.0
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+ %forward = getelementptr i16, ptr %pred, i64 -8
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ %some.addr = getelementptr inbounds nuw i16, ptr %forward, i64 %iv
+ store i16 42, ptr %some.addr, align 2
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds'
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+ %n_bytes = mul nuw nsw i32 %n, 2
+ call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ]
+ %tc = sext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, %tc
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile'
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store volatile i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @exit_conditions_combined(ptr noalias dereferenceable(40) %array, ptr readonly align 2 dereferenceable(40) %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'exit_conditions_combined'
+; CHECK: LV: Not vectorizing: Cannot vectorize uncountable loop.
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ %or.cond = select i1 %ee.cond, i1 true, i1 %counted.cond
+ br i1 %or.cond, label %exit, label %for.body
+
+exit: ; preds = %for.body
+ ret void
+}
define i64 @uncountable_exit_in_conditional_block(ptr %mask) {
; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_in_conditional_block'
|
@@ -470,6 +470,192 @@ loop.end: | |||
ret i64 %retval | |||
} | |||
|
|||
define void @loop_contains_store_single_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is already an existing @loop_contains_store
test for a simple store in the loop. Do we still need this test? It looks very similar.
Also what does single_user
refer to in this case because the store can't have users. Are you referring to a single user of something else?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, the name could be clearer, will change it. I'm referring to the number of users of the load that forms the exit IR; in the function above (@loop_contains_store
) %ld1
is used both as part of the exit condition and as data to be stored. The transformation code I have right now checks recipes in the exit IR for a single user (except for the IV), because otherwise I'd need to introduce a PHI node for the values -- after the transform, we're doing the load for the next vector iteration, not the current one.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed the name to @loop_contains_store_condition_load_has_single_user
ret void | ||
} | ||
|
||
define void @loop_contains_store_multi_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Again, would be good to say what this is a user of?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I deleted this one, since @loop_contains_store
already covers the condition load having multiple users.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's probably worth adding these tests too:
- A test for a store to an invariant address if that matters for your early exit work.
- A loop with a store in the latch block instead of the early exiting block.
- A test with a store to an unknown address that isn't safe to vectorise.
- A test with a store where the loop condition is invariant (since I believe this will be important for your follow-on work?).
- Do we need a test that requires runtime memory checks? I don't know if this is possible, but if two
ptr dereferenceable(40)
pointer arguments are passed they could still potentially alias, even if it's something as simple as they are just identical pointers. I just want to make sure we don't silently vectorise such a loop without runtime checks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might be worth pulling out the store related tests into separate files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added the tests, and moved most store tests to a new file.
ret void | ||
} | ||
|
||
define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about a test with an unsafe dependency?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
* Improved test function names to clarify * Removed redundant test
I left |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the updates! Just a few more comments ...
%ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv | ||
%ee.val = load i16, ptr %ee.addr, align 2 | ||
%ee.cond = icmp sgt i16 %ee.val, 500 | ||
%some.addr = getelementptr inbounds nuw i16, ptr %forward, i64 %iv |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This cannot possibly ever get vectorised, right? Even if we can prove the loop is legal to vectorise with the store, the first 4 stores will be out of bounds of %pred
due to
%forward = getelementptr i16, ptr %pred, i64 -8
I'd expect the dereferenceability checks to fail here. Also, %pred
is marked as readonly
, which doesn't seem right.
You should be able to achieve the same effect by adding an offset to %iv
before doing the load and have the stores trailing, right?
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency' | ||
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. | ||
entry: | ||
%unknown.offset = call i64 @get_an_unknown_offset() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you might need to clamp this to 20 so that the analysis code knows it's guaranteed to be dereferenceable, otherwise this will never be legal to vectorise.
ret void | ||
} | ||
|
||
define void @loop_contains_store_to_pointer_with_no_deref_info(ptr noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this test you're also implicitly testing loads from a pointer with no deref info too. It's probably more concise to have separate arrays for the load and store so that you're strictly limiting it to what you care about?
ret void | ||
} | ||
|
||
define void @loop_contains_store_unknown_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are two problems here, right? The first is the pointers are not known to be dereferenceable at all, and the second is the trip count is unknown. If you specifically want to test the latter, perhaps it's worth marking the pointers as dereferenceable(100) or some other random number? That way the analysis code is guaranteed to get to the point where the trip count matters. I'm just a bit worried the test is relying upon the order in which we do the checks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It just occurred to me as well that it might be worth having a test containing a store in a reverse loop, just in case that matters for your follow-on work?
Adds some additional LoopVectorizeLegality tests for early exit loops
with a store that we don't vectorize.
Test precommit split from #137774