Skip to content

[LV] Add early-exit-with-store tests #140899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from

Conversation

huntergr-arm
Copy link
Collaborator

Adds some additional LoopVectorizeLegality tests for early exit loops
with a store that we don't vectorize.

Test precommit split from #137774

Adds some additional LoopVectorizeLegality tests for early exit loops
with a store that we don't vectorize.
@llvmbot
Copy link
Member

llvmbot commented May 21, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Graham Hunter (huntergr-arm)

Changes

Adds some additional LoopVectorizeLegality tests for early exit loops
with a store that we don't vectorize.

Test precommit split from #137774


Full diff: https://github.com/llvm/llvm-project/pull/140899.diff

1 Files Affected:

  • (modified) llvm/test/Transforms/LoopVectorize/early_exit_legality.ll (+186)
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
index de455c81d363e..b6a497fe4a672 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
@@ -470,6 +470,192 @@ loop.end:
   ret i64 %retval
 }
 
+define void @loop_contains_store_single_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_single_user'
+; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_multi_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_multi_user'
+; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  %unused = add i16 %ee.val, 42
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_fcmp(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp'
+; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw half, ptr %pred, i64 %iv
+  %ee.val = load half, ptr %ee.addr, align 2
+  %ee.cond = fcmp ugt half %ee.val, 500.0
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
+; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+  %forward = getelementptr i16, ptr %pred, i64 -8
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  %some.addr = getelementptr inbounds nuw i16, ptr %forward, i64 %iv
+  store i16 42, ptr %some.addr, align 2
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds'
+; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+  %n_bytes = mul nuw nsw i32 %n, 2
+  call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ]
+  %tc = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, %tc
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile'
+; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store volatile i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @exit_conditions_combined(ptr noalias dereferenceable(40) %array, ptr readonly align 2 dereferenceable(40) %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'exit_conditions_combined'
+; CHECK:       LV: Not vectorizing: Cannot vectorize uncountable loop.
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  %or.cond = select i1 %ee.cond, i1 true, i1 %counted.cond
+  br i1 %or.cond, label %exit, label %for.body
+
+exit:                                             ; preds = %for.body
+  ret void
+}
 
 define i64 @uncountable_exit_in_conditional_block(ptr %mask) {
 ; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_in_conditional_block'

@@ -470,6 +470,192 @@ loop.end:
ret i64 %retval
}

define void @loop_contains_store_single_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is already an existing @loop_contains_store test for a simple store in the loop. Do we still need this test? It looks very similar.

Also what does single_user refer to in this case because the store can't have users. Are you referring to a single user of something else?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the name could be clearer, will change it. I'm referring to the number of users of the load that forms the exit IR; in the function above (@loop_contains_store) %ld1 is used both as part of the exit condition and as data to be stored. The transformation code I have right now checks recipes in the exit IR for a single user (except for the IV), because otherwise I'd need to introduce a PHI node for the values -- after the transform, we're doing the load for the next vector iteration, not the current one.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the name to @loop_contains_store_condition_load_has_single_user

ret void
}

define void @loop_contains_store_multi_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, would be good to say what this is a user of?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I deleted this one, since @loop_contains_store already covers the condition load having multiple users.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably worth adding these tests too:

  1. A test for a store to an invariant address if that matters for your early exit work.
  2. A loop with a store in the latch block instead of the early exiting block.
  3. A test with a store to an unknown address that isn't safe to vectorise.
  4. A test with a store where the loop condition is invariant (since I believe this will be important for your follow-on work?).
  5. Do we need a test that requires runtime memory checks? I don't know if this is possible, but if two ptr dereferenceable(40) pointer arguments are passed they could still potentially alias, even if it's something as simple as they are just identical pointers. I just want to make sure we don't silently vectorise such a loop without runtime checks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth pulling out the store related tests into separate files.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the tests, and moved most store tests to a new file.

ret void
}

define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about a test with an unsafe dependency?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

@huntergr-arm
Copy link
Collaborator Author

I left @exit_conditions_combined_in_single_branch in early_exit_legality.ll, since the point of the test is having two exit conditions for a single branch (which just happens to have a store in it; could be any operation producing a side effect).

Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the updates! Just a few more comments ...

%ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
%ee.val = load i16, ptr %ee.addr, align 2
%ee.cond = icmp sgt i16 %ee.val, 500
%some.addr = getelementptr inbounds nuw i16, ptr %forward, i64 %iv
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This cannot possibly ever get vectorised, right? Even if we can prove the loop is legal to vectorise with the store, the first 4 stores will be out of bounds of %pred due to

  %forward = getelementptr i16, ptr %pred, i64 -8

I'd expect the dereferenceability checks to fail here. Also, %pred is marked as readonly, which doesn't seem right.

You should be able to achieve the same effect by adding an offset to %iv before doing the load and have the stores trailing, right?

; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
entry:
%unknown.offset = call i64 @get_an_unknown_offset()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you might need to clamp this to 20 so that the analysis code knows it's guaranteed to be dereferenceable, otherwise this will never be legal to vectorise.

ret void
}

define void @loop_contains_store_to_pointer_with_no_deref_info(ptr noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this test you're also implicitly testing loads from a pointer with no deref info too. It's probably more concise to have separate arrays for the load and store so that you're strictly limiting it to what you care about?

ret void
}

define void @loop_contains_store_unknown_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two problems here, right? The first is the pointers are not known to be dereferenceable at all, and the second is the trip count is unknown. If you specifically want to test the latter, perhaps it's worth marking the pointers as dereferenceable(100) or some other random number? That way the analysis code is guaranteed to get to the point where the trip count matters. I'm just a bit worried the test is relying upon the order in which we do the checks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It just occurred to me as well that it might be worth having a test containing a store in a reverse loop, just in case that matters for your follow-on work?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants