-
Notifications
You must be signed in to change notification settings - Fork 13.4k
InferAddressSpaces: Fix mishandling stores of pointers to themselves #101877
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
InferAddressSpaces: Fix mishandling stores of pointers to themselves #101877
Conversation
If a store was using the flat pointer also as the value operand, it would incorrectly replace both operands with the non-flat pointer. This happened to work correctly for atomics because they place the pointer operand first. We could improve the handling of the multi-pointer operand cases, but I'll leave that for a separate change.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) ChangesIf a store was using the flat pointer also as the value operand, it would We could improve the handling of the multi-pointer operand cases, but I'll Full diff: https://github.com/llvm/llvm-project/pull/101877.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index c9be8ee00cdc7..6b9566f1ae461 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1233,7 +1233,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
// If V is used as the pointer operand of a compatible memory operation,
// sets the pointer operand to NewV. This replacement does not change
// the element type, so the resultant load/store is still valid.
- CurUser->replaceUsesOfWith(V, NewV);
+ U.set(NewV);
continue;
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
new file mode 100644
index 0000000000000..bce0e4ec1fe16
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
+
+; Make sure memory instructions where the pointer appears in both a
+; pointer and value operand work correctly.
+
+declare void @user(ptr)
+
+; Make sure only the pointer operand use of the store is replaced
+define void @store_flat_pointer_to_self() {
+; CHECK-LABEL: define void @store_flat_pointer_to_self() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ store ptr %flat, ptr %flat, align 8
+ call void @user(ptr %flat)
+ ret void
+}
+
+; FIXME: Should be able to optimize the pointer operand to flat.
+define ptr @atomicrmw_xchg_flat_pointer_to_self() {
+; CHECK-LABEL: define ptr @atomicrmw_xchg_flat_pointer_to_self() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[XCHG:%.*]] = atomicrmw xchg ptr [[FLAT]], ptr [[FLAT]] seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret ptr [[XCHG]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %xchg = atomicrmw xchg ptr %flat, ptr %flat seq_cst, align 8
+ call void @user(ptr %flat)
+ ret ptr %xchg
+}
+
+define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(ptr %cmp) {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(
+; CHECK-SAME: ptr [[CMP:%.*]]) {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %cmpx = cmpxchg ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
+ call void @user(ptr %flat)
+ ret { ptr, i1 } %cmpx
+}
+
+define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(ptr %new) {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(
+; CHECK-SAME: ptr [[NEW:%.*]]) {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[FLAT]], ptr [[NEW]] seq_cst seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %cmpx = cmpxchg ptr %flat, ptr %flat, ptr %new seq_cst seq_cst, align 8
+ call void @user(ptr %flat)
+ ret { ptr, i1 } %cmpx
+}
|
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesIf a store was using the flat pointer also as the value operand, it would We could improve the handling of the multi-pointer operand cases, but I'll Full diff: https://github.com/llvm/llvm-project/pull/101877.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index c9be8ee00cdc7..6b9566f1ae461 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1233,7 +1233,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
// If V is used as the pointer operand of a compatible memory operation,
// sets the pointer operand to NewV. This replacement does not change
// the element type, so the resultant load/store is still valid.
- CurUser->replaceUsesOfWith(V, NewV);
+ U.set(NewV);
continue;
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
new file mode 100644
index 0000000000000..bce0e4ec1fe16
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
+
+; Make sure memory instructions where the pointer appears in both a
+; pointer and value operand work correctly.
+
+declare void @user(ptr)
+
+; Make sure only the pointer operand use of the store is replaced
+define void @store_flat_pointer_to_self() {
+; CHECK-LABEL: define void @store_flat_pointer_to_self() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ store ptr %flat, ptr %flat, align 8
+ call void @user(ptr %flat)
+ ret void
+}
+
+; FIXME: Should be able to optimize the pointer operand to flat.
+define ptr @atomicrmw_xchg_flat_pointer_to_self() {
+; CHECK-LABEL: define ptr @atomicrmw_xchg_flat_pointer_to_self() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[XCHG:%.*]] = atomicrmw xchg ptr [[FLAT]], ptr [[FLAT]] seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret ptr [[XCHG]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %xchg = atomicrmw xchg ptr %flat, ptr %flat seq_cst, align 8
+ call void @user(ptr %flat)
+ ret ptr %xchg
+}
+
+define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(ptr %cmp) {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(
+; CHECK-SAME: ptr [[CMP:%.*]]) {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %cmpx = cmpxchg ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
+ call void @user(ptr %flat)
+ ret { ptr, i1 } %cmpx
+}
+
+define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(ptr %new) {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(
+; CHECK-SAME: ptr [[NEW:%.*]]) {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[FLAT]], ptr [[NEW]] seq_cst seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %cmpx = cmpxchg ptr %flat, ptr %flat, ptr %new seq_cst seq_cst, align 8
+ call void @user(ptr %flat)
+ ret { ptr, i1 } %cmpx
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixes the failures I observed locally, thanks!.
…lvm#101877) (cherry picked from commit 3c483b8)
…lvm#101877) (cherry picked from commit 3c483b8)
If a store was using the flat pointer also as the value operand, it would
incorrectly replace both operands with the non-flat pointer. This happened
to work correctly for atomics because they place the pointer operand first.
We could improve the handling of the multi-pointer operand cases, but I'll
leave that for a separate change.