Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Aug 5, 2024

No description provided.

Copy link
Contributor Author

arsenm commented Aug 5, 2024

@arsenm arsenm marked this pull request as ready for review August 5, 2024 16:57
@llvmbot
Copy link
Member

llvmbot commented Aug 5, 2024

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/101982.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp (+8)
  • (added) llvm/test/Transforms/InferAddressSpaces/AMDGPU/prefetch.ll (+59)
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 830c15249582c..87b885447cc02 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -401,6 +401,13 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
     II->setCalledFunction(NewDecl);
     return true;
   }
+  case Intrinsic::prefetch: {
+    Function *NewDecl =
+        Intrinsic::getDeclaration(M, II->getIntrinsicID(), {NewV->getType()});
+    II->setArgOperand(0, NewV);
+    II->setCalledFunction(NewDecl);
+    return true;
+  }
   default: {
     Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
     if (!Rewrite)
@@ -423,6 +430,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
                                                  PostorderStack, Visited);
     break;
   case Intrinsic::masked_gather:
+  case Intrinsic::prefetch:
     appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
                                                  PostorderStack, Visited);
     break;
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/prefetch.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/prefetch.ll
new file mode 100644
index 0000000000000..e2c12c4c37141
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/prefetch.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
+
+define void @prefetch_shared_to_flat(ptr addrspace(3) %group.ptr) {
+; CHECK-LABEL: define void @prefetch_shared_to_flat(
+; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]]) {
+; CHECK-NEXT:    tail call void @llvm.prefetch.p3(ptr addrspace(3) [[GROUP_PTR]], i32 0, i32 0, i32 1)
+; CHECK-NEXT:    ret void
+;
+  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
+  tail call void @llvm.prefetch.p0(ptr %cast, i32 0, i32 0, i32 1)
+  ret void
+}
+
+define void @prefetch_global_to_flat(ptr addrspace(1) %global.ptr) {
+; CHECK-LABEL: define void @prefetch_global_to_flat(
+; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) {
+; CHECK-NEXT:    tail call void @llvm.prefetch.p1(ptr addrspace(1) [[GLOBAL_PTR]], i32 0, i32 0, i32 1)
+; CHECK-NEXT:    ret void
+;
+  %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
+  tail call void @llvm.prefetch.p0(ptr addrspace(0) %cast, i32 0, i32 0, i32 1)
+  ret void
+}
+
+define void @prefetch_constant_to_flat(ptr addrspace(4) %const.ptr) {
+; CHECK-LABEL: define void @prefetch_constant_to_flat(
+; CHECK-SAME: ptr addrspace(4) [[CONST_PTR:%.*]]) {
+; CHECK-NEXT:    tail call void @llvm.prefetch.p4(ptr addrspace(4) [[CONST_PTR]], i32 0, i32 0, i32 1)
+; CHECK-NEXT:    ret void
+;
+  %cast = addrspacecast ptr addrspace(4) %const.ptr to ptr
+  tail call void @llvm.prefetch.p0(ptr %cast, i32 0, i32 0, i32 1)
+  ret void
+}
+
+define void @prefetch_flat_to_shared(ptr %flat.ptr) {
+; CHECK-LABEL: define void @prefetch_flat_to_shared(
+; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) {
+; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3)
+; CHECK-NEXT:    tail call void @llvm.prefetch.p3(ptr addrspace(3) [[CAST]], i32 0, i32 0, i32 1)
+; CHECK-NEXT:    ret void
+;
+  %cast = addrspacecast ptr %flat.ptr to ptr addrspace(3)
+  tail call void @llvm.prefetch.p3(ptr addrspace(3) %cast, i32 0, i32 0, i32 1)
+  ret void
+}
+
+define void @prefetch_flat_to_global(ptr %flat.ptr) {
+; CHECK-LABEL: define void @prefetch_flat_to_global(
+; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) {
+; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(1)
+; CHECK-NEXT:    tail call void @llvm.prefetch.p1(ptr addrspace(1) [[CAST]], i32 0, i32 0, i32 1)
+; CHECK-NEXT:    ret void
+;
+  %cast = addrspacecast ptr %flat.ptr to ptr addrspace(1)
+  tail call void @llvm.prefetch.p1(ptr addrspace(1) %cast, i32 0, i32 0, i32 1)
+  ret void
+}

Copy link
Contributor Author

arsenm commented Aug 5, 2024

Merge activity

  • Aug 5, 4:10 PM EDT: @arsenm started a stack merge that includes this pull request via Graphite.
  • Aug 5, 4:12 PM EDT: Graphite rebased this pull request as part of a merge.
  • Aug 5, 4:14 PM EDT: @arsenm merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/infer-address-spaces-handle-prefetch branch from 2df6107 to cd69508 Compare August 5, 2024 20:11
@arsenm arsenm merged commit f01a6f5 into main Aug 5, 2024
@arsenm arsenm deleted the users/arsenm/infer-address-spaces-handle-prefetch branch August 5, 2024 20:14
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants