@@ -48,7 +48,8 @@ define amdgpu_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.
48
48
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group (ptr %dest , ptr addrspace (3 ) %src.group.ptr , i64 %size ) #0 {
49
49
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(
50
50
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
51
- ; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
51
+ ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
52
+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
52
53
; CHECK-NEXT: ret void
53
54
;
54
55
%cast.src = addrspacecast ptr addrspace (3 ) %src.group.ptr to ptr
@@ -59,7 +60,8 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest,
59
60
define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group (ptr %dest , ptr addrspace (3 ) %src.group.ptr ) #0 {
60
61
; CHECK-LABEL: define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(
61
62
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]]) #[[ATTR0]] {
62
- ; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
63
+ ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
64
+ ; CHECK-NEXT: call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
63
65
; CHECK-NEXT: ret void
64
66
;
65
67
%cast.src = addrspacecast ptr addrspace (3 ) %src.group.ptr to ptr
@@ -70,7 +72,8 @@ define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr
70
72
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group (ptr addrspace (3 ) %dest.group.ptr , ptr %src.ptr , i64 %size ) #0 {
71
73
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(
72
74
; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr [[SRC_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
73
- ; CHECK-NEXT: call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr align 4 [[SRC_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
75
+ ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[SRC_PTR]] to ptr addrspace(1)
76
+ ; CHECK-NEXT: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
74
77
; CHECK-NEXT: ret void
75
78
;
76
79
%cast.dest = addrspacecast ptr addrspace (3 ) %dest.group.ptr to ptr
@@ -116,7 +119,8 @@ define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspac
116
119
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct (ptr %dest , ptr addrspace (3 ) %src.group.ptr , i64 %size ) #0 {
117
120
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
118
121
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
119
- ; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
122
+ ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
123
+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
120
124
; CHECK-NEXT: ret void
121
125
;
122
126
%cast.src = addrspacecast ptr addrspace (3 ) %src.group.ptr to ptr
@@ -127,7 +131,8 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struc
127
131
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md (ptr %dest , ptr addrspace (3 ) %src.group.ptr , i64 %size ) #0 {
128
132
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(
129
133
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
130
- ; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
134
+ ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
135
+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
131
136
; CHECK-NEXT: ret void
132
137
;
133
138
%cast.src = addrspacecast ptr addrspace (3 ) %src.group.ptr to ptr
@@ -138,8 +143,10 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr
138
143
define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md (ptr %dest0 , ptr %dest1 , ptr addrspace (3 ) %src.group.ptr , i64 %size ) #0 {
139
144
; CHECK-LABEL: define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
140
145
; CHECK-SAME: ptr [[DEST0:%.*]], ptr [[DEST1:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
141
- ; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST0]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
142
- ; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
146
+ ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST0]] to ptr addrspace(1)
147
+ ; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DEST1]] to ptr addrspace(1)
148
+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
149
+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP2]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
143
150
; CHECK-NEXT: ret void
144
151
;
145
152
%cast.src = addrspacecast ptr addrspace (3 ) %src.group.ptr to ptr
@@ -162,7 +169,8 @@ define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %grou
162
169
define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group (ptr %dest , ptr addrspace (3 ) %src.group.ptr , i64 %size ) #0 {
163
170
; CHECK-LABEL: define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(
164
171
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
165
- ; CHECK-NEXT: call void @llvm.memmove.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
172
+ ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
173
+ ; CHECK-NEXT: call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
166
174
; CHECK-NEXT: ret void
167
175
;
168
176
%cast.src = addrspacecast ptr addrspace (3 ) %src.group.ptr to ptr
0 commit comments