@@ -20,8 +20,6 @@ define amdgpu_ps float @readanylane_to_physical_vgpr(ptr addrspace(1) inreg %ptr
20
20
; CHECK-NEXT: v_mov_b32_e32 v0, 0
21
21
; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc
22
22
; CHECK-NEXT: s_waitcnt vmcnt(0)
23
- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
24
- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
25
23
; CHECK-NEXT: ; return to shader part epilog
26
24
%load = load volatile float , ptr addrspace (1 ) %ptr
27
25
ret float %load
@@ -33,8 +31,6 @@ define amdgpu_ps void @readanylane_to_bitcast_to_virtual_vgpr(ptr addrspace(1) i
33
31
; CHECK-NEXT: v_mov_b32_e32 v0, 0
34
32
; CHECK-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
35
33
; CHECK-NEXT: s_waitcnt vmcnt(0)
36
- ; CHECK-NEXT: v_readfirstlane_b32 s0, v1
37
- ; CHECK-NEXT: v_mov_b32_e32 v1, s0
38
34
; CHECK-NEXT: global_store_dword v0, v1, s[2:3]
39
35
; CHECK-NEXT: s_endpgm
40
36
%load = load volatile <2 x i16 >, ptr addrspace (1 ) %ptr0
@@ -49,8 +45,6 @@ define amdgpu_ps float @readanylane_to_bitcast_to_physical_vgpr(ptr addrspace(1)
49
45
; CHECK-NEXT: v_mov_b32_e32 v0, 0
50
46
; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc
51
47
; CHECK-NEXT: s_waitcnt vmcnt(0)
52
- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
53
- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
54
48
; CHECK-NEXT: ; return to shader part epilog
55
49
%load = load volatile <2 x i16 >, ptr addrspace (1 ) %ptr0
56
50
%bitcast = bitcast <2 x i16 > %load to float
@@ -63,10 +57,6 @@ define amdgpu_ps void @unmerge_readanylane_merge_to_virtual_vgpr(ptr addrspace(1
63
57
; CHECK-NEXT: v_mov_b32_e32 v2, 0
64
58
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
65
59
; CHECK-NEXT: s_waitcnt vmcnt(0)
66
- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
67
- ; CHECK-NEXT: v_readfirstlane_b32 s1, v1
68
- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
69
- ; CHECK-NEXT: v_mov_b32_e32 v1, s1
70
60
; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
71
61
; CHECK-NEXT: s_endpgm
72
62
%load = load volatile i64 , ptr addrspace (1 ) %ptr0
@@ -85,10 +75,6 @@ define amdgpu_ps void @unmerge_readanylane_merge_bitcast_to_virtual_vgpr(ptr add
85
75
; CHECK-NEXT: v_mov_b32_e32 v2, 0
86
76
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
87
77
; CHECK-NEXT: s_waitcnt vmcnt(0)
88
- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
89
- ; CHECK-NEXT: v_readfirstlane_b32 s1, v1
90
- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
91
- ; CHECK-NEXT: v_mov_b32_e32 v1, s1
92
78
; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
93
79
; CHECK-NEXT: s_endpgm
94
80
%load = load volatile <2 x i32 >, ptr addrspace (1 ) %ptr0
@@ -109,9 +95,7 @@ define amdgpu_ps void @unmerge_readanylane_merge_extract_to_virtual_vgpr(ptr add
109
95
; CHECK-NEXT: v_mov_b32_e32 v2, 0
110
96
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
111
97
; CHECK-NEXT: s_waitcnt vmcnt(0)
112
- ; CHECK-NEXT: v_readfirstlane_b32 s0, v1
113
- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
114
- ; CHECK-NEXT: global_store_dword v2, v0, s[2:3]
98
+ ; CHECK-NEXT: global_store_dword v2, v1, s[2:3]
115
99
; CHECK-NEXT: s_endpgm
116
100
%load = load volatile <2 x i32 >, ptr addrspace (1 ) %ptr0
117
101
%extracted = extractelement <2 x i32 > %load , i32 1
@@ -125,8 +109,7 @@ define amdgpu_ps float @unmerge_readanylane_merge_extract_to_physical_vgpr(ptr a
125
109
; CHECK-NEXT: v_mov_b32_e32 v0, 0
126
110
; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
127
111
; CHECK-NEXT: s_waitcnt vmcnt(0)
128
- ; CHECK-NEXT: v_readfirstlane_b32 s0, v1
129
- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
112
+ ; CHECK-NEXT: v_mov_b32_e32 v0, v1
130
113
; CHECK-NEXT: ; return to shader part epilog
131
114
%load = load volatile <2 x float >, ptr addrspace (1 ) %ptr0
132
115
%extracted = extractelement <2 x float > %load , i32 1
@@ -139,8 +122,6 @@ define amdgpu_ps void @unmerge_readanylane_merge_extract_bitcast_to_virtual_vgpr
139
122
; CHECK-NEXT: v_mov_b32_e32 v2, 0
140
123
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
141
124
; CHECK-NEXT: s_waitcnt vmcnt(0)
142
- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
143
- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
144
125
; CHECK-NEXT: global_store_dword v2, v0, s[2:3]
145
126
; CHECK-NEXT: s_endpgm
146
127
%load = load volatile <4 x i16 >, ptr addrspace (1 ) %ptr0
@@ -156,8 +137,6 @@ define amdgpu_ps float @unmerge_readanylane_merge_extract_bitcast_to_physical_vg
156
137
; CHECK-NEXT: v_mov_b32_e32 v0, 0
157
138
; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
158
139
; CHECK-NEXT: s_waitcnt vmcnt(0)
159
- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
160
- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
161
140
; CHECK-NEXT: ; return to shader part epilog
162
141
%load = load volatile <4 x i16 >, ptr addrspace (1 ) %ptr0
163
142
%extracted = shufflevector <4 x i16 > %load , <4 x i16 > %load , <2 x i32 > <i32 0 , i32 1 >
0 commit comments