@@ -134,6 +134,117 @@ entry:
134
134
ret float %cond6
135
135
}
136
136
137
+ define float @safe_math_fract_f32_swap (float %x , ptr addrspace (1 ) writeonly captures(none) %ip ) {
138
+ ; GFX6-IR-LABEL: define float @safe_math_fract_f32_swap(
139
+ ; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
140
+ ; GFX6-IR-NEXT: [[ENTRY:.*:]]
141
+ ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
142
+ ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
143
+ ; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
144
+ ; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00
145
+ ; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
146
+ ; GFX6-IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
147
+ ; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
148
+ ; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
149
+ ; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
150
+ ; GFX6-IR-NEXT: ret float [[COND6]]
151
+ ;
152
+ ; IR-FRACT-LABEL: define float @safe_math_fract_f32_swap(
153
+ ; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
154
+ ; IR-FRACT-NEXT: [[ENTRY:.*:]]
155
+ ; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
156
+ ; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
157
+ ; IR-FRACT-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
158
+ ; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
159
+ ; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
160
+ ; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
161
+ ; IR-FRACT-NEXT: ret float [[COND6]]
162
+ ;
163
+ ; GFX6-LABEL: safe_math_fract_f32_swap:
164
+ ; GFX6: ; %bb.0: ; %entry
165
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166
+ ; GFX6-NEXT: v_floor_f32_e32 v3, v0
167
+ ; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
168
+ ; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
169
+ ; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
170
+ ; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
171
+ ; GFX6-NEXT: s_mov_b32 s6, 0
172
+ ; GFX6-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
173
+ ; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
174
+ ; GFX6-NEXT: s_mov_b32 s7, 0xf000
175
+ ; GFX6-NEXT: s_mov_b32 s4, s6
176
+ ; GFX6-NEXT: s_mov_b32 s5, s6
177
+ ; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
178
+ ; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
179
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
180
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
181
+ ;
182
+ ; GFX7-LABEL: safe_math_fract_f32_swap:
183
+ ; GFX7: ; %bb.0: ; %entry
184
+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185
+ ; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
186
+ ; GFX7-NEXT: s_mov_b32 s6, 0
187
+ ; GFX7-NEXT: v_fract_f32_e32 v4, v0
188
+ ; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
189
+ ; GFX7-NEXT: s_mov_b32 s7, 0xf000
190
+ ; GFX7-NEXT: s_mov_b32 s4, s6
191
+ ; GFX7-NEXT: s_mov_b32 s5, s6
192
+ ; GFX7-NEXT: v_floor_f32_e32 v3, v0
193
+ ; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
194
+ ; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
195
+ ; GFX7-NEXT: s_waitcnt vmcnt(0)
196
+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
197
+ ;
198
+ ; GFX8-LABEL: safe_math_fract_f32_swap:
199
+ ; GFX8: ; %bb.0: ; %entry
200
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201
+ ; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
202
+ ; GFX8-NEXT: v_fract_f32_e32 v4, v0
203
+ ; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
204
+ ; GFX8-NEXT: v_floor_f32_e32 v3, v0
205
+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
206
+ ; GFX8-NEXT: global_store_dword v[1:2], v3, off
207
+ ; GFX8-NEXT: s_waitcnt vmcnt(0)
208
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
209
+ ;
210
+ ; GFX11-LABEL: safe_math_fract_f32_swap:
211
+ ; GFX11: ; %bb.0: ; %entry
212
+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213
+ ; GFX11-NEXT: v_fract_f32_e32 v3, v0
214
+ ; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
215
+ ; GFX11-NEXT: v_floor_f32_e32 v4, v0
216
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
217
+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
218
+ ; GFX11-NEXT: global_store_b32 v[1:2], v4, off
219
+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
220
+ ;
221
+ ; GFX12-LABEL: safe_math_fract_f32_swap:
222
+ ; GFX12: ; %bb.0: ; %entry
223
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
224
+ ; GFX12-NEXT: s_wait_expcnt 0x0
225
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
226
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
227
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
228
+ ; GFX12-NEXT: v_fract_f32_e32 v3, v0
229
+ ; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
230
+ ; GFX12-NEXT: v_floor_f32_e32 v4, v0
231
+ ; GFX12-NEXT: s_wait_alu 0xfffd
232
+ ; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
233
+ ; GFX12-NEXT: global_store_b32 v[1:2], v4, off
234
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
235
+ entry:
236
+ %floor = tail call float @llvm.floor.f32 (float %x )
237
+ %sub = fsub float %x , %floor
238
+ %min = tail call float @llvm.minnum.f32 (float %sub , float 0x3FEFFFFFE0000000 )
239
+ %uno = fcmp ord float %x , 0 .000000e+00
240
+ %cond = select i1 %uno , float %min , float %x
241
+ %fabs = tail call float @llvm.fabs.f32 (float %x )
242
+ %cmpinf = fcmp oeq float %fabs , 0x7FF0000000000000
243
+ %cond6 = select i1 %cmpinf , float 0 .000000e+00 , float %cond
244
+ store float %floor , ptr addrspace (1 ) %ip , align 4
245
+ ret float %cond6
246
+ }
247
+
137
248
define float @safe_math_fract_f32_noinf_check (float %x , ptr addrspace (1 ) writeonly captures(none) %ip ) {
138
249
; GFX6-IR-LABEL: define float @safe_math_fract_f32_noinf_check(
139
250
; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
0 commit comments