Skip to content

Commit 2b6e81e

Browse files
committed
AMDGPU: Add missing fract test
This was missing the case where the fcmp condition and select were inverted.
1 parent c8c377f commit 2b6e81e

File tree

1 file changed

+111
-0
lines changed

1 file changed

+111
-0
lines changed

llvm/test/CodeGen/AMDGPU/fract-match.ll

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,117 @@ entry:
134134
ret float %cond6
135135
}
136136

137+
define float @safe_math_fract_f32_swap(float %x, ptr addrspace(1) writeonly captures(none) %ip) {
138+
; GFX6-IR-LABEL: define float @safe_math_fract_f32_swap(
139+
; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
140+
; GFX6-IR-NEXT: [[ENTRY:.*:]]
141+
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
142+
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
143+
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
144+
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00
145+
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
146+
; GFX6-IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
147+
; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
148+
; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
149+
; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
150+
; GFX6-IR-NEXT: ret float [[COND6]]
151+
;
152+
; IR-FRACT-LABEL: define float @safe_math_fract_f32_swap(
153+
; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
154+
; IR-FRACT-NEXT: [[ENTRY:.*:]]
155+
; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
156+
; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
157+
; IR-FRACT-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
158+
; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
159+
; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
160+
; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
161+
; IR-FRACT-NEXT: ret float [[COND6]]
162+
;
163+
; GFX6-LABEL: safe_math_fract_f32_swap:
164+
; GFX6: ; %bb.0: ; %entry
165+
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166+
; GFX6-NEXT: v_floor_f32_e32 v3, v0
167+
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
168+
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
169+
; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
170+
; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
171+
; GFX6-NEXT: s_mov_b32 s6, 0
172+
; GFX6-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
173+
; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
174+
; GFX6-NEXT: s_mov_b32 s7, 0xf000
175+
; GFX6-NEXT: s_mov_b32 s4, s6
176+
; GFX6-NEXT: s_mov_b32 s5, s6
177+
; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
178+
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
179+
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
180+
; GFX6-NEXT: s_setpc_b64 s[30:31]
181+
;
182+
; GFX7-LABEL: safe_math_fract_f32_swap:
183+
; GFX7: ; %bb.0: ; %entry
184+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185+
; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
186+
; GFX7-NEXT: s_mov_b32 s6, 0
187+
; GFX7-NEXT: v_fract_f32_e32 v4, v0
188+
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
189+
; GFX7-NEXT: s_mov_b32 s7, 0xf000
190+
; GFX7-NEXT: s_mov_b32 s4, s6
191+
; GFX7-NEXT: s_mov_b32 s5, s6
192+
; GFX7-NEXT: v_floor_f32_e32 v3, v0
193+
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
194+
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
195+
; GFX7-NEXT: s_waitcnt vmcnt(0)
196+
; GFX7-NEXT: s_setpc_b64 s[30:31]
197+
;
198+
; GFX8-LABEL: safe_math_fract_f32_swap:
199+
; GFX8: ; %bb.0: ; %entry
200+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201+
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
202+
; GFX8-NEXT: v_fract_f32_e32 v4, v0
203+
; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
204+
; GFX8-NEXT: v_floor_f32_e32 v3, v0
205+
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
206+
; GFX8-NEXT: global_store_dword v[1:2], v3, off
207+
; GFX8-NEXT: s_waitcnt vmcnt(0)
208+
; GFX8-NEXT: s_setpc_b64 s[30:31]
209+
;
210+
; GFX11-LABEL: safe_math_fract_f32_swap:
211+
; GFX11: ; %bb.0: ; %entry
212+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213+
; GFX11-NEXT: v_fract_f32_e32 v3, v0
214+
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
215+
; GFX11-NEXT: v_floor_f32_e32 v4, v0
216+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
217+
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
218+
; GFX11-NEXT: global_store_b32 v[1:2], v4, off
219+
; GFX11-NEXT: s_setpc_b64 s[30:31]
220+
;
221+
; GFX12-LABEL: safe_math_fract_f32_swap:
222+
; GFX12: ; %bb.0: ; %entry
223+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
224+
; GFX12-NEXT: s_wait_expcnt 0x0
225+
; GFX12-NEXT: s_wait_samplecnt 0x0
226+
; GFX12-NEXT: s_wait_bvhcnt 0x0
227+
; GFX12-NEXT: s_wait_kmcnt 0x0
228+
; GFX12-NEXT: v_fract_f32_e32 v3, v0
229+
; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
230+
; GFX12-NEXT: v_floor_f32_e32 v4, v0
231+
; GFX12-NEXT: s_wait_alu 0xfffd
232+
; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
233+
; GFX12-NEXT: global_store_b32 v[1:2], v4, off
234+
; GFX12-NEXT: s_setpc_b64 s[30:31]
235+
entry:
236+
%floor = tail call float @llvm.floor.f32(float %x)
237+
%sub = fsub float %x, %floor
238+
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
239+
%uno = fcmp ord float %x, 0.000000e+00
240+
%cond = select i1 %uno, float %min, float %x
241+
%fabs = tail call float @llvm.fabs.f32(float %x)
242+
%cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
243+
%cond6 = select i1 %cmpinf, float 0.000000e+00, float %cond
244+
store float %floor, ptr addrspace(1) %ip, align 4
245+
ret float %cond6
246+
}
247+
137248
define float @safe_math_fract_f32_noinf_check(float %x, ptr addrspace(1) writeonly captures(none) %ip) {
138249
; GFX6-IR-LABEL: define float @safe_math_fract_f32_noinf_check(
139250
; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {

0 commit comments

Comments
 (0)