33; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX942 %s
44; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s
55; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s
6- ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
6+ ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
7+ ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
78; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
89
910declare float @llvm.amdgcn.cvt.f32.bf8 (i32 , i32 )
@@ -275,17 +276,29 @@ define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) {
275276; GFX9X-NEXT: v_mov_b32_e32 v0, v2
276277; GFX9X-NEXT: s_setpc_b64 s[30:31]
277278;
278- ; GFX12-LABEL: test_cvt_pk_bf8_f32_word0:
279- ; GFX12: ; %bb.0:
280- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
281- ; GFX12-NEXT: s_wait_expcnt 0x0
282- ; GFX12-NEXT: s_wait_samplecnt 0x0
283- ; GFX12-NEXT: s_wait_bvhcnt 0x0
284- ; GFX12-NEXT: s_wait_kmcnt 0x0
285- ; GFX12-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1
286- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
287- ; GFX12-NEXT: v_mov_b32_e32 v0, v2
288- ; GFX12-NEXT: s_setpc_b64 s[30:31]
279+ ; GFX12-TRUE16-LABEL: test_cvt_pk_bf8_f32_word0:
280+ ; GFX12-TRUE16: ; %bb.0:
281+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
282+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
283+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
284+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
285+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
286+ ; GFX12-TRUE16-NEXT: v_cvt_pk_bf8_f32 v2.l, v0, v1
287+ ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
288+ ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
289+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
290+ ;
291+ ; GFX12-FAKE16-LABEL: test_cvt_pk_bf8_f32_word0:
292+ ; GFX12-FAKE16: ; %bb.0:
293+ ; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
294+ ; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
295+ ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
296+ ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
297+ ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
298+ ; GFX12-FAKE16-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1
299+ ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
300+ ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
301+ ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
289302 %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32 (float %x , float %y , i32 %old , i1 false )
290303 ret i32 %ret
291304}
@@ -299,17 +312,29 @@ define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) {
299312; GFX9X-NEXT: v_mov_b32_e32 v0, v2
300313; GFX9X-NEXT: s_setpc_b64 s[30:31]
301314;
302- ; GFX12-LABEL: test_cvt_pk_bf8_f32_word1:
303- ; GFX12: ; %bb.0:
304- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
305- ; GFX12-NEXT: s_wait_expcnt 0x0
306- ; GFX12-NEXT: s_wait_samplecnt 0x0
307- ; GFX12-NEXT: s_wait_bvhcnt 0x0
308- ; GFX12-NEXT: s_wait_kmcnt 0x0
309- ; GFX12-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
310- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
311- ; GFX12-NEXT: v_mov_b32_e32 v0, v2
312- ; GFX12-NEXT: s_setpc_b64 s[30:31]
315+ ; GFX12-TRUE16-LABEL: test_cvt_pk_bf8_f32_word1:
316+ ; GFX12-TRUE16: ; %bb.0:
317+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
318+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
319+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
320+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
321+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
322+ ; GFX12-TRUE16-NEXT: v_cvt_pk_bf8_f32 v2.h, v0, v1 op_sel:[0,0,1]
323+ ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
324+ ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
325+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
326+ ;
327+ ; GFX12-FAKE16-LABEL: test_cvt_pk_bf8_f32_word1:
328+ ; GFX12-FAKE16: ; %bb.0:
329+ ; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
330+ ; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
331+ ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
332+ ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
333+ ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
334+ ; GFX12-FAKE16-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
335+ ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
336+ ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
337+ ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
313338 %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32 (float %x , float %y , i32 %old , i1 true )
314339 ret i32 %ret
315340}
@@ -322,17 +347,29 @@ define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
322347; GFX9X-NEXT: v_mov_b32_e32 v0, v2
323348; GFX9X-NEXT: s_setpc_b64 s[30:31]
324349;
325- ; GFX12-LABEL: test_cvt_pk_fp8_f32_word0:
326- ; GFX12: ; %bb.0:
327- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
328- ; GFX12-NEXT: s_wait_expcnt 0x0
329- ; GFX12-NEXT: s_wait_samplecnt 0x0
330- ; GFX12-NEXT: s_wait_bvhcnt 0x0
331- ; GFX12-NEXT: s_wait_kmcnt 0x0
332- ; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1
333- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
334- ; GFX12-NEXT: v_mov_b32_e32 v0, v2
335- ; GFX12-NEXT: s_setpc_b64 s[30:31]
350+ ; GFX12-TRUE16-LABEL: test_cvt_pk_fp8_f32_word0:
351+ ; GFX12-TRUE16: ; %bb.0:
352+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
353+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
354+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
355+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
356+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
357+ ; GFX12-TRUE16-NEXT: v_cvt_pk_fp8_f32 v2.l, v0, v1
358+ ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
359+ ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
360+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
361+ ;
362+ ; GFX12-FAKE16-LABEL: test_cvt_pk_fp8_f32_word0:
363+ ; GFX12-FAKE16: ; %bb.0:
364+ ; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
365+ ; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
366+ ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
367+ ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
368+ ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
369+ ; GFX12-FAKE16-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1
370+ ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
371+ ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
372+ ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
336373 %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32 (float %x , float %y , i32 %old , i1 false )
337374 ret i32 %ret
338375}
@@ -346,17 +383,29 @@ define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
346383; GFX9X-NEXT: v_mov_b32_e32 v0, v2
347384; GFX9X-NEXT: s_setpc_b64 s[30:31]
348385;
349- ; GFX12-LABEL: test_cvt_pk_fp8_f32_word1:
350- ; GFX12: ; %bb.0:
351- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
352- ; GFX12-NEXT: s_wait_expcnt 0x0
353- ; GFX12-NEXT: s_wait_samplecnt 0x0
354- ; GFX12-NEXT: s_wait_bvhcnt 0x0
355- ; GFX12-NEXT: s_wait_kmcnt 0x0
356- ; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
357- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
358- ; GFX12-NEXT: v_mov_b32_e32 v0, v2
359- ; GFX12-NEXT: s_setpc_b64 s[30:31]
386+ ; GFX12-TRUE16-LABEL: test_cvt_pk_fp8_f32_word1:
387+ ; GFX12-TRUE16: ; %bb.0:
388+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
389+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
390+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
391+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
392+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
393+ ; GFX12-TRUE16-NEXT: v_cvt_pk_fp8_f32 v2.h, v0, v1 op_sel:[0,0,1]
394+ ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
395+ ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
396+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
397+ ;
398+ ; GFX12-FAKE16-LABEL: test_cvt_pk_fp8_f32_word1:
399+ ; GFX12-FAKE16: ; %bb.0:
400+ ; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
401+ ; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
402+ ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
403+ ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
404+ ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
405+ ; GFX12-FAKE16-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
406+ ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
407+ ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
408+ ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
360409 %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32 (float %x , float %y , i32 %old , i1 true )
361410 ret i32 %ret
362411}
0 commit comments