|
3 | 3 | ; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
|
4 | 4 | ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s
|
5 | 5 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
|
6 |
| -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s |
| 6 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s |
| 7 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s |
7 | 8 |
|
8 | 9 | define <3 x half> @bitcast_v3bf16_to_v3f16(<3 x bfloat> %a, i32 %b) {
|
9 | 10 | ; GCN-LABEL: bitcast_v3bf16_to_v3f16:
|
@@ -134,47 +135,92 @@ define <3 x half> @bitcast_v3bf16_to_v3f16(<3 x bfloat> %a, i32 %b) {
|
134 | 135 | ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
135 | 136 | ; GFX9-NEXT: s_setpc_b64 s[30:31]
|
136 | 137 | ;
|
137 |
| -; GFX11-LABEL: bitcast_v3bf16_to_v3f16: |
138 |
| -; GFX11: ; %bb.0: |
139 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
140 |
| -; GFX11-NEXT: s_mov_b32 s0, exec_lo |
141 |
| -; GFX11-NEXT: v_cmpx_ne_u32_e32 0, v2 |
142 |
| -; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 |
143 |
| -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
144 |
| -; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 |
145 |
| -; GFX11-NEXT: s_cbranch_execz .LBB0_2 |
146 |
| -; GFX11-NEXT: ; %bb.1: ; %cmp.true |
147 |
| -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
148 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
149 |
| -; GFX11-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v0 |
150 |
| -; GFX11-NEXT: v_add_f32_e32 v2, 0x40c00000, v2 |
151 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) |
152 |
| -; GFX11-NEXT: v_bfe_u32 v4, v1, 16, 1 |
153 |
| -; GFX11-NEXT: v_or_b32_e32 v6, 0x400000, v1 |
154 |
| -; GFX11-NEXT: v_bfe_u32 v3, v2, 16, 1 |
155 |
| -; GFX11-NEXT: v_or_b32_e32 v7, 0x400000, v2 |
156 |
| -; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 |
157 |
| -; GFX11-NEXT: v_add3_u32 v4, v4, v1, 0x7fff |
158 |
| -; GFX11-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 |
159 |
| -; GFX11-NEXT: v_add3_u32 v3, v3, v2, 0x7fff |
160 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
161 |
| -; GFX11-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 |
162 |
| -; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo |
163 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) |
164 |
| -; GFX11-NEXT: v_bfe_u32 v5, v0, 16, 1 |
165 |
| -; GFX11-NEXT: v_or_b32_e32 v8, 0x400000, v0 |
166 |
| -; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 |
167 |
| -; GFX11-NEXT: v_add3_u32 v5, v5, v0, 0x7fff |
168 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) |
169 |
| -; GFX11-NEXT: v_cndmask_b32_e32 v0, v5, v8, vcc_lo |
170 |
| -; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 |
171 |
| -; GFX11-NEXT: v_perm_b32 v0, v0, v2, 0x7060302 |
172 |
| -; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo |
173 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
174 |
| -; GFX11-NEXT: v_alignbit_b32 v1, 0x7fc0, v1, 16 |
175 |
| -; GFX11-NEXT: .LBB0_2: ; %end |
176 |
| -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
177 |
| -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 138 | +; GFX11-TRUE16-LABEL: bitcast_v3bf16_to_v3f16: |
| 139 | +; GFX11-TRUE16: ; %bb.0: |
| 140 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 141 | +; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo |
| 142 | +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v2 |
| 143 | +; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 |
| 144 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| 145 | +; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 |
| 146 | +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB0_2 |
| 147 | +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.true |
| 148 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l |
| 149 | +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| 150 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 151 | +; GFX11-TRUE16-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v2 |
| 152 | +; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2 |
| 153 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) |
| 154 | +; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1 |
| 155 | +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v1 |
| 156 | +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1 |
| 157 | +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2 |
| 158 | +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 |
| 159 | +; GFX11-TRUE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff |
| 160 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 161 | +; GFX11-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff |
| 162 | +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v2, v3, v7 :: v_dual_mov_b32 v3, 0x7fc0 |
| 163 | +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 |
| 164 | +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 |
| 165 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| 166 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h |
| 167 | +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v1, v4, v5 :: v_dual_add_f32 v0, 0x40c00000, v0 |
| 168 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| 169 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h |
| 170 | +; GFX11-TRUE16-NEXT: v_bfe_u32 v6, v0, 16, 1 |
| 171 | +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 |
| 172 | +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 |
| 173 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v3.l |
| 174 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 175 | +; GFX11-TRUE16-NEXT: v_add3_u32 v6, v6, v0, 0x7fff |
| 176 | +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo |
| 177 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 178 | +; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
| 179 | +; GFX11-TRUE16-NEXT: .LBB0_2: ; %end |
| 180 | +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
| 181 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 182 | +; |
| 183 | +; GFX11-FAKE16-LABEL: bitcast_v3bf16_to_v3f16: |
| 184 | +; GFX11-FAKE16: ; %bb.0: |
| 185 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 186 | +; GFX11-FAKE16-NEXT: s_mov_b32 s0, exec_lo |
| 187 | +; GFX11-FAKE16-NEXT: v_cmpx_ne_u32_e32 0, v2 |
| 188 | +; GFX11-FAKE16-NEXT: s_xor_b32 s0, exec_lo, s0 |
| 189 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| 190 | +; GFX11-FAKE16-NEXT: s_and_not1_saveexec_b32 s0, s0 |
| 191 | +; GFX11-FAKE16-NEXT: s_cbranch_execz .LBB0_2 |
| 192 | +; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.true |
| 193 | +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| 194 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 195 | +; GFX11-FAKE16-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v0 |
| 196 | +; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2 |
| 197 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) |
| 198 | +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v1, 16, 1 |
| 199 | +; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, 0x400000, v1 |
| 200 | +; GFX11-FAKE16-NEXT: v_bfe_u32 v3, v2, 16, 1 |
| 201 | +; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v2 |
| 202 | +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 |
| 203 | +; GFX11-FAKE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff |
| 204 | +; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 |
| 205 | +; GFX11-FAKE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff |
| 206 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| 207 | +; GFX11-FAKE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 |
| 208 | +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo |
| 209 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) |
| 210 | +; GFX11-FAKE16-NEXT: v_bfe_u32 v5, v0, 16, 1 |
| 211 | +; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 |
| 212 | +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 |
| 213 | +; GFX11-FAKE16-NEXT: v_add3_u32 v5, v5, v0, 0x7fff |
| 214 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) |
| 215 | +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v5, v8, vcc_lo |
| 216 | +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 |
| 217 | +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x7060302 |
| 218 | +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo |
| 219 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 220 | +; GFX11-FAKE16-NEXT: v_alignbit_b32 v1, 0x7fc0, v1, 16 |
| 221 | +; GFX11-FAKE16-NEXT: .LBB0_2: ; %end |
| 222 | +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
| 223 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
178 | 224 | %cmp = icmp eq i32 %b, 0
|
179 | 225 | br i1 %cmp, label %cmp.true, label %cmp.false
|
180 | 226 |
|
@@ -426,47 +472,95 @@ define <3 x i16> @bitcast_v3bf16_to_v3i16(<3 x bfloat> %a, i32 %b) {
|
426 | 472 | ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
427 | 473 | ; GFX9-NEXT: s_setpc_b64 s[30:31]
|
428 | 474 | ;
|
429 |
| -; GFX11-LABEL: bitcast_v3bf16_to_v3i16: |
430 |
| -; GFX11: ; %bb.0: |
431 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
432 |
| -; GFX11-NEXT: s_mov_b32 s0, exec_lo |
433 |
| -; GFX11-NEXT: v_cmpx_ne_u32_e32 0, v2 |
434 |
| -; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 |
435 |
| -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
436 |
| -; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 |
437 |
| -; GFX11-NEXT: s_cbranch_execz .LBB2_2 |
438 |
| -; GFX11-NEXT: ; %bb.1: ; %cmp.true |
439 |
| -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
440 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
441 |
| -; GFX11-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v0 |
442 |
| -; GFX11-NEXT: v_add_f32_e32 v2, 0x40c00000, v2 |
443 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) |
444 |
| -; GFX11-NEXT: v_bfe_u32 v4, v1, 16, 1 |
445 |
| -; GFX11-NEXT: v_or_b32_e32 v6, 0x400000, v1 |
446 |
| -; GFX11-NEXT: v_bfe_u32 v3, v2, 16, 1 |
447 |
| -; GFX11-NEXT: v_or_b32_e32 v7, 0x400000, v2 |
448 |
| -; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 |
449 |
| -; GFX11-NEXT: v_add3_u32 v4, v4, v1, 0x7fff |
450 |
| -; GFX11-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 |
451 |
| -; GFX11-NEXT: v_add3_u32 v3, v3, v2, 0x7fff |
452 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
453 |
| -; GFX11-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 |
454 |
| -; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo |
455 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) |
456 |
| -; GFX11-NEXT: v_bfe_u32 v5, v0, 16, 1 |
457 |
| -; GFX11-NEXT: v_or_b32_e32 v8, 0x400000, v0 |
458 |
| -; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 |
459 |
| -; GFX11-NEXT: v_add3_u32 v5, v5, v0, 0x7fff |
460 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) |
461 |
| -; GFX11-NEXT: v_cndmask_b32_e32 v0, v5, v8, vcc_lo |
462 |
| -; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 |
463 |
| -; GFX11-NEXT: v_perm_b32 v0, v0, v2, 0x7060302 |
464 |
| -; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo |
465 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
466 |
| -; GFX11-NEXT: v_alignbit_b32 v1, 0x7fc0, v1, 16 |
467 |
| -; GFX11-NEXT: .LBB2_2: ; %end |
468 |
| -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
469 |
| -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 475 | +; GFX11-TRUE16-LABEL: bitcast_v3bf16_to_v3i16: |
| 476 | +; GFX11-TRUE16: ; %bb.0: |
| 477 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 478 | +; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo |
| 479 | +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v2 |
| 480 | +; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 |
| 481 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| 482 | +; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 |
| 483 | +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB2_2 |
| 484 | +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.true |
| 485 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l |
| 486 | +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| 487 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 488 | +; GFX11-TRUE16-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v2 |
| 489 | +; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2 |
| 490 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) |
| 491 | +; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1 |
| 492 | +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v1 |
| 493 | +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v2, 16, 1 |
| 494 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) |
| 495 | +; GFX11-TRUE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff |
| 496 | +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 |
| 497 | +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v2 |
| 498 | +; GFX11-TRUE16-NEXT: v_add3_u32 v5, v5, v2, 0x7fff |
| 499 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 500 | +; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 |
| 501 | +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1 |
| 502 | +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0 |
| 503 | +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 |
| 504 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 505 | +; GFX11-TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff |
| 506 | +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc_lo |
| 507 | +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 |
| 508 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) |
| 509 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h |
| 510 | +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v5, v8, vcc_lo |
| 511 | +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 |
| 512 | +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
| 513 | +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo |
| 514 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| 515 | +; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v2 |
| 516 | +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| 517 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 518 | +; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, 0x7fc0, 16, v1 |
| 519 | +; GFX11-TRUE16-NEXT: .LBB2_2: ; %end |
| 520 | +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
| 521 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 522 | +; |
| 523 | +; GFX11-FAKE16-LABEL: bitcast_v3bf16_to_v3i16: |
| 524 | +; GFX11-FAKE16: ; %bb.0: |
| 525 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 526 | +; GFX11-FAKE16-NEXT: s_mov_b32 s0, exec_lo |
| 527 | +; GFX11-FAKE16-NEXT: v_cmpx_ne_u32_e32 0, v2 |
| 528 | +; GFX11-FAKE16-NEXT: s_xor_b32 s0, exec_lo, s0 |
| 529 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| 530 | +; GFX11-FAKE16-NEXT: s_and_not1_saveexec_b32 s0, s0 |
| 531 | +; GFX11-FAKE16-NEXT: s_cbranch_execz .LBB2_2 |
| 532 | +; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.true |
| 533 | +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| 534 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 535 | +; GFX11-FAKE16-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v0 |
| 536 | +; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2 |
| 537 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) |
| 538 | +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v1, 16, 1 |
| 539 | +; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, 0x400000, v1 |
| 540 | +; GFX11-FAKE16-NEXT: v_bfe_u32 v3, v2, 16, 1 |
| 541 | +; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v2 |
| 542 | +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 |
| 543 | +; GFX11-FAKE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff |
| 544 | +; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 |
| 545 | +; GFX11-FAKE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff |
| 546 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| 547 | +; GFX11-FAKE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 |
| 548 | +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo |
| 549 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) |
| 550 | +; GFX11-FAKE16-NEXT: v_bfe_u32 v5, v0, 16, 1 |
| 551 | +; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 |
| 552 | +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 |
| 553 | +; GFX11-FAKE16-NEXT: v_add3_u32 v5, v5, v0, 0x7fff |
| 554 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) |
| 555 | +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v5, v8, vcc_lo |
| 556 | +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 |
| 557 | +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x7060302 |
| 558 | +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo |
| 559 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 560 | +; GFX11-FAKE16-NEXT: v_alignbit_b32 v1, 0x7fc0, v1, 16 |
| 561 | +; GFX11-FAKE16-NEXT: .LBB2_2: ; %end |
| 562 | +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
| 563 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
470 | 564 | %cmp = icmp eq i32 %b, 0
|
471 | 565 | br i1 %cmp, label %cmp.true, label %cmp.false
|
472 | 566 |
|
|
0 commit comments