22; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s 
33; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s 
44; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s 
5- ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX11 %s 
5+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-TRUE16 %s 
6+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-FAKE16 %s 
67
78; FIXME: GFX9 should be producing v_mad_u16 instead of v_mad_legacy_u16. 
89
@@ -65,22 +66,44 @@ define amdgpu_kernel void @mad_u16(
6566; GFX10-NEXT:    global_store_short v0, v1, s[8:9] 
6667; GFX10-NEXT:    s_endpgm 
6768; 
68- ; GFX11-LABEL: mad_u16: 
69- ; GFX11:       ; %bb.0: ; %entry 
70- ; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24 
71- ; GFX11-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 
72- ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
73- ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0 
74- ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) 
75- ; GFX11-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc 
76- ; GFX11-NEXT:    s_waitcnt vmcnt(0) 
77- ; GFX11-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc 
78- ; GFX11-NEXT:    s_waitcnt vmcnt(0) 
79- ; GFX11-NEXT:    global_load_u16 v0, v0, s[6:7] glc dlc 
80- ; GFX11-NEXT:    s_waitcnt vmcnt(0) 
81- ; GFX11-NEXT:    v_mad_u16 v0, v1, v2, v0 
82- ; GFX11-NEXT:    global_store_b16 v3, v0, s[0:1] 
83- ; GFX11-NEXT:    s_endpgm 
69+ ; GFX11-TRUE16-LABEL: mad_u16: 
70+ ; GFX11-TRUE16:       ; %bb.0: ; %entry 
71+ ; GFX11-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24 
72+ ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0 
73+ ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
74+ ; GFX11-TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0 
75+ ; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0) 
76+ ; GFX11-TRUE16-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc 
77+ ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) 
78+ ; GFX11-TRUE16-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc 
79+ ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) 
80+ ; GFX11-TRUE16-NEXT:    global_load_u16 v3, v0, s[6:7] glc dlc 
81+ ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) 
82+ ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v1.l 
83+ ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v2.l 
84+ ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v3.l 
85+ ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
86+ ; GFX11-TRUE16-NEXT:    v_mad_u16 v0.l, v0.l, v0.h, v1.l 
87+ ; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0 
88+ ; GFX11-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1] 
89+ ; GFX11-TRUE16-NEXT:    s_endpgm 
90+ ; 
91+ ; GFX11-FAKE16-LABEL: mad_u16: 
92+ ; GFX11-FAKE16:       ; %bb.0: ; %entry 
93+ ; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24 
94+ ; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 
95+ ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
96+ ; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0 
97+ ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0) 
98+ ; GFX11-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc 
99+ ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) 
100+ ; GFX11-FAKE16-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc 
101+ ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) 
102+ ; GFX11-FAKE16-NEXT:    global_load_u16 v0, v0, s[6:7] glc dlc 
103+ ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) 
104+ ; GFX11-FAKE16-NEXT:    v_mad_u16 v0, v1, v2, v0 
105+ ; GFX11-FAKE16-NEXT:    global_store_b16 v3, v0, s[0:1] 
106+ ; GFX11-FAKE16-NEXT:    s_endpgm 
84107    ptr  addrspace (1 ) %r ,
85108    ptr  addrspace (1 ) %a ,
86109    ptr  addrspace (1 ) %b ,
@@ -121,11 +144,20 @@ define i16 @v_mad_u16(i16 %arg0, i16 %arg1, i16 %arg2) {
121144; GFX10-NEXT:    v_mad_u16 v0, v0, v1, v2 
122145; GFX10-NEXT:    s_setpc_b64 s[30:31] 
123146; 
124- ; GFX11-LABEL: v_mad_u16: 
125- ; GFX11:       ; %bb.0: 
126- ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
127- ; GFX11-NEXT:    v_mad_u16 v0, v0, v1, v2 
128- ; GFX11-NEXT:    s_setpc_b64 s[30:31] 
147+ ; GFX11-TRUE16-LABEL: v_mad_u16: 
148+ ; GFX11-TRUE16:       ; %bb.0: 
149+ ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
150+ ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l 
151+ ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l 
152+ ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
153+ ; GFX11-TRUE16-NEXT:    v_mad_u16 v0.l, v0.l, v0.h, v1.l 
154+ ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31] 
155+ ; 
156+ ; GFX11-FAKE16-LABEL: v_mad_u16: 
157+ ; GFX11-FAKE16:       ; %bb.0: 
158+ ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
159+ ; GFX11-FAKE16-NEXT:    v_mad_u16 v0, v0, v1, v2 
160+ ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31] 
129161  %mul  = mul  i16  %arg0 , %arg1 
130162  %add  = add  i16  %mul , %arg2 
131163  ret  i16  %add 
@@ -151,13 +183,23 @@ define i32 @v_mad_u16_zext(i16 %arg0, i16 %arg1, i16 %arg2) {
151183; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0 
152184; GFX10-NEXT:    s_setpc_b64 s[30:31] 
153185; 
154- ; GFX11-LABEL: v_mad_u16_zext: 
155- ; GFX11:       ; %bb.0: 
156- ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
157- ; GFX11-NEXT:    v_mad_u16 v0, v0, v1, v2 
158- ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
159- ; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0 
160- ; GFX11-NEXT:    s_setpc_b64 s[30:31] 
186+ ; GFX11-TRUE16-LABEL: v_mad_u16_zext: 
187+ ; GFX11-TRUE16:       ; %bb.0: 
188+ ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
189+ ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l 
190+ ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l 
191+ ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 
192+ ; GFX11-TRUE16-NEXT:    v_mad_u16 v0.l, v0.l, v0.h, v1.l 
193+ ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0 
194+ ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31] 
195+ ; 
196+ ; GFX11-FAKE16-LABEL: v_mad_u16_zext: 
197+ ; GFX11-FAKE16:       ; %bb.0: 
198+ ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
199+ ; GFX11-FAKE16-NEXT:    v_mad_u16 v0, v0, v1, v2 
200+ ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
201+ ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0 
202+ ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31] 
161203  %mul  = mul  i16  %arg0 , %arg1 
162204  %add  = add  i16  %mul , %arg2 
163205  %zext  = zext  i16  %add  to  i32 
@@ -187,13 +229,23 @@ define i64 @v_mad_u16_zext64(i16 %arg0, i16 %arg1, i16 %arg2) {
187229; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0 
188230; GFX10-NEXT:    s_setpc_b64 s[30:31] 
189231; 
190- ; GFX11-LABEL: v_mad_u16_zext64: 
191- ; GFX11:       ; %bb.0: 
192- ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
193- ; GFX11-NEXT:    v_mad_u16 v0, v0, v1, v2 
194- ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
195- ; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 
196- ; GFX11-NEXT:    s_setpc_b64 s[30:31] 
232+ ; GFX11-TRUE16-LABEL: v_mad_u16_zext64: 
233+ ; GFX11-TRUE16:       ; %bb.0: 
234+ ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
235+ ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l 
236+ ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l 
237+ ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 
238+ ; GFX11-TRUE16-NEXT:    v_mad_u16 v0.l, v0.l, v0.h, v1.l 
239+ ; GFX11-TRUE16-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 
240+ ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31] 
241+ ; 
242+ ; GFX11-FAKE16-LABEL: v_mad_u16_zext64: 
243+ ; GFX11-FAKE16:       ; %bb.0: 
244+ ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
245+ ; GFX11-FAKE16-NEXT:    v_mad_u16 v0, v0, v1, v2 
246+ ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
247+ ; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 
248+ ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31] 
197249  %mul  = mul  i16  %arg0 , %arg1 
198250  %add  = add  i16  %mul , %arg2 
199251  %zext  = zext  i16  %add  to  i64 
0 commit comments