@@ -22,28 +22,28 @@ def CC_SI_Gfx : CallingConv<[
2222 // 32 is reserved for the stack pointer
2323 // 33 is reserved for the frame pointer
2424 // 34 is reserved for the base pointer
25- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
25+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
2626 SGPR4, SGPR5, SGPR6, SGPR7,
2727 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
2828 SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
2929 SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
3030 ]>>>,
3131
32- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
32+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
3333 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
3434 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
3535 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
3636 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
3737 ]>>>,
3838
39- CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
39+ CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16 ], CCAssignToStack<4, 4>>
4040]>;
4141
4242def RetCC_SI_Gfx : CallingConv<[
4343 CCIfType<[i1], CCPromoteToType<i32>>,
4444 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
4545
46- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
46+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
4747 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
4848 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
4949 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -66,7 +66,7 @@ def RetCC_SI_Gfx : CallingConv<[
6666
6767def CC_SI_SHADER : CallingConv<[
6868
69- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
69+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
7070 SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
7171 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
7272 SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -76,7 +76,7 @@ def CC_SI_SHADER : CallingConv<[
7676 ]>>>,
7777
7878 // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
79- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
79+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
8080 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
8181 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
8282 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -109,7 +109,7 @@ def RetCC_SI_Shader : CallingConv<[
109109 ]>>,
110110
111111 // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
112- CCIfType<[f32, f16, v2f16] , CCAssignToReg<[
112+ CCIfType<[f32, f16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
113113 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
114114 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
115115 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -188,23 +188,23 @@ def CC_AMDGPU_Func : CallingConv<[
188188 CCIfType<[i1], CCPromoteToType<i32>>,
189189 CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
190190
191- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
191+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<
192192 !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29
193193 >>>,
194194
195- CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
195+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16 ], CCAssignToReg<[
196196 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
197197 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
198198 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
199199 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
200- CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
200+ CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16 ], CCAssignToStack<4, 4>>
201201]>;
202202
203203// Calling convention for leaf functions
204204def RetCC_AMDGPU_Func : CallingConv<[
205205 CCIfType<[i1], CCPromoteToType<i32>>,
206206 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
207- CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[
207+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16 ], CCAssignToReg<[
208208 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
209209 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
210210 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -223,11 +223,11 @@ def CC_AMDGPU : CallingConv<[
223223]>;
224224
225225def CC_AMDGPU_CS_CHAIN : CallingConv<[
226- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
226+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<
227227 !foreach(i, !range(105), !cast<Register>("SGPR"#i))
228228 >>>,
229229
230- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
230+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<
231231 !foreach(i, !range(8, 255), !cast<Register>("VGPR"#i))
232232 >>>
233233]>;
0 commit comments