@@ -376,7 +376,7 @@ def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> {
376376 let HasSGPR = 1;
377377}
378378
379- def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
379+ def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16 ], 16, (add M0_LO16)> {
380380 let CopyCost = 1;
381381 let Size = 16;
382382 let isAllocatable = 0;
@@ -385,15 +385,15 @@ def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
385385
386386// TODO: Do we need to set DwarfRegAlias on register tuples?
387387
388- def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
388+ def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16 ], 16,
389389 (add (sequence "SGPR%u_LO16", 0, 105))> {
390390 let AllocationPriority = 0;
391391 let Size = 16;
392392 let GeneratePressureSet = 0;
393393 let HasSGPR = 1;
394394}
395395
396- def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
396+ def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16 ], 16,
397397 (add (sequence "SGPR%u_HI16", 0, 105))> {
398398 let isAllocatable = 0;
399399 let Size = 16;
@@ -402,7 +402,7 @@ def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
402402}
403403
404404// SGPR 32-bit registers
405- def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
405+ def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 ], 32,
406406 (add (sequence "SGPR%u", 0, 105))> {
407407 // Give all SGPR classes higher priority than VGPR classes, because
408408 // we want to spill SGPRs to VGPRs.
@@ -451,14 +451,14 @@ def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s"
451451def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">;
452452
453453// Trap handler TMP 32-bit registers
454- def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
454+ def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16, v2bf16 ], 32,
455455 (add (sequence "TTMP%u", 0, 15))> {
456456 let isAllocatable = 0;
457457 let HasSGPR = 1;
458458}
459459
460460// Trap handler TMP 16-bit registers
461- def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
461+ def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16 ], 16,
462462 (add (sequence "TTMP%u_LO16", 0, 15))> {
463463 let Size = 16;
464464 let isAllocatable = 0;
@@ -584,8 +584,8 @@ class RegisterTypes<list<ValueType> reg_types> {
584584 list<ValueType> types = reg_types;
585585}
586586
587- def Reg16Types : RegisterTypes<[i16, f16]>;
588- def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
587+ def Reg16Types : RegisterTypes<[i16, f16, bf16 ]>;
588+ def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
589589
590590let HasVGPR = 1 in {
591591// VOP3 and VINTERP can access 256 lo and 256 hi registers.
@@ -683,7 +683,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
683683}
684684
685685// AccVGPR 32-bit registers
686- def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
686+ def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 ], 32,
687687 (add (sequence "AGPR%u", 0, 255))> {
688688 let AllocationPriority = 0;
689689 let Size = 32;
@@ -735,15 +735,15 @@ def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;
735735// Register classes used as source and destination
736736//===----------------------------------------------------------------------===//
737737
738- def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
738+ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 ], 32,
739739 (add FP_REG, SP_REG)> {
740740 let isAllocatable = 0;
741741 let CopyCost = -1;
742742 let HasSGPR = 1;
743743 let BaseClassOrder = 10000;
744744}
745745
746- def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32,
746+ def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16, v8bf16 ], 32,
747747 (add PRIVATE_RSRC_REG)> {
748748 let isAllocatable = 0;
749749 let CopyCost = -1;
@@ -760,7 +760,7 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
760760let GeneratePressureSet = 0, HasSGPR = 1 in {
761761// Subset of SReg_32 without M0 for SMRD instructions and alike.
762762// See comments in SIInstructions.td for more info.
763- def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
763+ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 , i1], 32,
764764 (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
765765 SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
766766 SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
@@ -769,7 +769,7 @@ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2
769769 let AllocationPriority = 0;
770770}
771771
772- def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
772+ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16 ], 16,
773773 (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
774774 XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
775775 TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
@@ -782,39 +782,39 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
782782 let BaseClassOrder = 16;
783783}
784784
785- def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
785+ def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 , i1], 32,
786786 (add SReg_32_XM0_XEXEC, M0_CLASS)> {
787787 let AllocationPriority = 0;
788788}
789789
790- def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
790+ def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 , i1], 32,
791791 (add SReg_32_XEXEC, EXEC_LO)> {
792792 let AllocationPriority = 0;
793793}
794794
795- def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
795+ def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 , i1], 32,
796796 (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
797797 let AllocationPriority = 0;
798798}
799799
800800} // End GeneratePressureSet = 0
801801
802802// Register class for all scalar registers (SGPRs + Special Registers)
803- def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
803+ def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 , i1], 32,
804804 (add SReg_32_XM0, M0_CLASS)> {
805805 let AllocationPriority = 0;
806806 let HasSGPR = 1;
807807 let BaseClassOrder = 32;
808808}
809809
810810let GeneratePressureSet = 0 in {
811- def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
811+ def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 ], 32,
812812 (add SReg_32, LDS_DIRECT_CLASS)> {
813813 let isAllocatable = 0;
814814 let HasSGPR = 1;
815815}
816816
817- def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
817+ def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16, v4bf16 ], 32,
818818 (add SGPR_64Regs)> {
819819 let CopyCost = 1;
820820 let AllocationPriority = 1;
@@ -836,21 +836,21 @@ def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
836836 let HasSGPR = 1;
837837}
838838
839- def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
839+ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16 ], 32,
840840 (add TTMP_64Regs)> {
841841 let isAllocatable = 0;
842842 let HasSGPR = 1;
843843}
844844
845- def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
845+ def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16 ], 32,
846846 (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
847847 SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
848848 let CopyCost = 1;
849849 let AllocationPriority = 1;
850850 let HasSGPR = 1;
851851}
852852
853- def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
853+ def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16 ], 32,
854854 (add SReg_64_XEXEC, EXEC)> {
855855 let CopyCost = 1;
856856 let AllocationPriority = 1;
@@ -905,11 +905,11 @@ multiclass SRegClass<int numRegs,
905905}
906906
907907defm "" : SRegClass<3, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;
908- defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;
908+ defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16 ], SGPR_128Regs, TTMP_128Regs>;
909909defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
910910defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
911911defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
912- defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>;
912+ defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16 ], SGPR_256Regs, TTMP_256Regs>;
913913defm "" : SRegClass<9, [v9i32, v9f32], SGPR_288Regs, TTMP_288Regs>;
914914defm "" : SRegClass<10, [v10i32, v10f32], SGPR_320Regs, TTMP_320Regs>;
915915defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
@@ -920,7 +920,7 @@ defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512
920920defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
921921}
922922
923- def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
923+ def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 ], 32,
924924 (add VGPR_32, LDS_DIRECT_CLASS)> {
925925 let isAllocatable = 0;
926926 let HasVGPR = 1;
@@ -955,15 +955,15 @@ multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
955955 }
956956}
957957
958- defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
958+ defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4bf16, v4i16, p0, p1, p4],
959959 (add VGPR_64)>;
960960defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;
961- defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>;
961+ defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16 ], (add VGPR_128)>;
962962defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
963963
964964defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;
965965defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>;
966- defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>;
966+ defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16 ], (add VGPR_256)>;
967967defm VReg_288 : VRegClass<9, [v9i32, v9f32], (add VGPR_288)>;
968968defm VReg_320 : VRegClass<10, [v10i32, v10f32], (add VGPR_320)>;
969969defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>;
@@ -993,7 +993,7 @@ multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
993993defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
994994 (add AGPR_64)>;
995995defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
996- defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>;
996+ defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16 ], (add AGPR_128)>;
997997defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
998998defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>;
999999defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>;
@@ -1032,14 +1032,14 @@ def VS_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
10321032 let HasVGPR = 1;
10331033}
10341034
1035- def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
1035+ def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 ], 32,
10361036 (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
10371037 let isAllocatable = 0;
10381038 let HasVGPR = 1;
10391039 let HasSGPR = 1;
10401040}
10411041
1042- def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
1042+ def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16 ], 32,
10431043 (add VGPR_32_Lo128, SReg_32, LDS_DIRECT_CLASS)> {
10441044 let isAllocatable = 0;
10451045 let HasVGPR = 1;
0 commit comments