Skip to content

Commit e29228e

Browse files
authored
[AMDGPU][MC] Allow VOP3C dpp src1 to be imm or SGPR (#87418)
Allows src1 of VOP3 encoded VOPC to be an SGPR or inline immediate on GFX1150Plus The w32 and w64 _e64_dpp assembler only real instructions were unused, and erroneously constructed in a way that bugged parsing of the new instructions. They are removed. This patch is a follow up to PR #87382
1 parent 7c68a95 commit e29228e

14 files changed

+3218
-86
lines changed

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,9 +496,7 @@ bool isVOPC64DPP(unsigned Opc) {
496496
return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
497497
}
498498

499-
bool isVOPCAsmOnly(unsigned Opc) {
500-
return isVOPCAsmOnlyOpcodeHelper(Opc) || isVOP3CAsmOnlyOpcodeHelper(Opc);
501-
}
499+
bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
502500

503501
bool getMAIIsDGEMM(unsigned Opc) {
504502
const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Lines changed: 1 addition & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,6 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
7575
let HasDst32 = 0;
7676
// VOPC disallows dst_sel and dst_unused as they have no effect on destination
7777
let EmitDstSel = 0;
78-
// FIXME: work around AsmParser bug
79-
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
8078
let Outs64 = (outs VOPDstS64orS32:$sdst);
8179
let OutsVOP3DPP = Outs64;
8280
let OutsVOP3DPP8 = Outs64;
@@ -114,8 +112,6 @@ class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
114112
"$src0, $src1");
115113
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
116114
let EmitDst = 0;
117-
// FIXME: work around AsmParser bug
118-
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
119115
}
120116

121117
multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt0> {
@@ -776,7 +772,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
776772
// DPP8 forbids modifiers and can inherit from VOPC_Profile
777773

778774
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
779-
dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VRegSrc_32:$src1);
775+
dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VCSrc_b32:$src1);
780776
let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
781777
(ins)));
782778
let AsmVOP3Base = "$sdst, $src0_modifiers, $src1";
@@ -789,8 +785,6 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
789785
let HasSrc1Mods = 0;
790786
let HasClamp = 0;
791787
let HasOMod = 0;
792-
// FIXME: work around AsmParser bug
793-
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
794788
}
795789

796790
multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
@@ -818,8 +812,6 @@ class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType src0VT, Va
818812
let AsmVOP3Base = "$src0_modifiers, $src1";
819813
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
820814
let EmitDst = 0;
821-
// FIXME: work around AsmParser bug
822-
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
823815
}
824816

825817
multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
@@ -1385,31 +1377,9 @@ multiclass VOPC_Real_Base<GFXGen Gen, bits<9> op> {
13851377
}
13861378
if ps64.Pfl.HasExtVOP3DPP then {
13871379
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e64" #"_dpp");
1388-
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
13891380
def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP>,
13901381
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
1391-
def _e64_dpp_w32#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
1392-
let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
1393-
let isAsmParserOnly = 1;
1394-
let WaveSizePredicate = isWave32;
1395-
}
1396-
def _e64_dpp_w64#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
1397-
let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
1398-
let isAsmParserOnly = 1;
1399-
let WaveSizePredicate = isWave64;
1400-
}
1401-
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
14021382
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64>;
1403-
def _e64_dpp8_w32#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
1404-
let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
1405-
let isAsmParserOnly = 1;
1406-
let WaveSizePredicate = isWave32;
1407-
}
1408-
def _e64_dpp8_w64#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
1409-
let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
1410-
let isAsmParserOnly = 1;
1411-
let WaveSizePredicate = isWave64;
1412-
}
14131383
}
14141384
} // AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace
14151385
}
@@ -1480,35 +1450,9 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
14801450

14811451
if ps64.Pfl.HasExtVOP3DPP then {
14821452
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
1483-
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
14841453
def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
14851454
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
1486-
def _e64_dpp_w32#Gen.Suffix
1487-
: VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
1488-
let AsmString = asm_name # " vcc_lo, " # AsmDPP;
1489-
let isAsmParserOnly = 1;
1490-
let WaveSizePredicate = isWave32;
1491-
}
1492-
def _e64_dpp_w64#Gen.Suffix
1493-
: VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
1494-
let AsmString = asm_name # " vcc, " # AsmDPP;
1495-
let isAsmParserOnly = 1;
1496-
let WaveSizePredicate = isWave64;
1497-
}
1498-
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
14991455
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
1500-
def _e64_dpp8_w32#Gen.Suffix
1501-
: VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
1502-
let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
1503-
let isAsmParserOnly = 1;
1504-
let WaveSizePredicate = isWave32;
1505-
}
1506-
def _e64_dpp8_w64#Gen.Suffix
1507-
: VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
1508-
let AsmString = asm_name # " vcc, " # AsmDPP8;
1509-
let isAsmParserOnly = 1;
1510-
let WaveSizePredicate = isWave64;
1511-
}
15121456
}
15131457
} // End AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace
15141458
}

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1680,7 +1680,6 @@ class AsmOnlyInfoTable <string Format, string Class>: GenericTable {
16801680
}
16811681

16821682
def VOPCAsmOnlyInfoTable : AsmOnlyInfoTable <"VOPC", "VOPC_DPPe_Common">;
1683-
def VOP3CAsmOnlyInfoTable : AsmOnlyInfoTable <"VOP3C", "VOP3_DPPe_Common_Base">;
16841683

16851684
def VOPTrue16Table : GenericTable {
16861685
let FilterClass = "VOP_Pseudo";

llvm/test/MC/AMDGPU/gfx1150_asm_features.s

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,17 @@ v_add_f32_e64_dpp v5, v1, s2 row_mirror
3030
v_min3_f16 v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf
3131
// GFX1150: encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff]
3232

33-
// This is a regression test for potential changes in the future.
3433
v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
3534
// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
35+
36+
v_cmp_le_f32 vcc_lo, v1, s2 row_mirror
37+
// GFX1150: encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff]
38+
39+
v_cmp_le_f32 vcc_lo, v1, s2 quad_perm:[1,1,1,1]
40+
// GFX1150: encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x55,0x00,0xff]
41+
42+
v_cmpx_neq_f16 v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
43+
// GFX1150: encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
44+
45+
v_cmpx_class_f16 v1, 2.0 quad_perm:[1,1,1,1]
46+
// GFX1150: encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x55,0x00,0xff]

llvm/test/MC/AMDGPU/gfx12_asm_features.s

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,49 @@
66
//
77

88
v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
9-
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff]
9+
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff]
1010

1111
v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
12-
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff]
12+
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff]
1313

1414
v_add3_u32_e64_dpp v5, v1, s2, v0 dpp8:[7,6,5,4,3,2,1,0]
15-
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05]
15+
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05]
1616

1717
v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0]
18-
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
18+
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
1919

2020
v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0]
21-
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]
21+
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]
2222

2323
v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
24-
// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
24+
// GFX12: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
2525

26-
// This is a regression test for potential changes in the future.
2726
v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
28-
// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
27+
// GFX12: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
28+
29+
v_cmp_eq_f32_e64_dpp s5, v1, s99 row_mirror
30+
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x40,0x01,0xff]
31+
32+
v_cmp_eq_f32_e64_dpp s5, v1, s99 row_half_mirror
33+
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x41,0x01,0xff]
34+
35+
v_cmp_eq_f32_e64_dpp s5, v1, s99 row_shl:15
36+
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x0f,0x01,0xff]
37+
38+
v_cmp_eq_f32_e64_dpp s5, v1, s99 row_shr:1
39+
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x11,0x01,0xff]
40+
41+
v_cmp_eq_f32_e64_dpp s5, v1, s99 row_ror:1
42+
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x21,0x01,0xff]
43+
44+
v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -s99 row_share:15 row_mask:0x0 bank_mask:0x1
45+
// GFX12: encoding: [0x6b,0x01,0x12,0xd4,0xfa,0xc6,0x00,0x40,0x01,0x5f,0x01,0x01]
46+
47+
v_cmp_eq_f32_e64_dpp ttmp15, -v1, |s99| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
48+
// GFX12: encoding: [0x7b,0x02,0x12,0xd4,0xfa,0xc6,0x00,0x20,0x01,0x60,0x09,0x13]
49+
50+
v_cmpx_gt_f32_e64_dpp v255, 4.0 dpp8:[0,0,0,0,0,0,0,0] fi:0
51+
// GFX12: encoding: [0x7e,0x00,0x94,0xd4,0xe9,0xec,0x01,0x00,0xff,0x00,0x00,0x00]
2952

3053
//
3154
// Elements of CPol operand can be given in any order

0 commit comments

Comments
 (0)