Skip to content

[AMDGPU][MC] Allow VOP3C dpp src1 to be imm or SGPR #87418

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,7 @@ bool isVOPC64DPP(unsigned Opc) {
return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
}

bool isVOPCAsmOnly(unsigned Opc) {
return isVOPCAsmOnlyOpcodeHelper(Opc) || isVOP3CAsmOnlyOpcodeHelper(Opc);
}
bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }

bool getMAIIsDGEMM(unsigned Opc) {
const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
Expand Down
58 changes: 1 addition & 57 deletions llvm/lib/Target/AMDGPU/VOPCInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
let HasDst32 = 0;
// VOPC disallows dst_sel and dst_unused as they have no effect on destination
let EmitDstSel = 0;
// FIXME: work around AsmParser bug
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
let Outs64 = (outs VOPDstS64orS32:$sdst);
let OutsVOP3DPP = Outs64;
let OutsVOP3DPP8 = Outs64;
Expand Down Expand Up @@ -114,8 +112,6 @@ class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
"$src0, $src1");
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let EmitDst = 0;
// FIXME: work around AsmParser bug
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
}

multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt0> {
Expand Down Expand Up @@ -776,7 +772,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
// DPP8 forbids modifiers and can inherit from VOPC_Profile

let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VRegSrc_32:$src1);
dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VCSrc_b32:$src1);
let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
(ins)));
let AsmVOP3Base = "$sdst, $src0_modifiers, $src1";
Expand All @@ -789,8 +785,6 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
let HasSrc1Mods = 0;
let HasClamp = 0;
let HasOMod = 0;
// FIXME: work around AsmParser bug
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
}

multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
Expand Down Expand Up @@ -818,8 +812,6 @@ class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType src0VT, Va
let AsmVOP3Base = "$src0_modifiers, $src1";
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let EmitDst = 0;
// FIXME: work around AsmParser bug
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
}

multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
Expand Down Expand Up @@ -1385,31 +1377,9 @@ multiclass VOPC_Real_Base<GFXGen Gen, bits<9> op> {
}
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP>,
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
def _e64_dpp_w32#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
def _e64_dpp_w64#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave64;
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64>;
def _e64_dpp8_w32#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
def _e64_dpp8_w64#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave64;
}
}
} // AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace
}
Expand Down Expand Up @@ -1480,35 +1450,9 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,

if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
def _e64_dpp_w32#Gen.Suffix
: VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
let AsmString = asm_name # " vcc_lo, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
def _e64_dpp_w64#Gen.Suffix
: VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
let AsmString = asm_name # " vcc, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave64;
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
def _e64_dpp8_w32#Gen.Suffix
: VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
def _e64_dpp8_w64#Gen.Suffix
: VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
let AsmString = asm_name # " vcc, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave64;
}
}
} // End AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace
}
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1680,7 +1680,6 @@ class AsmOnlyInfoTable <string Format, string Class>: GenericTable {
}

def VOPCAsmOnlyInfoTable : AsmOnlyInfoTable <"VOPC", "VOPC_DPPe_Common">;
def VOP3CAsmOnlyInfoTable : AsmOnlyInfoTable <"VOP3C", "VOP3_DPPe_Common_Base">;

def VOPTrue16Table : GenericTable {
let FilterClass = "VOP_Pseudo";
Expand Down
13 changes: 12 additions & 1 deletion llvm/test/MC/AMDGPU/gfx1150_asm_features.s
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ v_add_f32_e64_dpp v5, v1, s2 row_mirror
v_min3_f16 v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf
// GFX1150: encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff]

// This is a regression test for potential changes in the future.
v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]

v_cmp_le_f32 vcc_lo, v1, s2 row_mirror
// GFX1150: encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff]

v_cmp_le_f32 vcc_lo, v1, s2 quad_perm:[1,1,1,1]
// GFX1150: encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x55,0x00,0xff]

v_cmpx_neq_f16 v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
// GFX1150: encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]

v_cmpx_class_f16 v1, 2.0 quad_perm:[1,1,1,1]
// GFX1150: encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x55,0x00,0xff]
39 changes: 31 additions & 8 deletions llvm/test/MC/AMDGPU/gfx12_asm_features.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,49 @@
//

v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff]
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff]

v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff]
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff]

v_add3_u32_e64_dpp v5, v1, s2, v0 dpp8:[7,6,5,4,3,2,1,0]
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05]
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05]

v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0]
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]

v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0]
// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]
// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]

v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]

// This is a regression test for potential changes in the future.
v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
// GFX12: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]

v_cmp_eq_f32_e64_dpp s5, v1, s99 row_mirror
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x40,0x01,0xff]

v_cmp_eq_f32_e64_dpp s5, v1, s99 row_half_mirror
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x41,0x01,0xff]

v_cmp_eq_f32_e64_dpp s5, v1, s99 row_shl:15
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x0f,0x01,0xff]

v_cmp_eq_f32_e64_dpp s5, v1, s99 row_shr:1
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x11,0x01,0xff]

v_cmp_eq_f32_e64_dpp s5, v1, s99 row_ror:1
// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x21,0x01,0xff]

v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -s99 row_share:15 row_mask:0x0 bank_mask:0x1
// GFX12: encoding: [0x6b,0x01,0x12,0xd4,0xfa,0xc6,0x00,0x40,0x01,0x5f,0x01,0x01]

v_cmp_eq_f32_e64_dpp ttmp15, -v1, |s99| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX12: encoding: [0x7b,0x02,0x12,0xd4,0xfa,0xc6,0x00,0x20,0x01,0x60,0x09,0x13]

v_cmpx_gt_f32_e64_dpp v255, 4.0 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: encoding: [0x7e,0x00,0x94,0xd4,0xe9,0xec,0x01,0x00,0xff,0x00,0x00,0x00]

//
// Elements of CPol operand can be given in any order
Expand Down
Loading