Skip to content

[AMDGPU][True16][MC] 16bit vsrc and vdst support in MC #104510

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
}

bool isRegOrImmWithIntT16InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
return isRegOrImmWithInputMods(
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
}

bool isRegOrImmWithInt32InputMods() const {
Expand All @@ -292,6 +293,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
}

template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
return isRegOrInline(
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
}

bool isRegOrInlineImmWithInt32InputMods() const {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
}
Expand All @@ -304,8 +310,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
}

bool isRegOrImmWithFPT16InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
return isRegOrImmWithInputMods(
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
}

bool isRegOrImmWithFP32InputMods() const {
Expand Down Expand Up @@ -354,6 +361,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
}

bool isVRegWithInputMods() const;
template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
template <bool IsFake16> bool isT16VRegWithInputMods() const;

bool isSDWAOperand(MVT type) const;
Expand Down Expand Up @@ -515,7 +523,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
}

bool isVCSrcTB16() const {
bool isVCSrcT_b16() const {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
}

Expand Down Expand Up @@ -545,7 +553,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
}

bool isVCSrcTF16() const {
bool isVCSrcT_f16() const {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
}

bool isVCSrcT_bf16() const {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
}

Expand Down Expand Up @@ -583,7 +595,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {

bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }

bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }

bool isVSrcT_b16_Lo128() const {
return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
Expand Down Expand Up @@ -617,7 +629,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {

bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }

bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }

bool isVSrcT_bf16_Lo128() const {
return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
Expand Down Expand Up @@ -2162,11 +2174,17 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
}

template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
template <bool IsFake16>
bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
: AMDGPU::VGPR_16_Lo128RegClassID);
}

template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
: AMDGPU::VGPR_16RegClassID);
}

bool AMDGPUOperand::isSDWAOperand(MVT type) const {
if (AsmParser->isVI())
return isVReg32();
Expand Down
22 changes: 14 additions & 8 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,36 +328,40 @@ DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}

template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
unsigned OperandSemantics>
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
uint64_t /*Addr*/,
const MCDisassembler *Decoder) {
assert(isUInt<9>(Imm) && "9-bit encoding expected");

const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
bool IsVGPR = Imm & (1 << 8);
if (IsVGPR) {
if (Imm & AMDGPU::EncValues::IS_VGPR) {
bool IsHi = Imm & (1 << 7);
unsigned RegIdx = Imm & 0x7f;
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
Imm & 0xFF, false, 16));
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
OpWidth, Imm & 0xFF, false, ImmWidth,
(AMDGPU::OperandSemantics)OperandSemantics));
}

template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
unsigned OperandSemantics>
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
uint64_t /*Addr*/,
const MCDisassembler *Decoder) {
assert(isUInt<10>(Imm) && "10-bit encoding expected");

const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
bool IsVGPR = Imm & (1 << 8);
if (IsVGPR) {
if (Imm & AMDGPU::EncValues::IS_VGPR) {
bool IsHi = Imm & (1 << 9);
unsigned RegIdx = Imm & 0xff;
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
Imm & 0xFF, false, 16));
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
OpWidth, Imm & 0xFF, false, ImmWidth,
(AMDGPU::OperandSemantics)OperandSemantics));
}

static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
Expand Down Expand Up @@ -628,6 +632,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
convertVOP3DPPInst(MI); // Regular VOP3 case
}

convertTrue16OpSel(MI);

if (AMDGPU::isMAC(MI.getOpcode())) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5411,9 +5411,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64;
case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64;
case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64;
case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64;
case AMDGPU::S_CVT_F32_F16:
case AMDGPU::S_CVT_HI_F32_F16:
return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
: AMDGPU::V_CVT_F32_F16_fake16_e64;
case AMDGPU::S_CVT_F16_F32:
return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
: AMDGPU::V_CVT_F16_F32_fake16_e64;
case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64;
case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;
Expand Down
Loading
Loading