Skip to content

Commit bbb58a2

Browse files
Manolis Tsamisptomsich
authored andcommitted
[RISCV] Add vendor-defined XTheadMemPair (two-GPR Memory Operations) extension
The vendor-defined XTHeadMemPair (no comparable standard extension exists at the time of writing) extension adds two-GPR load/store pair instructions. It is supported by the C9xx cores (e.g., found in the wild in the Allwinner D1) by Alibaba T-Head. The current (as of this commit) public documentation for this extension is available at: https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf Support for these instructions has already landed in GNU Binutils: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=6e17ae625570ff8f3c12c8765b8d45d4db8694bd Depends on D143847 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D144002
1 parent d567e06 commit bbb58a2

15 files changed

+711
-1
lines changed

llvm/docs/RISCVUsage.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,9 @@ The current vendor extensions supported are:
189189
``XTheadMac``
190190
LLVM implements `the XTheadMac (multiply-accumulate instructions) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification.
191191

192+
``XTHeadMemPair``
193+
LLVM implements `the THeadMemPair (two-GPR memory operations) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification.
194+
192195
``XTHeadVdot``
193196
LLVM implements `version 1.0.0 of the THeadV-family custom instructions specification <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.0/xthead-2022-12-04-2.2.0.pdf>`_ by T-HEAD of Alibaba. All instructions are prefixed with `th.` as described in the specification, and the riscv-toolchain-convention document linked above.
194197

llvm/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ Changes to the RISC-V Backend
112112
* Adds support for the vendor-defined XTHeadBb (basic bit-manipulation) extension.
113113
* Adds support for the vendor-defined XTHeadBs (single-bit) extension.
114114
* Adds support for the vendor-defined XTHeadMac (multiply-accumulate instructions) extension.
115+
* Added support for the vendor-defined XTHeadMemPair (two-GPR memory operations)
116+
extension disassembler/assembler.
115117
* Support for the now-ratified Zawrs extension is no longer experimental.
116118

117119
Changes to the WebAssembly Backend

llvm/lib/Support/RISCVISAInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
117117
{"xtheadbb", RISCVExtensionVersion{1, 0}},
118118
{"xtheadbs", RISCVExtensionVersion{1, 0}},
119119
{"xtheadmac", RISCVExtensionVersion{1, 0}},
120+
{"xtheadmempair", RISCVExtensionVersion{1, 0}},
120121
{"xtheadvdot", RISCVExtensionVersion{1, 0}},
121122
{"xventanacondops", RISCVExtensionVersion{1, 0}},
122123
};

llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2664,6 +2664,34 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst,
26642664
}
26652665
}
26662666

2667+
unsigned Opcode = Inst.getOpcode();
2668+
2669+
if (Opcode == RISCV::TH_LDD || Opcode == RISCV::TH_LWUD ||
2670+
Opcode == RISCV::TH_LWD) {
2671+
unsigned Rd1 = Inst.getOperand(0).getReg();
2672+
unsigned Rd2 = Inst.getOperand(1).getReg();
2673+
unsigned Rs1 = Inst.getOperand(2).getReg();
2674+
// The encoding with rd1 == rd2 == rs1 is reserved for XTHead load pair.
2675+
if (Rs1 == Rd1 && Rs1 == Rd2) {
2676+
SMLoc Loc = Operands[1]->getStartLoc();
2677+
return Error(Loc, "The source register and destination registers "
2678+
"cannot be equal.");
2679+
}
2680+
}
2681+
2682+
bool IsTHeadMemPair32 = (Opcode == RISCV::TH_LWD ||
2683+
Opcode == RISCV::TH_LWUD || Opcode == RISCV::TH_SWD);
2684+
bool IsTHeadMemPair64 = (Opcode == RISCV::TH_LDD || Opcode == RISCV::TH_SDD);
2685+
// The last operand of XTHeadMemPair instructions must be constant 3 or 4
2686+
// depending on the data width.
2687+
if (IsTHeadMemPair32 && Inst.getOperand(4).getImm() != 3) {
2688+
SMLoc Loc = Operands.back()->getStartLoc();
2689+
return Error(Loc, "Operand must be constant 3.");
2690+
} else if (IsTHeadMemPair64 && Inst.getOperand(4).getImm() != 4) {
2691+
SMLoc Loc = Operands.back()->getStartLoc();
2692+
return Error(Loc, "Operand must be constant 4.");
2693+
}
2694+
26672695
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
26682696
RISCVII::VConstraintType Constraints = RISCVII::getConstraint(MCID.TSFlags);
26692697
if (Constraints == RISCVII::NoConstraint)
@@ -2687,7 +2715,6 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst,
26872715
if ((Constraints & RISCVII::VMConstraint) && (DestReg == RISCV::V0)) {
26882716
// vadc, vsbc are special cases. These instructions have no mask register.
26892717
// The destination register could not be V0.
2690-
unsigned Opcode = Inst.getOpcode();
26912718
if (Opcode == RISCV::VADC_VVM || Opcode == RISCV::VADC_VXM ||
26922719
Opcode == RISCV::VADC_VIM || Opcode == RISCV::VSBC_VVM ||
26932720
Opcode == RISCV::VSBC_VXM || Opcode == RISCV::VFMERGE_VFM ||

llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,10 @@ static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn,
367367
uint64_t Address,
368368
const MCDisassembler *Decoder);
369369

370+
static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
371+
uint64_t Address,
372+
const MCDisassembler *Decoder);
373+
370374
#include "RISCVGenDisassemblerTables.inc"
371375

372376
static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
@@ -427,6 +431,32 @@ static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn,
427431
return MCDisassembler::Success;
428432
}
429433

434+
static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
435+
uint64_t Address,
436+
const MCDisassembler *Decoder) {
437+
uint32_t Rd1 = fieldFromInstruction(Insn, 7, 5);
438+
uint32_t Rs1 = fieldFromInstruction(Insn, 15, 5);
439+
uint32_t Rd2 = fieldFromInstruction(Insn, 20, 5);
440+
uint32_t UImm2 = fieldFromInstruction(Insn, 25, 2);
441+
DecodeGPRRegisterClass(Inst, Rd1, Address, Decoder);
442+
DecodeGPRRegisterClass(Inst, Rd2, Address, Decoder);
443+
DecodeGPRRegisterClass(Inst, Rs1, Address, Decoder);
444+
DecodeStatus Result = decodeUImmOperand<2>(Inst, UImm2, Address, Decoder);
445+
(void)Result;
446+
assert(Result == MCDisassembler::Success && "Invalid immediate");
447+
448+
// Disassemble the final operand which is implicit.
449+
unsigned Opcode = Inst.getOpcode();
450+
bool IsWordOp = (Opcode == RISCV::TH_LWD || Opcode == RISCV::TH_LWUD ||
451+
Opcode == RISCV::TH_SWD);
452+
if (IsWordOp)
453+
Inst.addOperand(MCOperand::createImm(3));
454+
else
455+
Inst.addOperand(MCOperand::createImm(4));
456+
457+
return MCDisassembler::Success;
458+
}
459+
430460
DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
431461
ArrayRef<uint8_t> Bytes,
432462
uint64_t Address,
@@ -497,6 +527,13 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
497527
if (Result != MCDisassembler::Fail)
498528
return Result;
499529
}
530+
if (STI.hasFeature(RISCV::FeatureVendorXTHeadMemPair)) {
531+
LLVM_DEBUG(dbgs() << "Trying XTHeadMemPair custom opcode table:\n");
532+
Result = decodeInstruction(DecoderTableTHeadMemPair32, MI, Insn, Address,
533+
this, STI);
534+
if (Result != MCDisassembler::Fail)
535+
return Result;
536+
}
500537
if (STI.hasFeature(RISCV::FeatureVendorXTHeadVdot)) {
501538
LLVM_DEBUG(dbgs() << "Trying XTHeadVdot custom opcode table:\n");
502539
Result =

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,13 @@ def HasVendorXTHeadMac : Predicate<"Subtarget->hasVendorXTHeadMac()">,
512512
AssemblerPredicate<(all_of FeatureVendorXTHeadMac),
513513
"'xtheadmac' (T-Head Multiply-Accumulate Instructions)">;
514514

515+
def FeatureVendorXTHeadMemPair
516+
: SubtargetFeature<"xtheadmempair", "HasVendorXTHeadMemPair", "true",
517+
"'xtheadmempair' (T-Head two-GPR Memory Operations)">;
518+
def HasVendorXTHeadMemPair : Predicate<"Subtarget->hasVendorXTHeadMemPair()">,
519+
AssemblerPredicate<(all_of FeatureVendorXTHeadMemPair),
520+
"'xtheadmempair' (T-Head two-GPR Memory Operations)">;
521+
515522
def FeatureVendorXTHeadVdot
516523
: SubtargetFeature<"xtheadvdot", "HasVendorXTHeadVdot", "true",
517524
"'xtheadvdot' (T-Head Vector Extensions for Dot)",

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
10641064
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
10651065
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
10661066
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR});
1067+
if (Subtarget.hasVendorXTHeadMemPair())
1068+
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
10671069
if (Subtarget.useRVVForFixedLengthVectors())
10681070
setTargetDAGCombine(ISD::BITCAST);
10691071

@@ -9653,6 +9655,143 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
96539655
return InputRootReplacement;
96549656
}
96559657

9658+
// Helper function for performMemPairCombine.
9659+
// Try to combine the memory loads/stores LSNode1 and LSNode2
9660+
// into a single memory pair operation.
9661+
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
9662+
LSBaseSDNode *LSNode2, SDValue BasePtr,
9663+
uint64_t Imm) {
9664+
SmallPtrSet<const SDNode *, 32> Visited;
9665+
SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
9666+
9667+
if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
9668+
SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
9669+
return SDValue();
9670+
9671+
MachineFunction &MF = DAG.getMachineFunction();
9672+
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
9673+
9674+
// The new operation has twice the width.
9675+
MVT XLenVT = Subtarget.getXLenVT();
9676+
EVT MemVT = LSNode1->getMemoryVT();
9677+
EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
9678+
MachineMemOperand *MMO = LSNode1->getMemOperand();
9679+
MachineMemOperand *NewMMO = MF.getMachineMemOperand(
9680+
MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
9681+
9682+
if (LSNode1->getOpcode() == ISD::LOAD) {
9683+
auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
9684+
unsigned Opcode;
9685+
if (MemVT == MVT::i32)
9686+
Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
9687+
else
9688+
Opcode = RISCVISD::TH_LDD;
9689+
9690+
SDValue Res = DAG.getMemIntrinsicNode(
9691+
Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
9692+
{LSNode1->getChain(), BasePtr,
9693+
DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
9694+
NewMemVT, NewMMO);
9695+
9696+
SDValue Node1 =
9697+
DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
9698+
SDValue Node2 =
9699+
DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
9700+
9701+
DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
9702+
return Node1;
9703+
} else {
9704+
unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
9705+
9706+
SDValue Res = DAG.getMemIntrinsicNode(
9707+
Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
9708+
{LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
9709+
BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
9710+
NewMemVT, NewMMO);
9711+
9712+
DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
9713+
return Res;
9714+
}
9715+
}
9716+
9717+
// Try to combine two adjacent loads/stores to a single pair instruction from
9718+
// the XTHeadMemPair vendor extension.
9719+
static SDValue performMemPairCombine(SDNode *N,
9720+
TargetLowering::DAGCombinerInfo &DCI) {
9721+
SelectionDAG &DAG = DCI.DAG;
9722+
MachineFunction &MF = DAG.getMachineFunction();
9723+
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
9724+
9725+
// Target does not support load/store pair.
9726+
if (!Subtarget.hasVendorXTHeadMemPair())
9727+
return SDValue();
9728+
9729+
LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
9730+
EVT MemVT = LSNode1->getMemoryVT();
9731+
unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
9732+
9733+
// No volatile, indexed or atomic loads/stores.
9734+
if (!LSNode1->isSimple() || LSNode1->isIndexed())
9735+
return SDValue();
9736+
9737+
// Function to get a base + constant representation from a memory value.
9738+
auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
9739+
if (Ptr->getOpcode() == ISD::ADD)
9740+
if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
9741+
return {Ptr->getOperand(0), C1->getZExtValue()};
9742+
return {Ptr, 0};
9743+
};
9744+
9745+
auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
9746+
9747+
SDValue Chain = N->getOperand(0);
9748+
for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
9749+
UI != UE; ++UI) {
9750+
SDUse &Use = UI.getUse();
9751+
if (Use.getUser() != N && Use.getResNo() == 0 &&
9752+
Use.getUser()->getOpcode() == N->getOpcode()) {
9753+
LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
9754+
9755+
// No volatile, indexed or atomic loads/stores.
9756+
if (!LSNode2->isSimple() || LSNode2->isIndexed())
9757+
continue;
9758+
9759+
// Check if LSNode1 and LSNode2 have the same type and extension.
9760+
if (LSNode1->getOpcode() == ISD::LOAD)
9761+
if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
9762+
cast<LoadSDNode>(LSNode1)->getExtensionType())
9763+
continue;
9764+
9765+
if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
9766+
continue;
9767+
9768+
auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
9769+
9770+
// Check if the base pointer is the same for both instruction.
9771+
if (Base1 != Base2)
9772+
continue;
9773+
9774+
// Check if the offsets match the XTHeadMemPair encoding contraints.
9775+
if (MemVT == MVT::i32) {
9776+
// Check for adjacent i32 values and a 2-bit index.
9777+
if ((Offset1 + 4 != Offset2) || !isShiftedUInt<2, 3>(Offset1))
9778+
continue;
9779+
} else if (MemVT == MVT::i64) {
9780+
// Check for adjacent i64 values and a 2-bit index.
9781+
if ((Offset1 + 8 != Offset2) || !isShiftedUInt<2, 4>(Offset1))
9782+
continue;
9783+
}
9784+
9785+
// Try to combine.
9786+
if (SDValue Res =
9787+
tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
9788+
return Res;
9789+
}
9790+
}
9791+
9792+
return SDValue();
9793+
}
9794+
96569795
// Fold
96579796
// (fp_to_int (froundeven X)) -> fcvt X, rne
96589797
// (fp_to_int (ftrunc X)) -> fcvt X, rtz
@@ -10622,7 +10761,15 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1062210761
return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
1062310762
VL);
1062410763
}
10764+
case ISD::LOAD:
1062510765
case ISD::STORE: {
10766+
if (DCI.isAfterLegalizeDAG())
10767+
if (SDValue V = performMemPairCombine(N, DCI))
10768+
return V;
10769+
10770+
if (N->getOpcode() != ISD::STORE)
10771+
break;
10772+
1062610773
auto *Store = cast<StoreSDNode>(N);
1062710774
SDValue Val = Store->getValue();
1062810775
// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
@@ -13452,6 +13599,11 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1345213599
NODE_NAME_CASE(ORC_B)
1345313600
NODE_NAME_CASE(ZIP)
1345413601
NODE_NAME_CASE(UNZIP)
13602+
NODE_NAME_CASE(TH_LWD)
13603+
NODE_NAME_CASE(TH_LWUD)
13604+
NODE_NAME_CASE(TH_LDD)
13605+
NODE_NAME_CASE(TH_SWD)
13606+
NODE_NAME_CASE(TH_SDD)
1345513607
NODE_NAME_CASE(VMV_V_X_VL)
1345613608
NODE_NAME_CASE(VFMV_V_F_VL)
1345713609
NODE_NAME_CASE(VMV_X_S)

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,12 @@ enum NodeType : unsigned {
338338
// Load address.
339339
LA = ISD::FIRST_TARGET_MEMORY_OPCODE,
340340
LA_TLS_IE,
341+
342+
TH_LWD,
343+
TH_LWUD,
344+
TH_LDD,
345+
TH_SWD,
346+
TH_SDD,
341347
};
342348
} // namespace RISCVISD
343349

0 commit comments

Comments
 (0)