Skip to content

Commit

Permalink
[PowerPC] Add paired vector load and store builtins and intrinsics
Browse files Browse the repository at this point in the history
This patch adds the Clang builtins and LLVM intrinsics to load and store vector pairs.

Differential Revision: https://reviews.llvm.org/D90799
  • Loading branch information
Baptiste Saleil committed Nov 13, 2020
1 parent 66b876c commit 3f78605
Show file tree
Hide file tree
Showing 15 changed files with 848 additions and 8 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/BuiltinsPPC.def
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,8 @@ MMA_BUILTIN(pmxvbf16ger2pp, "vW512*VVi15i15i3", true)
MMA_BUILTIN(pmxvbf16ger2pn, "vW512*VVi15i15i3", true)
MMA_BUILTIN(pmxvbf16ger2np, "vW512*VVi15i15i3", true)
MMA_BUILTIN(pmxvbf16ger2nn, "vW512*VVi15i15i3", true)
MMA_BUILTIN(lxvp, "W256SLLiW256C*", false)
MMA_BUILTIN(stxvp, "vW256SLLiW256C*", false)

// FIXME: Obviously incomplete.

Expand Down
13 changes: 13 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14776,6 +14776,19 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
break;
#include "clang/Basic/BuiltinsPPC.def"
}
if (BuiltinID == PPC::BI__builtin_mma_lxvp ||
BuiltinID == PPC::BI__builtin_mma_stxvp) {
if (BuiltinID == PPC::BI__builtin_mma_lxvp) {
Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
} else {
Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
}
Ops.pop_back();
llvm::Function *F = CGM.getIntrinsic(ID);
return Builder.CreateCall(F, Ops, "");
}
SmallVector<Value*, 4> CallOps;
if (Accumulate) {
Address Addr = EmitPointerWithAlignment(E->getArg(0));
Expand Down
159 changes: 159 additions & 0 deletions clang/test/CodeGen/builtins-ppc-mma.c
Original file line number Diff line number Diff line change
Expand Up @@ -1036,3 +1036,162 @@ void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
__builtin_mma_pmxvbf16ger2nn(&vq, vc, vc, 0, 0, 0);
*((__vector_quad *)resp) = vq;
}

// CHECK-LABEL: @test66(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]])
// CHECK-NEXT: ret void
//
void test66(const __vector_pair *vpp, const __vector_pair *vp2) {
__vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
__builtin_mma_stxvp(vp, 0LL, vp2);
}

// CHECK-LABEL: @test67(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]]
// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
// CHECK-NEXT: ret void
//
void test67(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) {
__vector_pair vp = __builtin_mma_lxvp(offset, vpp);
__builtin_mma_stxvp(vp, offset, vp2);
}

// CHECK-LABEL: @test68(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18
// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18
// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
// CHECK-NEXT: ret void
//
void test68(const __vector_pair *vpp, const __vector_pair *vp2) {
__vector_pair vp = __builtin_mma_lxvp(18LL, vpp);
__builtin_mma_stxvp(vp, 18LL, vp2);
}

// CHECK-LABEL: @test69(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1
// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1
// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
// CHECK-NEXT: ret void
//
void test69(const __vector_pair *vpp, const __vector_pair *vp2) {
__vector_pair vp = __builtin_mma_lxvp(1LL, vpp);
__builtin_mma_stxvp(vp, 1LL, vp2);
}

// CHECK-LABEL: @test70(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42
// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42
// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
// CHECK-NEXT: ret void
//
void test70(const __vector_pair *vpp, const __vector_pair *vp2) {
__vector_pair vp = __builtin_mma_lxvp(42LL, vpp);
__builtin_mma_stxvp(vp, 42LL, vp2);
}

// CHECK-LABEL: @test71(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 128
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8*
// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 128
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8*
// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
// CHECK-NEXT: ret void
//
void test71(const __vector_pair *vpp, const __vector_pair *vp2) {
__vector_pair vp = __builtin_mma_lxvp(32768LL, vpp);
__builtin_mma_stxvp(vp, 32768LL, vp2);
}

// CHECK-LABEL: @test72(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799
// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799
// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
// CHECK-NEXT: ret void
//
void test72(const __vector_pair *vpp, const __vector_pair *vp2) {
__vector_pair vp = __builtin_mma_lxvp(32799LL, vpp);
__builtin_mma_stxvp(vp, 32799LL, vp2);
}

// CHECK-LABEL: @test73(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8
// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]]
// CHECK-NEXT: ret void
//
void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
__vector_quad vq = *((__vector_quad *)vqp);
__vector_pair vp = __builtin_mma_lxvp(8LL, vpp);
__builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
*((__vector_quad *)resp) = vq;
}

// CHECK-LABEL: @test74(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, [[TBAA2]]
// CHECK-NEXT: ret void
//
void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
__vector_quad vq = *((__vector_quad *)vqp);
__vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
__builtin_mma_xvf64gernp(&vq, vp, vc);
*((__vector_quad *)resp) = vq;
}

// CHECK-LABEL: @test75(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]])
// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]]
// CHECK-NEXT: ret void
//
void test75(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
__vector_quad vq = *((__vector_quad *)vqp);
__vector_pair vp = __builtin_mma_lxvp(offs, vpp);
__builtin_mma_xvf64gernp(&vq, vp, vc);
*((__vector_quad *)resp) = vq;
}
14 changes: 14 additions & 0 deletions clang/test/Sema/ppc-mma-types.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,17 @@ void testVPOperators4(int v, void *ptr) {
__vector_pair vp2 = (__vector_pair)vpp; // expected-error {{used type '__vector_pair' where arithmetic or pointer type is required}}
}

void testBuiltinTypes1(const __vector_pair *vpp, const __vector_pair *vp2, float f) {
__vector_pair vp = __builtin_mma_lxvp(f, vpp); // expected-error {{passing 'float' to parameter of incompatible type 'long long'}}
__builtin_mma_stxvp(vp, 32799, vp2); // expected-error {{passing 'int' to parameter of incompatible type 'long long'}}
}

void testBuiltinTypes2(__vector_pair *vpp, const __vector_pair *vp2, unsigned char c) {
__vector_pair vp = __builtin_mma_lxvp(6LL, vpp); // expected-error {{passing '__vector_pair *' to parameter of incompatible type 'const __vector_pair *'}}
__builtin_mma_stxvp(vp, c, vp2); // expected-error {{passing 'unsigned char' to parameter of incompatible type 'long long'}}
}

void testBuiltinTypes3(vector int v, __vector_pair *vp2, signed long long ll, unsigned short s) {
__vector_pair vp = __builtin_mma_lxvp(ll, v); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type 'const __vector_pair *'}}
__builtin_mma_stxvp(vp, ll, s); // expected-error {{passing 'unsigned short' to parameter of incompatible type 'const __vector_pair *'}}
}
8 changes: 8 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsPowerPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -1422,6 +1422,14 @@ let TargetPrefix = "ppc" in {
def int_ppc_mma_xxsetaccz :
Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;

def int_ppc_mma_lxvp :
Intrinsic<[llvm_v256i1_ty], [llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;

def int_ppc_mma_stxvp :
Intrinsic<[], [llvm_v256i1_ty, llvm_ptr_ty],
[IntrWriteMem, IntrArgMemOnly]>;

// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
defm int_ppc_mma_xvi4ger8 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,13 @@ namespace {
Align(16));
}

/// SelectAddrImmX34 - Returns true if the address N can be represented by
/// a base register plus a signed 34-bit displacement. Suitable for use by
/// PSTXVP and friends.
bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
}

// Select an address into a single register.
bool SelectAddr(SDValue N, SDValue &Base) {
Base = N;
Expand Down
63 changes: 63 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2399,6 +2399,20 @@ bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
return false;
}

/// isIntS34Immediate - This method tests if value of node given can be
/// accurately represented as a sign extension from a 34-bit value. If so,
/// this returns true and the immediate.
bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
if (!isa<ConstantSDNode>(N))
return false;

Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
return isInt<34>(Imm);
}
bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
return isIntS34Immediate(Op.getNode(), Imm);
}

/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
Expand Down Expand Up @@ -2599,6 +2613,55 @@ bool PPCTargetLowering::SelectAddressRegImm(
return true; // [r+0]
}

/// Similar to the 16-bit case but for instructions that take a 34-bit
/// displacement field (prefixed loads/stores).
bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG) const {
// Only on 64-bit targets.
if (N.getValueType() != MVT::i64)
return false;

SDLoc dl(N);
int64_t Imm = 0;

if (N.getOpcode() == ISD::ADD) {
if (!isIntS34Immediate(N.getOperand(1), Imm))
return false;
Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
else
Base = N.getOperand(0);
return true;
}

if (N.getOpcode() == ISD::OR) {
if (!isIntS34Immediate(N.getOperand(1), Imm))
return false;
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
return false;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
else
Base = N.getOperand(0);
Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
return true;
}

if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
return true;
}

return false;
}

/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,8 @@ namespace llvm {
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG,
MaybeAlign EncodingAlignment) const;
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG) const;

/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
Expand Down Expand Up @@ -1325,6 +1327,8 @@ namespace llvm {

bool isIntS16Immediate(SDNode *N, int16_t &Imm);
bool isIntS16Immediate(SDValue Op, int16_t &Imm);
bool isIntS34Immediate(SDNode *N, int64_t &Imm);
bool isIntS34Immediate(SDValue Op, int64_t &Imm);

bool convertToNonDenormSingle(APInt &ArgAPInt);
bool convertToNonDenormSingle(APFloat &ArgAPFloat);
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1031,11 +1031,13 @@ def pred : Operand<OtherVT> {
// Define PowerPC specific addressing mode.

// d-form
def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb"
def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb"
// ds-form
def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
// dq-form
def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
// 8LS:d-form
def iaddrX34 : ComplexPattern<iPTR, 2, "SelectAddrImmX34", [], []>; // "pstxvp"

// Below forms are all x-form addressing mode, use three different ones so we
// can make a accurate check for x-form instructions in ISEL.
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrPrefix.td
Original file line number Diff line number Diff line change
Expand Up @@ -1654,6 +1654,24 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] i
"pstxvp $XTp, $D_RA", IIC_LdStLFD>;
}

let Predicates = [PairedVectorMemops] in {
// Intrinsics for Paired Vector Loads.
def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
def : Pat<(v256i1 (int_ppc_mma_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
let Predicates = [PairedVectorMemops, PrefixInstrs] in {
def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
}
// Intrinsics for Paired Vector Stores.
def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX16:$dst),
(STXVP $XSp, memrix16:$dst)>;
def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, xaddrX16:$dst),
(STXVPX $XSp, xaddrX16:$dst)>;
let Predicates = [PairedVectorMemops, PrefixInstrs] in {
def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX34:$dst),
(PSTXVP $XSp, memri34:$dst)>;
}
}

// TODO: We have an added complexity of 500 here. This is only a temporary
// solution to have tablegen consider these patterns first. The way we do
// addressing for PowerPC is complex depending on available D form, X form, or
Expand Down
Loading

0 comments on commit 3f78605

Please sign in to comment.