-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Add DAG combine for forming VAADDU_VL from VP intrinsics. #124848
Changes from 4 commits
8185a93
ffa8ae0
13c6a51
39cef88
7192ca8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -1526,18 +1526,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, | |||||||
setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, | ||||||||
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); | ||||||||
if (Subtarget.hasVInstructions()) | ||||||||
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, | ||||||||
ISD::MSCATTER, ISD::VP_GATHER, | ||||||||
ISD::VP_SCATTER, ISD::SRA, | ||||||||
ISD::SRL, ISD::SHL, | ||||||||
ISD::STORE, ISD::SPLAT_VECTOR, | ||||||||
ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, | ||||||||
ISD::VP_STORE, ISD::EXPERIMENTAL_VP_REVERSE, | ||||||||
ISD::MUL, ISD::SDIV, | ||||||||
ISD::UDIV, ISD::SREM, | ||||||||
ISD::UREM, ISD::INSERT_VECTOR_ELT, | ||||||||
ISD::ABS, ISD::CTPOP, | ||||||||
ISD::VECTOR_SHUFFLE, ISD::VSELECT}); | ||||||||
setTargetDAGCombine( | ||||||||
{ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, | ||||||||
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, | ||||||||
ISD::SRL, ISD::SHL, ISD::STORE, | ||||||||
ISD::SPLAT_VECTOR, ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, | ||||||||
ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE, | ||||||||
ISD::MUL, ISD::SDIV, ISD::UDIV, | ||||||||
ISD::SREM, ISD::UREM, ISD::INSERT_VECTOR_ELT, | ||||||||
ISD::ABS, ISD::CTPOP, ISD::VECTOR_SHUFFLE, | ||||||||
ISD::VSELECT}); | ||||||||
|
||||||||
if (Subtarget.hasVendorXTHeadMemPair()) | ||||||||
setTargetDAGCombine({ISD::LOAD, ISD::STORE}); | ||||||||
|
@@ -16373,6 +16371,92 @@ static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, | |||||||
VPStore->isTruncatingStore(), VPStore->isCompressingStore()); | ||||||||
} | ||||||||
|
||||||||
// Peephole avgceil pattern. | ||||||||
// %1 = zext <N x i8> %a to <N x i32> | ||||||||
// %2 = zext <N x i8> %b to <N x i32> | ||||||||
// %3 = add nuw nsw <N x i32> %1, splat (i32 1) | ||||||||
// %4 = add nuw nsw <N x i32> %3, %2 | ||||||||
// %5 = lshr <N x i32> %4, splat (i32 1) | ||||||||
// %6 = trunc <N x i32> %5 to <N x i8> | ||||||||
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, | ||||||||
const RISCVSubtarget &Subtarget) { | ||||||||
EVT VT = N->getValueType(0); | ||||||||
|
||||||||
// Ignore fixed vectors. | ||||||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | ||||||||
if (!VT.isScalableVector() || !TLI.isTypeLegal(VT)) | ||||||||
return SDValue(); | ||||||||
|
||||||||
SDValue In = N->getOperand(0); | ||||||||
SDValue Mask = N->getOperand(1); | ||||||||
SDValue VL = N->getOperand(2); | ||||||||
|
||||||||
// Input should be a vp_srl with same mask and VL. | ||||||||
if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask || | ||||||||
In.getOperand(3) != VL) | ||||||||
return SDValue(); | ||||||||
|
||||||||
// Shift amount should be 1. | ||||||||
if (!isOneOrOneSplat(In.getOperand(1))) | ||||||||
return SDValue(); | ||||||||
|
||||||||
// Shifted value should be a vp_add with same mask and VL. | ||||||||
SDValue LHS = In.getOperand(0); | ||||||||
if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask || | ||||||||
LHS.getOperand(3) != VL) | ||||||||
return SDValue(); | ||||||||
|
||||||||
SDValue Operands[3]; | ||||||||
|
||||||||
// Matches another VP_ADD with same VL and Mask. | ||||||||
auto FindAdd = [&](SDValue V, SDValue Other) { | ||||||||
if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask || | ||||||||
V.getOperand(3) != VL) | ||||||||
return false; | ||||||||
|
||||||||
Operands[0] = Other; | ||||||||
Operands[1] = V.getOperand(1); | ||||||||
Operands[2] = V.getOperand(0); | ||||||||
return true; | ||||||||
}; | ||||||||
|
||||||||
// We need to find another VP_ADD in one of the operands. | ||||||||
SDValue LHS0 = LHS.getOperand(0); | ||||||||
SDValue LHS1 = LHS.getOperand(1); | ||||||||
if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0)) | ||||||||
return SDValue(); | ||||||||
|
||||||||
// Now we have three operands of two additions. Check that one of them is a | ||||||||
// constant vector with ones. | ||||||||
auto I = llvm::find_if(Operands, | ||||||||
[](const SDValue &Op) { return isOneOrOneSplat(Op); }); | ||||||||
Comment on lines
+16431
to
+16432
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you pass the predicate directly?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't compile. Possibly because |
||||||||
if (I == std::end(Operands)) | ||||||||
return SDValue(); | ||||||||
// We found a vector with ones, move if it to the end of the Operands array. | ||||||||
std::swap(*I, Operands[2]); | ||||||||
|
||||||||
// Make sure the other 2 operands can be promoted from the result type. | ||||||||
for (SDValue Op : drop_end(Operands)) { | ||||||||
if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask || | ||||||||
Op.getOperand(2) != VL) | ||||||||
return SDValue(); | ||||||||
// Input must be the same size or smaller than our result. | ||||||||
if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits()) | ||||||||
return SDValue(); | ||||||||
} | ||||||||
|
||||||||
// Pattern is detected. | ||||||||
// Rebuild the zero extends in case the inputs are smaller than our result. | ||||||||
SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT, | ||||||||
Operands[0].getOperand(0), Mask, VL); | ||||||||
SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT, | ||||||||
Operands[1].getOperand(0), Mask, VL); | ||||||||
// Build a VAADDU with RNU rounding mode. | ||||||||
SDLoc DL(N); | ||||||||
return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT, | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AVGCEILU_VL appears to be previously unused code. Do we have any such other nodes which need deleted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not unused it used via macro in |
||||||||
{NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL}); | ||||||||
} | ||||||||
|
||||||||
// Convert from one FMA opcode to another based on whether we are negating the | ||||||||
// multiply result and/or the accumulator. | ||||||||
// NOTE: Only supports RVV operations with VL. | ||||||||
|
@@ -17930,6 +18014,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, | |||||||
if (SDValue V = combineTruncOfSraSext(N, DAG)) | ||||||||
return V; | ||||||||
return combineTruncToVnclip(N, DAG, Subtarget); | ||||||||
case ISD::VP_TRUNCATE: | ||||||||
return performVP_TRUNCATECombine(N, DAG, Subtarget); | ||||||||
case ISD::TRUNCATE: | ||||||||
return performTRUNCATECombine(N, DAG, Subtarget); | ||||||||
case ISD::SELECT: | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | ||
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s | ||
|
||
declare <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32) | ||
declare <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32) | ||
declare <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32) | ||
declare <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32) | ||
declare <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32) | ||
declare <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32) | ||
declare <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32) | ||
declare <vscale x 2 x i16> @llvm.vp.lshr.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32) | ||
declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) | ||
declare <vscale x 2 x i32> @llvm.vp.lshr.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) | ||
|
||
define <vscale x 2 x i8> @vaaddu_1(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { | ||
; CHECK-LABEL: vaaddu_1: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: csrwi vxrm, 0 | ||
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma | ||
; CHECK-NEXT: vaaddu.vv v8, v8, v9, v0.t | ||
; CHECK-NEXT: ret | ||
%xz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 %vl) | ||
%yz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 %vl) | ||
%a = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %xz, <vscale x 2 x i16> %yz, <vscale x 2 x i1> %m, i32 %vl) | ||
%b = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%c = call <vscale x 2 x i16> @llvm.vp.lshr.nxv2i16(<vscale x 2 x i16> %b, <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%d = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 %vl) | ||
ret <vscale x 2 x i8> %d | ||
} | ||
|
||
define <vscale x 2 x i8> @vaaddu_2(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { | ||
; CHECK-LABEL: vaaddu_2: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: csrwi vxrm, 0 | ||
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma | ||
; CHECK-NEXT: vaaddu.vv v8, v8, v9, v0.t | ||
; CHECK-NEXT: ret | ||
%xz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 %vl) | ||
%yz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 %vl) | ||
%a = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %xz, <vscale x 2 x i16> %yz, <vscale x 2 x i1> %m, i32 %vl) | ||
%b = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> splat (i16 1), <vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 %vl) | ||
%c = call <vscale x 2 x i16> @llvm.vp.lshr.nxv2i16(<vscale x 2 x i16> %b, <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%d = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 %vl) | ||
ret <vscale x 2 x i8> %d | ||
} | ||
|
||
define <vscale x 2 x i8> @vaaddu_3(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { | ||
; CHECK-LABEL: vaaddu_3: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: csrwi vxrm, 0 | ||
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma | ||
; CHECK-NEXT: vaaddu.vv v8, v9, v8, v0.t | ||
; CHECK-NEXT: ret | ||
%xz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 %vl) | ||
%yz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 %vl) | ||
%a = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %xz, <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%b = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %yz, <vscale x 2 x i1> %m, i32 %vl) | ||
%c = call <vscale x 2 x i16> @llvm.vp.lshr.nxv2i16(<vscale x 2 x i16> %b, <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%d = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 %vl) | ||
ret <vscale x 2 x i8> %d | ||
} | ||
|
||
define <vscale x 2 x i8> @vaaddu_4(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { | ||
; CHECK-LABEL: vaaddu_4: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: csrwi vxrm, 0 | ||
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma | ||
; CHECK-NEXT: vaaddu.vv v8, v9, v8, v0.t | ||
; CHECK-NEXT: ret | ||
%xz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 %vl) | ||
%yz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 %vl) | ||
%a = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %xz, <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%b = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %yz, <vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 %vl) | ||
%c = call <vscale x 2 x i16> @llvm.vp.lshr.nxv2i16(<vscale x 2 x i16> %b, <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%d = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 %vl) | ||
ret <vscale x 2 x i8> %d | ||
} | ||
|
||
define <vscale x 2 x i8> @vaaddu_5(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { | ||
; CHECK-LABEL: vaaddu_5: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: csrwi vxrm, 0 | ||
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma | ||
; CHECK-NEXT: vaaddu.vv v8, v9, v8, v0.t | ||
; CHECK-NEXT: ret | ||
%xz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 %vl) | ||
%yz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 %vl) | ||
%a = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> splat (i16 1), <vscale x 2 x i16> %xz, <vscale x 2 x i1> %m, i32 %vl) | ||
%b = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %yz, <vscale x 2 x i1> %m, i32 %vl) | ||
%c = call <vscale x 2 x i16> @llvm.vp.lshr.nxv2i16(<vscale x 2 x i16> %b, <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%d = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 %vl) | ||
ret <vscale x 2 x i8> %d | ||
} | ||
|
||
define <vscale x 2 x i8> @vaaddu_6(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { | ||
; CHECK-LABEL: vaaddu_6: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: csrwi vxrm, 0 | ||
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma | ||
; CHECK-NEXT: vaaddu.vv v8, v9, v8, v0.t | ||
; CHECK-NEXT: ret | ||
%xz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 %vl) | ||
%yz = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 %vl) | ||
%a = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> splat (i16 1), <vscale x 2 x i16> %xz, <vscale x 2 x i1> %m, i32 %vl) | ||
%b = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %yz, <vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 %vl) | ||
%c = call <vscale x 2 x i16> @llvm.vp.lshr.nxv2i16(<vscale x 2 x i16> %b, <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%d = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 %vl) | ||
ret <vscale x 2 x i8> %d | ||
} | ||
|
||
; Test where the size is reduced by 4x instead of 2x. | ||
define <vscale x 2 x i8> @vaaddu_7(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { | ||
; CHECK-LABEL: vaaddu_7: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: csrwi vxrm, 0 | ||
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma | ||
; CHECK-NEXT: vaaddu.vv v8, v8, v9, v0.t | ||
; CHECK-NEXT: ret | ||
%xz = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 %vl) | ||
%yz = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 %vl) | ||
%a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %xz, <vscale x 2 x i32> %yz, <vscale x 2 x i1> %m, i32 %vl) | ||
%b = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%c = call <vscale x 2 x i32> @llvm.vp.lshr.nxv2i32(<vscale x 2 x i32> %b, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%d = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 %vl) | ||
ret <vscale x 2 x i8> %d | ||
} | ||
|
||
; Test where the zext can't be completely removed. | ||
define <vscale x 2 x i16> @vaaddu_8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { | ||
; CHECK-LABEL: vaaddu_8: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma | ||
; CHECK-NEXT: vzext.vf2 v10, v8, v0.t | ||
; CHECK-NEXT: csrwi vxrm, 0 | ||
; CHECK-NEXT: vzext.vf2 v8, v9, v0.t | ||
; CHECK-NEXT: vaaddu.vv v8, v10, v8, v0.t | ||
; CHECK-NEXT: ret | ||
%xz = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 %vl) | ||
%yz = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %y, <vscale x 2 x i1> %m, i32 %vl) | ||
%a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %xz, <vscale x 2 x i32> %yz, <vscale x 2 x i1> %m, i32 %vl) | ||
%b = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%c = call <vscale x 2 x i32> @llvm.vp.lshr.nxv2i32(<vscale x 2 x i32> %b, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%d = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 %vl) | ||
ret <vscale x 2 x i16> %d | ||
} | ||
|
||
; Negative test. The truncate has a smaller type than the zero extend. | ||
; TODO: Could still handle this by truncating after an i16 vaaddu. | ||
define <vscale x 2 x i8> @vaaddu_9(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { | ||
; CHECK-LABEL: vaaddu_9: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma | ||
; CHECK-NEXT: vwaddu.vv v10, v8, v9, v0.t | ||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma | ||
; CHECK-NEXT: vadd.vi v8, v10, 1, v0.t | ||
; CHECK-NEXT: vsrl.vi v8, v8, 1, v0.t | ||
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma | ||
; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t | ||
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma | ||
; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t | ||
; CHECK-NEXT: ret | ||
%xz = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i1> %m, i32 %vl) | ||
%yz = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i16(<vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 %vl) | ||
%a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %xz, <vscale x 2 x i32> %yz, <vscale x 2 x i1> %m, i32 %vl) | ||
%b = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%c = call <vscale x 2 x i32> @llvm.vp.lshr.nxv2i32(<vscale x 2 x i32> %b, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %vl) | ||
%d = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 %vl) | ||
ret <vscale x 2 x i8> %d | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Stray format change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added VP_TRUNCATE to it. It added it to the middle because that's where it was in my downstream.