Skip to content

Commit 5527139

Browse files
victor-edstopperc
authored andcommitted
[RISCV][VP] Add RVV codegen for [nX]vXi1 vp.select
Expand [nX]vXi1 vp.select the same way as [nX]vXi1 vselect. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D115546
1 parent 42a4f51 commit 5527139

File tree

4 files changed

+205
-0
lines changed

4 files changed

+205
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ class VectorLegalizer {
133133
/// Implement vselect in terms of XOR, AND, OR when blend is not
134134
/// supported by the target.
135135
SDValue ExpandVSELECT(SDNode *Node);
136+
SDValue ExpandVP_SELECT(SDNode *Node);
136137
SDValue ExpandSELECT(SDNode *Node);
137138
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
138139
SDValue ExpandStore(SDNode *N);
@@ -349,6 +350,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
349350
case ISD::CTPOP:
350351
case ISD::SELECT:
351352
case ISD::VSELECT:
353+
case ISD::VP_SELECT:
352354
case ISD::SELECT_CC:
353355
case ISD::ZERO_EXTEND:
354356
case ISD::ANY_EXTEND:
@@ -718,6 +720,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
718720
case ISD::VSELECT:
719721
Results.push_back(ExpandVSELECT(Node));
720722
return;
723+
case ISD::VP_SELECT:
724+
Results.push_back(ExpandVP_SELECT(Node));
725+
return;
721726
case ISD::SELECT:
722727
Results.push_back(ExpandSELECT(Node));
723728
return;
@@ -1195,6 +1200,37 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
11951200
return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
11961201
}
11971202

1203+
SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1204+
// Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1205+
// do not support it natively.
1206+
SDLoc DL(Node);
1207+
1208+
SDValue Mask = Node->getOperand(0);
1209+
SDValue Op1 = Node->getOperand(1);
1210+
SDValue Op2 = Node->getOperand(2);
1211+
SDValue EVL = Node->getOperand(3);
1212+
1213+
EVT VT = Mask.getValueType();
1214+
1215+
// If we can't even use the basic vector operations of
1216+
// VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1217+
if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
1218+
TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
1219+
TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
1220+
return DAG.UnrollVectorOp(Node);
1221+
1222+
// This operation also isn't safe when the operands aren't also booleans.
1223+
if (Op1.getValueType().getVectorElementType() != MVT::i1)
1224+
return DAG.UnrollVectorOp(Node);
1225+
1226+
SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1227+
SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL);
1228+
1229+
Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL);
1230+
Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL);
1231+
return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL);
1232+
}
1233+
11981234
void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
11991235
SmallVectorImpl<SDValue> &Results) {
12001236
// Attempt to expand using TargetLowering.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
561561
setOperationAction(ISD::SELECT, VT, Custom);
562562
setOperationAction(ISD::SELECT_CC, VT, Expand);
563563
setOperationAction(ISD::VSELECT, VT, Expand);
564+
setOperationAction(ISD::VP_SELECT, VT, Expand);
564565

565566
setOperationAction(ISD::VP_AND, VT, Custom);
566567
setOperationAction(ISD::VP_OR, VT, Custom);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,76 @@
44
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
55
; RUN: -verify-machineinstrs < %s | FileCheck %s
66

7+
declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32)
8+
9+
define <1 x i1> @select_v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 zeroext %evl) {
10+
; CHECK-LABEL: select_v1i1:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
13+
; CHECK-NEXT: vmandn.mm v9, v9, v0
14+
; CHECK-NEXT: vmand.mm v8, v8, v0
15+
; CHECK-NEXT: vmor.mm v0, v8, v9
16+
; CHECK-NEXT: ret
17+
%v = call <1 x i1> @llvm.vp.select.v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 %evl)
18+
ret <1 x i1> %v
19+
}
20+
21+
declare <2 x i1> @llvm.vp.select.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32)
22+
23+
define <2 x i1> @select_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 zeroext %evl) {
24+
; CHECK-LABEL: select_v2i1:
25+
; CHECK: # %bb.0:
26+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
27+
; CHECK-NEXT: vmandn.mm v9, v9, v0
28+
; CHECK-NEXT: vmand.mm v8, v8, v0
29+
; CHECK-NEXT: vmor.mm v0, v8, v9
30+
; CHECK-NEXT: ret
31+
%v = call <2 x i1> @llvm.vp.select.v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 %evl)
32+
ret <2 x i1> %v
33+
}
34+
35+
declare <4 x i1> @llvm.vp.select.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32)
36+
37+
define <4 x i1> @select_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 zeroext %evl) {
38+
; CHECK-LABEL: select_v4i1:
39+
; CHECK: # %bb.0:
40+
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
41+
; CHECK-NEXT: vmandn.mm v9, v9, v0
42+
; CHECK-NEXT: vmand.mm v8, v8, v0
43+
; CHECK-NEXT: vmor.mm v0, v8, v9
44+
; CHECK-NEXT: ret
45+
%v = call <4 x i1> @llvm.vp.select.v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 %evl)
46+
ret <4 x i1> %v
47+
}
48+
49+
declare <8 x i1> @llvm.vp.select.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32)
50+
51+
define <8 x i1> @select_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 zeroext %evl) {
52+
; CHECK-LABEL: select_v8i1:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
55+
; CHECK-NEXT: vmandn.mm v9, v9, v0
56+
; CHECK-NEXT: vmand.mm v8, v8, v0
57+
; CHECK-NEXT: vmor.mm v0, v8, v9
58+
; CHECK-NEXT: ret
59+
%v = call <8 x i1> @llvm.vp.select.v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 %evl)
60+
ret <8 x i1> %v
61+
}
62+
63+
declare <16 x i1> @llvm.vp.select.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32)
64+
65+
define <16 x i1> @select_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 zeroext %evl) {
66+
; CHECK-LABEL: select_v16i1:
67+
; CHECK: # %bb.0:
68+
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
69+
; CHECK-NEXT: vmandn.mm v9, v9, v0
70+
; CHECK-NEXT: vmand.mm v8, v8, v0
71+
; CHECK-NEXT: vmor.mm v0, v8, v9
72+
; CHECK-NEXT: ret
73+
%v = call <16 x i1> @llvm.vp.select.v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 %evl)
74+
ret <16 x i1> %v
75+
}
76+
777
declare <2 x i8> @llvm.vp.select.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
878

979
define <2 x i8> @select_v2i8(<2 x i1> %a, <2 x i8> %b, <2 x i8> %c, i32 zeroext %evl) {

llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,104 @@
44
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
55
; RUN: -verify-machineinstrs < %s | FileCheck %s
66

7+
declare <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
8+
9+
define <vscale x 1 x i1> @select_nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i1> %c, i32 zeroext %evl) {
10+
; CHECK-LABEL: select_nxv1i1:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
13+
; CHECK-NEXT: vmandn.mm v9, v9, v0
14+
; CHECK-NEXT: vmand.mm v8, v8, v0
15+
; CHECK-NEXT: vmor.mm v0, v8, v9
16+
; CHECK-NEXT: ret
17+
%v = call <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i1> %c, i32 %evl)
18+
ret <vscale x 1 x i1> %v
19+
}
20+
21+
declare <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x i1>, i32)
22+
23+
define <vscale x 2 x i1> @select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c, i32 zeroext %evl) {
24+
; CHECK-LABEL: select_nxv2i1:
25+
; CHECK: # %bb.0:
26+
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
27+
; CHECK-NEXT: vmandn.mm v9, v9, v0
28+
; CHECK-NEXT: vmand.mm v8, v8, v0
29+
; CHECK-NEXT: vmor.mm v0, v8, v9
30+
; CHECK-NEXT: ret
31+
%v = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c, i32 %evl)
32+
ret <vscale x 2 x i1> %v
33+
}
34+
35+
declare <vscale x 4 x i1> @llvm.vp.select.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x i1>, i32)
36+
37+
define <vscale x 4 x i1> @select_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, <vscale x 4 x i1> %c, i32 zeroext %evl) {
38+
; CHECK-LABEL: select_nxv4i1:
39+
; CHECK: # %bb.0:
40+
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
41+
; CHECK-NEXT: vmandn.mm v9, v9, v0
42+
; CHECK-NEXT: vmand.mm v8, v8, v0
43+
; CHECK-NEXT: vmor.mm v0, v8, v9
44+
; CHECK-NEXT: ret
45+
%v = call <vscale x 4 x i1> @llvm.vp.select.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, <vscale x 4 x i1> %c, i32 %evl)
46+
ret <vscale x 4 x i1> %v
47+
}
48+
49+
declare <vscale x 8 x i1> @llvm.vp.select.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i1>, i32)
50+
51+
define <vscale x 8 x i1> @select_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, <vscale x 8 x i1> %c, i32 zeroext %evl) {
52+
; CHECK-LABEL: select_nxv8i1:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
55+
; CHECK-NEXT: vmandn.mm v9, v9, v0
56+
; CHECK-NEXT: vmand.mm v8, v8, v0
57+
; CHECK-NEXT: vmor.mm v0, v8, v9
58+
; CHECK-NEXT: ret
59+
%v = call <vscale x 8 x i1> @llvm.vp.select.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, <vscale x 8 x i1> %c, i32 %evl)
60+
ret <vscale x 8 x i1> %v
61+
}
62+
63+
declare <vscale x 16 x i1> @llvm.vp.select.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32)
64+
65+
define <vscale x 16 x i1> @select_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c, i32 zeroext %evl) {
66+
; CHECK-LABEL: select_nxv16i1:
67+
; CHECK: # %bb.0:
68+
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
69+
; CHECK-NEXT: vmandn.mm v9, v9, v0
70+
; CHECK-NEXT: vmand.mm v8, v8, v0
71+
; CHECK-NEXT: vmor.mm v0, v8, v9
72+
; CHECK-NEXT: ret
73+
%v = call <vscale x 16 x i1> @llvm.vp.select.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c, i32 %evl)
74+
ret <vscale x 16 x i1> %v
75+
}
76+
77+
declare <vscale x 32 x i1> @llvm.vp.select.nxv32i1(<vscale x 32 x i1>, <vscale x 32 x i1>, <vscale x 32 x i1>, i32)
78+
79+
define <vscale x 32 x i1> @select_nxv32i1(<vscale x 32 x i1> %a, <vscale x 32 x i1> %b, <vscale x 32 x i1> %c, i32 zeroext %evl) {
80+
; CHECK-LABEL: select_nxv32i1:
81+
; CHECK: # %bb.0:
82+
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
83+
; CHECK-NEXT: vmandn.mm v9, v9, v0
84+
; CHECK-NEXT: vmand.mm v8, v8, v0
85+
; CHECK-NEXT: vmor.mm v0, v8, v9
86+
; CHECK-NEXT: ret
87+
%v = call <vscale x 32 x i1> @llvm.vp.select.nxv32i1(<vscale x 32 x i1> %a, <vscale x 32 x i1> %b, <vscale x 32 x i1> %c, i32 %evl)
88+
ret <vscale x 32 x i1> %v
89+
}
90+
91+
declare <vscale x 64 x i1> @llvm.vp.select.nxv64i1(<vscale x 64 x i1>, <vscale x 64 x i1>, <vscale x 64 x i1>, i32)
92+
93+
define <vscale x 64 x i1> @select_nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b, <vscale x 64 x i1> %c, i32 zeroext %evl) {
94+
; CHECK-LABEL: select_nxv64i1:
95+
; CHECK: # %bb.0:
96+
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
97+
; CHECK-NEXT: vmandn.mm v9, v9, v0
98+
; CHECK-NEXT: vmand.mm v8, v8, v0
99+
; CHECK-NEXT: vmor.mm v0, v8, v9
100+
; CHECK-NEXT: ret
101+
%v = call <vscale x 64 x i1> @llvm.vp.select.nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b, <vscale x 64 x i1> %c, i32 %evl)
102+
ret <vscale x 64 x i1> %v
103+
}
104+
7105
declare <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
8106

9107
define <vscale x 1 x i8> @select_nxv1i8(<vscale x 1 x i1> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, i32 zeroext %evl) {

0 commit comments

Comments
 (0)