-
Notifications
You must be signed in to change notification settings - Fork 11.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Support multiple levels of truncates in combineTruncToVnclip. #93752
Conversation
We can use multiple vnclips to saturate an i32 value into an i8 value.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesWe can use multiple vnclips to saturate an i32 value into an i8 value. Full diff: https://github.com/llvm/llvm-project/pull/93752.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0e7713509e969..ba9b3665c6ce2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16263,22 +16263,39 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
return SDValue();
};
+ SDValue Src = N->getOperand(0);
+
+ // Look through multiple layers of truncates.
+ while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
+ Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
+ Src.hasOneUse())
+ Src = Src.getOperand(0);
+
SDValue Val;
unsigned ClipOpc;
- if ((Val = DetectUSatPattern(N->getOperand(0))))
+ if ((Val = DetectUSatPattern(Src)))
ClipOpc = RISCVISD::VNCLIPU_VL;
- else if ((Val = DetectSSatPattern(N->getOperand(0))))
+ else if ((Val = DetectSSatPattern(Src)))
ClipOpc = RISCVISD::VNCLIP_VL;
else
return SDValue();
SDLoc DL(N);
- // Rounding mode here is arbitrary since we aren't shifting out any bits.
- return DAG.getNode(
- ClipOpc, DL, VT,
- {Val, DAG.getConstant(0, DL, VT), DAG.getUNDEF(VT), Mask,
- DAG.getTargetConstant(RISCVVXRndMode::RNU, DL, Subtarget.getXLenVT()),
- VL});
+
+ MVT ValVT = Val.getSimpleValueType();
+
+ do {
+ MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
+ ValVT = MVT::getVectorVT(ValEltVT, ValVT.getVectorElementCount());
+ // Rounding mode here is arbitrary since we aren't shifting out any bits.
+ Val = DAG.getNode(
+ ClipOpc, DL, ValVT,
+ {Val, DAG.getConstant(0, DL, ValVT), DAG.getUNDEF(VT), Mask,
+ DAG.getTargetConstant(RISCVVXRndMode::RNU, DL, Subtarget.getXLenVT()),
+ VL});
+ } while (ValVT != VT);
+
+ return Val;
}
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
index 9f82eddf432da..4a95ef58122c7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
@@ -414,16 +414,11 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -437,16 +432,11 @@ define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -460,14 +450,11 @@ define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -526,18 +513,13 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -551,18 +533,13 @@ define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -576,16 +553,13 @@ define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -648,17 +622,11 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 32
@@ -672,17 +640,11 @@ define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 32
@@ -717,15 +679,11 @@ define void @trunc_sat_u16u64_notopt(ptr %x, ptr %y) {
define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 32
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
index 78e8f0fbbbdd7..a51f0a8ea7e9d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -415,15 +415,10 @@ define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -438,15 +433,10 @@ define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -461,13 +451,10 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_min:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -527,17 +514,12 @@ define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclip.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclip.wi v8, v12, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -552,17 +534,12 @@ define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclip.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclip.wi v8, v12, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -577,15 +554,12 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_min:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclipu.wi v8, v12, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -649,16 +623,10 @@ define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclip.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclip.wi v8, v12, 0
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 32
@@ -673,16 +641,10 @@ define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclip.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclip.wi v8, v12, 0
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 32
@@ -697,14 +659,10 @@ define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_min:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclipu.wi v8, v12, 0
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 32
|
|
||
do { | ||
MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2); | ||
ValVT = MVT::getVectorVT(ValEltVT, ValVT.getVectorElementCount()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changeVectorElementType?
Or we can add a helper getHalfVectorElementSizeVT
like getHalfNumVectorElementsVT
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
We can use multiple vnclips to saturate an i32 value into an i8 value.