From 7db4887dbbdc3b05d2e7ef854a3990164af6a2c4 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Tue, 16 Jul 2024 14:14:40 +0900 Subject: [PATCH] [RISCV] Support saturated truncate Add support for saturated truncate by implementing the following changes: - Add `TRUNCATE_[SU]SAT_[SU]` to the Action target of `TRUNCATE` - Add `TRUNCATE_[SU]SAT_[SU]` to the TargetLowering target of `TRUNCATE` - Convert `TRUNCATE_SSAT_S` to `TRUNCATE_VECTOR_VL_SSAT` - Convert `TRUNCATE_[SU]SAT_U` to `TRUNCATE_VECTOR_VL_USAT` --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 25 +++++-- .../RISCV/rvv/fixed-vectors-trunc-sat-clip.ll | 32 +++------ .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 68 +++++++------------ .../RISCV/rvv/trunc-sat-clip-sdnode.ll | 32 +++------ .../RISCV/rvv/trunc-select-to-max-usat.ll | 57 +++++----------- 5 files changed, 80 insertions(+), 134 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d40d4997d76149..704caeab90bb6e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -853,7 +853,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" // nodes which truncate by one power of two at a time. - setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, + ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U}, + VT, Custom); // Custom-lower insert/extract operations to simplify patterns. setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, @@ -1168,7 +1170,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, + ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U}, + VT, Custom); setOperationAction(ISD::BITCAST, VT, Custom); @@ -6395,6 +6399,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap); } case ISD::TRUNCATE: + case ISD::TRUNCATE_SSAT_S: + case ISD::TRUNCATE_SSAT_U: + case ISD::TRUNCATE_USAT_U: // Only custom-lower vector truncates if (!Op.getSimpleValueType().isVector()) return Op; @@ -8234,7 +8241,8 @@ SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const { - bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; + unsigned Opc = Op.getOpcode(); + bool IsVPTrunc = Opc == ISD::VP_TRUNCATE; SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); @@ -8279,11 +8287,18 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); } + unsigned NewOpc; + if (Opc == ISD::TRUNCATE_SSAT_S) + NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT; + else if (Opc == ISD::TRUNCATE_SSAT_U || Opc == ISD::TRUNCATE_USAT_U) + NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT; + else + NewOpc = RISCVISD::TRUNCATE_VECTOR_VL; + do { SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT); - Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, - Mask, VL); + Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL); } while (SrcEltVT != DstEltVT); if (SrcVT.isFixedLengthVector()) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll index 4e367bb0d70cd1..e2f540e991fd0b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll @@ -101,10 +101,8 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) { define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u16_maxmin: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret @@ -119,10 +117,8 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) { define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u16_minmax: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret @@ -356,10 +352,8 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) { define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u32u64_maxmin: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v10, v8, 0 ; CHECK-NEXT: vse32.v v10, (a1) ; CHECK-NEXT: ret @@ -374,10 +368,8 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) { define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u32u64_minmax: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v10, v8, 0 ; CHECK-NEXT: vse32.v v10, (a1) ; CHECK-NEXT: ret @@ -445,10 +437,8 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) { define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u32_maxmin: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 @@ -465,10 +455,8 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) { define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u32_minmax: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 @@ -544,10 +532,8 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) { define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u64_maxmin: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v10, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v10, 0 @@ -566,10 +552,8 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) { define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u64_minmax: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v10, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v10, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 3e2db3fa4685dd..ffbcebf621fd7c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -113,7 +113,6 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-V-NEXT: vmax.vx v8, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-V-NEXT: ret @@ -304,9 +303,6 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8 -; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-V-NEXT: vmax.vx v10, v10, zero -; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: ret entry: @@ -801,17 +797,16 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: vmax.vx v10, v8, zero +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -944,9 +939,8 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-V-NEXT: vmax.vx v8, v9, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-V-NEXT: vnclipu.wi v8, v8, 0 +; CHECK-V-NEXT: vnclipu.wi v8, v9, 0 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i32> @@ -1139,7 +1133,6 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-V-NEXT: vmax.vx v8, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-V-NEXT: ret @@ -2114,24 +2107,23 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 4 -; CHECK-V-NEXT: vmax.vx v10, v8, zero +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -3473,7 +3465,6 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-V-NEXT: vmax.vx v8, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-V-NEXT: ret @@ -3659,9 +3650,6 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8 -; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-V-NEXT: vmax.vx v10, v10, zero -; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: ret entry: @@ -4151,17 +4139,16 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: vmax.vx v10, v8, zero +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -4289,9 +4276,8 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-V-NEXT: vmax.vx v8, v9, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-V-NEXT: vnclipu.wi v8, v8, 0 +; CHECK-V-NEXT: vnclipu.wi v8, v9, 0 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i32> @@ -4479,7 +4465,6 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) { ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-V-NEXT: vmax.vx v8, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-V-NEXT: ret @@ -5449,24 +5434,23 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 4 -; CHECK-V-NEXT: vmax.vx v10, v8, zero +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll index 01a90d8a33b6ec..f43faadc532f26 100644 --- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll @@ -102,9 +102,7 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u16_maxmin: ; CHECK: # %bb.0: ; CHECK-NEXT: vl1re16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret @@ -120,9 +118,7 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u16_minmax: ; CHECK: # %bb.0: ; CHECK-NEXT: vl1re16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret @@ -357,9 +353,7 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u32u64_maxmin: ; CHECK: # %bb.0: ; CHECK-NEXT: vl4re64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vnclipu.wi v12, v8, 0 ; CHECK-NEXT: vs2r.v v12, (a1) ; CHECK-NEXT: ret @@ -375,9 +369,7 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u32u64_minmax: ; CHECK: # %bb.0: ; CHECK-NEXT: vl4re64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vnclipu.wi v12, v8, 0 ; CHECK-NEXT: vs2r.v v12, (a1) ; CHECK-NEXT: ret @@ -446,9 +438,7 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u32_maxmin: ; CHECK: # %bb.0: ; CHECK-NEXT: vl2re32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v10, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v10, 0 @@ -466,9 +456,7 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u32_minmax: ; CHECK: # %bb.0: ; CHECK-NEXT: vl2re32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v10, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v10, 0 @@ -545,9 +533,7 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u64_maxmin: ; CHECK: # %bb.0: ; CHECK-NEXT: vl4re64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vnclipu.wi v12, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v12, 0 @@ -567,9 +553,7 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) { ; CHECK-LABEL: trunc_sat_u8u64_minmax: ; CHECK: # %bb.0: ; CHECK-NEXT: vl4re64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vnclipu.wi v12, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v12, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll index 28d7588b9347a7..992ea8f8c18a5e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll @@ -4,9 +4,7 @@ define <4 x i8> @test_v4i16_v4i8(<4 x i16> %x) { ; CHECK-LABEL: test_v4i16_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: ret %a = icmp sgt <4 x i16> %x, zeroinitializer @@ -20,9 +18,7 @@ define <4 x i8> @test_v4i16_v4i8(<4 x i16> %x) { define <4 x i8> @test_v4i32_v4i8(<4 x i32> %x) { ; CHECK-LABEL: test_v4i32_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 @@ -38,9 +34,7 @@ define <4 x i8> @test_v4i32_v4i8(<4 x i32> %x) { define <4 x i8> @test_v4i64_v4i8(<4 x i64> %x) { ; CHECK-LABEL: test_v4i64_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v10, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v10, 0 @@ -58,9 +52,7 @@ define <4 x i8> @test_v4i64_v4i8(<4 x i64> %x) { define <4 x i16> @test_v4i32_v4i16(<4 x i32> %x) { ; CHECK-LABEL: test_v4i32_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: ret %a = icmp sgt <4 x i32> %x, zeroinitializer @@ -74,9 +66,7 @@ define <4 x i16> @test_v4i32_v4i16(<4 x i32> %x) { define <4 x i16> @test_v4i64_v4i16(<4 x i64> %x) { ; CHECK-LABEL: test_v4i64_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v10, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v10, 0 @@ -92,10 +82,9 @@ define <4 x i16> @test_v4i64_v4i16(<4 x i64> %x) { define <4 x i32> @test_v4i64_v4i32(<4 x i64> %x) { ; CHECK-LABEL: test_v4i64_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmax.vx v10, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vnclipu.wi v8, v10, 0 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vnclipu.wi v10, v8, 0 +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %a = icmp sgt <4 x i64> %x, zeroinitializer %b = sext <4 x i1> %a to <4 x i64> @@ -108,9 +97,7 @@ define <4 x i32> @test_v4i64_v4i32(<4 x i64> %x) { define @test_nxv4i16_nxv4i8( %x) { ; CHECK-LABEL: test_nxv4i16_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v8, 0 ; CHECK-NEXT: ret %a = icmp sgt %x, zeroinitializer @@ -124,9 +111,7 @@ define @test_nxv4i16_nxv4i8( %x) { define @test_nxv4i32_nxv4i8( %x) { ; CHECK-LABEL: test_nxv4i32_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v10, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v10, 0 @@ -142,9 +127,7 @@ define @test_nxv4i32_nxv4i8( %x) { define @test_nxv4i64_nxv4i8( %x) { ; CHECK-LABEL: test_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vnclipu.wi v12, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v12, 0 @@ -162,10 +145,9 @@ define @test_nxv4i64_nxv4i8( %x) { define @test_nxv4i32_nxv4i16( %x) { ; CHECK-LABEL: test_nxv4i32_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmax.vx v10, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vnclipu.wi v8, v10, 0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vnclipu.wi v10, v8, 0 +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %a = icmp sgt %x, zeroinitializer %b = sext %a to @@ -178,9 +160,7 @@ define @test_nxv4i32_nxv4i16( %x) { define @test_nxv4i64_nxv4i16( %x) { ; CHECK-LABEL: test_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vnclipu.wi v12, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vnclipu.wi v8, v12, 0 @@ -196,10 +176,9 @@ define @test_nxv4i64_nxv4i16( %x) { define @test_nxv4i64_nxv4i32( %x) { ; CHECK-LABEL: test_nxv4i64_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmax.vx v12, v8, zero -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vnclipu.wi v8, v12, 0 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vnclipu.wi v12, v8, 0 +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %a = icmp sgt %x, zeroinitializer %b = sext %a to