Skip to content

Commit

Permalink
[RISCV] Support saturated truncate
Browse files Browse the repository at this point in the history
Add support for saturated truncate by implementing the following changes:

- Add `TRUNCATE_[SU]SAT_[SU]` to the Action target of `TRUNCATE`
- Add `TRUNCATE_[SU]SAT_[SU]` to the TargetLowering target of `TRUNCATE`
- Convert `TRUNCATE_SSAT_S` to `TRUNCATE_VECTOR_VL_SSAT`
- Convert `TRUNCATE_[SU]SAT_U` to `TRUNCATE_VECTOR_VL_USAT`
  • Loading branch information
ParkHanbum committed Aug 9, 2024
1 parent 49c4481 commit 7db4887
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 134 deletions.
25 changes: 20 additions & 5 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
VT, Custom);

// Custom-lower insert/extract operations to simplify patterns.
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
Expand Down Expand Up @@ -1168,7 +1170,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setOperationAction(ISD::SELECT, VT, Custom);

setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
VT, Custom);

setOperationAction(ISD::BITCAST, VT, Custom);

Expand Down Expand Up @@ -6395,6 +6399,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
}
case ISD::TRUNCATE:
case ISD::TRUNCATE_SSAT_S:
case ISD::TRUNCATE_SSAT_U:
case ISD::TRUNCATE_USAT_U:
// Only custom-lower vector truncates
if (!Op.getSimpleValueType().isVector())
return Op;
Expand Down Expand Up @@ -8234,7 +8241,8 @@ SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,

SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
SelectionDAG &DAG) const {
bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
unsigned Opc = Op.getOpcode();
bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
SDLoc DL(Op);

MVT VT = Op.getSimpleValueType();
Expand Down Expand Up @@ -8279,11 +8287,18 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
}

unsigned NewOpc;
if (Opc == ISD::TRUNCATE_SSAT_S)
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
else if (Opc == ISD::TRUNCATE_SSAT_U || Opc == ISD::TRUNCATE_USAT_U)
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
else
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;

do {
SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
Mask, VL);
Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
} while (SrcEltVT != DstEltVT);

if (SrcVT.isFixedLengthVector())
Expand Down
32 changes: 8 additions & 24 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,8 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
Expand All @@ -119,10 +117,8 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -356,10 +352,8 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
Expand All @@ -374,10 +368,8 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -445,10 +437,8 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
Expand All @@ -465,10 +455,8 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
Expand Down Expand Up @@ -544,10 +532,8 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
Expand All @@ -566,10 +552,8 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
Expand Down
68 changes: 26 additions & 42 deletions llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
Expand Down Expand Up @@ -304,9 +303,6 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-V-NEXT: vmax.vx v10, v10, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
Expand Down Expand Up @@ -801,17 +797,16 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vmv.s.x v10, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -944,9 +939,8 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: vmax.vx v8, v9, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
Expand Down Expand Up @@ -1139,7 +1133,6 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
Expand Down Expand Up @@ -2114,24 +2107,23 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vmv.s.x v10, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v9, 2
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 4
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -3473,7 +3465,6 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
Expand Down Expand Up @@ -3659,9 +3650,6 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-V-NEXT: vmax.vx v10, v10, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
Expand Down Expand Up @@ -4151,17 +4139,16 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vmv.s.x v10, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -4289,9 +4276,8 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: vmax.vx v8, v9, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
Expand Down Expand Up @@ -4479,7 +4465,6 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
Expand Down Expand Up @@ -5449,24 +5434,23 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vmv.s.x v10, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v9, 2
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 4
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
Expand Down
Loading

0 comments on commit 7db4887

Please sign in to comment.