From c98e41f8586bc43033d29ef3ec0f9a2f79b3ec32 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 7 Oct 2024 17:40:32 +0800 Subject: [PATCH] [LegalizeVectorTypes] Always widen fabs (#111298) fabs and fneg are similar nodes in that they can always be expanded to integer ops, but currently they diverge when widened. If the widened vector fabs is marked as expand (and the corresponding scalar type is too), LegalizeVectorTypes thinks that it may be turned into a libcall and so will unroll it to avoid the overhead on the undef elements. However unlike the other ops in that list like fsin, fround, flog etc., an fabs marked as expand will never be legalized into a libcall. Like fneg, it can always be expanded into an integer op. This moves it below unrollExpandedOp to bring it in line with fneg, which fixes an issue on RISC-V with f16 fabs being unexpectedly scalarized when there's no zfhmin. --- .../SelectionDAG/LegalizeVectorTypes.cpp | 3 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 266 +----------------- 2 files changed, 12 insertions(+), 257 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index e7ae989fcc3494..ab734ffb25dbd0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4679,7 +4679,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_XROUND(N); break; - case ISD::FABS: case ISD::FACOS: case ISD::FASIN: case ISD::FATAN: @@ -4727,7 +4726,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ_ZERO_UNDEF: case ISD::VP_CTTZ_ZERO_UNDEF: case ISD::FNEG: case ISD::VP_FNEG: - case ISD::VP_FABS: + case ISD::FABS: case ISD::VP_FABS: case ISD::VP_SQRT: case ISD::VP_FCEIL: case ISD::VP_FFLOOR: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 297afd9fc96f9d..7ecf8af54c8dc0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32-ZVFHMIN -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64-ZVFHMIN +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define void @fadd_v8f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fadd_v8f16: @@ -484,259 +484,15 @@ define void @fabs_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; RV32-ZVFHMIN-LABEL: fabs_v6f16: -; RV32-ZVFHMIN: # %bb.0: -; RV32-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV32-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV32-ZVFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill -; RV32-ZVFHMIN-NEXT: .cfi_offset ra, -4 -; RV32-ZVFHMIN-NEXT: .cfi_offset s0, -8 -; RV32-ZVFHMIN-NEXT: .cfi_offset s1, -12 -; RV32-ZVFHMIN-NEXT: .cfi_offset fs0, -24 -; RV32-ZVFHMIN-NEXT: csrr a1, vlenb -; RV32-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV32-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV32-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV32-ZVFHMIN-NEXT: mv s0, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fmv.s fs0, fa0 -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fs0 -; RV32-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV32-ZVFHMIN-NEXT: fmv.s fa0, fa5 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV32-ZVFHMIN-NEXT: add sp, sp, a0 -; RV32-ZVFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV32-ZVFHMIN-NEXT: ret -; -; RV64-ZVFHMIN-LABEL: fabs_v6f16: -; RV64-ZVFHMIN: # %bb.0: -; RV64-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV64-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV64-ZVFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: .cfi_offset ra, -8 -; RV64-ZVFHMIN-NEXT: .cfi_offset s0, -16 -; RV64-ZVFHMIN-NEXT: .cfi_offset s1, -24 -; RV64-ZVFHMIN-NEXT: .cfi_offset fs0, -32 -; RV64-ZVFHMIN-NEXT: csrr a1, vlenb -; RV64-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV64-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV64-ZVFHMIN-NEXT: mv s0, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fmv.s fs0, fa0 -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fs0 -; RV64-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV64-ZVFHMIN-NEXT: fmv.s fa0, fa5 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV64-ZVFHMIN-NEXT: add sp, sp, a0 -; RV64-ZVFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV64-ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: fabs_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x