|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
2 |
| -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT |
3 |
| -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT |
4 |
| -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT |
5 |
| -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT |
| 2 | +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT |
| 3 | +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT |
| 4 | +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT |
| 5 | +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT |
6 | 6 |
|
7 | 7 | ; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer
|
8 | 8 |
|
@@ -5069,3 +5069,63 @@ define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b,
|
5069 | 5069 | %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
|
5070 | 5070 | ret <vscale x 4 x float> %2
|
5071 | 5071 | }
|
| 5072 | + |
| 5073 | +define <vscale x 4 x half> @vfsqrt(<vscale x 4 x half> %a) { |
| 5074 | +; NOVLOPT-LABEL: vfsqrt: |
| 5075 | +; NOVLOPT: # %bb.0: |
| 5076 | +; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
| 5077 | +; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8 |
| 5078 | +; NOVLOPT-NEXT: fsrmi a0, 0 |
| 5079 | +; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma |
| 5080 | +; NOVLOPT-NEXT: vfsqrt.v v10, v10 |
| 5081 | +; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
| 5082 | +; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10 |
| 5083 | +; NOVLOPT-NEXT: fsrm a0 |
| 5084 | +; NOVLOPT-NEXT: ret |
| 5085 | +; |
| 5086 | +; VLOPT-LABEL: vfsqrt: |
| 5087 | +; VLOPT: # %bb.0: |
| 5088 | +; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
| 5089 | +; VLOPT-NEXT: vfwcvt.f.f.v v10, v8 |
| 5090 | +; VLOPT-NEXT: fsrmi a0, 0 |
| 5091 | +; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| 5092 | +; VLOPT-NEXT: vfsqrt.v v10, v10 |
| 5093 | +; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| 5094 | +; VLOPT-NEXT: vfncvt.f.f.w v8, v10 |
| 5095 | +; VLOPT-NEXT: fsrm a0 |
| 5096 | +; VLOPT-NEXT: ret |
| 5097 | + %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6) |
| 5098 | + %2 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 0, iXLen 7) |
| 5099 | + %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison, <vscale x 4 x float> %2, iXLen 0, iXLen 6) |
| 5100 | + ret <vscale x 4 x half> %3 |
| 5101 | +} |
| 5102 | + |
| 5103 | +define <vscale x 4 x half> @vfrsqrt7(<vscale x 4 x half> %a) { |
| 5104 | +; NOVLOPT-LABEL: vfrsqrt7: |
| 5105 | +; NOVLOPT: # %bb.0: |
| 5106 | +; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
| 5107 | +; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8 |
| 5108 | +; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma |
| 5109 | +; NOVLOPT-NEXT: vfrsqrt7.v v10, v10 |
| 5110 | +; NOVLOPT-NEXT: fsrmi a0, 0 |
| 5111 | +; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
| 5112 | +; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10 |
| 5113 | +; NOVLOPT-NEXT: fsrm a0 |
| 5114 | +; NOVLOPT-NEXT: ret |
| 5115 | +; |
| 5116 | +; VLOPT-LABEL: vfrsqrt7: |
| 5117 | +; VLOPT: # %bb.0: |
| 5118 | +; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
| 5119 | +; VLOPT-NEXT: vfwcvt.f.f.v v10, v8 |
| 5120 | +; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| 5121 | +; VLOPT-NEXT: vfrsqrt7.v v10, v10 |
| 5122 | +; VLOPT-NEXT: fsrmi a0, 0 |
| 5123 | +; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| 5124 | +; VLOPT-NEXT: vfncvt.f.f.w v8, v10 |
| 5125 | +; VLOPT-NEXT: fsrm a0 |
| 5126 | +; VLOPT-NEXT: ret |
| 5127 | + %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6) |
| 5128 | + %2 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 7) |
| 5129 | + %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison,<vscale x 4 x float> %2, iXLen 0, iXLen 6) |
| 5130 | + ret <vscale x 4 x half> %3 |
| 5131 | +} |
0 commit comments