Skip to content

Commit 33b7481

Browse files
committed
address the comments and add the tests
1 parent c110e5b commit 33b7481

File tree

3 files changed

+92
-12
lines changed

3 files changed

+92
-12
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,10 @@ static bool isSupportedInstr(const MachineInstr &MI) {
10911091
case RISCV::VFWNMSAC_VF:
10921092
case RISCV::VFWMACCBF16_VV:
10931093
case RISCV::VFWMACCBF16_VF:
1094+
// Vector Floating-Point Square-Root Instruction
1095+
case RISCV::VFSQRT_V:
1096+
// Vector Floating-Point Reciprocal Square-Root Estimate Instruction
1097+
case RISCV::VFRSQRT7_V:
10941098
// Vector Floating-Point MIN/MAX Instructions
10951099
case RISCV::VFMIN_VF:
10961100
case RISCV::VFMIN_VV:
@@ -1140,10 +1144,6 @@ static bool isSupportedInstr(const MachineInstr &MI) {
11401144
case RISCV::VFNCVT_F_F_W:
11411145
case RISCV::VFNCVT_ROD_F_F_W:
11421146
case RISCV::VFNCVTBF16_F_F_W:
1143-
// Vector Floating-Point Square-Root Instruction
1144-
case RISCV::VFSQRT_V:
1145-
// Vector Floating-Point Reciprocal Square-Root Estimate Instruction
1146-
case RISCV::VFRSQRT7_V:
11471147
return true;
11481148
}
11491149

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
3-
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
4-
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
5-
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
3+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
4+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
5+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
66

77
; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer
88

@@ -5069,3 +5069,63 @@ define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b,
50695069
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
50705070
ret <vscale x 4 x float> %2
50715071
}
5072+
5073+
define <vscale x 4 x half> @vfsqrt(<vscale x 4 x half> %a) {
5074+
; NOVLOPT-LABEL: vfsqrt:
5075+
; NOVLOPT: # %bb.0:
5076+
; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5077+
; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8
5078+
; NOVLOPT-NEXT: fsrmi a0, 0
5079+
; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
5080+
; NOVLOPT-NEXT: vfsqrt.v v10, v10
5081+
; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5082+
; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10
5083+
; NOVLOPT-NEXT: fsrm a0
5084+
; NOVLOPT-NEXT: ret
5085+
;
5086+
; VLOPT-LABEL: vfsqrt:
5087+
; VLOPT: # %bb.0:
5088+
; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5089+
; VLOPT-NEXT: vfwcvt.f.f.v v10, v8
5090+
; VLOPT-NEXT: fsrmi a0, 0
5091+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5092+
; VLOPT-NEXT: vfsqrt.v v10, v10
5093+
; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5094+
; VLOPT-NEXT: vfncvt.f.f.w v8, v10
5095+
; VLOPT-NEXT: fsrm a0
5096+
; VLOPT-NEXT: ret
5097+
%1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6)
5098+
%2 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 0, iXLen 7)
5099+
%3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison, <vscale x 4 x float> %2, iXLen 0, iXLen 6)
5100+
ret <vscale x 4 x half> %3
5101+
}
5102+
5103+
define <vscale x 4 x half> @vfrsqrt7(<vscale x 4 x half> %a) {
5104+
; NOVLOPT-LABEL: vfrsqrt7:
5105+
; NOVLOPT: # %bb.0:
5106+
; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5107+
; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8
5108+
; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
5109+
; NOVLOPT-NEXT: vfrsqrt7.v v10, v10
5110+
; NOVLOPT-NEXT: fsrmi a0, 0
5111+
; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5112+
; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10
5113+
; NOVLOPT-NEXT: fsrm a0
5114+
; NOVLOPT-NEXT: ret
5115+
;
5116+
; VLOPT-LABEL: vfrsqrt7:
5117+
; VLOPT: # %bb.0:
5118+
; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5119+
; VLOPT-NEXT: vfwcvt.f.f.v v10, v8
5120+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5121+
; VLOPT-NEXT: vfrsqrt7.v v10, v10
5122+
; VLOPT-NEXT: fsrmi a0, 0
5123+
; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5124+
; VLOPT-NEXT: vfncvt.f.f.w v8, v10
5125+
; VLOPT-NEXT: fsrm a0
5126+
; VLOPT-NEXT: ret
5127+
%1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6)
5128+
%2 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 7)
5129+
%3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison,<vscale x 4 x float> %2, iXLen 0, iXLen 6)
5130+
ret <vscale x 4 x half> %3
5131+
}

llvm/test/CodeGen/RISCV/rvv/vl-opt.mir

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,26 +141,46 @@ body: |
141141
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0
142142
...
143143
---
144-
name: vfsqr
144+
name: vfsqrt
145145
body: |
146146
bb.0:
147-
; CHECK-LABEL: name: vfsqr
147+
; CHECK-LABEL: name: vfsqrt
148148
; CHECK: %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 6, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
149149
; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
150150
%x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
151151
early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
152152
...
153153
---
154-
name: vfsqr_nofpexcept
154+
name: vfsqrt_nofpexcept
155155
body: |
156156
bb.0:
157-
; CHECK-LABEL: name: vfsqr_nofpexcept
157+
; CHECK-LABEL: name: vfsqrt_nofpexcept
158158
; CHECK: %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
159159
; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
160160
%x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
161161
early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
162162
...
163163
---
164+
name: vfrsqrt7
165+
body: |
166+
bb.0:
167+
; CHECK-LABEL: name: vfrsqrt7
168+
; CHECK: %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
169+
; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
170+
%x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
171+
%y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
172+
...
173+
---
174+
name: vfrsqrt7_nofpexcept
175+
body: |
176+
bb.0:
177+
; CHECK-LABEL: name: vfrsqrt7_nofpexcept
178+
; CHECK: %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5 /* e32 */, 0 /* tu, mu */
179+
; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
180+
%x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
181+
%y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
182+
...
183+
---
164184
name: vwadd_tied_vs1
165185
body: |
166186
bb.0:

0 commit comments

Comments
 (0)