Skip to content

Commit e649b38

Browse files
committed
[RISCV] Add tests for widening FP VP reductions. NFC
We're missing patterns for matching vfwred{u,o}sum.vs, both with VP and non-VP fpexts.
1 parent 077e0c1 commit e649b38

File tree

1 file changed

+128
-0
lines changed

1 file changed

+128
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,3 +465,131 @@ define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m
465465
%s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
466466
ret float %s
467467
}
468+
469+
define float @vpreduce_fadd_fpext_vp_nxv1f16_nxv1f32(float %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
470+
; CHECK-LABEL: vpreduce_fadd_fpext_vp_nxv1f16_nxv1f32:
471+
; CHECK: # %bb.0:
472+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
473+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8, v0.t
474+
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
475+
; CHECK-NEXT: vfmv.s.f v8, fa0
476+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
477+
; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t
478+
; CHECK-NEXT: vfmv.f.s fa0, v8
479+
; CHECK-NEXT: ret
480+
%w = call <vscale x 1 x float> @llvm.vp.fpext(<vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 %evl)
481+
%r = call reassoc float @llvm.vp.reduce.fadd(float %s, <vscale x 1 x float> %w, <vscale x 1 x i1> %m, i32 %evl)
482+
ret float %r
483+
}
484+
485+
define float @vpreduce_ord_fadd_fpext_vp_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
486+
; CHECK-LABEL: vpreduce_ord_fadd_fpext_vp_fpext_nxv1f16_nxv1f32:
487+
; CHECK: # %bb.0:
488+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
489+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8, v0.t
490+
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
491+
; CHECK-NEXT: vfmv.s.f v8, fa0
492+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
493+
; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t
494+
; CHECK-NEXT: vfmv.f.s fa0, v8
495+
; CHECK-NEXT: ret
496+
%w = call <vscale x 1 x float> @llvm.vp.fpext(<vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 %evl)
497+
%r = call float @llvm.vp.reduce.fadd(float %s, <vscale x 1 x float> %w, <vscale x 1 x i1> %m, i32 %evl)
498+
ret float %r
499+
}
500+
501+
define double @vpreduce_fadd_fpext_vp_nxv1f32_nxv1f64(double %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
502+
; CHECK-LABEL: vpreduce_fadd_fpext_vp_nxv1f32_nxv1f64:
503+
; CHECK: # %bb.0:
504+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
505+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8, v0.t
506+
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
507+
; CHECK-NEXT: vfmv.s.f v8, fa0
508+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
509+
; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t
510+
; CHECK-NEXT: vfmv.f.s fa0, v8
511+
; CHECK-NEXT: ret
512+
%w = call <vscale x 1 x double> @llvm.vp.fpext(<vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 %evl)
513+
%r = call reassoc double @llvm.vp.reduce.fadd(double %s, <vscale x 1 x double> %w, <vscale x 1 x i1> %m, i32 %evl)
514+
ret double %r
515+
}
516+
517+
define double @vpreduce_ord_fadd_fpext_vp_fpext_nxv1f32_nxv1f64(double %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
518+
; CHECK-LABEL: vpreduce_ord_fadd_fpext_vp_fpext_nxv1f32_nxv1f64:
519+
; CHECK: # %bb.0:
520+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
521+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8, v0.t
522+
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
523+
; CHECK-NEXT: vfmv.s.f v8, fa0
524+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
525+
; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t
526+
; CHECK-NEXT: vfmv.f.s fa0, v8
527+
; CHECK-NEXT: ret
528+
%w = call <vscale x 1 x double> @llvm.vp.fpext(<vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 %evl)
529+
%r = call double @llvm.vp.reduce.fadd(double %s, <vscale x 1 x double> %w, <vscale x 1 x i1> %m, i32 %evl)
530+
ret double %r
531+
}
532+
533+
define float @vpreduce_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
534+
; CHECK-LABEL: vpreduce_fadd_fpext_nxv1f16_nxv1f32:
535+
; CHECK: # %bb.0:
536+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
537+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
538+
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
539+
; CHECK-NEXT: vfmv.s.f v8, fa0
540+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
541+
; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t
542+
; CHECK-NEXT: vfmv.f.s fa0, v8
543+
; CHECK-NEXT: ret
544+
%w = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
545+
%r = call reassoc float @llvm.vp.reduce.fadd(float %s, <vscale x 1 x float> %w, <vscale x 1 x i1> %m, i32 %evl)
546+
ret float %r
547+
}
548+
549+
define float @vpreduce_ord_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
550+
; CHECK-LABEL: vpreduce_ord_fadd_fpext_nxv1f16_nxv1f32:
551+
; CHECK: # %bb.0:
552+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
553+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
554+
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
555+
; CHECK-NEXT: vfmv.s.f v8, fa0
556+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
557+
; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t
558+
; CHECK-NEXT: vfmv.f.s fa0, v8
559+
; CHECK-NEXT: ret
560+
%w = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
561+
%r = call float @llvm.vp.reduce.fadd(float %s, <vscale x 1 x float> %w, <vscale x 1 x i1> %m, i32 %evl)
562+
ret float %r
563+
}
564+
565+
define double @vpreduce_fadd_fpext_nxv1f32_nxv1f64(double %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
566+
; CHECK-LABEL: vpreduce_fadd_fpext_nxv1f32_nxv1f64:
567+
; CHECK: # %bb.0:
568+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
569+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
570+
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
571+
; CHECK-NEXT: vfmv.s.f v8, fa0
572+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
573+
; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t
574+
; CHECK-NEXT: vfmv.f.s fa0, v8
575+
; CHECK-NEXT: ret
576+
%w = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
577+
%r = call reassoc double @llvm.vp.reduce.fadd(double %s, <vscale x 1 x double> %w, <vscale x 1 x i1> %m, i32 %evl)
578+
ret double %r
579+
}
580+
581+
define double @vpreduce_ord_fadd_fpext_nxv1f32_nxv1f64(double %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
582+
; CHECK-LABEL: vpreduce_ord_fadd_fpext_nxv1f32_nxv1f64:
583+
; CHECK: # %bb.0:
584+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
585+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
586+
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
587+
; CHECK-NEXT: vfmv.s.f v8, fa0
588+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
589+
; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t
590+
; CHECK-NEXT: vfmv.f.s fa0, v8
591+
; CHECK-NEXT: ret
592+
%w = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
593+
%r = call double @llvm.vp.reduce.fadd(double %s, <vscale x 1 x double> %w, <vscale x 1 x i1> %m, i32 %evl)
594+
ret double %r
595+
}

0 commit comments

Comments
 (0)