-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[AArch64][SVE2] Add codegen patterns for SVE2 FAMINMAX #107284
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: None (SpencerAbson) ChangesTablegen patterns were previously added to lower the following sequences from generic IR to NEON FAMIN/FAMAX instructions
The same idea has been applied for the scalable vector variants of FAMIN/FAMAX.
Patch is 33.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107284.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4922fb280333bb..d2692c7bfc7da6 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -218,6 +218,16 @@ def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3),
return N->getFlags().hasAllowContract();
}]>;
+def AArch64fminnm_p_nnan : PatFrag<(ops node:$op1, node:$op2, node:$op3),
+ (AArch64fminnm_p node:$op1, node:$op2, node:$op3), [{
+ return N->getFlags().hasNoNaNs();
+}]>;
+
+def AArch64fmaxnm_p_nnan : PatFrag<(ops node:$op1, node:$op2, node:$op3),
+ (AArch64fmaxnm_p node:$op1, node:$op2, node:$op3), [{
+ return N->getFlags().hasNoNaNs();
+}]>;
+
def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
@@ -469,6 +479,24 @@ def AArch64fmls_m1 : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
(vselect node:$pg, (AArch64fsub_p_contract (SVEAllActive), node:$za, (AArch64fmul_p_oneuse (SVEAllActive), node:$zn, node:$zm)), node:$za),
(vselect node:$pg, (AArch64fma_p (SVEAllActive), (AArch64fneg_mt (SVEAllActive), node:$zn, (undef)), node:$zm, node:$za), node:$za)]>;
+def AArch64famin_p : PatFrags<(ops node:$pred, node:$op1, node:$op2),
+ [(int_aarch64_sve_famin_u node:$pred, node:$op1, node:$op2),
+ (AArch64fmin_p node:$pred,
+ (AArch64fabs_mt node:$pred, node:$op1, undef),
+ (AArch64fabs_mt node:$pred, node:$op2, undef)),
+ (AArch64fminnm_p_nnan node:$pred,
+ (AArch64fabs_mt node:$pred, node:$op1, undef),
+ (AArch64fabs_mt node:$pred, node:$op2, undef))]>;
+
+def AArch64famax_p : PatFrags<(ops node:$pred, node:$op1, node:$op2),
+ [(int_aarch64_sve_famax_u node:$pred, node:$op1, node:$op2),
+ (AArch64fmax_p node:$pred,
+ (AArch64fabs_mt node:$pred, node:$op1, undef),
+ (AArch64fabs_mt node:$pred, node:$op2, undef)),
+ (AArch64fmaxnm_p_nnan node:$pred,
+ (AArch64fabs_mt node:$pred, node:$op1, undef),
+ (AArch64fabs_mt node:$pred, node:$op2, undef))]>;
+
def AArch64add_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_add, add>;
def AArch64sub_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_sub, sub>;
def AArch64mul_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_mul, AArch64mul_p>;
@@ -4186,8 +4214,8 @@ let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", int_aarch64_sve_famin, DestructiveBinaryComm>;
defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", int_aarch64_sve_famax, DestructiveBinaryComm>;
-defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>;
-defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>;
+defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64famax_p>;
+defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64famin_p>;
} // End HasSVE2orSME2, HasFAMINMAX
let Predicates = [HasSSVE_FP8FMA] in {
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sve2-faminmax.ll b/llvm/test/CodeGen/AArch64/aarch64-sve2-faminmax.ll
new file mode 100644
index 00000000000000..6dde9222abf2ef
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-sve2-faminmax.ll
@@ -0,0 +1,661 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -mattr=+faminmax -mattr=+sve2 -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+faminmax -mattr=+sme2 -force-streaming -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FAMINMAX
+
+; Replace pattern max(abs(a), abs(b)) by famax(a,b)
+; Replace pattern min(abs(a), abs(b)) by famin(a,b)
+
+; When the fastmath flag 'nnan' (no nan) is enabled, we may also replace:
+; minnm(abs(a), abs(b)) with famin(a, b)
+; maxnm(abs(a), abs(b)) with famax(a, b)
+
+define <vscale x 4 x half> @famax_max_nx4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: famax_max_nx4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famax_max_nx4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fmax z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %a)
+ %bb = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %b)
+ %r = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %aa, <vscale x 4 x half> %bb)
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 4 x half> @famax_maxnm_nnan_nx4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: famax_maxnm_nnan_nx4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famax_maxnm_nnan_nx4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %a)
+ %bb = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %b)
+ %r = call nnan <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %aa, <vscale x 4 x half> %bb)
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 4 x half> @famax_maxnm_noflag_nx4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: famax_maxnm_noflag_nx4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famax_maxnm_noflag_nx4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %a)
+ %bb = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %b)
+ %r = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %aa, <vscale x 4 x half> %bb)
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 4 x half> @famin_min_nx4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: famin_min_nx4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famin_min_nx4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fmin z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %a)
+ %bb = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %b)
+ %r = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> %aa, <vscale x 4 x half> %bb)
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 4 x half> @famin_minnm_nnan_nx4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: famin_minnm_nnan_nx4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famin_minnm_nnan_nx4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %a)
+ %bb = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %b)
+ %r = call nnan <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %aa, <vscale x 4 x half> %bb)
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 4 x half> @famin_minnm_noflag_nx4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: famin_minnm_noflag_nx4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famin_minnm_noflag_nx4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %a)
+ %bb = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %b)
+ %r = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %aa, <vscale x 4 x half> %bb)
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 8 x half> @famax_max_nx8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: famax_max_nx8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famax_max_nx8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fmax z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a)
+ %bb = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %b)
+ %r = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %aa, <vscale x 8 x half> %bb)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @famax_maxnm_nnan_nx8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: famax_maxnm_nnan_nx8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famax_maxnm_nnan_nx8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a)
+ %bb = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %b)
+ %r = call nnan <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %aa, <vscale x 8 x half> %bb)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @famax_maxnm_noflag_nx8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: famax_maxnm_noflag_nx8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famax_maxnm_noflag_nx8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a)
+ %bb = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %b)
+ %r = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %aa, <vscale x 8 x half> %bb)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @famin_min_nx8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: famin_min_nx8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famin_min_nx8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fmin z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a)
+ %bb = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %b)
+ %r = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> %aa, <vscale x 8 x half> %bb)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @famin_minnm_nnan_nx8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: famin_minnm_nnan_nx8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famin_minnm_nnan_nx8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a)
+ %bb = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %b)
+ %r = call nnan <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %aa, <vscale x 8 x half> %bb)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @famin_minnm_noflag_nx8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: famin_minnm_noflag_nx8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famin_minnm_noflag_nx8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.h, p0/m, z1.h
+; CHECK-NO-FAMINMAX-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a)
+ %bb = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %b)
+ %r = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %aa, <vscale x 8 x half> %bb)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 2 x float> @famax_max_nx2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: famax_max_nx2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famax_max_nx2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.d
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.s, p0/m, z0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.s, p0/m, z1.s
+; CHECK-NO-FAMINMAX-NEXT: fmax z0.s, p0/m, z0.s, z1.s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %a)
+ %bb = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %b)
+ %r = call <vscale x 2 x float> @llvm.maximum.nx2f32(<vscale x 2 x float> %aa, <vscale x 2 x float> %bb)
+ ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x float> @famax_maxnm_nnan_nx2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: famax_maxnm_nnan_nx2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famax_maxnm_nnan_nx2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.d
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.s, p0/m, z0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.s, p0/m, z1.s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %a)
+ %bb = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %b)
+ %r = call nnan <vscale x 2 x float> @llvm.maxnum.nx2f32(<vscale x 2 x float> %aa, <vscale x 2 x float> %bb)
+ ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x float> @famax_maxnm_noflag_nx2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: famax_maxnm_noflag_nx2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fabs z0.s, p0/m, z0.s
+; CHECK-NEXT: fabs z1.s, p0/m, z1.s
+; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famax_maxnm_noflag_nx2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.d
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.s, p0/m, z0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.s, p0/m, z1.s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %a)
+ %bb = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %b)
+ %r = call <vscale x 2 x float> @llvm.maxnum.nx2f32(<vscale x 2 x float> %aa, <vscale x 2 x float> %bb)
+ ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x float> @famin_min_nx2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: famin_min_nx2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famin_min_nx2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.d
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.s, p0/m, z0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.s, p0/m, z1.s
+; CHECK-NO-FAMINMAX-NEXT: fmin z0.s, p0/m, z0.s, z1.s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %a)
+ %bb = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %b)
+ %r = call <vscale x 2 x float> @llvm.minimum.nx2f32(<vscale x 2 x float> %aa, <vscale x 2 x float> %bb)
+ ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x float> @famin_minnm_nnan_nx2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: famin_minnm_nnan_nx2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: famin_minnm_nnan_nx2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: ptrue p0.d
+; CHECK-NO-FAMINMAX-NEXT: fabs z0.s, p0/m, z0.s
+; CHECK-NO-FAMINMAX-NEXT: fabs z1.s, p0/m, z1.s
+; CHECK-NO-FAMINMAX-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %a)
+ %bb = call <vscale x 2 x float> @llvm.fabs.nx2f32(<vscale x 2 x float> %b)
+ %r = call nnan <vscale x 2 x float> @llvm.minnum.nx2f32(<vscale x 2 x float> %aa, <vscale x 2 x float> %bb)
+ ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x float> @famin_minnm_noflag_nx2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: famin_minnm_noflag_nx2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fabs z0.s, p0/m, z0.s
...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the patch. It looks fine to me.
Tablegen patterns were previously added to lower the following sequences from generic IR to NEON FAMIN/FAMAX instructions - fminimum((abs(a), abs(b)) -> famin(a, b) - fmaximum((abs(a)), abs(b)) -> famax(a, b) - llvm#103027 - fminnum[nnan](abs(a), abs(b)) -> famin(a, b) - fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b) - llvm#104766 The same idea has been applied for the scalable vector variants of FAMIN/FAMAX. ('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags) - Changes to LLVM - lib/target/AArch64/AArch64SVEInstrInfo.td - Add 'AArch64fminnm_p_nnan' and 'AArch64fmaxnm_p_nnan' patfrags (patterns predicated on the 'nnan' flag). - Add 'AArch64famax_p' and 'AArch64famin_p' - test/CodeGen/AArch64/aarch64-sve2-faminmax.ll - Add tests to verify the new patterns, including both positive and negative tests for 'nnan' predicated behavior.
b8d15b0
to
f7ad7b9
Compare
Tablegen patterns were previously added to lower the following sequences from generic IR to NEON FAMIN/FAMAX instructions - `fminimum((abs(a), abs(b)) -> famin(a, b)` - `fmaximum((abs(a)), abs(b)) -> famax(a, b)` - llvm#103027 - `fminnum[nnan](abs(a), abs(b)) -> famin(a, b)` - `fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)` - llvm#104766 The same idea has been applied for the scalable vector variants of [FAMIN](https://developer.arm.com/documentation/ddi0602/2024-06/SVE-Instructions/FAMIN--Floating-point-absolute-minimum--predicated--)/[FAMAX](https://developer.arm.com/documentation/ddi0602/2024-06/SVE-Instructions/FAMAX--Floating-point-absolute-maximum--predicated--). ('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags). - Changes to LLVM - lib/target/AArch64/AArch64SVEInstrInfo.td - Add 'AArch64fminnm_p_nnan' and 'AArch64fmaxnm_p_nnan' patfrags (patterns predicated on the 'nnan' flag). - Add 'AArch64famax_p' and 'AArch64famin_p' - test/CodeGen/AArch64/aarch64-sve2-faminmax.ll - Add tests to verify the new patterns, including both positive and negative tests for 'nnan' predicated behavior.
Tablegen patterns were previously added to lower the following sequences from generic IR to NEON FAMIN/FAMAX instructions
fminimum((abs(a), abs(b)) -> famin(a, b)
fmaximum((abs(a)), abs(b)) -> famax(a, b)
fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
The same idea has been applied for the scalable vector variants of FAMIN/FAMAX.
('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags).