-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[AArch64][NEON] Add famax/famin codegen patterns #103027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
- Replace min(abs(a), abs(b)) with famin(a, b) - Replace max(abs(a), abs(b)) with famax(a, b) - llvm/lib/Target/AArch64InstrInfo.td - Add pattern for NEON types - +llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll - Add tests with and without +faminmax flag.
@llvm/pr-subscribers-backend-aarch64 Author: None (SpencerAbson) Changes
Full diff: https://github.com/llvm/llvm-project/pull/103027.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1e5c5e2657e65d..2ca11310019122 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -10155,7 +10155,19 @@ let Predicates = [HasFP8] in {
let Predicates = [HasFAMINMAX] in {
defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
-} // End let Predicates = [HasFAMAXMIN]
+} // End let Predicates = [HasFAMINMAX]
+
+let Predicates = [HasNEON, HasFAMINMAX] in {
+ foreach Ty = [v4f16, v8f16, v2f32, v4f32, v2f64] in {
+ // Replace min(abs(a), abs(b)) with famin(a, b)
+ def : Pat<(Ty (fminimum (fabs Ty:$Rn), (fabs Ty:$Rm))),
+ (!cast<Instruction>("FAMIN"#Ty) Ty:$Rn, Ty:$Rm)>;
+
+ // Replace max(abs(a), abs(b)) with famax(a, b)
+ def : Pat<(Ty (fmaximum (fabs Ty:$Rn), (fabs Ty:$Rm))),
+ (!cast<Instruction>("FAMAX"#Ty) Ty:$Rn, Ty:$Rm)>;
+ }
+} // End let Predicates = [HasNEON, HasFAMINMAX]
let Predicates = [HasFP8FMA] in {
defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
new file mode 100644
index 00000000000000..a8ec25565f59dc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -mattr=+faminmax -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FAMINMAX
+
+; Replace min(abs(a), abs(b)) with famin(a, b)
+; Replace max(abs(a), abs(b)) with famax(a, b)
+
+define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_max_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.maximum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_min_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.minimum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_max_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_min_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_max_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.maximum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_min_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.minimum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_max_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_min_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.minimum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_max_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_min_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+
+declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+
+declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
+declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
+declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
+declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
+
+declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
+declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
+declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
+declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
+
+attributes #0 = { nounwind "target-features"="+fullfp16" }
|
- Change prediction of NEON famin/fmax instructions to [HasNEON,HasFAMINMAX] - Use target triple string in faminmax llc test instead of 'mtriple' flag.
Thank you, Paul. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi - Matching from a fminimum (denormals aside) sounds like it should be correct, that sounds good. Unfortunately that does make it less useful. Did you consider adding patterns for fminnum with nnan too? They are much more likely to come up in practice if people are using fast-math.
Patterns were previously added to allow the following reduction - fminimum(abs(a), abs(b)) -> famin(a, b) - fmaximum(abs(a), abs(b)) -> famax(a, b) - (llvm#103027) It was suggested by @davemgreen that the following reductions are also possible - fminnum[nnan](abs(a), abs(b)) -> famin(a, b) - fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b) ('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags) The 'no NaNs' flag allows optimisations to assume that neither argument is a Nan, and so the differing NaN propagation semantics of llvm.maxnum/llvm.minnum and FAMAX/FAMIN can be ignored in this reduction. (llvm.maxnum/llvm.minnum: https://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic) - Changes to LLVM - lib/target/AArch64/AArch64InstrInfo.td - add 'fminnm_nnan' and 'fmaxnm_nnan'; patfrags on fminnm/fmaxnm that are predicated on the instrinsic call having the 'nnan' flag. - add AArch64famin and AArch64famax patfrags, containing the new and existing reductions. - test/CodeGen/AArch64/aarch64-neon-faminmax.ll - add positive and negative tests for the new reduction, based on the presence of 'nnan' in the IR intrinsic call.
Patterns were previously added to allow the following reductions - fminimum(abs(a), abs(b)) -> famin(a, b) - fmaximum(abs(a), abs(b)) -> famax(a, b) - #103027 It was suggested by @davemgreen that the following reductions are also possible - fminnum[nnan](abs(a), abs(b)) -> famin(a, b) - fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b) ('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags) The 'no NaNs' flag allows optimisations to assume that neither argument is a NaN, and so the differing NaN propagation semantics of llvm.maxnum/llvm.minnum and FAMAX/FAMIN can be ignored in this reduction. (llvm.maxnum/llvm.minnum: https://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic) - Changes to LLVM - lib/target/AArch64/AArch64InstrInfo.td - add 'fminnm_nnan' and 'fmaxnm_nnan'; patfrags on fminnm/fmaxnm that are predicated on the instrinsic call having the 'nnan' flag. - add AArch64famin and AArch64famax patfrags, containing the new and existing reductions. - test/CodeGen/AArch64/aarch64-neon-faminmax.ll - add positive and negative tests for the new reduction, based on the presence of 'nnan' in the IR intrinsic call.
Tablegen patterns were previously added to lower the following sequences from generic IR to NEON FAMIN/FAMAX instructions - fminimum((abs(a), abs(b)) -> famin(a, b) - fmaximum((abs(a)), abs(b)) -> famax(a, b) - llvm#103027 - fminnum[nnan](abs(a), abs(b)) -> famin(a, b) - fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b) - llvm#104766 The same idea has been applied for the scalable vector variants of FAMIN/FAMAX. ('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags) - Changes to LLVM - lib/target/AArch64/AArch64SVEInstrInfo.td - Add 'AArch64fminnm_p_nnan' and 'AArch64fmaxnm_p_nnan' patfrags (patterns predicated on the 'nnan' flag). - Add 'AArch64famax_p' and 'AArch64famin_p' - test/CodeGen/AArch64/aarch64-sve2-faminmax.ll - Add tests to verify the new patterns, including both positive and negative tests for 'nnan' predicated behavior.
Tablegen patterns were previously added to lower the following sequences from generic IR to NEON FAMIN/FAMAX instructions - fminimum((abs(a), abs(b)) -> famin(a, b) - fmaximum((abs(a)), abs(b)) -> famax(a, b) - llvm#103027 - fminnum[nnan](abs(a), abs(b)) -> famin(a, b) - fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b) - llvm#104766 The same idea has been applied for the scalable vector variants of FAMIN/FAMAX. ('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags) - Changes to LLVM - lib/target/AArch64/AArch64SVEInstrInfo.td - Add 'AArch64fminnm_p_nnan' and 'AArch64fmaxnm_p_nnan' patfrags (patterns predicated on the 'nnan' flag). - Add 'AArch64famax_p' and 'AArch64famin_p' - test/CodeGen/AArch64/aarch64-sve2-faminmax.ll - Add tests to verify the new patterns, including both positive and negative tests for 'nnan' predicated behavior.
Tablegen patterns were previously added to lower the following sequences from generic IR to NEON FAMIN/FAMAX instructions - `fminimum((abs(a), abs(b)) -> famin(a, b)` - `fmaximum((abs(a)), abs(b)) -> famax(a, b)` - #103027 - `fminnum[nnan](abs(a), abs(b)) -> famin(a, b)` - `fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)` - #104766 The same idea has been applied for the scalable vector variants of [FAMIN](https://developer.arm.com/documentation/ddi0602/2024-06/SVE-Instructions/FAMIN--Floating-point-absolute-minimum--predicated--)/[FAMAX](https://developer.arm.com/documentation/ddi0602/2024-06/SVE-Instructions/FAMAX--Floating-point-absolute-maximum--predicated--). ('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags). - Changes to LLVM - lib/target/AArch64/AArch64SVEInstrInfo.td - Add 'AArch64fminnm_p_nnan' and 'AArch64fmaxnm_p_nnan' patfrags (patterns predicated on the 'nnan' flag). - Add 'AArch64famax_p' and 'AArch64famin_p' - test/CodeGen/AArch64/aarch64-sve2-faminmax.ll - Add tests to verify the new patterns, including both positive and negative tests for 'nnan' predicated behavior.
Tablegen patterns were previously added to lower the following sequences from generic IR to NEON FAMIN/FAMAX instructions - `fminimum((abs(a), abs(b)) -> famin(a, b)` - `fmaximum((abs(a)), abs(b)) -> famax(a, b)` - llvm#103027 - `fminnum[nnan](abs(a), abs(b)) -> famin(a, b)` - `fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)` - llvm#104766 The same idea has been applied for the scalable vector variants of [FAMIN](https://developer.arm.com/documentation/ddi0602/2024-06/SVE-Instructions/FAMIN--Floating-point-absolute-minimum--predicated--)/[FAMAX](https://developer.arm.com/documentation/ddi0602/2024-06/SVE-Instructions/FAMAX--Floating-point-absolute-maximum--predicated--). ('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags). - Changes to LLVM - lib/target/AArch64/AArch64SVEInstrInfo.td - Add 'AArch64fminnm_p_nnan' and 'AArch64fmaxnm_p_nnan' patfrags (patterns predicated on the 'nnan' flag). - Add 'AArch64famax_p' and 'AArch64famin_p' - test/CodeGen/AArch64/aarch64-sve2-faminmax.ll - Add tests to verify the new patterns, including both positive and negative tests for 'nnan' predicated behavior.
min(abs(a), abs(b)) -> famin(a, b), max(abs(a), abs(b))-> famax(a, b)
Changes to LLVM