From 4dc194d1b0658831104ed26d2b68b957e42504b4 Mon Sep 17 00:00:00 2001 From: Elliott Mahler Date: Tue, 5 Jan 2021 18:59:17 -0500 Subject: [PATCH] Fixed AVX planner not using prime butterflies as bases (#46) * Fixed AVX planner not using prime butterflies as bases * rustfmt --- benches/bench_rustfft.rs | 1 + src/avx/avx_planner.rs | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/benches/bench_rustfft.rs b/benches/bench_rustfft.rs index 6dd09fc2..28af9a00 100644 --- a/benches/bench_rustfft.rs +++ b/benches/bench_rustfft.rs @@ -101,6 +101,7 @@ fn bench_planned_f32(b: &mut Bencher, len: usize) { #[bench] fn planned32_composite_024028(b: &mut Bencher) { bench_planned_f32(b, 24028); } // small mixed composites times a large prime +#[bench] fn planned32_composite_005472(b: &mut Bencher) { bench_planned_f32(b, 5472); } #[bench] fn planned32_composite_030270(b: &mut Bencher) { bench_planned_f32(b, 30270); } // small mixed composites diff --git a/src/avx/avx_planner.rs b/src/avx/avx_planner.rs index 51e2700c..d06516e9 100644 --- a/src/avx/avx_planner.rs +++ b/src/avx/avx_planner.rs @@ -259,6 +259,11 @@ impl AvxPlannerInternal { if factors.get_other_factors() > 1 { let other_factors = factors.get_other_factors(); + // First, if the "other factors" are a butterfly, use that as the butterfly + if self.is_butterfly(other_factors) { + return MixedRadixPlan::butterfly(other_factors, vec![]); + } + // We can only use rader's if `other_factors` is prime if miller_rabin(other_factors as u64) { // len is prime, so we can use Rader's Algorithm as a base. Whether or not that's a good idea is a different story @@ -267,7 +272,7 @@ impl AvxPlannerInternal { // We're intentionally being too conservative here. Otherwise we'd be recursively applying a heuristic, and repeated heuristic failures could stack to make a rader's chain significantly slower. // If we were writing a measuring planner, expanding this heuristic and measuring its effectiveness would be an opportunity for up to 2x performance gains. let inner_factors = PartialFactors::compute(other_factors - 1); - if inner_factors.get_other_factors() == 1 { + if self.is_butterfly(inner_factors.get_other_factors()) { // We only have factors of 2,3,5,7, and 11. If we don't have AVX2, we also have to exclude factors of 5 and 7 and 11, because avx2 gives us enough headroom for the overhead of those to not be a problem if is_x86_feature_detected!("avx2") || (inner_factors.product_power2power3() == len - 1) @@ -464,6 +469,11 @@ impl AvxPlannerInternal { if factors.get_other_factors() > 1 { let other_factors = factors.get_other_factors(); + // First, if the "other factors" are a butterfly, use that as the butterfly + if self.is_butterfly(other_factors) { + return MixedRadixPlan::butterfly(other_factors, vec![]); + } + // We can only use rader's if `other_factors` is prime if miller_rabin(other_factors as u64) { // len is prime, so we can use Rader's Algorithm as a base. Whether or not that's a good idea is a different story @@ -472,7 +482,7 @@ impl AvxPlannerInternal { // We're intentionally being too conservative here. Otherwise we'd be recursively applying a heuristic, and repeated heuristic failures could stack to make a rader's chain significantly slower. // If we were writing a measuring planner, expanding this heuristic and measuring its effectiveness would be an opportunity for up to 2x performance gains. let inner_factors = PartialFactors::compute(other_factors - 1); - if inner_factors.get_other_factors() == 1 { + if self.is_butterfly(inner_factors.get_other_factors()) { // We only have factors of 2,3,5,7, and 11. If we don't have AVX2, we also have to exclude factors of 5 and 7 and 11, because avx2 gives us enough headroom for the overhead of those to not be a problem if is_x86_feature_detected!("avx2") || (inner_factors.product_power2power3() == len - 1)