Skip to content

Commit

Permalink
Fixed AVX planner not using prime butterflies as bases (#46)
Browse files Browse the repository at this point in the history
* Fixed AVX planner not using prime butterflies as bases

* rustfmt
  • Loading branch information
ejmahler authored Jan 5, 2021
1 parent 66bb2a9 commit 4dc194d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
1 change: 1 addition & 0 deletions benches/bench_rustfft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ fn bench_planned_f32(b: &mut Bencher, len: usize) {
#[bench] fn planned32_composite_024028(b: &mut Bencher) { bench_planned_f32(b, 24028); }

// small mixed composites times a large prime
#[bench] fn planned32_composite_005472(b: &mut Bencher) { bench_planned_f32(b, 5472); }
#[bench] fn planned32_composite_030270(b: &mut Bencher) { bench_planned_f32(b, 30270); }

// small mixed composites
Expand Down
14 changes: 12 additions & 2 deletions src/avx/avx_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,11 @@ impl<T: FftNum> AvxPlannerInternal<f32, T> {
if factors.get_other_factors() > 1 {
let other_factors = factors.get_other_factors();

// First, if the "other factors" are a butterfly, use that as the butterfly
if self.is_butterfly(other_factors) {
return MixedRadixPlan::butterfly(other_factors, vec![]);
}

// We can only use rader's if `other_factors` is prime
if miller_rabin(other_factors as u64) {
// len is prime, so we can use Rader's Algorithm as a base. Whether or not that's a good idea is a different story
Expand All @@ -267,7 +272,7 @@ impl<T: FftNum> AvxPlannerInternal<f32, T> {
// We're intentionally being too conservative here. Otherwise we'd be recursively applying a heuristic, and repeated heuristic failures could stack to make a rader's chain significantly slower.
// If we were writing a measuring planner, expanding this heuristic and measuring its effectiveness would be an opportunity for up to 2x performance gains.
let inner_factors = PartialFactors::compute(other_factors - 1);
if inner_factors.get_other_factors() == 1 {
if self.is_butterfly(inner_factors.get_other_factors()) {
// We only have factors of 2,3,5,7, and 11. If we don't have AVX2, we also have to exclude factors of 5 and 7 and 11, because avx2 gives us enough headroom for the overhead of those to not be a problem
if is_x86_feature_detected!("avx2")
|| (inner_factors.product_power2power3() == len - 1)
Expand Down Expand Up @@ -464,6 +469,11 @@ impl<T: FftNum> AvxPlannerInternal<f64, T> {
if factors.get_other_factors() > 1 {
let other_factors = factors.get_other_factors();

// First, if the "other factors" are a butterfly, use that as the butterfly
if self.is_butterfly(other_factors) {
return MixedRadixPlan::butterfly(other_factors, vec![]);
}

// We can only use rader's if `other_factors` is prime
if miller_rabin(other_factors as u64) {
// len is prime, so we can use Rader's Algorithm as a base. Whether or not that's a good idea is a different story
Expand All @@ -472,7 +482,7 @@ impl<T: FftNum> AvxPlannerInternal<f64, T> {
// We're intentionally being too conservative here. Otherwise we'd be recursively applying a heuristic, and repeated heuristic failures could stack to make a rader's chain significantly slower.
// If we were writing a measuring planner, expanding this heuristic and measuring its effectiveness would be an opportunity for up to 2x performance gains.
let inner_factors = PartialFactors::compute(other_factors - 1);
if inner_factors.get_other_factors() == 1 {
if self.is_butterfly(inner_factors.get_other_factors()) {
// We only have factors of 2,3,5,7, and 11. If we don't have AVX2, we also have to exclude factors of 5 and 7 and 11, because avx2 gives us enough headroom for the overhead of those to not be a problem
if is_x86_feature_detected!("avx2")
|| (inner_factors.product_power2power3() == len - 1)
Expand Down

0 comments on commit 4dc194d

Please sign in to comment.