Skip to content

Commit

Permalink
Merge pull request pyFFTW#159 from grlee77/next_fast_len
Browse files Browse the repository at this point in the history
add next_fast_len as introduced in scipy 0.18
  • Loading branch information
hgomersall authored Apr 4, 2017
2 parents d592834 + d636475 commit 2d310cc
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 3 deletions.
2 changes: 1 addition & 1 deletion conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.coverage',
'sphinx.ext.intersphinx']
'sphinx.ext.intersphinx', 'sphinx.ext.napoleon']

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
Expand Down
1 change: 1 addition & 0 deletions pyfftw/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
empty_aligned,
ones_aligned,
zeros_aligned,
next_fast_len,
)

from . import builders
Expand Down
6 changes: 5 additions & 1 deletion pyfftw/interfaces/scipy_fftpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,18 @@
shift, fftshift, ifftshift, fftfreq, rfftfreq,
convolve, _fftpack)

# a next_fast_len specific to pyFFTW is used in place of the scipy.fftpack one
from ..pyfftw import next_fast_len

try:
from scipy.fftpack import dst, idst
except ImportError:
pass

__all__ = ['fft','ifft','fftn','ifftn','rfft','irfft', 'fft2','ifft2',
'diff', 'tilbert','itilbert','hilbert','ihilbert', 'sc_diff',
'cs_diff','cc_diff','ss_diff', 'shift', 'rfftfreq']
'cs_diff','cc_diff','ss_diff', 'shift', 'rfftfreq',
'next_fast_len']

def fft(x, n=None, axis=-1, overwrite_x=False,
planner_effort='FFTW_ESTIMATE', threads=1,
Expand Down
2 changes: 2 additions & 0 deletions pyfftw/pyfftw.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,5 @@ Utility Functions
.. autofunction:: pyfftw.n_byte_align_empty

.. autofunction:: pyfftw.is_n_byte_aligned

.. autofunction:: pyfftw.next_fast_len
178 changes: 178 additions & 0 deletions pyfftw/utils.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
# POSSIBILITY OF SUCH DAMAGE.
#

from bisect import bisect_left
cimport numpy as np
cimport cpu
from libc.stdint cimport intptr_t
Expand Down Expand Up @@ -243,3 +244,180 @@ cpdef ones_aligned(shape, dtype='float64', order='C', n=None):
array = empty_aligned(shape, dtype=dtype, order=order, n=n)
array.fill(1)
return array


cpdef next_fast_len(target):
'''next_fast_len(target)
Find the next fast transform length for FFTW.
FFTW has efficient functions for transforms of length
2**a * 3**b * 5**c * 7**d * 11**e * 13**f, where e + f is either 0 or 1.
Parameters
----------
target : int
Length to start searching from. Must be a positive integer.
Returns
-------
out : int
The first fast length greater than or equal to `target`.
Examples
--------
On a particular machine, an FFT of prime length takes 2.1 ms:
>>> from pyfftw.interfaces import scipy_fftpack
>>> min_len = 10007 # prime length is worst case for speed
>>> a = numpy.random.randn(min_len)
>>> b = scipy_fftpack.fft(a)
Zero-padding to the next fast length reduces computation time to
406 us, a speedup of ~5 times:
>>> next_fast_len(min_len)
10080
>>> b = scipy_fftpack.fft(a, 10080)
Rounding up to the next power of 2 is not optimal, taking 598 us to
compute, 1.5 times as long as the size selected by next_fast_len.
>>> b = fftpack.fft(a, 16384)
Similar speedups will occur for pre-planned FFTs as generated via
pyfftw.builders.
'''
lpre = (18, 20, 21, 22, 24, 25, 26, 27, 28, 30,
32, 33, 35, 36, 39, 40, 42, 44, 45, 48,
49, 50, 52, 54, 55, 56, 60, 63, 64,
65, 66, 70, 72, 75, 77, 78, 80, 81,
84, 88, 90, 91, 96, 98, 99, 100, 104,
105, 108, 110, 112, 117, 120, 125, 126, 128,
130, 132, 135, 140, 144, 147, 150, 154, 156,
160, 162, 165, 168, 175, 176, 180, 182, 189,
192, 195, 196, 198, 200, 208, 210, 216, 220,
224, 225, 231, 234, 240, 243, 245, 250, 252,
256, 260, 264, 270, 273, 275, 280, 288, 294,
297, 300, 308, 312, 315, 320, 324, 325, 330,
336, 343, 350, 351, 352, 360, 364, 375, 378,
384, 385, 390, 392, 396, 400, 405, 416, 420,
432, 440, 441, 448, 450, 455, 462, 468, 480,
486, 490, 495, 500, 504, 512, 520, 525, 528,
539, 540, 546, 550, 560, 567, 576, 585, 588,
594, 600, 616, 624, 625, 630, 637, 640, 648,
650, 660, 672, 675, 686, 693, 700, 702, 704,
720, 728, 729, 735, 750, 756, 768, 770, 780,
784, 792, 800, 810, 819, 825, 832, 840, 864,
875, 880, 882, 891, 896, 900, 910, 924, 936,
945, 960, 972, 975, 980, 990, 1000, 1008, 1024,
1029, 1040, 1050, 1053, 1056, 1078, 1080, 1092, 1100,
1120, 1125, 1134, 1152, 1155, 1170, 1176, 1188, 1200,
1215, 1225, 1232, 1248, 1250, 1260, 1274, 1280, 1296,
1300, 1320, 1323, 1344, 1350, 1365, 1372, 1375, 1386,
1400, 1404, 1408, 1440, 1456, 1458, 1470, 1485, 1500,
1512, 1536, 1540, 1560, 1568, 1575, 1584, 1600, 1617,
1620, 1625, 1638, 1650, 1664, 1680, 1701, 1715, 1728,
1750, 1755, 1760, 1764, 1782, 1792, 1800, 1820, 1848,
1872, 1875, 1890, 1911, 1920, 1925, 1944, 1950, 1960,
1980, 2000, 2016, 2025, 2048, 2058, 2079, 2080, 2100,
2106, 2112, 2156, 2160, 2184, 2187, 2200, 2205, 2240,
2250, 2268, 2275, 2304, 2310, 2340, 2352, 2376, 2400,
2401, 2430, 2450, 2457, 2464, 2475, 2496, 2500, 2520,
2548, 2560, 2592, 2600, 2625, 2640, 2646, 2673, 2688,
2695, 2700, 2730, 2744, 2750, 2772, 2800, 2808, 2816,
2835, 2880, 2912, 2916, 2925, 2940, 2970, 3000, 3024,
3072, 3080, 3087, 3120, 3125, 3136, 3150, 3159, 3168,
3185, 3200, 3234, 3240, 3250, 3276, 3300, 3328, 3360,
3375, 3402, 3430, 3456, 3465, 3500, 3510, 3520, 3528,
3564, 3584, 3600, 3640, 3645, 3675, 3696, 3744, 3750,
3773, 3780, 3822, 3840, 3850, 3888, 3900, 3920, 3960,
3969, 4000, 4032, 4050, 4095, 4096, 4116, 4125, 4158,
4160, 4200, 4212, 4224, 4312, 4320, 4368, 4374, 4375,
4400, 4410, 4455, 4459, 4480, 4500, 4536, 4550, 4608,
4620, 4680, 4704, 4725, 4752, 4800, 4802, 4851, 4860,
4875, 4900, 4914, 4928, 4950, 4992, 5000, 5040, 5096,
5103, 5120, 5145, 5184, 5200, 5250, 5265, 5280, 5292,
5346, 5376, 5390, 5400, 5460, 5488, 5500, 5544, 5600,
5616, 5625, 5632, 5670, 5733, 5760, 5775, 5824, 5832,
5850, 5880, 5940, 6000, 6048, 6075, 6125, 6144, 6160,
6174, 6237, 6240, 6250, 6272, 6300, 6318, 6336, 6370,
6400, 6468, 6480, 6500, 6552, 6561, 6600, 6615, 6656,
6720, 6750, 6804, 6825, 6860, 6875, 6912, 6930, 7000,
7020, 7040, 7056, 7128, 7168, 7200, 7203, 7280, 7290,
7350, 7371, 7392, 7425, 7488, 7500, 7546, 7560, 7644,
7680, 7700, 7776, 7800, 7840, 7875, 7920, 7938, 8000,
8019, 8064, 8085, 8100, 8125, 8190, 8192, 8232, 8250,
8316, 8320, 8400, 8424, 8448, 8505, 8575, 8624, 8640,
8736, 8748, 8750, 8775, 8800, 8820, 8910, 8918, 8960,
9000, 9072, 9100, 9216, 9240, 9261, 9360, 9375, 9408,
9450, 9477, 9504, 9555, 9600, 9604, 9625, 9702, 9720,
9750, 9800, 9828, 9856, 9900, 9984, 10000)

if target <= 16:
return target

# Quickly check if it's already a power of 2
if not (target & (target-1)):
return target

# Get result quickly for small sizes, since FFT itself is similarly fast.
if target <= lpre[-1]:
return lpre[bisect_left(lpre, target)]

# check if 13 or 11 is a factor first
if target % 13 == 0:
p11_13 = 13
e_f_cases = [13, ] # e=0, f=1
elif target % 11 == 0:
p11_13 = 11
e_f_cases = [11, ] # e=1, f=0
else:
p11_13 = 1
# try all three cases where e + f <= 1 (see docstring)
e_f_cases = [13, 11, 1]

best_match = float('inf') # Anything found will be smaller

# outer loop is for the cases where e + f <= 1 (see docstring)
for p11_13 in e_f_cases:
match = float('inf')
# allow any integer powers of 2, 3, 5 or 7
p7_11_13 = p11_13
while p7_11_13 < target:
p5_7_11_13 = p7_11_13
while p5_7_11_13 < target:
p3_5_7_11_13 = p5_7_11_13
while p3_5_7_11_13 < target:
# Ceiling integer division, avoiding conversion to
# float.
# (quotient = ceil(target / p35))
quotient = -(-target // p3_5_7_11_13)

# Quickly find next power of 2 >= quotient
p2 = 2**((quotient - 1).bit_length())

N = p2 * p3_5_7_11_13
if N == target:
return N
elif N < match:
match = N
p3_5_7_11_13 *= 3
if p3_5_7_11_13 == target:
return p3_5_7_11_13
if p3_5_7_11_13 < match:
match = p3_5_7_11_13
p5_7_11_13 *= 5
if p5_7_11_13 == target:
return p5_7_11_13
if p5_7_11_13 < match:
match = p5_7_11_13
p7_11_13 *= 7
if p7_11_13 == target:
return p7_11_13
if p7_11_13 < match:
match = p7_11_13
if match < best_match:
best_match = match
return best_match
79 changes: 78 additions & 1 deletion test/test_pyfftw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import pyfftw
import platform
import os
from numpy.testing import assert_, assert_equal

def get_cpus_info():

Expand Down Expand Up @@ -88,8 +89,84 @@ def test_get_alignment(self):
else:
self.assertTrue(pyfftw.simd_alignment == 1)


class NextFastLenTest(unittest.TestCase):

def __init__(self, *args, **kwargs):

super(NextFastLenTest, self).__init__(*args, **kwargs)

if not hasattr(self, 'assertRaisesRegex'):
self.assertRaisesRegex = self.assertRaisesRegexp

def test_next_fast_len(self):

def nums():
for j in range(1, 1000):
yield j
yield 2**5 * 3**5 * 4**5 + 1

for n in nums():
m = pyfftw.next_fast_len(n)
msg = "n=%d, m=%d" % (n, m)

assert_(m >= n, msg)

# check regularity
k = m
num11 = num13 = 0
# These factors come from the description in the FFTW3 docs:
# http://fftw.org/fftw3_doc/Complex-DFTs.html#Complex-DFTs
for d in [2, 3, 5, 7, 11, 13]:
while True:
a, b = divmod(k, d)
if b == 0:
k = a
if d in [11, 13]:
# only allowed to match 11 or 13 once
if num11 > 0 or num13 > 0:
break
if d == 11:
num11 += 1
else:
num13 += 1
else:
break
assert_equal(k, 1, err_msg=msg)

def test_next_fast_len_strict(self):
strict_test_cases = {
1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 11: 11, 13: 13,
14: 14, 15: 15, 16: 16, 17: 18, 1021: 1024,

# 2 * 3 * 5 * 7 * 11
2310: 2310,
2310 - 1: 2310,
# 2 * 3 * 5 * 7 * 13
2730: 2730,
2730 - 1: 2730,
# 2**2 * 3**2 * 5**2 * 7**2 * 11
485100: 485100,
485100-1: 485100,
# 2**2 * 3**2 * 5**2 * 7**2 * 13
573300: 573300,
573300-1: 573300,

# more than one multiple of 11 or 13 is not accepted
# 2 * 3 * 5 * 7 * 11**2
25410: 25872,
# 2 * 3 * 5 * 7 * 13**2
35490: 35672,
# 2 * 3 * 5 * 7 * 11 * 13
30030: 30576,

}
for x, y in strict_test_cases.items():
assert_equal(pyfftw.next_fast_len(x), y)

test_cases = (
UtilsTest,)
UtilsTest,
NextFastLenTest)

test_set = None

Expand Down

0 comments on commit 2d310cc

Please sign in to comment.