5
5
6
6
import numpy as np
7
7
import scipy .fftpack as fft
8
+ from warnings import warn
8
9
9
10
from . import audio
10
11
from .time_frequency import cqt_frequencies , note_to_hz
20
21
21
22
@cache
22
23
def cqt (y , sr = 22050 , hop_length = 512 , fmin = None , n_bins = 84 ,
23
- bins_per_octave = 12 , tuning = None , resolution = 2 ,
24
- aggregate = None , norm = 1 , sparsity = 0.01 ):
24
+ bins_per_octave = 12 , tuning = None , filter_scale = 2 ,
25
+ aggregate = None , norm = 1 , sparsity = 0.01 , real = True ,
26
+ resolution = util .Deprecated ()):
25
27
'''Compute the constant-Q transform of an audio signal.
26
28
27
29
This implementation is based on the recursive sub-sampling method
@@ -56,8 +58,9 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
56
58
57
59
If `None`, tuning will be automatically estimated.
58
60
59
- resolution : float > 0
60
- Filter resolution factor. Larger values use longer windows.
61
+ filter_scale : float > 0
62
+ Filter scale factor. Small values (<1) use shorter windows
63
+ for improved time resolution.
61
64
62
65
aggregate : None or function
63
66
Aggregation function for time-oversampling energy aggregation.
@@ -73,15 +76,25 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
73
76
74
77
Set `sparsity=0` to disable sparsification.
75
78
79
+ real : bool
80
+ If true, return only the magnitude of the CQT.
81
+
82
+ resolution : float
83
+ .. warning:: This parameter name was in librosa 0.4.2
84
+ Use the `filter_scale` parameter instead.
85
+ The `resolution` parameter will be removed in librosa 0.5.0.
86
+
87
+
76
88
Returns
77
89
-------
78
- CQT : np.ndarray [shape=(n_bins, t), dtype=np.float]
79
- Constant-Q energy for each frequency at each time.
90
+ CQT : np.ndarray [shape=(n_bins, t), dtype=np.complex or np. float]
91
+ Constant-Q value each frequency at each time.
80
92
81
93
Raises
82
94
------
83
95
ParameterError
84
- If `hop_length` is not an integer multiple of `2**(n_bins / bins_per_octave)`
96
+ If `hop_length` is not an integer multiple of
97
+ `2**(n_bins / bins_per_octave)`
85
98
86
99
See Also
87
100
--------
@@ -115,7 +128,7 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
115
128
[ 2.363e-07, 5.329e-07, ..., 1.294e-07, 1.611e-07]])
116
129
117
130
118
- Using a higher resolution
131
+ Using a higher frequency resolution
119
132
120
133
>>> C = librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('C2'),
121
134
... n_bins=60 * 2, bins_per_octave=12 * 2)
@@ -127,10 +140,20 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
127
140
[ 4.896e-08, 5.407e-07, ..., 9.176e-08, 1.051e-07]])
128
141
'''
129
142
143
+ filter_scale = util .rename_kw ('resolution' , resolution ,
144
+ 'filter_scale' , filter_scale ,
145
+ '0.4.2' , '0.5.0' )
146
+
147
+ if real :
148
+ warn ('Real-valued CQT (real=True) is deprecated in 0.4.2. '
149
+ 'Complex-valued CQT will become the default in 0.5.0. '
150
+ 'Consider using np.abs(librosa.cqt(..., real=False)) '
151
+ 'instead of real=True to maintain forward compatibility.' ,
152
+ DeprecationWarning )
153
+
130
154
# How many octaves are we dealing with?
131
155
n_octaves = int (np .ceil (float (n_bins ) / bins_per_octave ))
132
156
133
-
134
157
if fmin is None :
135
158
# C2 by default
136
159
fmin = note_to_hz ('C1' )
@@ -146,7 +169,7 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
146
169
fmax_t = np .max (freqs )
147
170
148
171
# Determine required resampling quality
149
- Q = float (resolution ) / (2.0 ** (1. / bins_per_octave ) - 1 )
172
+ Q = float (filter_scale ) / (2.0 ** (1. / bins_per_octave ) - 1 )
150
173
151
174
filter_cutoff = fmax_t * (1 + filters .window_bandwidth ('hann' ) / Q )
152
175
@@ -176,7 +199,7 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
176
199
n_filters ,
177
200
bins_per_octave ,
178
201
tuning ,
179
- resolution ,
202
+ filter_scale ,
180
203
norm ,
181
204
sparsity )
182
205
min_filter_length = np .min (filter_lengths )
@@ -203,15 +226,16 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
203
226
# Make sure our hop is long enough to support the bottom octave
204
227
num_twos = __num_two_factors (hop_length )
205
228
if num_twos < n_octaves - 1 :
206
- raise ParameterError ('hop_length must be a positive integer multiple of 2^{0:d} '
207
- 'for {1:d}-octave CQT' .format (n_octaves - 1 , n_octaves ))
229
+ raise ParameterError ('hop_length must be a positive integer '
230
+ 'multiple of 2^{0:d} for {1:d}-octave CQT'
231
+ .format (n_octaves - 1 , n_octaves ))
208
232
209
233
# Now do the recursive bit
210
234
fft_basis , n_fft , filter_lengths = __fft_filters (sr , fmin_t ,
211
235
n_filters ,
212
236
bins_per_octave ,
213
237
tuning ,
214
- resolution ,
238
+ filter_scale ,
215
239
norm ,
216
240
sparsity )
217
241
@@ -239,13 +263,14 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
239
263
# Convolve
240
264
cqt_resp .append (my_cqt )
241
265
242
- return __trim_stack (cqt_resp , n_bins )
266
+ return __trim_stack (cqt_resp , n_bins , real )
243
267
244
268
245
269
@cache
246
270
def hybrid_cqt (y , sr = 22050 , hop_length = 512 , fmin = None , n_bins = 84 ,
247
- bins_per_octave = 12 , tuning = None , resolution = 2 ,
248
- norm = 1 , sparsity = 0.01 ):
271
+ bins_per_octave = 12 , tuning = None , filter_scale = 2 ,
272
+ norm = 1 , sparsity = 0.01 ,
273
+ resolution = util .Deprecated ()):
249
274
'''Compute the hybrid constant-Q transform of an audio signal.
250
275
251
276
Here, the hybrid CQT uses the pseudo CQT for higher frequencies where
@@ -277,15 +302,21 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
277
302
278
303
If `None`, tuning will be automatically estimated.
279
304
280
- resolution : float > 0
281
- Filter resolution factor. Larger values use longer windows.
305
+ filter_scale : float > 0
306
+ Filter filter_scale factor. Larger values use longer windows.
282
307
283
308
sparsity : float in [0, 1)
284
309
Sparsify the CQT basis by discarding up to `sparsity`
285
310
fraction of the energy in each basis.
286
311
287
312
Set `sparsity=0` to disable sparsification.
288
313
314
+ resolution : float
315
+ .. warning:: This parameter name was in librosa 0.4.2
316
+ Use the `filter_scale` parameter instead.
317
+ The `resolution` parameter will be removed in librosa 0.5.0.
318
+
319
+
289
320
Returns
290
321
-------
291
322
CQT : np.ndarray [shape=(n_bins, t), dtype=np.float]
@@ -294,16 +325,18 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
294
325
Raises
295
326
------
296
327
ParameterError
297
- If `hop_length` is not an integer multiple of `2**(n_bins / bins_per_octave)`
328
+ If `hop_length` is not an integer multiple of
329
+ `2**(n_bins / bins_per_octave)`
298
330
299
331
See Also
300
332
--------
301
333
cqt
302
334
pseudo_cqt
303
335
'''
304
336
305
- # How many octaves are we dealing with?
306
- n_octaves = int (np .ceil (float (n_bins ) / bins_per_octave ))
337
+ filter_scale = util .rename_kw ('resolution' , resolution ,
338
+ 'filter_scale' , filter_scale ,
339
+ '0.4.2' , '0.5.0' )
307
340
308
341
if fmin is None :
309
342
# C1 by default
@@ -322,7 +355,7 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
322
355
n_bins = n_bins ,
323
356
bins_per_octave = bins_per_octave ,
324
357
tuning = tuning ,
325
- resolution = resolution )
358
+ filter_scale = filter_scale )
326
359
327
360
# Determine which filters to use with Pseudo CQT
328
361
pseudo_filters = lengths < 2 * hop_length
@@ -338,7 +371,7 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
338
371
n_bins = n_bins_pseudo ,
339
372
bins_per_octave = bins_per_octave ,
340
373
tuning = tuning ,
341
- resolution = resolution ,
374
+ filter_scale = filter_scale ,
342
375
norm = norm ,
343
376
sparsity = sparsity )
344
377
cqt_resp .append (my_pseudo_cqt )
@@ -349,25 +382,27 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
349
382
350
383
fmin_full = np .min (freqs [~ pseudo_filters ])
351
384
352
- my_cqt = cqt (y , sr ,
353
- hop_length = hop_length ,
354
- fmin = fmin_full ,
355
- n_bins = n_bins_full ,
356
- bins_per_octave = bins_per_octave ,
357
- tuning = tuning ,
358
- resolution = resolution ,
359
- norm = norm ,
360
- sparsity = sparsity )
385
+ my_cqt = np .abs (cqt (y , sr ,
386
+ hop_length = hop_length ,
387
+ fmin = fmin_full ,
388
+ n_bins = n_bins_full ,
389
+ bins_per_octave = bins_per_octave ,
390
+ tuning = tuning ,
391
+ filter_scale = filter_scale ,
392
+ norm = norm ,
393
+ sparsity = sparsity ,
394
+ real = False ))
361
395
362
396
cqt_resp .append (my_cqt )
363
397
364
- return __trim_stack (cqt_resp , n_bins )
398
+ return __trim_stack (cqt_resp , n_bins , True )
365
399
366
400
367
401
@cache
368
402
def pseudo_cqt (y , sr = 22050 , hop_length = 512 , fmin = None , n_bins = 84 ,
369
- bins_per_octave = 12 , tuning = None , resolution = 2 ,
370
- norm = 1 , sparsity = 0.01 ):
403
+ bins_per_octave = 12 , tuning = None , filter_scale = 2 ,
404
+ norm = 1 , sparsity = 0.01 ,
405
+ resolution = util .Deprecated ()):
371
406
'''Compute the pseudo constant-Q transform of an audio signal.
372
407
373
408
This uses a single fft size that is the smallest power of 2 that is greater
@@ -401,15 +436,21 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
401
436
402
437
If `None`, tuning will be automatically estimated.
403
438
404
- resolution : float > 0
405
- Filter resolution factor. Larger values use longer windows.
439
+ filter_scale : float > 0
440
+ Filter filter_scale factor. Larger values use longer windows.
406
441
407
442
sparsity : float in [0, 1)
408
443
Sparsify the CQT basis by discarding up to `sparsity`
409
444
fraction of the energy in each basis.
410
445
411
446
Set `sparsity=0` to disable sparsification.
412
447
448
+ resolution : float
449
+ .. warning:: This parameter name was in librosa 0.4.2
450
+ Use the `filter_scale` parameter instead.
451
+ The `resolution` parameter will be removed in librosa 0.5.0.
452
+
453
+
413
454
Returns
414
455
-------
415
456
CQT : np.ndarray [shape=(n_bins, t), dtype=np.float]
@@ -418,10 +459,15 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
418
459
Raises
419
460
------
420
461
ParameterError
421
- If `hop_length` is not an integer multiple of `2**(n_bins / bins_per_octave)`
462
+ If `hop_length` is not an integer multiple of
463
+ `2**(n_bins / bins_per_octave)`
422
464
423
465
'''
424
466
467
+ filter_scale = util .rename_kw ('resolution' , resolution ,
468
+ 'filter_scale' , filter_scale ,
469
+ '0.4.2' , '0.5.0' )
470
+
425
471
if fmin is None :
426
472
# C1 by default
427
473
fmin = note_to_hz ('C1' )
@@ -434,12 +480,11 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
434
480
n_bins ,
435
481
bins_per_octave ,
436
482
tuning ,
437
- resolution ,
483
+ filter_scale ,
438
484
norm ,
439
485
sparsity ,
440
486
hop_length = hop_length )
441
487
442
- # Remove phase for Pseudo CQT
443
488
fft_basis = np .abs (fft_basis )
444
489
445
490
# Compute the magnitude STFT with Hann window
@@ -450,15 +495,15 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
450
495
451
496
452
497
def __fft_filters (sr , fmin , n_bins , bins_per_octave , tuning ,
453
- resolution , norm , sparsity , hop_length = None ):
498
+ filter_scale , norm , sparsity , hop_length = None ):
454
499
'''Generate the frequency domain constant-Q filter basis.'''
455
500
456
501
basis , lengths = filters .constant_q (sr ,
457
502
fmin = fmin ,
458
503
n_bins = n_bins ,
459
504
bins_per_octave = bins_per_octave ,
460
505
tuning = tuning ,
461
- resolution = resolution ,
506
+ filter_scale = filter_scale ,
462
507
norm = norm ,
463
508
pad_fft = True )
464
509
@@ -480,7 +525,7 @@ def __fft_filters(sr, fmin, n_bins, bins_per_octave, tuning,
480
525
return fft_basis , n_fft , lengths
481
526
482
527
483
- def __trim_stack (cqt_resp , n_bins ):
528
+ def __trim_stack (cqt_resp , n_bins , real ):
484
529
'''Helper function to trim and stack a collection of CQT responses'''
485
530
486
531
# cleanup any framing errors at the boundaries
@@ -490,7 +535,11 @@ def __trim_stack(cqt_resp, n_bins):
490
535
491
536
# Finally, clip out any bottom frequencies that we don't really want
492
537
# Transpose magic here to ensure column-contiguity
493
- return np .ascontiguousarray (cqt_resp [- n_bins :].T ).T
538
+
539
+ C = np .ascontiguousarray (cqt_resp [- n_bins :].T ).T
540
+ if real :
541
+ C = np .abs (C )
542
+ return C
494
543
495
544
496
545
def __variable_hop_response (y , n_fft , hop_length , min_filter_length ,
@@ -515,7 +564,7 @@ def __variable_hop_response(y, n_fft, hop_length, min_filter_length,
515
564
window = np .ones )
516
565
517
566
# And filter response energy
518
- my_cqt = np . abs ( fft_basis .dot (D ) )
567
+ my_cqt = fft_basis .dot (D )
519
568
520
569
if zoom_factor > 1 :
521
570
# We need to aggregate. Generate the boundary frames
@@ -532,9 +581,8 @@ def __early_downsample(y, sr, hop_length, res_type, n_octaves,
532
581
if not (res_type == 'sinc_fastest' and audio ._HAS_SAMPLERATE ):
533
582
return y , sr , hop_length
534
583
535
-
536
- downsample_count1 = int (np .ceil (np .log2 (audio .BW_FASTEST * nyquist
537
- / filter_cutoff )) - 1 )
584
+ downsample_count1 = int (np .ceil (np .log2 (audio .BW_FASTEST * nyquist /
585
+ filter_cutoff )) - 1 )
538
586
num_twos = __num_two_factors (hop_length )
539
587
downsample_count2 = max (0 , num_twos - n_octaves + 1 )
540
588
downsample_count = min (downsample_count1 , downsample_count2 )
@@ -565,4 +613,3 @@ def __num_two_factors(x):
565
613
x //= 2
566
614
567
615
return num_twos
568
-
0 commit comments