-
Notifications
You must be signed in to change notification settings - Fork 14
/
FeaturesExtractor.py
71 lines (60 loc) · 2.81 KB
/
FeaturesExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
from sklearn import preprocessing
from python_speech_features import mfcc
from python_speech_features import delta
class FeaturesExtractor:
def __init__(self):
pass
def extract_features(self, audio, rate):
"""
Extract voice features including the Mel Frequency Cepstral Coefficient (MFCC)
from an audio using the python_speech_features module, performs Cepstral Mean
Normalization (CMS) and combine it with MFCC deltas and the MFCC double
deltas.
Args:
audio_path (str) : path to wave file without silent moments.
Returns:
(array) : Extracted features matrix.
"""
mfcc_feature = mfcc(# The audio signal from which to compute features.
audio,
# The samplerate of the signal we are working with.
rate,
# The length of the analysis window in seconds.
# Default is 0.025s (25 milliseconds)
winlen = 0.025,
# The step between successive windows in seconds.
# Default is 0.01s (10 milliseconds)
winstep = 0.01,
# The number of cepstrum to return.
# Default 13.
numcep = 20,
# The number of filters in the filterbank.
# Default is 26.
nfilt = 30,
# The FFT size. Default is 512.
nfft = 512,
# If true, the zeroth cepstral coefficient is replaced
# with the log of the total frame energy.
appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
deltas = delta(mfcc_feature, 2)
double_deltas = delta(deltas, 2)
combined = np.hstack((mfcc_feature, deltas, double_deltas))
return combined
def accelerated_get_features_vector(self, input_wave_file, audio, sound_rate):
"""
Get voice features from an input wave file faster.
Args:
input_wave_file (str) : Path to input wave file.
audio (ndarray) : Array representing the wave data.
sound_rate (int) : Rate of the audio.
Returns:
(array) with the voice features if the extraction was successful else [].
"""
# extract features
try :
return self.extract_features(audio, sound_rate)
except:
print("Cannot extract features from", input_wave_file.split('/')[-1])
return np.array([])