-
Notifications
You must be signed in to change notification settings - Fork 269
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2f131af
commit a7bbf15
Showing
1 changed file
with
76 additions
and
0 deletions.
There are no files selected for viewing
76 changes: 76 additions & 0 deletions
76
12- Music genre classification: Preparing the dataset/code/extract_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import json | ||
import os | ||
import math | ||
import librosa | ||
|
||
DATASET_PATH = "path/to/marsyas/dataset" | ||
JSON_PATH = "data_10.json" | ||
SAMPLE_RATE = 22050 | ||
TRACK_DURATION = 30 # measured in seconds | ||
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION | ||
|
||
|
||
def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5): | ||
"""Extracts MFCCs from music dataset and saves them into a json file along witgh genre labels. | ||
:param dataset_path (str): Path to dataset | ||
:param json_path (str): Path to json file used to save MFCCs | ||
:param num_mfcc (int): Number of coefficients to extract | ||
:param n_fft (int): Interval we consider to apply FFT. Measured in # of samples | ||
:param hop_length (int): Sliding window for FFT. Measured in # of samples | ||
:param: num_segments (int): Number of segments we want to divide sample tracks into | ||
:return: | ||
""" | ||
|
||
# dictionary to store mapping, labels, and MFCCs | ||
data = { | ||
"mapping": [], | ||
"labels": [], | ||
"mfcc": [] | ||
} | ||
|
||
samples_per_segment = int(SAMPLES_PER_TRACK / num_segments) | ||
num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length) | ||
|
||
# loop through all genre sub-folder | ||
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)): | ||
|
||
# ensure we're processing a genre sub-folder level | ||
if dirpath is not dataset_path: | ||
|
||
# save genre label (i.e., sub-folder name) in the mapping | ||
semantic_label = dirpath.split("/")[-1] | ||
data["mapping"].append(semantic_label) | ||
print("\nProcessing: {}".format(semantic_label)) | ||
|
||
# process all audio files in genre sub-dir | ||
for f in filenames: | ||
|
||
# load audio file | ||
file_path = os.path.join(dirpath, f) | ||
signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE) | ||
|
||
# process all segments of audio file | ||
for d in range(num_segments): | ||
|
||
# calculate start and finish sample for current segment | ||
start = samples_per_segment * d | ||
finish = start + samples_per_segment | ||
|
||
# extract mfcc | ||
mfcc = librosa.feature.mfcc(signal[start:finish], sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length) | ||
mfcc = mfcc.T | ||
|
||
# store only mfcc feature with expected number of vectors | ||
if len(mfcc) == num_mfcc_vectors_per_segment: | ||
data["mfcc"].append(mfcc.tolist()) | ||
data["labels"].append(i-1) | ||
print("{}, segment:{}".format(file_path, d+1)) | ||
|
||
# save MFCCs to json file | ||
with open(json_path, "w") as fp: | ||
json.dump(data, fp, indent=4) | ||
|
||
|
||
if __name__ == "__main__": | ||
save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10) |