-
Notifications
You must be signed in to change notification settings - Fork 9
/
medley_voice_label.py
88 lines (72 loc) · 3.39 KB
/
medley_voice_label.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
'''
Annotate MedleyDB vocal containing songs with the provided instrument activation annotation.
Labels are at sample level with SR = 22050.
'''
import os
import sys
import numpy as np
import librosa
import yaml
from scipy.signal import medfilt
base_dir = '/media/bach1/dataset/MedleyDB/voice_only/'
activation_dir = '/media/bach3/dataset/MedleyDB/Annotations/Instrument_Activations/ACTIVATION_CONF/' # path to MedleyDB 'ACTIVATION_CONF' directory
metadata_dir = '/media/bach3/dataset/MedleyDB/Audio/' # path to MedleyDB 'Audio' directory
label_dir = './mdb_voc_label/'
if not os.path.exists(label_dir):
os.mkdir(label_dir)
vocal_list = open('./medleydb_vocal_songs.txt', 'r').readlines()
vocals = ['vocalists', 'female singer', 'male singer']
for voice_file in vocal_list:
song_name = voice_file.strip('\n').replace('_RAW.wav', '')
print(song_name)
# find which stem number is the vocal stem
yaml_file = os.path.join(metadata_dir, song_name, song_name + '_METADATA.yaml')
vocal_stem_num = ''
with open(yaml_file, 'r') as stream:
metadata = yaml.load(stream)
print(metadata['mix_filename'])
num_stems = len(metadata['stems'])
for s in (list(metadata['stems'])):
stem = (metadata['stems'][s])
if stem['component'] == 'melody':
if stem['instrument'] in vocals:
vocal_stem_num = s
if vocal_stem_num == '':
for s in (list(metadata['stems'])):
if stem['component'] == '':
if stem['instrument'] in vocals:
vocal_stem_num = s
assert vocal_stem_num != ''
print(vocal_stem_num)
vocal_stem = int(vocal_stem_num.replace('S', ''))
activation_file = os.path.join(activation_dir, song_name + '_ACTIVATION_CONF.lab')
# get length of the audio file
sig, sr = librosa.load(os.path.join(base_dir, song_name + '_RAW.wav'), sr=22050)
label = np.zeros((len(sig),))
# read the instrument activation file and get time and activations
f = open(activation_file, 'r').readlines()[1:]
for i, line in enumerate(f[:-1]):
line = line.strip('\n').split(',')
start_time = float(line[0])
end_time = float(f[i + 1].strip('\n').split(',')[0])
start_sample = librosa.time_to_samples(start_time, sr=22050)
end_sample = librosa.time_to_samples(end_time, sr=22050)
activation = 1 if float(line[vocal_stem]) >= 0.5 else 0
label[int(start_sample):int(end_sample)] = activation
# fill the end of the label file
last_line = f[-1].strip('\n').split(',')
last_start_sample = librosa.time_to_samples(float(last_line[0]), sr=22050)
last_activation = 1 if float(last_line[vocal_stem]) >= 0.5 else 0
label[int(last_start_sample):] = last_activation
np.save(os.path.join(label_dir, song_name + '_RAW.npy'), label)
''' This code is for using energy level in RAW vocal tracks to create annotation.
base_dir = '/media/bach1/dataset/MedleyDB/voice_only/' # path to medleydb vocal stem files
threshold = 0.005 # signal amplitude threshold
sig, sr = librosa.load(os.path.join(base_dir, voice_file))
label = np.abs(sig)
label[label <= threshold] = 0
label[label > threshold] = 1
label = medfilt(label, 15)
print (label.shape, np.count_nonzero(label))
np.save(os.path.join(label_dir, voice_file.strip().split('.')[-2] + '.npy'), label)
'''