-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
95936e4
commit 86736cd
Showing
4 changed files
with
501 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import os | ||
import json | ||
import glob | ||
import numpy as np | ||
from scipy.io import wavfile | ||
import os | ||
import sys | ||
import random | ||
import librosa | ||
import librosa.display | ||
|
||
def extract_max(pitches,magnitudes, shape): | ||
new_pitches = [] | ||
new_magnitudes = [] | ||
for i in range(0, shape[1]): | ||
new_pitches.append(np.max(pitches[:,i])) | ||
new_magnitudes.append(np.max(magnitudes[:,i])) | ||
return (new_pitches,new_magnitudes) | ||
|
||
def smooth(x,window_len=11,window='hanning'): | ||
if window_len<3: | ||
return x | ||
if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']: | ||
raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'") | ||
s=np.r_[2*x[0]-x[window_len-1::-1],x,2*x[-1]-x[-1:-window_len:-1]] | ||
if window == 'flat': #moving average | ||
w=np.ones(window_len,'d') | ||
else: | ||
w=eval('np.'+window+'(window_len)') | ||
y=np.convolve(w/w.sum(),s,mode='same') | ||
return y[window_len:-window_len+1] | ||
|
||
def corrCalc(pitches, pitches_2): | ||
a = (pitches - np.mean(pitches)) / (np.std(pitches)) | ||
b = (pitches_2 - np.mean(pitches_2)) / (np.std(pitches_2)) | ||
c = np.correlate(a, b, 'same') / max(len(a), len(b)) | ||
|
||
return np.max(c) | ||
|
||
def compare(original, translated): | ||
# Get the original and translated samples | ||
wave_data_orig, samplerate = librosa.load(original, mono=True) | ||
wave_data_translated, _ = librosa.load(translated, mono=True) | ||
|
||
# Calculate their pitches and magnitudes using piptrack | ||
pitches_orig, magnitudes_orig = librosa.piptrack(wave_data_orig, sr=samplerate) | ||
pitches_orig, magnitudes_orig = extract_max(pitches_orig,magnitudes_orig, np.shape(pitches_orig)) | ||
pitches_orig = smooth(np.asarray(pitches_orig), window_len=20) | ||
pitches_orig = np.asarray(pitches_orig) | ||
|
||
pitches_trans, magnitudes_trans = librosa.piptrack(wave_data_translated, sr=samplerate) | ||
pitches_trans, magnitudes_trans = extract_max(pitches_trans,magnitudes_trans, np.shape(pitches_trans)) | ||
pitches_trans = smooth(np.asarray(pitches_trans), window_len=20) | ||
pitches_trans = np.asarray(pitches_trans) | ||
|
||
# DTW on pitches, pitches_2 | ||
if pitches_trans.shape[0] != pitches_orig.shape[0]: | ||
D, wp_pitches = librosa.sequence.dtw(pitches_orig, pitches_trans, subseq=True) | ||
|
||
# Warp the paths using dtw | ||
x_path, y_path = zip(*wp_pitches) | ||
x_path = np.asarray(x_path) | ||
y_path = np.asarray(y_path) | ||
pitches = pitches[:,x_path] | ||
pitches_2 = pitches_2[:,y_path] | ||
|
||
# Calculate the correlation for the best shift | ||
corr = corrCalc(pitches_orig, pitches_trans) | ||
print(corr) | ||
|
||
return corr | ||
|
||
def main(): | ||
directory = 'C:/Users/shizh/Documents/GitHub/Wavenet-CPC-Music-Translation/paired-5_new/' | ||
ls = [x[0] for x in os.walk(directory)][1:] | ||
correlations = [] | ||
|
||
for d in ls: | ||
pitch = str(d[-3:]) | ||
path = d | ||
original_kb = '' | ||
translated_kb_umtcpc = '' | ||
original_str = '' | ||
translated_str = '' | ||
for filename in glob.glob(os.path.join(path, '*.wav')): | ||
if len(filename[len(path):-4]) > 1: | ||
if 'keyboard_acoustic' in filename and not 'umt' in filename: | ||
original_kb = filename | ||
translated_kb = original_kb | ||
elif 'keyboard_acoustic' in filename and 'umt' in filename: | ||
translated_kb = filename | ||
print(original_str[len(path)+1:], translated_kb[len(path)+1:]) | ||
corr = compare(original_str, translated_kb) | ||
correlations.append([original_str[len(path)+1:], translated_kb[len(path)+1:], str(corr)]) | ||
elif 'string_acoustic' in filename and not 'umt' in filename: | ||
original_str = filename | ||
translated_str = original_str | ||
elif 'string_acoustic' in filename and 'umt' in filename: | ||
translated_str = filename | ||
print(original_kb[len(path)+1:], translated_str[len(path)+1:]) | ||
corr = compare(original_kb, translated_str) | ||
correlations.append([original_kb[len(path)+1:], translated_str[len(path)+1:], str(corr)]) | ||
|
||
with open('pitch_5_new/outputAll.txt', 'w') as f: | ||
for i in correlations: | ||
f.write('%s, ' % i[0]) | ||
f.write('%s, ' % i[1]) | ||
f.write('%s\n' % i[2]) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
string_acoustic_080-029-025.wav, umtcpc-gru_keyboard_acoustic_004-029-050_0.wav, 0.47669136840783166 | ||
string_acoustic_080-029-025.wav, umtcpc-gru_keyboard_acoustic_004-029-050_1.wav, 0.32308011848923035 | ||
string_acoustic_080-029-025.wav, umtcpc-gru_keyboard_acoustic_004-029-050_2.wav, 0.627623309319683 | ||
keyboard_acoustic_004-029-050.wav, umtcpc-gru_string_acoustic_080-029-025_0.wav, 0.8701267302001969 | ||
keyboard_acoustic_004-029-050.wav, umtcpc-gru_string_acoustic_080-029-025_1.wav, 0.81225610410218 | ||
keyboard_acoustic_004-029-050.wav, umtcpc-gru_string_acoustic_080-029-025_2.wav, 0.775821672030709 | ||
string_acoustic_012-035-127.wav, umtcpc-gru_keyboard_acoustic_004-035-050_0.wav, nan | ||
string_acoustic_012-035-127.wav, umtcpc-gru_keyboard_acoustic_004-035-050_1.wav, nan | ||
string_acoustic_012-035-127.wav, umtcpc-gru_keyboard_acoustic_004-035-050_2.wav, nan | ||
keyboard_acoustic_004-035-050.wav, umtcpc-gru_string_acoustic_012-035-127_0.wav, 0.7987055343430346 | ||
keyboard_acoustic_004-035-050.wav, umtcpc-gru_string_acoustic_012-035-127_1.wav, 0.30634859359392846 | ||
keyboard_acoustic_004-035-050.wav, umtcpc-gru_string_acoustic_012-035-127_2.wav, 0.3260149097957912 | ||
string_acoustic_012-043-127.wav, umtcpc-gru_keyboard_acoustic_004-043-127_0.wav, 0.17337729630505502 | ||
string_acoustic_012-043-127.wav, umtcpc-gru_keyboard_acoustic_004-043-127_1.wav, 0.5967675642267608 | ||
string_acoustic_012-043-127.wav, umtcpc-gru_keyboard_acoustic_004-043-127_2.wav, 0.49574102983321616 | ||
keyboard_acoustic_004-043-127.wav, umtcpc-gru_string_acoustic_012-043-127_0.wav, 0.6649963837119002 | ||
keyboard_acoustic_004-043-127.wav, umtcpc-gru_string_acoustic_012-043-127_1.wav, 0.541459462196636 | ||
keyboard_acoustic_004-043-127.wav, umtcpc-gru_string_acoustic_012-043-127_2.wav, 0.3407155888542435 | ||
string_acoustic_014-059-050.wav, umtcpc-gru_keyboard_acoustic_004-059-127_0.wav, 0.3542439866718514 | ||
string_acoustic_014-059-050.wav, umtcpc-gru_keyboard_acoustic_004-059-127_1.wav, 0.8768117714565221 | ||
string_acoustic_014-059-050.wav, umtcpc-gru_keyboard_acoustic_004-059-127_2.wav, 0.5555510984072101 | ||
keyboard_acoustic_004-059-127.wav, umtcpc-gru_string_acoustic_014-059-050_0.wav, 0.7400007707112999 | ||
keyboard_acoustic_004-059-127.wav, umtcpc-gru_string_acoustic_014-059-050_1.wav, 0.411378465747302 | ||
keyboard_acoustic_004-059-127.wav, umtcpc-gru_string_acoustic_014-059-050_2.wav, 0.554729962292461 | ||
string_acoustic_056-063-127.wav, umtcpc-gru_keyboard_acoustic_004-063-025_0.wav, 0.23584577679098312 | ||
string_acoustic_056-063-127.wav, umtcpc-gru_keyboard_acoustic_004-063-025_1.wav, 0.25363857266191175 | ||
string_acoustic_056-063-127.wav, umtcpc-gru_keyboard_acoustic_004-063-025_2.wav, 0.22334330222741142 | ||
keyboard_acoustic_004-063-025.wav, umtcpc-gru_string_acoustic_056-063-127_0.wav, 0.16707008851694516 | ||
keyboard_acoustic_004-063-025.wav, umtcpc-gru_string_acoustic_056-063-127_1.wav, 0.23405087526792362 | ||
keyboard_acoustic_004-063-025.wav, umtcpc-gru_string_acoustic_056-063-127_2.wav, 0.2043789286840358 |
Oops, something went wrong.