update with text files outputs

ChesterHuynh · May 7, 2021 · 86736cd · 86736cd
1 parent 95936e4
commit 86736cd
Show file tree

Hide file tree

Showing 4 changed files with 501 additions and 0 deletions.
diff --git a/src/data/pitchExp.py b/src/data/pitchExp.py
@@ -0,0 +1,111 @@
+import os
+import json
+import glob
+import numpy as np
+from scipy.io import wavfile
+import os
+import sys
+import random
+import librosa
+import librosa.display
+
+def extract_max(pitches,magnitudes, shape):
+    new_pitches = []
+    new_magnitudes = []
+    for i in range(0, shape[1]):
+        new_pitches.append(np.max(pitches[:,i]))
+        new_magnitudes.append(np.max(magnitudes[:,i]))
+    return (new_pitches,new_magnitudes)
+
+def smooth(x,window_len=11,window='hanning'):
+    if window_len<3:
+            return x
+    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
+            raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
+    s=np.r_[2*x[0]-x[window_len-1::-1],x,2*x[-1]-x[-1:-window_len:-1]]
+    if window == 'flat': #moving average
+            w=np.ones(window_len,'d')
+    else:
+            w=eval('np.'+window+'(window_len)')
+    y=np.convolve(w/w.sum(),s,mode='same')
+    return y[window_len:-window_len+1]
+
+def corrCalc(pitches, pitches_2):
+    a = (pitches - np.mean(pitches)) / (np.std(pitches))
+    b = (pitches_2 - np.mean(pitches_2)) / (np.std(pitches_2))
+    c = np.correlate(a, b, 'same') / max(len(a), len(b))
+
+    return np.max(c)
+
+def compare(original, translated):
+    # Get the original and translated samples
+    wave_data_orig, samplerate = librosa.load(original, mono=True)
+    wave_data_translated, _ = librosa.load(translated, mono=True)
+
+    # Calculate their pitches and magnitudes using piptrack
+    pitches_orig, magnitudes_orig = librosa.piptrack(wave_data_orig, sr=samplerate)
+    pitches_orig, magnitudes_orig = extract_max(pitches_orig,magnitudes_orig, np.shape(pitches_orig))
+    pitches_orig = smooth(np.asarray(pitches_orig), window_len=20)
+    pitches_orig = np.asarray(pitches_orig)
+
+    pitches_trans, magnitudes_trans = librosa.piptrack(wave_data_translated, sr=samplerate)
+    pitches_trans, magnitudes_trans = extract_max(pitches_trans,magnitudes_trans, np.shape(pitches_trans))
+    pitches_trans = smooth(np.asarray(pitches_trans), window_len=20)
+    pitches_trans = np.asarray(pitches_trans)
+
+    # DTW on pitches, pitches_2
+    if pitches_trans.shape[0] != pitches_orig.shape[0]:
+        D, wp_pitches = librosa.sequence.dtw(pitches_orig, pitches_trans, subseq=True)
+
+        # Warp the paths using dtw
+        x_path, y_path = zip(*wp_pitches)
+        x_path = np.asarray(x_path)
+        y_path = np.asarray(y_path)
+        pitches = pitches[:,x_path]
+        pitches_2 = pitches_2[:,y_path]
+
+    # Calculate the correlation for the best shift
+    corr = corrCalc(pitches_orig, pitches_trans)
+    print(corr)
+
+    return corr
+
+def main():
+    directory = 'C:/Users/shizh/Documents/GitHub/Wavenet-CPC-Music-Translation/paired-5_new/'
+    ls = [x[0] for x in os.walk(directory)][1:]
+    correlations = []
+
+    for d in ls:
+        pitch = str(d[-3:])
+        path = d
+        original_kb = ''
+        translated_kb_umtcpc = ''
+        original_str = ''
+        translated_str = ''
+        for filename in glob.glob(os.path.join(path, '*.wav')):
+            if len(filename[len(path):-4]) > 1:
+                if 'keyboard_acoustic' in filename and not 'umt' in filename:
+                    original_kb = filename
+                    translated_kb = original_kb
+                elif 'keyboard_acoustic' in filename and 'umt' in filename:
+                    translated_kb = filename
+                    print(original_str[len(path)+1:], translated_kb[len(path)+1:])
+                    corr = compare(original_str, translated_kb)
+                    correlations.append([original_str[len(path)+1:], translated_kb[len(path)+1:], str(corr)])
+                elif 'string_acoustic' in filename and not 'umt' in filename:
+                    original_str = filename
+                    translated_str = original_str
+                elif 'string_acoustic' in filename and 'umt' in filename:
+                    translated_str = filename
+                    print(original_kb[len(path)+1:], translated_str[len(path)+1:])
+                    corr = compare(original_kb, translated_str)
+                    correlations.append([original_kb[len(path)+1:], translated_str[len(path)+1:], str(corr)])
+
+    with open('pitch_5_new/outputAll.txt', 'w') as f:
+        for i in correlations:
+            f.write('%s, ' % i[0])
+            f.write('%s, ' % i[1])
+            f.write('%s\n' % i[2])
+
+if __name__ == "__main__":
+    main()
diff --git a/src/data/pitch_5/outputAll.txt b/src/data/pitch_5/outputAll.txt
@@ -0,0 +1,30 @@
+string_acoustic_080-029-025.wav, umtcpc-gru_keyboard_acoustic_004-029-050_0.wav, 0.47669136840783166
+string_acoustic_080-029-025.wav, umtcpc-gru_keyboard_acoustic_004-029-050_1.wav, 0.32308011848923035
+string_acoustic_080-029-025.wav, umtcpc-gru_keyboard_acoustic_004-029-050_2.wav, 0.627623309319683
+keyboard_acoustic_004-029-050.wav, umtcpc-gru_string_acoustic_080-029-025_0.wav, 0.8701267302001969
+keyboard_acoustic_004-029-050.wav, umtcpc-gru_string_acoustic_080-029-025_1.wav, 0.81225610410218
+keyboard_acoustic_004-029-050.wav, umtcpc-gru_string_acoustic_080-029-025_2.wav, 0.775821672030709
+string_acoustic_012-035-127.wav, umtcpc-gru_keyboard_acoustic_004-035-050_0.wav, nan
+string_acoustic_012-035-127.wav, umtcpc-gru_keyboard_acoustic_004-035-050_1.wav, nan
+string_acoustic_012-035-127.wav, umtcpc-gru_keyboard_acoustic_004-035-050_2.wav, nan
+keyboard_acoustic_004-035-050.wav, umtcpc-gru_string_acoustic_012-035-127_0.wav, 0.7987055343430346
+keyboard_acoustic_004-035-050.wav, umtcpc-gru_string_acoustic_012-035-127_1.wav, 0.30634859359392846
+keyboard_acoustic_004-035-050.wav, umtcpc-gru_string_acoustic_012-035-127_2.wav, 0.3260149097957912
+string_acoustic_012-043-127.wav, umtcpc-gru_keyboard_acoustic_004-043-127_0.wav, 0.17337729630505502
+string_acoustic_012-043-127.wav, umtcpc-gru_keyboard_acoustic_004-043-127_1.wav, 0.5967675642267608
+string_acoustic_012-043-127.wav, umtcpc-gru_keyboard_acoustic_004-043-127_2.wav, 0.49574102983321616
+keyboard_acoustic_004-043-127.wav, umtcpc-gru_string_acoustic_012-043-127_0.wav, 0.6649963837119002
+keyboard_acoustic_004-043-127.wav, umtcpc-gru_string_acoustic_012-043-127_1.wav, 0.541459462196636
+keyboard_acoustic_004-043-127.wav, umtcpc-gru_string_acoustic_012-043-127_2.wav, 0.3407155888542435
+string_acoustic_014-059-050.wav, umtcpc-gru_keyboard_acoustic_004-059-127_0.wav, 0.3542439866718514
+string_acoustic_014-059-050.wav, umtcpc-gru_keyboard_acoustic_004-059-127_1.wav, 0.8768117714565221
+string_acoustic_014-059-050.wav, umtcpc-gru_keyboard_acoustic_004-059-127_2.wav, 0.5555510984072101
+keyboard_acoustic_004-059-127.wav, umtcpc-gru_string_acoustic_014-059-050_0.wav, 0.7400007707112999
+keyboard_acoustic_004-059-127.wav, umtcpc-gru_string_acoustic_014-059-050_1.wav, 0.411378465747302
+keyboard_acoustic_004-059-127.wav, umtcpc-gru_string_acoustic_014-059-050_2.wav, 0.554729962292461
+string_acoustic_056-063-127.wav, umtcpc-gru_keyboard_acoustic_004-063-025_0.wav, 0.23584577679098312
+string_acoustic_056-063-127.wav, umtcpc-gru_keyboard_acoustic_004-063-025_1.wav, 0.25363857266191175
+string_acoustic_056-063-127.wav, umtcpc-gru_keyboard_acoustic_004-063-025_2.wav, 0.22334330222741142
+keyboard_acoustic_004-063-025.wav, umtcpc-gru_string_acoustic_056-063-127_0.wav, 0.16707008851694516
+keyboard_acoustic_004-063-025.wav, umtcpc-gru_string_acoustic_056-063-127_1.wav, 0.23405087526792362
+keyboard_acoustic_004-063-025.wav, umtcpc-gru_string_acoustic_056-063-127_2.wav, 0.2043789286840358