1+ # MixMe: main script for structure analysis
2+ # 31-7-2017 jjburred for Phonotonic
3+
4+ import os
5+ import numpy as np
6+ import scipy .io .wavfile as wav
7+ import scipy as sp
8+ from scipy import signal
9+ import matplotlib .pyplot as plt
10+ from matplotlib import gridspec
11+ from MM_stft import stft
12+ from MM_features import *
13+ from MM_structure import *
14+ from MM_beat import *
15+ import time
16+ from shutil import copyfile
17+ from sklearn .preprocessing import StandardScaler
18+
19+
20+ # inputFolder = "/Users/jjb/Documents/research/phonotonic/db/test"
21+ inputFolder = "/Users/Phtc-LD/Desktop/Dev/Music/phonoFitTracks/GTTrack" #Apprentissage GTTrack
22+ #inputFolder = "/Users/Phtc-LD/Desktop/Dev/Music/phonoFitTracks/trackAtester/" #A tester
23+
24+
25+ inputSegmentFolder = "/Users/Phtc-LD/Desktop/Applications/MusicAndSport/mix.me_research/python/GT/Segmentation/"
26+
27+
28+ outputFolder = "/Users/Phtc-LD/Desktop/Applications/MusicAndSport/mix.me_research/python/out"
29+
30+ main_pars = {
31+ 'resolution' : 0.04 , # analysis resolution in s
32+ 'mfccWeight' : 0.8 , # MFCC weights for clustering (for segmentation, MFCC is always used)
33+ 'chromaWeight' : 0.2 , # chroma weights for clustering
34+ 'context' : 16 , # segmentation context in beats - combien il regarde autour
35+ 'beatsPerSeg' : 16 , # minimum beats per segment
36+ 'partsDetail' : 0.6 , # description level for parts clustering
37+ 'debitsDetail' : 0.7 # description level for debits clustering
38+ }
39+
40+ # =======================================================
41+
42+ # STFT parameters
43+ stft_pars = {
44+ 'winLength' : main_pars ['resolution' ], # in s. was 0.04
45+ 'overlapFactor' : 0.75
46+ }
47+
48+ # Beat detection parameters
49+ beat_pars = {
50+ 'minBPM' : 70 ,
51+ 'maxBPM' : 155
52+ }
53+
54+ # MFCC parameters
55+ mfcc_pars = {
56+ 'numMFCC' : 13 ,
57+ 'numFilt' : 40 ,
58+ 'includeEnergy' : 0 ,
59+ 'maxFreq' : 16000 ,
60+ 'plot' : 0
61+ }
62+
63+ # self-similarity matrix parameters
64+ struct_pars = {
65+ 'dist' : 'eucl' , # distance: 'eucl', 'exp_cosine', 'cosine', 'corr'
66+ 'cbSize' : main_pars ['context' ], # checkerboard size
67+ 'plot' : 0 ,
68+ 'partsDetail' : main_pars ['partsDetail' ],
69+ 'debitsDetail' : main_pars ['debitsDetail' ],
70+ 'beatsPerSeg' : main_pars ['beatsPerSeg' ],
71+ 'clustFeat' : 'mean_std' # mean_std, stack, seq_dist
72+ }
73+
74+ # create output folder
75+ expName = time .strftime ("MMout_%Y-%m-%d_%H-%M-%S" )
76+ expFolder = os .path .join (outputFolder ,expName )
77+ if not os .path .exists (expFolder ):
78+ os .makedirs (expFolder )
79+
80+ # copy configuration file
81+ copyfile ('./MM_analyze.py' ,os .path .join (expFolder ,'MM_analyze.py' ))
82+
83+ for f in os .listdir (inputFolder ):
84+ print ("F {}" .format (os .path .splitext (f )[0 ]))
85+ name = os .path .splitext (f )[0 ]
86+
87+ str_currFileName = os .path .join (inputFolder ,f )
88+ if str_currFileName .lower ().endswith ('.wav' ):
89+
90+ #Get semgentation file for ground truth
91+
92+
93+ # create current file subfolder
94+ currFolder = os .path .join (expFolder ,f )
95+ os .makedirs (currFolder )
96+
97+ # load wave file
98+ print ("Loading wave file: " + str_currFileName )
99+ (fs ,waveformOri ) = wav .read (str_currFileName )
100+ oriLength = waveformOri .shape [0 ]
101+
102+ # mix to mono
103+ if len (waveformOri .shape )> 1 :
104+ waveform = np .sum (waveformOri ,axis = 1 )
105+ else :
106+ waveform = waveformOri
107+
108+ # normalize to floats
109+ waveform = waveform / np .max (np .abs (waveform ))
110+
111+ # compute STFT
112+ stft_pars ['winSize_' ] = int (2 ** np .ceil (np .log2 (stft_pars ['winLength' ]* fs ))) # nextpow2
113+ stft_pars ['hopSize_' ] = int (np .round (stft_pars ['winSize_' ]* (1 - stft_pars ['overlapFactor' ])))
114+ stft_pars ['realHopLength_' ] = stft_pars ['hopSize_' ]/ fs
115+
116+ win = np .hamming (stft_pars ['winSize_' ])
117+ currSpec = stft (waveform ,win ,stft_pars ['winSize_' ],stft_pars ['hopSize_' ],stft_pars ['overlapFactor' ])
118+
119+ currSpec = np .squeeze (currSpec ) # remove singleton dimension for mono STFTs
120+ currSpec = np .absolute (currSpec [:int (stft_pars ['winSize_' ]/ 2 + 1 ),:])
121+
122+ print ('spectrogram size: {} x {} (bins x frames)' .format (currSpec .shape [0 ],currSpec .shape [1 ]))
123+
124+ beatInd , beatConf = beatDetect (currSpec ,stft_pars ,beat_pars ,currFolder )
125+
126+ # compute temporal vector
127+ stft_pars ['numFrames_' ] = currSpec .shape [1 ]
128+ tempVec = np .arange (0 ,stft_pars ['numFrames_' ])* stft_pars ['realHopLength_' ]
129+ tempVec = tempVec [beatInd ]
130+ #
131+ # # # high-resolution version
132+ # # tempVecHR = np.arange(0,stft_pars['numFrames_']*upSample)*stft_pars['realHopLength_']/upSample
133+ # # tempVecHR = tempVecHR[beatIndHR]
134+ #
135+ exportBeats (beatInd ,beatConf ,tempVec ,currFolder )
136+
137+ # compute MFCC
138+ stft_pars ['numBins_' ] = currSpec .shape [0 ]
139+ melFB = init_mfcc (stft_pars ,fs ,mfcc_pars )
140+ currMfcc = mfcc (currSpec ,melFB ,mfcc_pars )
141+
142+ # compute chroma
143+ chromaInd = init_chroma (stft_pars ,fs )
144+ currChroma = chroma (currSpec ,chromaInd )
145+
146+ # compute energy
147+ currEnergy = energy (currSpec )
148+
149+ if main_pars ['mfccWeight' ]== 0 :
150+ clustFeatMat = currChroma
151+ elif main_pars ['chromaWeight' ]== 0 :
152+ clustFeatMat = currMfcc
153+ else :
154+ currChromaW = currChroma * main_pars ['chromaWeight' ]
155+ currMfccW = currMfcc * main_pars ['mfccWeight' ]
156+ clustFeatMat = np .concatenate ((currMfccW ,currChromaW ),axis = 0 )
157+
158+ segFeatMat = currMfcc # feature matrix for segmentation
159+
160+ # normalize feature matrices
161+ scaler = StandardScaler ()
162+ segFeatMat = scaler .fit_transform (np .transpose (segFeatMat ))
163+ segFeatMat = np .transpose (segFeatMat )
164+
165+ scaler = StandardScaler ()
166+ clustFeatMat = scaler .fit_transform (np .transpose (clustFeatMat ))
167+ clustFeatMat = np .transpose (clustFeatMat )
168+
169+ # median filter
170+ # segFeatMat = sp.signal.medfilt(segFeatMat,5)
171+ # clustFeatMat = sp.signal.medfilt(clustFeatMat,5)
172+
173+ # quantize seg matrix to beats
174+ numBeats = len (beatInd )
175+ segFeatMatQ = np .zeros ((segFeatMat .shape [0 ],numBeats - 1 ))
176+ for i in range (0 ,numBeats - 1 ):
177+ segFeatMatQ [:,i ] = np .mean (segFeatMat [:,beatInd [i ]:beatInd [i + 1 ]],axis = 1 )
178+
179+ # quantize clust matrix to beats
180+ clustFeatMatQ = np .zeros ((clustFeatMat .shape [0 ],numBeats - 1 ))
181+ for i in range (0 ,numBeats - 1 ):
182+ clustFeatMatQ [:,i ] = np .mean (clustFeatMat [:,beatInd [i ]:beatInd [i + 1 ]],axis = 1 )
183+
184+ # quantize energy
185+ energyQ = np .zeros ((numBeats - 1 ,1 ))
186+ for i in range (0 ,numBeats - 1 ):
187+ energyQ [i ] = np .mean (currEnergy [beatInd [i ]:beatInd [i + 1 ]])
188+
189+ print ('seg. feat. matr. size: {} x {} (dim x beats)' .format (segFeatMatQ .shape [0 ],segFeatMatQ .shape [1 ]))
190+
191+ # self-similarity matrix
192+ currSSM = SSM (segFeatMatQ ,struct_pars ,currFolder )
193+
194+ # segmentation based on SSM
195+ boundaries = SSM_segment (currSSM ,struct_pars ,currFolder ,energyQ )
196+
197+ temp = tempVec [boundaries ]
198+
199+ # export segments
200+ f = open (os .path .join (currFolder ,'segments.txt' ),'w' )
201+ for i ,b in enumerate (temp ):
202+ f .write ("{}\t {}\t {}\n " .format (b ,b ,i ))
203+ f .close ()
204+
205+ # cluster segments
206+ #secIDs,subsecIDs,boundaries,clustVar,clusterKmeansPartie,clusterKmeansDebit,clustFeat = segCluster(clustFeatMatQ,boundaries,struct_pars,currFolder)
207+
208+ # sort debits by intensity
209+ #subsecIDs,globalIntensity = sortDebits(currSpec,beatInd,boundaries,secIDs,subsecIDs)
210+ #newSubSecIDs,globalIntensity = sortDebits(currSpec,beatInd,boundaries,clusterKmeansPartie,clusterKmeansDebit)
211+
212+ # export data
213+ #export(boundaries,secIDs,subsecIDs,tempVec,currFolder,waveformOri,fs,globalIntensity,clustVar)
214+ #export(boundaries,clusterKmeansPartie,newSubSecIDs,tempVec,currFolder,waveformOri,fs,globalIntensity,clustVar)
215+
216+
217+
218+ #TEST LAURENT
219+
220+
221+ str = "/Users/Phtc-LD/Desktop/Dev/Music/phonoFitTracks/" + name + ".png"
222+ str2 = "/Users/Phtc-LD/Desktop/Dev/Music/phonoFitTracks/" + name + "-R.png"
223+ #[clusterKmeansPartie,cCluster,inertia] = MyKmeans(5,clustFeat[1:19],None)
224+
225+
226+ fig = plt .figure ()
227+ #plt.ylim([0,1])
228+ plt .stem (secIDs )
229+ #plt.savefig(str)
230+ plt .show ()
231+ plt .close ()
232+
233+
234+ fig = plt .figure ()
235+ #plt.ylim([0,1])
236+ plt .stem (clusterKmeansPartie )
237+ #plt.savefig(str)
238+ plt .show ()
239+ plt .close ()
240+
241+
242+ #"""
243+ segmentPath = inputSegmentFolder + name + '.txt'
244+ maSegmentsGT = []
245+ with open (segmentPath ) as fl :
246+ for line in fl :
247+ l = line .split ("\t " )
248+ maSegmentsGT .append (float (l [0 ]))
249+
250+
251+
252+ #"""
253+
254+ """
255+ segmentPathATester = inputFolder + name + '.txt'
256+ maSegmentsGT = []
257+ with open(segmentPathATester) as fl:
258+ for line in fl:
259+ l = line.split("\t ")
260+ maSegmentsGT.append(float(l[0]))
261+ """
262+
263+ compteurGTSegment = 0
264+ for k in range (0 ,len (maSegmentsGT )):
265+ for m in range (0 ,len (temp )):
266+ if (temp [m ]- 0.1 < maSegmentsGT [k ]< temp [m ]+ 0.1 ):
267+ compteurGTSegment = compteurGTSegment + 1
268+ break
269+
270+ file = open ("/Users/Phtc-LD/Desktop/Dev/Music/phonoFitTracks/segmentsGT.txt" ,"a" )
271+ file .write ("{}\t {}\n " .format (name ,100 * compteurGTSegment / len (temp )))
272+ file .close ()
273+
274+
275+
0 commit comments