diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ee9a45a --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/cache +/data +/data_test +/model diff --git a/__pycache__/dataset.cpython-36.pyc b/__pycache__/dataset.cpython-36.pyc index edefce0..e82a826 100644 Binary files a/__pycache__/dataset.cpython-36.pyc and b/__pycache__/dataset.cpython-36.pyc differ diff --git a/dataset.py b/dataset.py index ab61879..f6baf31 100644 --- a/dataset.py +++ b/dataset.py @@ -14,7 +14,7 @@ import librosa import librosa.display -import tqdm +from tqdm import tqdm from scipy.fftpack import fft from scipy import signal @@ -81,13 +81,46 @@ def targetToTensor(self, target): def music_load(filename) : + ''' y, sr = librosa.load(filename, sr=44100) y_ = np.zeros(int(44100*0.04)-int(len(y)%int(44100*0.04))) y = np.hstack([y,y_]) y = np.reshape(y, (-1, int(sr/100)*4 )) + y = torch.from_numpy(y) + print(a.shape) + ''' + y = np.load(filename) + y = torch.from_numpy(y) + + return y + + def music_cache_make(filelist) : + #f = codecs.open("music_cache_data","w") + + for filename in tqdm(filelist): + y, sr = librosa.load("./data_test/songs/" + filename+"/nofx.ogg", sr=44100) + y_ = np.zeros(int(44100*0.04)-int(len(y)%int(44100*0.04))) + y = np.hstack([y,y_]) + + y = np.reshape(y, (-1, int(sr/100)*4 )) + #print(y.shape) + + a = [] + for i in range(0, y.shape[0]) : + y1 = np.abs(librosa.stft(y[i], n_fft = 1764, hop_length=2048, win_length = 441)) + y2 = np.abs(librosa.stft(y[i], n_fft = 1764, hop_length=2048, win_length = 882)) + y3 = np.abs(librosa.stft(y[i], n_fft = 1764, hop_length=2048, win_length = 1764)) + + a.append(np.array([y1,y2,y3]).tolist()) + #if i == 200 : print(np.array([y1,y2,y3])) + + a = np.array(a) + #print(a.shape) + np.save("./cache/"+filename,a) + return y def timeStamp(filename, term) : @@ -220,6 +253,8 @@ def timeStamp(filename, term) : return return_timestamp if __name__ == "__main__": - y, sr = librosa.load("./data/songs/rootsphere_lastnote/nofx.ogg", sr=44100) - KshDataset.timeStamp("./data/songs/rootsphere_lastnote/exh.ksh", y.shape[0]) + filenames = os.listdir("./data_test/songs/") + KshDataset.music_cache_make(filenames) + #y, sr = librosa.load("./data/songs/rootsphere_lastnote/nofx.ogg", sr=44100) + #KshDataset.timeStamp("./data/songs/rootsphere_lastnote/exh.ksh", y.shape[0]) #print(y.shape[0]//441) \ No newline at end of file diff --git a/net/__pycache__/model.cpython-36.pyc b/net/__pycache__/model.cpython-36.pyc index 8870c40..6536f10 100644 Binary files a/net/__pycache__/model.cpython-36.pyc and b/net/__pycache__/model.cpython-36.pyc differ diff --git a/net/model.py b/net/model.py index 34b19a8..2b65862 100644 --- a/net/model.py +++ b/net/model.py @@ -10,7 +10,7 @@ def __init__(self): super(voltexNet, self).__init__() self.conv1 = nn.Sequential( - nn.Conv1d(1, 128, kernel_size=3, stride=1, padding=1), + nn.Conv1d(3, 128, kernel_size=3, stride=1, padding=1), nn.BatchNorm1d(128), nn.ReLU()) self.conv2 = nn.Sequential( @@ -39,17 +39,12 @@ def __init__(self): nn.ReLU(), nn.MaxPool1d(3,stride=3)) self.conv7 = nn.Sequential( - nn.Conv1d(256, 512, kernel_size=3, stride=1, padding=1), - nn.BatchNorm1d(512), + nn.Conv1d(256, 256, kernel_size=3, stride=1, padding=1), + nn.BatchNorm1d(256), nn.ReLU(), nn.MaxPool1d(3,stride=3)) - self.conv8 = nn.Sequential( - nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1), - nn.BatchNorm1d(512), - nn.ReLU(), - nn.Dropout(0.5)) - self.fc = nn.Linear(1024, 4) + self.fc = nn.Linear(256, 4) #self.LSTM = nn.LSTM(input_size = 4, hidden_size = 2, bidirectional=True) #self.fc3 = nn.Linear(hidden_size*2,output_size) @@ -57,6 +52,7 @@ def __init__(self): def forward(self, x): + x = x.squeeze() out = self.conv1(x) out = self.conv2(out) @@ -65,7 +61,6 @@ def forward(self, x): out = self.conv5(out) out = self.conv6(out) out = self.conv7(out) - out = self.conv8(out) out = out.reshape(out.size(0), out.size(1)* out.size(2)) #print(out.shape) @@ -83,7 +78,7 @@ def forward(self, x): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 model = voltexNet().to(device) - summary(model, [(1, 1764)]) + summary(model, [(3, 883, 1)]) ''' class VoltexLSTM diff --git a/train.py b/train.py index 28e41a8..d127251 100644 --- a/train.py +++ b/train.py @@ -71,7 +71,7 @@ def infer(model, device, batch, filename, savename) : def main(): model = voltexNet() - model.load_state_dict(torch.load("./model_99_.pth")) + #model.load_state_dict(torch.load("./model_99_.pth")) #print ("load model") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -97,7 +97,7 @@ def main(): filenames = os.listdir(dirname) valnames = os.listdir(valname) - batch = 256 + batch = 128 song_index = 0 best_Acc = 0 @@ -114,9 +114,9 @@ def main(): full_filename = os.path.join(dirname, filename) #print(full_filename) - input = KshDataset.music_load(full_filename + '/nofx.ogg') + input = KshDataset.music_load("./cache/" + filename+'.npy') #print(input.shape) - input = input.reshape(input.shape[0], 1, -1) + #input = input.reshape(input.shape[0], 1, -1) target = KshDataset.timeStamp(full_filename + '/exh.ksh', input.shape[0]) try : if target == None: @@ -148,7 +148,7 @@ def main(): optimizer.zero_grad() tmp_batch = input.shape[0] - i if tmp_batch > 1 : - pred = model(input[i:i+tmp_batch-1],tmp_batch-1) + pred = model(input[i:i+tmp_batch-1]) loss = criterion(pred.squeeze(), target[i:i+tmp_batch-1].squeeze()) loss.backward() @@ -165,8 +165,7 @@ def main(): #model.to(torch.device("cpu")) for filename in tqdm(valnames): full_filename = os.path.join(valname, filename) - input = KshDataset.music_load(full_filename + '/nofx.ogg') - input = input.reshape(input.shape[0], 1, -1) + input = KshDataset.music_load("./cache/" + filename+'.npy') input = input.to(device, dtype=torch.float) target = KshDataset.timeStamp(full_filename + '/exh.ksh', input.shape[0])