Skip to content

Commit

Permalink
save x y groups as npz
Browse files Browse the repository at this point in the history
  • Loading branch information
Beici Liang committed Oct 19, 2018
1 parent dcad0b9 commit 8bf45ed
Showing 1 changed file with 201 additions and 1 deletion.
202 changes: 201 additions & 1 deletion 4. test on real audio data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"from keras.layers.merge import Concatenate\n",
"from keras import backend as K\n",
"from keras.backend.tensorflow_backend import set_session\n",
"from keras.layers import concatenate as concat\n",
"\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\" # the number of the GPU\n",
"config = tf.ConfigProto()\n",
Expand All @@ -59,7 +60,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -799,6 +800,205 @@
"# plt.savefig(\"testfig.png\", bbox_inches=\"tight\", dpi=300)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use transfer learning"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"__________________________________________________________________________________________________\n",
"Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
"input_2 (InputLayer) (None, 1, 88200) 0 \n",
"__________________________________________________________________________________________________\n",
"melspectrogram_2 (Melspectrogra (None, 128, 200, 1) 1116288 input_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_2 (BatchNor (None, 128, 200, 1) 4 melspectrogram_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_1 (Conv2D) (None, 128, 200, 7) 427 batch_normalization_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_2 (Conv2D) (None, 128, 200, 7) 70 batch_normalization_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_3 (Conv2D) (None, 128, 200, 7) 427 batch_normalization_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"concatenate_1 (Concatenate) (None, 128, 200, 21) 0 conv2d_1[0][0] \n",
" conv2d_2[0][0] \n",
" conv2d_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_3 (BatchNor (None, 128, 200, 21) 84 concatenate_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_1 (Activation) (None, 128, 200, 21) 0 batch_normalization_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"max_pooling2d_1 (MaxPooling2D) (None, 64, 100, 21) 0 activation_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"dropout_1 (Dropout) (None, 64, 100, 21) 0 max_pooling2d_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_4 (Conv2D) (None, 64, 100, 21) 3990 dropout_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_4 (BatchNor (None, 64, 100, 21) 84 conv2d_4[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_2 (Activation) (None, 64, 100, 21) 0 batch_normalization_4[0][0] \n",
"__________________________________________________________________________________________________\n",
"max_pooling2d_2 (MaxPooling2D) (None, 32, 50, 21) 0 activation_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"dropout_2 (Dropout) (None, 32, 50, 21) 0 max_pooling2d_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_5 (Conv2D) (None, 32, 50, 21) 3990 dropout_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_5 (BatchNor (None, 32, 50, 21) 84 conv2d_5[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_3 (Activation) (None, 32, 50, 21) 0 batch_normalization_5[0][0] \n",
"__________________________________________________________________________________________________\n",
"max_pooling2d_3 (MaxPooling2D) (None, 16, 25, 21) 0 activation_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"dropout_3 (Dropout) (None, 16, 25, 21) 0 max_pooling2d_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_6 (Conv2D) (None, 16, 25, 21) 3990 dropout_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_6 (BatchNor (None, 16, 25, 21) 84 conv2d_6[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_4 (Activation) (None, 16, 25, 21) 0 batch_normalization_6[0][0] \n",
"__________________________________________________________________________________________________\n",
"max_pooling2d_4 (MaxPooling2D) (None, 4, 7, 21) 0 activation_4[0][0] \n",
"__________________________________________________________________________________________________\n",
"dropout_4 (Dropout) (None, 4, 7, 21) 0 max_pooling2d_4[0][0] \n",
"__________________________________________________________________________________________________\n",
"global_average_pooling2d_1 (Glo (None, 21) 0 dropout_4[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_1 (Dense) (None, 2) 44 global_average_pooling2d_1[0][0] \n",
"==================================================================================================\n",
"Total params: 1,129,566\n",
"Trainable params: 13,108\n",
"Non-trainable params: 1,116,458\n",
"__________________________________________________________________________________________________\n"
]
}
],
"source": [
"npz_dir = os.path.join(DIR_REAL_DATA, 'reference') \n",
"dataset_name = 'pedal-times_realaudio.npz'\n",
"npz_path = os.path.join(npz_dir, dataset_name)\n",
"\n",
"tracks = np.load(npz_path)\n",
"filenames = tracks['filename']\n",
"pedal_offset_gt_tracks = tracks['pedal_offset']\n",
"pedal_onset_gt_tracks = tracks['pedal_onset']\n",
"\n",
"# get model\n",
"batch_size = 1\n",
"reg_w = 1e-4\n",
"model_name = 'multi_kernel'\n",
"segment_exp_name = 'segment_{}'.format(model_name)\n",
"segment_model = model_multi_kernel_shape(n_out=2,input_shape=SEGMENT_INPUT_SHAPE)\n",
"segment_model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])\n",
"# load weights\n",
"segment_model.load_weights(os.path.join(DIR_SAVE_MODEL,\"{}_best_weights.h5\".format(segment_exp_name)))\n",
"# get model summary\n",
"segment_model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ChopinOp66...\n",
"ChopinOp28No20...\n",
"ChopinOp10No3...\n",
"ChopinB49...\n",
"ChopinOp28No15...\n",
"ChopinOp28No4...\n",
"ChopinOp28No7...\n",
"ChopinOp23No1...\n",
"ChopinOp28No6...\n",
"ChopinOp69No2...\n"
]
}
],
"source": [
"# use as feature extractor\n",
"feat_layer1 = GlobalAveragePooling2D()(segment_model.get_layer('activation_1').output)\n",
"feat_layer2 = GlobalAveragePooling2D()(segment_model.get_layer('activation_2').output)\n",
"feat_layer3 = GlobalAveragePooling2D()(segment_model.get_layer('activation_3').output)\n",
"feat_layer4 = GlobalAveragePooling2D()(segment_model.get_layer('activation_4').output)\n",
"segment_feat_all = concat([feat_layer1,feat_layer2,feat_layer3,feat_layer4])\n",
"segment_feat_extractor = Model(inputs = segment_model.input, outputs = segment_feat_all)\n",
"\n",
"for filename_idx, filename in enumerate(filenames): \n",
" pedal_offset_gt = np.array(pedal_offset_gt_tracks[filename_idx])\n",
" pedal_onset_gt = np.array(pedal_onset_gt_tracks[filename_idx])\n",
"\n",
" paudio_dir = os.path.join(DIR_REAL_DATA, '{}'.format(filename)) \n",
" paudio_path = os.path.join(paudio_dir, '{}.wav'.format(filename))\n",
"\n",
" paudio, sr = librosa.load(paudio_path, sr=SR) \n",
" print(\"{}...\".format(filename))\n",
" len_segment_shape = int(SR * MIN_SRC)\n",
" seghop_length = HOP_LENGTH*10\n",
" seghop_duration = seghop_length/SR\n",
" n_psegment = int(np.ceil((len(paudio)-len_segment_shape)/seghop_length))\n",
" gen_psegment = data_gen(paudio, n_psegment, len_segment_shape, 'segment', hop_length=seghop_length)\n",
" segment_feat = segment_feat_extractor.predict_generator(gen_psegment, n_psegment// batch_size)\n",
"\n",
" # set the ground truth frame by frame\n",
" paudio_duration = librosa.get_duration(y=paudio, sr=SR)\n",
" n_frames = int(np.ceil(paudio_duration/seghop_duration))\n",
" segframes_gt = np.zeros(n_frames)\n",
" segframes_est = np.zeros(n_frames)\n",
"\n",
" pedal_offset_gt = np.array(tracks['pedal_offset'][filename_idx])\n",
" pedal_onset_gt = np.array(tracks['pedal_onset'][filename_idx])\n",
" longpseg_idx = np.where((pedal_offset_gt-pedal_onset_gt)>seghop_duration)[0]\n",
" longseg_onset_gt = pedal_onset_gt[longpseg_idx]\n",
" longseg_offset_gt = pedal_offset_gt[longpseg_idx]\n",
" segintervals_gt = np.stack((longseg_onset_gt,longseg_offset_gt), axis=-1)\n",
"\n",
" for idx, onset_t in enumerate(longseg_onset_gt):\n",
" offset_t = longseg_offset_gt[idx]\n",
" onset_frm = int(onset_t//seghop_duration)\n",
" offset_frm = int(offset_t//seghop_duration)\n",
" segframes_gt[onset_frm:offset_frm] = 1 \n",
" \n",
" # align the segframes_gt to the features from transfer learning\n",
" segframes_gt_transfer = segframes_gt[1:1+segment_feat.shape[0]]\n",
" group = np.array([filename] * segment_feat.shape[0])\n",
" \n",
" # concat\n",
" if filename_idx == 0:\n",
" segment_feats = np.copy(segment_feat)\n",
" segframes_gt_transfers = np.copy(segframes_gt_transfer)\n",
" groups = np.copy(group)\n",
" else:\n",
" segment_feats = np.concatenate((segment_feats, segment_feat),axis=0)\n",
" segframes_gt_transfers = np.concatenate((segframes_gt_transfers, segframes_gt_transfer),axis=0)\n",
" groups = np.concatenate((groups, group),axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"save_dir = os.path.join(DIR_REAL_DATA, 'reference') \n",
"np.savez(os.path.join(save_dir, 'transfer-learning-xyg_segment.npz'), \n",
" X=segment_feats, y=segframes_gt_transfers, groups=groups)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down

0 comments on commit 8bf45ed

Please sign in to comment.