-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
78bde29
commit 084ed3b
Showing
10 changed files
with
319 additions
and
189 deletions.
There are no files selected for viewing
25 changes: 1 addition & 24 deletions
25
training/.ipynb_checkpoints/ResNet_classifier_from_scratch-checkpoint.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
25 changes: 1 addition & 24 deletions
25
training/.ipynb_checkpoints/ResNet_classifier_transfer_learning-checkpoint.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
286 changes: 286 additions & 0 deletions
286
training/.ipynb_checkpoints/standard_cnn_model-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,286 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"import os\n", | ||
"import librosa\n", | ||
"import librosa.display\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"from sklearn.preprocessing import LabelEncoder\n", | ||
"from keras.utils import to_categorical\n", | ||
"import h5py\n", | ||
"import math" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import struct\n", | ||
"\n", | ||
"class WavFileHelper():\n", | ||
" \n", | ||
" def read_file_properties(self, filename):\n", | ||
"\n", | ||
" wave_file = open(filename,\"rb\")\n", | ||
" \n", | ||
" riff = wave_file.read(12)\n", | ||
" fmt = wave_file.read(36)\n", | ||
" \n", | ||
" num_channels_string = fmt[10:12]\n", | ||
" num_channels = struct.unpack('<H', num_channels_string)[0]\n", | ||
"\n", | ||
" sample_rate_string = fmt[12:16]\n", | ||
" sample_rate = struct.unpack(\"<I\",sample_rate_string)[0]\n", | ||
" \n", | ||
" bit_depth_string = fmt[22:24]\n", | ||
" bit_depth = struct.unpack(\"<H\",bit_depth_string)[0]\n", | ||
"\n", | ||
" return (num_channels, sample_rate, bit_depth)\n", | ||
"\n", | ||
"wavfilehelper = WavFileHelper()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def extract_features(file_name):\n", | ||
" \n", | ||
" try:\n", | ||
" audio_y, sample_rate = librosa.load(file_name, res_type='kaiser_best') \n", | ||
" D = (librosa.stft(audio_y, center=False, n_fft=1024))\n", | ||
" #print(D)\n", | ||
" real = np.real(D)\n", | ||
" imag = np.imag(D)\n", | ||
" D_true = np.stack([real, imag], -1)\n", | ||
" \n", | ||
" #print(D_true.shape)\n", | ||
" if(D_true.shape != (513, 341, 2)):\n", | ||
" #print(\"ALERTTT\")\n", | ||
" #print(D_true)\n", | ||
" num_of_times_to_repeat = math.ceil(341 / D_true.shape[1])\n", | ||
" D_true = np.repeat(D_true, num_of_times_to_repeat, axis=1)\n", | ||
" D_true = D_true[:, 0:341, :]\n", | ||
" #print(D_true.shape)\n", | ||
" #print(D_true)\n", | ||
" \n", | ||
" #result = np.zeros(b.shape)\n", | ||
"\n", | ||
" \n", | ||
" #print(D_true.shape)\n", | ||
" \n", | ||
" #librosa.display.specshow(librosa.amplitude_to_db(D,\n", | ||
" # ref=np.max),\n", | ||
" # y_axis='log', x_axis='time')\n", | ||
" #plt.title('Power spectrogram')\n", | ||
" #plt.colorbar(format='%+2.0f dB')\n", | ||
" #plt.tight_layout()\n", | ||
" #plt.show()\n", | ||
" \n", | ||
" \n", | ||
" except Exception as e:\n", | ||
" print(\"Error encountered while parsing file: \", file)\n", | ||
" return None \n", | ||
" \n", | ||
" return (D_true)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"metadata = pd.read_csv('./UrbanSound8k/metadata/UrbanSound8K.csv')\n", | ||
"\n", | ||
"audiodata = []\n", | ||
"for index, row in metadata.iterrows():\n", | ||
" \n", | ||
" file_name = os.path.join(os.path.abspath('UrbanSound8k/audio/'),'fold'+str(row[\"fold\"])+'/',str(row[\"slice_file_name\"]))\n", | ||
" data = wavfilehelper.read_file_properties(file_name)\n", | ||
" audiodata.append(data)\n", | ||
"\n", | ||
"# Convert into a Panda dataframe\n", | ||
"audiodf = pd.DataFrame(audiodata, columns=['num_channels','sample_rate','bit_depth'])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"features = np.load(\"features.npy\", allow_pickle=True)\n", | ||
"labels = np.load(\"labels.npy\", allow_pickle=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"metadata = pd.read_csv('./UrbanSound8k/metadata/UrbanSound8K.csv')\n", | ||
"features = []\n", | ||
"labels = []\n", | ||
"i = 0\n", | ||
"\n", | ||
"# Iterate through each sound file and extract the features \n", | ||
"for index, row in metadata.iterrows():\n", | ||
" \n", | ||
" i = i +1\n", | ||
" file_name = os.path.join(os.path.abspath('UrbanSound8k/audio/'),'fold'+str(row[\"fold\"])+'/',str(row[\"slice_file_name\"]))\n", | ||
" \n", | ||
" class_label = row[\"class\"]\n", | ||
" print(f'{i}: Processing {file_name}')\n", | ||
" data = extract_features(file_name)\n", | ||
" \n", | ||
" features.append(data)\n", | ||
" labels.append(class_label)\n", | ||
"\n", | ||
"# Convert into a Panda dataframe \n", | ||
"#featuresdf = pd.DataFrame(features, columns=['feature','class_label'])\n", | ||
"np.save('features.npy', features)\n", | ||
"np.save('labels.npy', labels)\n", | ||
"\n", | ||
"\n", | ||
"print('Finished feature extraction from ', len(features), ' files')\n", | ||
"\n", | ||
"X = np.asarray(features)\n", | ||
"y = np.asarray(labels)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"X = (features)\n", | ||
"y = (labels)\n", | ||
"\n", | ||
"# Encode the classification labels\n", | ||
"le = LabelEncoder()\n", | ||
"yy = to_categorical(le.fit_transform(y)) \n", | ||
"\n", | ||
"# split the dataset \n", | ||
"from sklearn.model_selection import train_test_split \n", | ||
"\n", | ||
"x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from keras.models import Sequential\n", | ||
"from keras.layers import Dense, Dropout, Activation, Flatten\n", | ||
"from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D\n", | ||
"from keras.optimizers import Adam\n", | ||
"from keras.utils import np_utils\n", | ||
"from sklearn import metrics \n", | ||
"\n", | ||
"\n", | ||
"num_labels = yy.shape[1]\n", | ||
"\n", | ||
"# Construct model \n", | ||
"model = Sequential()\n", | ||
"model.add(Conv2D(filters=16, kernel_size=2, input_shape=(513, 341, 2), activation='relu'))\n", | ||
"model.add(MaxPooling2D(pool_size=2))\n", | ||
"model.add(Dropout(0.2))\n", | ||
"\n", | ||
"model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))\n", | ||
"model.add(MaxPooling2D(pool_size=2))\n", | ||
"model.add(Dropout(0.2))\n", | ||
"\n", | ||
"model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))\n", | ||
"model.add(MaxPooling2D(pool_size=2))\n", | ||
"model.add(Dropout(0.2))\n", | ||
"\n", | ||
"model.add(GlobalAveragePooling2D())\n", | ||
"\n", | ||
"model.add(Dense(num_labels, activation='softmax'))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')\n", | ||
"model.summary()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"score = model.evaluate(x_test, y_test, verbose=1)\n", | ||
"accuracy = 100*score[1]\n", | ||
"\n", | ||
"print(\"Pre-training accuracy: %.4f%%\" % accuracy) " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from keras.callbacks import ModelCheckpoint, TensorBoard\n", | ||
"from datetime import datetime \n", | ||
"\n", | ||
"num_epochs = 72\n", | ||
"num_batch_size = 256\n", | ||
"\n", | ||
"checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5', \n", | ||
" verbose=1, save_best_only=True)\n", | ||
"start = datetime.now()\n", | ||
"\n", | ||
"tbCallBack = TensorBoard(log_dir='./tensorboard_graphs', histogram_freq=0, write_graph=True, write_images=True)\n", | ||
"model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer, tbCallBack], verbose=1)\n", | ||
"\n", | ||
"\n", | ||
"duration = datetime.now() - start\n", | ||
"print(\"Training completed in time: \", duration)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
36 changes: 13 additions & 23 deletions
36
training/.ipynb_checkpoints/standard_tcn-checkpoint.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.