Skip to content

Commit

Permalink
Removed long code error
Browse files Browse the repository at this point in the history
  • Loading branch information
AnanthVivekanand committed Dec 11, 2020
1 parent 78bde29 commit 084ed3b
Show file tree
Hide file tree
Showing 10 changed files with 319 additions and 189 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

24 changes: 1 addition & 23 deletions training/.ipynb_checkpoints/optimized_TCN-checkpoint.ipynb

Large diffs are not rendered by default.

286 changes: 286 additions & 0 deletions training/.ipynb_checkpoints/standard_cnn_model-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import librosa\n",
"import librosa.display\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from keras.utils import to_categorical\n",
"import h5py\n",
"import math"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import struct\n",
"\n",
"class WavFileHelper():\n",
" \n",
" def read_file_properties(self, filename):\n",
"\n",
" wave_file = open(filename,\"rb\")\n",
" \n",
" riff = wave_file.read(12)\n",
" fmt = wave_file.read(36)\n",
" \n",
" num_channels_string = fmt[10:12]\n",
" num_channels = struct.unpack('<H', num_channels_string)[0]\n",
"\n",
" sample_rate_string = fmt[12:16]\n",
" sample_rate = struct.unpack(\"<I\",sample_rate_string)[0]\n",
" \n",
" bit_depth_string = fmt[22:24]\n",
" bit_depth = struct.unpack(\"<H\",bit_depth_string)[0]\n",
"\n",
" return (num_channels, sample_rate, bit_depth)\n",
"\n",
"wavfilehelper = WavFileHelper()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def extract_features(file_name):\n",
" \n",
" try:\n",
" audio_y, sample_rate = librosa.load(file_name, res_type='kaiser_best') \n",
" D = (librosa.stft(audio_y, center=False, n_fft=1024))\n",
" #print(D)\n",
" real = np.real(D)\n",
" imag = np.imag(D)\n",
" D_true = np.stack([real, imag], -1)\n",
" \n",
" #print(D_true.shape)\n",
" if(D_true.shape != (513, 341, 2)):\n",
" #print(\"ALERTTT\")\n",
" #print(D_true)\n",
" num_of_times_to_repeat = math.ceil(341 / D_true.shape[1])\n",
" D_true = np.repeat(D_true, num_of_times_to_repeat, axis=1)\n",
" D_true = D_true[:, 0:341, :]\n",
" #print(D_true.shape)\n",
" #print(D_true)\n",
" \n",
" #result = np.zeros(b.shape)\n",
"\n",
" \n",
" #print(D_true.shape)\n",
" \n",
" #librosa.display.specshow(librosa.amplitude_to_db(D,\n",
" # ref=np.max),\n",
" # y_axis='log', x_axis='time')\n",
" #plt.title('Power spectrogram')\n",
" #plt.colorbar(format='%+2.0f dB')\n",
" #plt.tight_layout()\n",
" #plt.show()\n",
" \n",
" \n",
" except Exception as e:\n",
" print(\"Error encountered while parsing file: \", file)\n",
" return None \n",
" \n",
" return (D_true)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"metadata = pd.read_csv('./UrbanSound8k/metadata/UrbanSound8K.csv')\n",
"\n",
"audiodata = []\n",
"for index, row in metadata.iterrows():\n",
" \n",
" file_name = os.path.join(os.path.abspath('UrbanSound8k/audio/'),'fold'+str(row[\"fold\"])+'/',str(row[\"slice_file_name\"]))\n",
" data = wavfilehelper.read_file_properties(file_name)\n",
" audiodata.append(data)\n",
"\n",
"# Convert into a Panda dataframe\n",
"audiodf = pd.DataFrame(audiodata, columns=['num_channels','sample_rate','bit_depth'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"features = np.load(\"features.npy\", allow_pickle=True)\n",
"labels = np.load(\"labels.npy\", allow_pickle=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"metadata = pd.read_csv('./UrbanSound8k/metadata/UrbanSound8K.csv')\n",
"features = []\n",
"labels = []\n",
"i = 0\n",
"\n",
"# Iterate through each sound file and extract the features \n",
"for index, row in metadata.iterrows():\n",
" \n",
" i = i +1\n",
" file_name = os.path.join(os.path.abspath('UrbanSound8k/audio/'),'fold'+str(row[\"fold\"])+'/',str(row[\"slice_file_name\"]))\n",
" \n",
" class_label = row[\"class\"]\n",
" print(f'{i}: Processing {file_name}')\n",
" data = extract_features(file_name)\n",
" \n",
" features.append(data)\n",
" labels.append(class_label)\n",
"\n",
"# Convert into a Panda dataframe \n",
"#featuresdf = pd.DataFrame(features, columns=['feature','class_label'])\n",
"np.save('features.npy', features)\n",
"np.save('labels.npy', labels)\n",
"\n",
"\n",
"print('Finished feature extraction from ', len(features), ' files')\n",
"\n",
"X = np.asarray(features)\n",
"y = np.asarray(labels)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X = (features)\n",
"y = (labels)\n",
"\n",
"# Encode the classification labels\n",
"le = LabelEncoder()\n",
"yy = to_categorical(le.fit_transform(y)) \n",
"\n",
"# split the dataset \n",
"from sklearn.model_selection import train_test_split \n",
"\n",
"x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from keras.models import Sequential\n",
"from keras.layers import Dense, Dropout, Activation, Flatten\n",
"from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D\n",
"from keras.optimizers import Adam\n",
"from keras.utils import np_utils\n",
"from sklearn import metrics \n",
"\n",
"\n",
"num_labels = yy.shape[1]\n",
"\n",
"# Construct model \n",
"model = Sequential()\n",
"model.add(Conv2D(filters=16, kernel_size=2, input_shape=(513, 341, 2), activation='relu'))\n",
"model.add(MaxPooling2D(pool_size=2))\n",
"model.add(Dropout(0.2))\n",
"\n",
"model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))\n",
"model.add(MaxPooling2D(pool_size=2))\n",
"model.add(Dropout(0.2))\n",
"\n",
"model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))\n",
"model.add(MaxPooling2D(pool_size=2))\n",
"model.add(Dropout(0.2))\n",
"\n",
"model.add(GlobalAveragePooling2D())\n",
"\n",
"model.add(Dense(num_labels, activation='softmax'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"score = model.evaluate(x_test, y_test, verbose=1)\n",
"accuracy = 100*score[1]\n",
"\n",
"print(\"Pre-training accuracy: %.4f%%\" % accuracy) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from keras.callbacks import ModelCheckpoint, TensorBoard\n",
"from datetime import datetime \n",
"\n",
"num_epochs = 72\n",
"num_batch_size = 256\n",
"\n",
"checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5', \n",
" verbose=1, save_best_only=True)\n",
"start = datetime.now()\n",
"\n",
"tbCallBack = TensorBoard(log_dir='./tensorboard_graphs', histogram_freq=0, write_graph=True, write_images=True)\n",
"model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer, tbCallBack], verbose=1)\n",
"\n",
"\n",
"duration = datetime.now() - start\n",
"print(\"Training completed in time: \", duration)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
36 changes: 13 additions & 23 deletions training/.ipynb_checkpoints/standard_tcn-checkpoint.ipynb

Large diffs are not rendered by default.

25 changes: 1 addition & 24 deletions training/ResNet_classifier_from_scratch.ipynb

Large diffs are not rendered by default.

25 changes: 1 addition & 24 deletions training/ResNet_classifier_transfer_learning.ipynb

Large diffs are not rendered by default.

24 changes: 1 addition & 23 deletions training/optimized_TCN.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion training/standard_cnn_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
"version": "3.8.6"
}
},
"nbformat": 4,
Expand Down
36 changes: 13 additions & 23 deletions training/standard_tcn.ipynb

Large diffs are not rendered by default.

0 comments on commit 084ed3b

Please sign in to comment.