diff --git a/audio_preprocessing.ipynb b/audio_preprocessing.ipynb index fb3a59e..c7e1844 100644 --- a/audio_preprocessing.ipynb +++ b/audio_preprocessing.ipynb @@ -43,6 +43,53 @@ "\n", "wavfilehelper = WavFileHelper()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_features(file_name):\n", + " \n", + " try:\n", + " audio_y, sample_rate = librosa.load(file_name, res_type='kaiser_best') \n", + " D = (librosa.stft(audio_y, center=False, n_fft=1024))\n", + " #print(D)\n", + " real = np.real(D)\n", + " imag = np.imag(D)\n", + " D_true = np.stack([real, imag], -1)\n", + " \n", + " #print(D_true.shape)\n", + " if(D_true.shape != (513, 341, 2)):\n", + " #print(\"ALERTTT\")\n", + " #print(D_true)\n", + " num_of_times_to_repeat = math.ceil(341 / D_true.shape[1])\n", + " D_true = np.repeat(D_true, num_of_times_to_repeat, axis=1)\n", + " D_true = D_true[:, 0:341, :]\n", + " #print(D_true.shape)\n", + " #print(D_true)\n", + " \n", + " #result = np.zeros(b.shape)\n", + "\n", + " \n", + " #print(D_true.shape)\n", + " \n", + " #librosa.display.specshow(librosa.amplitude_to_db(D,\n", + " # ref=np.max),\n", + " # y_axis='log', x_axis='time')\n", + " #plt.title('Power spectrogram')\n", + " #plt.colorbar(format='%+2.0f dB')\n", + " #plt.tight_layout()\n", + " #plt.show()\n", + " \n", + " \n", + " except Exception as e:\n", + " print(\"Error encountered while parsing file: \", file)\n", + " return None \n", + " \n", + " return (D_true)" + ] } ], "metadata": {