|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 1, |
| 5 | + "execution_count": 2, |
6 | 6 | "metadata": {},
|
7 | 7 | "outputs": [],
|
8 | 8 | "source": [
|
|
16 | 16 | },
|
17 | 17 | {
|
18 | 18 | "cell_type": "code",
|
19 |
| - "execution_count": 52, |
| 19 | + "execution_count": 20, |
20 | 20 | "metadata": {},
|
21 | 21 | "outputs": [],
|
22 | 22 | "source": [
|
|
26 | 26 | " return a[p], b[p]\n",
|
27 | 27 | " \n",
|
28 | 28 | "def get_data_set(count, speakers):\n",
|
29 |
| - " third = int(count/speakers)\n", |
| 29 | + " nn_input_chunks_per_speaker = int(count/speakers)\n", |
30 | 30 | " X = np.zeros((count, 12*20))\n",
|
31 | 31 | " y = np.zeros(count, dtype='uint8')\n",
|
32 | 32 | " \n",
|
33 |
| - " coefficients_per_speaker = third * 20\n", |
| 33 | + " frames_per_speaker = nn_input_chunks_per_speaker * 20\n", |
34 | 34 | " \n",
|
35 | 35 | " all_speakers = []\n",
|
| 36 | + " \n", |
36 | 37 | " for i in range(0, speakers):\n",
|
37 | 38 | " all_speakers.append([])\n",
|
38 | 39 | " index = 0\n",
|
39 |
| - " while (len(all_speakers[i]) < coefficients_per_speaker):\n", |
| 40 | + " while (len(all_speakers[i]) < frames_per_speaker):\n", |
40 | 41 | " print(index, end=\"\\r\")\n",
|
41 |
| - " y_, sr = Utils.load_file(f\"C:\\\\Users\\\\SCU8BH\\\\Documents\\\\T3000\\\\Studienarbeit\\\\Data\\\\50_speakers_audio_data\\\\Speaker{i+30:04}\\\\Speaker{i+30:02}_{index:03}.wav\")\n", |
| 42 | + " y_, sr = Utils.load_file(f\"/home/henry/Downloads/archive/50_speakers_audio_data/Speaker_{10+i:04}/Speaker_{10+i:04}_{index:05}.wav\")\n", |
42 | 43 | " \n",
|
43 | 44 | " y_ = AudioPreprocessor.remove_noise(y=y_, sr=sr)\n",
|
44 | 45 | " y_ = AudioPreprocessor.remove_silence(y=y_)\n",
|
|
54 | 55 | " print()\n",
|
55 | 56 | " \n",
|
56 | 57 | " for i in range(0, speakers):\n",
|
57 |
| - " for j in range(0, third):\n", |
58 |
| - " X[i*third + j] = np.concatenate((all_speakers[i][20*j][1:13], \n", |
| 58 | + " for j in range(0, nn_input_chunks_per_speaker):\n", |
| 59 | + " X[i*nn_input_chunks_per_speaker + j] = np.concatenate((all_speakers[i][20*j][1:13], \n", |
59 | 60 | " all_speakers[i][20*j+1][1:13], \n",
|
60 | 61 | " all_speakers[i][20*j+2][1:13],\n",
|
61 | 62 | " all_speakers[i][20*j+3][1:13],\n",
|
|
76 | 77 | " all_speakers[i][20*j+18][1:13],\n",
|
77 | 78 | " all_speakers[i][20*j+19][1:13]\n",
|
78 | 79 | " ))\n",
|
79 |
| - " y[i*third + j] = i\n", |
| 80 | + " y[i*nn_input_chunks_per_speaker + j] = i\n", |
80 | 81 | " \n",
|
81 | 82 | " return X, y"
|
82 | 83 | ]
|
83 | 84 | },
|
84 | 85 | {
|
85 | 86 | "cell_type": "code",
|
86 |
| - "execution_count": 53, |
| 87 | + "execution_count": 21, |
87 | 88 | "metadata": {},
|
88 | 89 | "outputs": [
|
89 | 90 | {
|
90 | 91 | "name": "stdout",
|
91 | 92 | "output_type": "stream",
|
92 | 93 | "text": [
|
93 | 94 | "8\n",
|
94 |
| - "11\n", |
| 95 | + "9\n", |
95 | 96 | "9\n",
|
96 | 97 | "10\n",
|
97 |
| - "10\n" |
| 98 | + "9\n" |
98 | 99 | ]
|
99 | 100 | }
|
100 | 101 | ],
|
|
106 | 107 | },
|
107 | 108 | {
|
108 | 109 | "cell_type": "code",
|
109 |
| - "execution_count": 63, |
| 110 | + "execution_count": 28, |
110 | 111 | "metadata": {},
|
111 | 112 | "outputs": [
|
112 | 113 | {
|
113 | 114 | "name": "stdout",
|
114 | 115 | "output_type": "stream",
|
115 | 116 | "text": [
|
116 | 117 | "[0 0 0 ... 4 4 4]\n",
|
117 |
| - "[4 2 3 ... 2 2 4]\n", |
118 |
| - "29/29 [==============================] - 0s 1ms/step - loss: 2.1533e-05 - accuracy: 1.0000\n", |
119 |
| - "Test accuracy: 1.0\n", |
120 |
| - "Test loss: 2.1533451217692345e-05\n", |
121 |
| - "4/4 [==============================] - 0s 1ms/step\n", |
122 |
| - "[4 2 2 2 2 2 2 2 2 2 2 2 1 4 2 2 1 1 1 1 2 2 4 2 1 2 2 2 2 2 2 4 2 2 2 2 2\n", |
123 |
| - " 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 4 1 2 2 2 2 2 1 2 3 1 1 2 2 2 4 2 4 2 2 2\n", |
124 |
| - " 2 2 2 2 4 1 0 2 4 2 4 2 4 2 1 2 4 2 3 3 2 2 2 2 2 2 2 4 3 4 1 0 2 1 2 2 4\n", |
125 |
| - " 2 2 4 2 2 0 0 0]\n", |
126 |
| - "6\n", |
127 |
| - "14\n", |
128 |
| - "79\n", |
129 |
| - "4\n", |
130 |
| - "16\n" |
| 118 | + "[2 3 2 ... 0 4 2]\n", |
| 119 | + "29/29 [==============================] - 0s 1ms/step - loss: 0.8353 - accuracy: 0.6485\n", |
| 120 | + "Test accuracy: 0.6484715938568115\n", |
| 121 | + "Test loss: 0.8353310227394104\n", |
| 122 | + "4/4 [==============================] - 0s 2ms/step\n", |
| 123 | + "[2 4 2 4 4 4 4 4 2 4 0 2 2 2 1 2 2 4 2 3 4 4 3 4 2 2 3 3 2 2 2 4 4 2 3 4 4\n", |
| 124 | + " 0 4 0 2 4 2 4 4 4 4 4 2 3 0 2 2 4 2 2 4 2 0 2 4 2 2 4 4 2 0 4 2 2 4 4 2 2\n", |
| 125 | + " 3 0 2 4 3 2 2 2 4 2 2 0 4 0 3 4 3 2 2 0 4 2 0 2 2 4 3 2 2 4 2 0 2 2 2 4 4\n", |
| 126 | + " 2 2 2 2 2 2 4]\n", |
| 127 | + "12\n", |
| 128 | + "1\n", |
| 129 | + "54\n", |
| 130 | + "11\n", |
| 131 | + "40\n" |
131 | 132 | ]
|
132 | 133 | }
|
133 | 134 | ],
|
|
154 | 155 | " print(f\"Test loss: {test_loss}\")\n",
|
155 | 156 | " \n",
|
156 | 157 | " \n",
|
157 |
| - " y_, sr = Utils.load_file(f\"C:\\\\Users\\\\SCU8BH\\\\Documents\\\\T3000\\\\Studienarbeit\\\\Data\\\\50_speakers_audio_data\\\\Speaker0032\\\\Speaker32_012.wav\")\n", |
| 158 | + " y_, sr = Utils.load_file(f\"/home/henry/Downloads/archive/50_speakers_audio_data/Speaker_0014/Speaker_0014_00020.wav\")\n", |
158 | 159 | " \n",
|
159 | 160 | " y_ = AudioPreprocessor.remove_noise(y=y_, sr=sr)\n",
|
160 | 161 | " y_ = AudioPreprocessor.remove_silence(y=y_)\n",
|
|
207 | 208 | ],
|
208 | 209 | "metadata": {
|
209 | 210 | "kernelspec": {
|
210 |
| - "display_name": "Python 3.10.4 64-bit", |
| 211 | + "display_name": "Python 3", |
211 | 212 | "language": "python",
|
212 | 213 | "name": "python3"
|
213 | 214 | },
|
|
221 | 222 | "name": "python",
|
222 | 223 | "nbconvert_exporter": "python",
|
223 | 224 | "pygments_lexer": "ipython3",
|
224 |
| - "version": "3.10.4" |
| 225 | + "version": "3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0]" |
225 | 226 | },
|
226 | 227 | "orig_nbformat": 4,
|
227 | 228 | "vscode": {
|
228 | 229 | "interpreter": {
|
229 |
| - "hash": "2fc4d7ba6602d69fe52dcf13f0361bb9556610661c910f56182baab83bdef03f" |
| 230 | + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" |
230 | 231 | }
|
231 | 232 | }
|
232 | 233 | },
|
|
0 commit comments