diff --git a/Makefile b/Makefile index 2987856..2e05160 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,7 @@ +features: + cd src/features; \ + python3 build_audio_features.py + data: cd src/data; \ python3 make_dataset.py diff --git a/dataset.csv b/dataset.csv index 376bf37..1b70f26 100644 --- a/dataset.csv +++ b/dataset.csv @@ -1,21 +1,21 @@ -audio_name,clap_start,clap_end,music_start,music_end,end_clap_start,audio_end -audio0,0,11,32,2551,2553,2607.16 -audio1,0,22,34,2418,2419,2451.75 -audio2,0,17,41,2423,2424,2488.09 -audio3,0,15,35,2017,2018,2096.62 -audio4,0,5,13,2128,2129,2191.2 -audio5,34,49,60,2480,2481,2554.71 -audio6,5,40,49,2180,2182,2190.18 -audio7,0,16,20,1712,1714,1767.36 -audio8,0,16,31,1947,1948,2048.23 -audio9,15,21,32,1665,1667,1847.66 -audio10,0,30,64,1992,1994,2062.28 -audio11,0,32,55,2390,2391,2432.45 -audio12,0,14,30,2262,2264,2288.93 -audio13,3,10,12,3970,3974,4048.86 -audio14,0,35,38,384,386,398.8 -audio15,0,19,30,1557,1558,1579.77 -audio16,16,48,57,1984,1985,2080.37 -audio17,0,31,41,2399,2401,2432.85 -audio18,0,20,41,1990,1994,2072.06 -audio19,0,26,44,1660,1662,1681.57 \ No newline at end of file +audio_name,clap_start,clap_end,music_start,music_end,end_clap_start,end_clap_end,audio_end +audio0,0,11,32,2551,2553,2602,2607.16 +audio1,0,22,34,2418,2419,2451.75,2451.75 +audio2,0,17,41,2423,2424,2488.09,2488.09 +audio3,0,15,35,2017,2018,2096.62,2096.62 +audio4,0,5,13,2128,2129,2191.2,2191.2 +audio5,34,49,60,2480,2481,2547,2554.71 +audio6,5,40,49,2180,2182,2189.5,2190.18 +audio7,0,16,20,1712,1714,1767.36,1767.36 +audio8,0,16,31,1947,1948,2048.23,2048.23 +audio9,15,21,32,1665,1667,1847.66,1847.66 +audio10,0,30,64,1992,1994,2061,2062.28 +audio11,0,32,55,2390,2391,2431,2432.45 +audio12,0,14,30,2262,2264,2288.93,2288.93 +audio13,3,10,12,3970,3974,4043,4048.86 +audio14,0,35,38,384,386,398.8,398.8 +audio15,0,19,30,1557,1558,1579.77,1579.77 +audio16,16,48,57,1984,1985,2059,2080.37 +audio17,0,31,41,2399,2401,2432.85,2432.85 +audio18,0,20,41,1990,1994,2072.06,2072.06 +audio19,0,26,44,1660,1662,1681.57,1681.57 \ No newline at end of file diff --git a/notebooks/data-display.ipynb b/notebooks/data-display.ipynb index 9ea410a..43df679 100644 --- a/notebooks/data-display.ipynb +++ b/notebooks/data-display.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -41,6 +41,7 @@ " music_start\n", " music_end\n", " end_clap_start\n", + " end_clap_end\n", " audio_end\n", " \n", " \n", @@ -53,6 +54,7 @@ " 32\n", " 2551\n", " 2553\n", + " 2602.00\n", " 2607.16\n", " \n", " \n", @@ -64,6 +66,7 @@ " 2418\n", " 2419\n", " 2451.75\n", + " 2451.75\n", " \n", " \n", " 2\n", @@ -74,6 +77,7 @@ " 2423\n", " 2424\n", " 2488.09\n", + " 2488.09\n", " \n", " \n", " 3\n", @@ -84,6 +88,7 @@ " 2017\n", " 2018\n", " 2096.62\n", + " 2096.62\n", " \n", " \n", " 4\n", @@ -94,6 +99,7 @@ " 2128\n", " 2129\n", " 2191.20\n", + " 2191.20\n", " \n", " \n", " 5\n", @@ -103,6 +109,7 @@ " 60\n", " 2480\n", " 2481\n", + " 2547.00\n", " 2554.71\n", " \n", " \n", @@ -113,6 +120,7 @@ " 49\n", " 2180\n", " 2182\n", + " 2189.50\n", " 2190.18\n", " \n", " \n", @@ -124,6 +132,7 @@ " 1712\n", " 1714\n", " 1767.36\n", + " 1767.36\n", " \n", " \n", " 8\n", @@ -134,6 +143,7 @@ " 1947\n", " 1948\n", " 2048.23\n", + " 2048.23\n", " \n", " \n", " 9\n", @@ -144,6 +154,7 @@ " 1665\n", " 1667\n", " 1847.66\n", + " 1847.66\n", " \n", " \n", " 10\n", @@ -153,6 +164,7 @@ " 64\n", " 1992\n", " 1994\n", + " 2061.00\n", " 2062.28\n", " \n", " \n", @@ -163,6 +175,7 @@ " 55\n", " 2390\n", " 2391\n", + " 2431.00\n", " 2432.45\n", " \n", " \n", @@ -174,6 +187,7 @@ " 2262\n", " 2264\n", " 2288.93\n", + " 2288.93\n", " \n", " \n", " 13\n", @@ -183,6 +197,7 @@ " 12\n", " 3970\n", " 3974\n", + " 4043.00\n", " 4048.86\n", " \n", " \n", @@ -194,6 +209,7 @@ " 384\n", " 386\n", " 398.80\n", + " 398.80\n", " \n", " \n", " 15\n", @@ -204,6 +220,7 @@ " 1557\n", " 1558\n", " 1579.77\n", + " 1579.77\n", " \n", " \n", " 16\n", @@ -213,6 +230,7 @@ " 57\n", " 1984\n", " 1985\n", + " 2059.00\n", " 2080.37\n", " \n", " \n", @@ -224,6 +242,7 @@ " 2399\n", " 2401\n", " 2432.85\n", + " 2432.85\n", " \n", " \n", " 18\n", @@ -234,6 +253,7 @@ " 1990\n", " 1994\n", " 2072.06\n", + " 2072.06\n", " \n", " \n", " 19\n", @@ -244,6 +264,7 @@ " 1660\n", " 1662\n", " 1681.57\n", + " 1681.57\n", " \n", " \n", "\n", @@ -272,30 +293,30 @@ "18 audio18 0 20 41 1990 1994 \n", "19 audio19 0 26 44 1660 1662 \n", "\n", - " audio_end \n", - "0 2607.16 \n", - "1 2451.75 \n", - "2 2488.09 \n", - "3 2096.62 \n", - "4 2191.20 \n", - "5 2554.71 \n", - "6 2190.18 \n", - "7 1767.36 \n", - "8 2048.23 \n", - "9 1847.66 \n", - "10 2062.28 \n", - "11 2432.45 \n", - "12 2288.93 \n", - "13 4048.86 \n", - "14 398.80 \n", - "15 1579.77 \n", - "16 2080.37 \n", - "17 2432.85 \n", - "18 2072.06 \n", - "19 1681.57 " + " end_clap_end audio_end \n", + "0 2602.00 2607.16 \n", + "1 2451.75 2451.75 \n", + "2 2488.09 2488.09 \n", + "3 2096.62 2096.62 \n", + "4 2191.20 2191.20 \n", + "5 2547.00 2554.71 \n", + "6 2189.50 2190.18 \n", + "7 1767.36 1767.36 \n", + "8 2048.23 2048.23 \n", + "9 1847.66 1847.66 \n", + "10 2061.00 2062.28 \n", + "11 2431.00 2432.45 \n", + "12 2288.93 2288.93 \n", + "13 4043.00 4048.86 \n", + "14 398.80 398.80 \n", + "15 1579.77 1579.77 \n", + "16 2059.00 2080.37 \n", + "17 2432.85 2432.85 \n", + "18 2072.06 2072.06 \n", + "19 1681.57 1681.57 " ] }, - "execution_count": 3, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -308,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -487,7 +508,7 @@ "19 https://www.youtube.com/watch?v=hXTyxNccdjM " ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } diff --git a/notebooks/display.ipynb b/notebooks/display.ipynb index 83f3f3c..e7cf887 100644 --- a/notebooks/display.ipynb +++ b/notebooks/display.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -38,13 +38,13 @@ "text/html": [ "\n", " \n", " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -52,9 +52,10 @@ } ], "source": [ - "audio_path = \"../audio/mp3/audio19.mp3\"\n", + "audio_no = 19\n", + "audio_path = f\"../audio/mp3/audio{audio_no}.mp3\"\n", "\n", - "start = 1662\n", + "start = 1680\n", "# end = 44\n", "\n", "# slice the performance\n", diff --git a/notebooks/features.ipynb b/notebooks/features.ipynb new file mode 100644 index 0000000..e69de29 diff --git a/notebooks/test.csv b/notebooks/test.csv new file mode 100644 index 0000000..f06df28 --- /dev/null +++ b/notebooks/test.csv @@ -0,0 +1,2 @@ +rms +2 diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb new file mode 100644 index 0000000..f72d278 --- /dev/null +++ b/notebooks/test.ipynb @@ -0,0 +1,45 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.DataFrame({\n", + " 'rms': []\n", + "})\n", + "\n", + "df.loc[len(df)] = 2\n", + "\n", + "df.reset_index(drop=True)\n", + "\n", + "df.to_csv(\"./test.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/test.wav b/notebooks/test.wav index b0b6989..28b689a 100644 Binary files a/notebooks/test.wav and b/notebooks/test.wav differ diff --git a/src/data/make_dataset.py b/src/data/make_dataset.py index 9cbef92..df20492 100644 --- a/src/data/make_dataset.py +++ b/src/data/make_dataset.py @@ -127,6 +127,8 @@ def split(start: int, end: int, audio_name="/path/to/sample", audio_result_name= clap_end_time = data.loc[i, 'clap_end'] * 1000 while (clap_start_time + duration < clap_end_time): + # print(f"{j}.wav is from {audio}.mp3") + if (os.path.exists("data/yes/" + f"{j}.wav")): clap_start_time += duration j += 1 @@ -146,9 +148,11 @@ def split(start: int, end: int, audio_name="/path/to/sample", audio_result_name= # clapping after performance end_clap_time = data.loc[i, 'end_clap_start'] * 1000 - audio_end_time = data.loc[i, 'audio_end'] * 1000 + audio_end_time = data.loc[i, 'end_clap_end'] * 1000 while (end_clap_time + duration < audio_end_time): + # print(f"{j}.wav is from {audio}.mp3") + if (os.path.exists("data/yes/" + f"{j}.wav")): end_clap_time += duration j += 1 diff --git a/src/features/build_audio_features.py b/src/features/build_audio_features.py index e69de29..4e92884 100644 --- a/src/features/build_audio_features.py +++ b/src/features/build_audio_features.py @@ -0,0 +1,102 @@ +import librosa +import pandas as pd +import numpy as np +import os + +# constants +FRAME_LENGTH = 2048 +HOP_LENGTH = 512 + +# create a DataFrame to store the audio features in a CSV file later +df = pd.DataFrame({ + 'audio_name': [], + 'rms': [], + 'zcr': [], + 'spectral_centroid': [], + 'spectral_bandwidth': [], + 'spectral_flatness': [], + 'class': [] +}) + +print("Building Audio Features for 'yes' class") + +# loop through each audio in the "yes" category +for audio_name in os.listdir("../../data/yes"): + + # load the audio using librosa + audio_path = "../../data/yes/" + audio_name + audio, sr = librosa.load(audio_path) + + # calculate the features + audio_rms = librosa.feature.rms(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH) + audio_zcr = librosa.feature.zero_crossing_rate(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH) + audio_spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH) + audio_spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH) + audio_spectral_flatness = librosa.feature.spectral_flatness(y=audio, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH) + + # perform aggregation for Machine Learning + audio_rms = np.mean(audio_rms) + audio_zcr = np.mean(audio_zcr) + audio_spectral_centroid = np.mean(audio_spectral_centroid) + audio_spectral_bandwidth = np.mean(audio_spectral_bandwidth) + audio_spectral_flatness = np.mean(audio_spectral_flatness) + + # create a new array to be stored + features = [ + audio_name, + audio_rms, + audio_zcr, + audio_spectral_centroid, + audio_spectral_bandwidth, + audio_spectral_flatness, + "yes" + ] + + # add to the DataFrame + df.loc[len(df)] = features + + + +print("Building Audio Features for 'no' class") + +# loop through each audio in the "no" category +for audio_name in os.listdir("../../data/no"): + + # load the audio using librosa + audio_path = "../../data/no/" + audio_name + audio, sr = librosa.load(audio_path) + + # calculate the features + audio_rms = librosa.feature.rms(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH) + audio_zcr = librosa.feature.zero_crossing_rate(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH) + audio_spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH) + audio_spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH) + audio_spectral_flatness = librosa.feature.spectral_flatness(y=audio, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH) + + # perform aggregation for Machine Learning + audio_rms = np.mean(audio_rms) + audio_zcr = np.mean(audio_zcr) + audio_spectral_centroid = np.mean(audio_spectral_centroid) + audio_spectral_bandwidth = np.mean(audio_spectral_bandwidth) + audio_spectral_flatness = np.mean(audio_spectral_flatness) + + # create a new array to be stored + features = [ + audio_name, + audio_rms, + audio_zcr, + audio_spectral_centroid, + audio_spectral_bandwidth, + audio_spectral_flatness, + "no" + ] + + # add to the DataFrame + df.loc[len(df)] = features + + + +print("Exporting to csv") + +# export to csv +df.to_csv("../../data/data.csv", index=False)