diff --git a/Makefile b/Makefile
index 2987856..2e05160 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,7 @@
+features:
+ cd src/features; \
+ python3 build_audio_features.py
+
data:
cd src/data; \
python3 make_dataset.py
diff --git a/dataset.csv b/dataset.csv
index 376bf37..1b70f26 100644
--- a/dataset.csv
+++ b/dataset.csv
@@ -1,21 +1,21 @@
-audio_name,clap_start,clap_end,music_start,music_end,end_clap_start,audio_end
-audio0,0,11,32,2551,2553,2607.16
-audio1,0,22,34,2418,2419,2451.75
-audio2,0,17,41,2423,2424,2488.09
-audio3,0,15,35,2017,2018,2096.62
-audio4,0,5,13,2128,2129,2191.2
-audio5,34,49,60,2480,2481,2554.71
-audio6,5,40,49,2180,2182,2190.18
-audio7,0,16,20,1712,1714,1767.36
-audio8,0,16,31,1947,1948,2048.23
-audio9,15,21,32,1665,1667,1847.66
-audio10,0,30,64,1992,1994,2062.28
-audio11,0,32,55,2390,2391,2432.45
-audio12,0,14,30,2262,2264,2288.93
-audio13,3,10,12,3970,3974,4048.86
-audio14,0,35,38,384,386,398.8
-audio15,0,19,30,1557,1558,1579.77
-audio16,16,48,57,1984,1985,2080.37
-audio17,0,31,41,2399,2401,2432.85
-audio18,0,20,41,1990,1994,2072.06
-audio19,0,26,44,1660,1662,1681.57
\ No newline at end of file
+audio_name,clap_start,clap_end,music_start,music_end,end_clap_start,end_clap_end,audio_end
+audio0,0,11,32,2551,2553,2602,2607.16
+audio1,0,22,34,2418,2419,2451.75,2451.75
+audio2,0,17,41,2423,2424,2488.09,2488.09
+audio3,0,15,35,2017,2018,2096.62,2096.62
+audio4,0,5,13,2128,2129,2191.2,2191.2
+audio5,34,49,60,2480,2481,2547,2554.71
+audio6,5,40,49,2180,2182,2189.5,2190.18
+audio7,0,16,20,1712,1714,1767.36,1767.36
+audio8,0,16,31,1947,1948,2048.23,2048.23
+audio9,15,21,32,1665,1667,1847.66,1847.66
+audio10,0,30,64,1992,1994,2061,2062.28
+audio11,0,32,55,2390,2391,2431,2432.45
+audio12,0,14,30,2262,2264,2288.93,2288.93
+audio13,3,10,12,3970,3974,4043,4048.86
+audio14,0,35,38,384,386,398.8,398.8
+audio15,0,19,30,1557,1558,1579.77,1579.77
+audio16,16,48,57,1984,1985,2059,2080.37
+audio17,0,31,41,2399,2401,2432.85,2432.85
+audio18,0,20,41,1990,1994,2072.06,2072.06
+audio19,0,26,44,1660,1662,1681.57,1681.57
\ No newline at end of file
diff --git a/notebooks/data-display.ipynb b/notebooks/data-display.ipynb
index 9ea410a..43df679 100644
--- a/notebooks/data-display.ipynb
+++ b/notebooks/data-display.ipynb
@@ -11,7 +11,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -41,6 +41,7 @@
"
music_start | \n",
" music_end | \n",
" end_clap_start | \n",
+ " end_clap_end | \n",
" audio_end | \n",
" \n",
" \n",
@@ -53,6 +54,7 @@
" 32 | \n",
" 2551 | \n",
" 2553 | \n",
+ " 2602.00 | \n",
" 2607.16 | \n",
" \n",
" \n",
@@ -64,6 +66,7 @@
" 2418 | \n",
" 2419 | \n",
" 2451.75 | \n",
+ " 2451.75 | \n",
"
\n",
" \n",
" 2 | \n",
@@ -74,6 +77,7 @@
" 2423 | \n",
" 2424 | \n",
" 2488.09 | \n",
+ " 2488.09 | \n",
"
\n",
" \n",
" 3 | \n",
@@ -84,6 +88,7 @@
" 2017 | \n",
" 2018 | \n",
" 2096.62 | \n",
+ " 2096.62 | \n",
"
\n",
" \n",
" 4 | \n",
@@ -94,6 +99,7 @@
" 2128 | \n",
" 2129 | \n",
" 2191.20 | \n",
+ " 2191.20 | \n",
"
\n",
" \n",
" 5 | \n",
@@ -103,6 +109,7 @@
" 60 | \n",
" 2480 | \n",
" 2481 | \n",
+ " 2547.00 | \n",
" 2554.71 | \n",
"
\n",
" \n",
@@ -113,6 +120,7 @@
" 49 | \n",
" 2180 | \n",
" 2182 | \n",
+ " 2189.50 | \n",
" 2190.18 | \n",
"
\n",
" \n",
@@ -124,6 +132,7 @@
" 1712 | \n",
" 1714 | \n",
" 1767.36 | \n",
+ " 1767.36 | \n",
"
\n",
" \n",
" 8 | \n",
@@ -134,6 +143,7 @@
" 1947 | \n",
" 1948 | \n",
" 2048.23 | \n",
+ " 2048.23 | \n",
"
\n",
" \n",
" 9 | \n",
@@ -144,6 +154,7 @@
" 1665 | \n",
" 1667 | \n",
" 1847.66 | \n",
+ " 1847.66 | \n",
"
\n",
" \n",
" 10 | \n",
@@ -153,6 +164,7 @@
" 64 | \n",
" 1992 | \n",
" 1994 | \n",
+ " 2061.00 | \n",
" 2062.28 | \n",
"
\n",
" \n",
@@ -163,6 +175,7 @@
" 55 | \n",
" 2390 | \n",
" 2391 | \n",
+ " 2431.00 | \n",
" 2432.45 | \n",
"
\n",
" \n",
@@ -174,6 +187,7 @@
" 2262 | \n",
" 2264 | \n",
" 2288.93 | \n",
+ " 2288.93 | \n",
"
\n",
" \n",
" 13 | \n",
@@ -183,6 +197,7 @@
" 12 | \n",
" 3970 | \n",
" 3974 | \n",
+ " 4043.00 | \n",
" 4048.86 | \n",
"
\n",
" \n",
@@ -194,6 +209,7 @@
" 384 | \n",
" 386 | \n",
" 398.80 | \n",
+ " 398.80 | \n",
"
\n",
" \n",
" 15 | \n",
@@ -204,6 +220,7 @@
" 1557 | \n",
" 1558 | \n",
" 1579.77 | \n",
+ " 1579.77 | \n",
"
\n",
" \n",
" 16 | \n",
@@ -213,6 +230,7 @@
" 57 | \n",
" 1984 | \n",
" 1985 | \n",
+ " 2059.00 | \n",
" 2080.37 | \n",
"
\n",
" \n",
@@ -224,6 +242,7 @@
" 2399 | \n",
" 2401 | \n",
" 2432.85 | \n",
+ " 2432.85 | \n",
"
\n",
" \n",
" 18 | \n",
@@ -234,6 +253,7 @@
" 1990 | \n",
" 1994 | \n",
" 2072.06 | \n",
+ " 2072.06 | \n",
"
\n",
" \n",
" 19 | \n",
@@ -244,6 +264,7 @@
" 1660 | \n",
" 1662 | \n",
" 1681.57 | \n",
+ " 1681.57 | \n",
"
\n",
" \n",
"\n",
@@ -272,30 +293,30 @@
"18 audio18 0 20 41 1990 1994 \n",
"19 audio19 0 26 44 1660 1662 \n",
"\n",
- " audio_end \n",
- "0 2607.16 \n",
- "1 2451.75 \n",
- "2 2488.09 \n",
- "3 2096.62 \n",
- "4 2191.20 \n",
- "5 2554.71 \n",
- "6 2190.18 \n",
- "7 1767.36 \n",
- "8 2048.23 \n",
- "9 1847.66 \n",
- "10 2062.28 \n",
- "11 2432.45 \n",
- "12 2288.93 \n",
- "13 4048.86 \n",
- "14 398.80 \n",
- "15 1579.77 \n",
- "16 2080.37 \n",
- "17 2432.85 \n",
- "18 2072.06 \n",
- "19 1681.57 "
+ " end_clap_end audio_end \n",
+ "0 2602.00 2607.16 \n",
+ "1 2451.75 2451.75 \n",
+ "2 2488.09 2488.09 \n",
+ "3 2096.62 2096.62 \n",
+ "4 2191.20 2191.20 \n",
+ "5 2547.00 2554.71 \n",
+ "6 2189.50 2190.18 \n",
+ "7 1767.36 1767.36 \n",
+ "8 2048.23 2048.23 \n",
+ "9 1847.66 1847.66 \n",
+ "10 2061.00 2062.28 \n",
+ "11 2431.00 2432.45 \n",
+ "12 2288.93 2288.93 \n",
+ "13 4043.00 4048.86 \n",
+ "14 398.80 398.80 \n",
+ "15 1579.77 1579.77 \n",
+ "16 2059.00 2080.37 \n",
+ "17 2432.85 2432.85 \n",
+ "18 2072.06 2072.06 \n",
+ "19 1681.57 1681.57 "
]
},
- "execution_count": 3,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -308,7 +329,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -487,7 +508,7 @@
"19 https://www.youtube.com/watch?v=hXTyxNccdjM "
]
},
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
diff --git a/notebooks/display.ipynb b/notebooks/display.ipynb
index 83f3f3c..e7cf887 100644
--- a/notebooks/display.ipynb
+++ b/notebooks/display.ipynb
@@ -9,7 +9,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 41,
"metadata": {},
"outputs": [
{
@@ -38,13 +38,13 @@
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
- ""
+ ""
]
},
"metadata": {},
@@ -52,9 +52,10 @@
}
],
"source": [
- "audio_path = \"../audio/mp3/audio19.mp3\"\n",
+ "audio_no = 19\n",
+ "audio_path = f\"../audio/mp3/audio{audio_no}.mp3\"\n",
"\n",
- "start = 1662\n",
+ "start = 1680\n",
"# end = 44\n",
"\n",
"# slice the performance\n",
diff --git a/notebooks/features.ipynb b/notebooks/features.ipynb
new file mode 100644
index 0000000..e69de29
diff --git a/notebooks/test.csv b/notebooks/test.csv
new file mode 100644
index 0000000..f06df28
--- /dev/null
+++ b/notebooks/test.csv
@@ -0,0 +1,2 @@
+rms
+2
diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb
new file mode 100644
index 0000000..f72d278
--- /dev/null
+++ b/notebooks/test.ipynb
@@ -0,0 +1,45 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.DataFrame({\n",
+ " 'rms': []\n",
+ "})\n",
+ "\n",
+ "df.loc[len(df)] = 2\n",
+ "\n",
+ "df.reset_index(drop=True)\n",
+ "\n",
+ "df.to_csv(\"./test.csv\", index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.10"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/test.wav b/notebooks/test.wav
index b0b6989..28b689a 100644
Binary files a/notebooks/test.wav and b/notebooks/test.wav differ
diff --git a/src/data/make_dataset.py b/src/data/make_dataset.py
index 9cbef92..df20492 100644
--- a/src/data/make_dataset.py
+++ b/src/data/make_dataset.py
@@ -127,6 +127,8 @@ def split(start: int, end: int, audio_name="/path/to/sample", audio_result_name=
clap_end_time = data.loc[i, 'clap_end'] * 1000
while (clap_start_time + duration < clap_end_time):
+ # print(f"{j}.wav is from {audio}.mp3")
+
if (os.path.exists("data/yes/" + f"{j}.wav")):
clap_start_time += duration
j += 1
@@ -146,9 +148,11 @@ def split(start: int, end: int, audio_name="/path/to/sample", audio_result_name=
# clapping after performance
end_clap_time = data.loc[i, 'end_clap_start'] * 1000
- audio_end_time = data.loc[i, 'audio_end'] * 1000
+ audio_end_time = data.loc[i, 'end_clap_end'] * 1000
while (end_clap_time + duration < audio_end_time):
+ # print(f"{j}.wav is from {audio}.mp3")
+
if (os.path.exists("data/yes/" + f"{j}.wav")):
end_clap_time += duration
j += 1
diff --git a/src/features/build_audio_features.py b/src/features/build_audio_features.py
index e69de29..4e92884 100644
--- a/src/features/build_audio_features.py
+++ b/src/features/build_audio_features.py
@@ -0,0 +1,102 @@
+import librosa
+import pandas as pd
+import numpy as np
+import os
+
+# constants
+FRAME_LENGTH = 2048
+HOP_LENGTH = 512
+
+# create a DataFrame to store the audio features in a CSV file later
+df = pd.DataFrame({
+ 'audio_name': [],
+ 'rms': [],
+ 'zcr': [],
+ 'spectral_centroid': [],
+ 'spectral_bandwidth': [],
+ 'spectral_flatness': [],
+ 'class': []
+})
+
+print("Building Audio Features for 'yes' class")
+
+# loop through each audio in the "yes" category
+for audio_name in os.listdir("../../data/yes"):
+
+ # load the audio using librosa
+ audio_path = "../../data/yes/" + audio_name
+ audio, sr = librosa.load(audio_path)
+
+ # calculate the features
+ audio_rms = librosa.feature.rms(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
+ audio_zcr = librosa.feature.zero_crossing_rate(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
+ audio_spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)
+ audio_spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)
+ audio_spectral_flatness = librosa.feature.spectral_flatness(y=audio, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)
+
+ # perform aggregation for Machine Learning
+ audio_rms = np.mean(audio_rms)
+ audio_zcr = np.mean(audio_zcr)
+ audio_spectral_centroid = np.mean(audio_spectral_centroid)
+ audio_spectral_bandwidth = np.mean(audio_spectral_bandwidth)
+ audio_spectral_flatness = np.mean(audio_spectral_flatness)
+
+ # create a new array to be stored
+ features = [
+ audio_name,
+ audio_rms,
+ audio_zcr,
+ audio_spectral_centroid,
+ audio_spectral_bandwidth,
+ audio_spectral_flatness,
+ "yes"
+ ]
+
+ # add to the DataFrame
+ df.loc[len(df)] = features
+
+
+
+print("Building Audio Features for 'no' class")
+
+# loop through each audio in the "no" category
+for audio_name in os.listdir("../../data/no"):
+
+ # load the audio using librosa
+ audio_path = "../../data/no/" + audio_name
+ audio, sr = librosa.load(audio_path)
+
+ # calculate the features
+ audio_rms = librosa.feature.rms(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
+ audio_zcr = librosa.feature.zero_crossing_rate(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
+ audio_spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)
+ audio_spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)
+ audio_spectral_flatness = librosa.feature.spectral_flatness(y=audio, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)
+
+ # perform aggregation for Machine Learning
+ audio_rms = np.mean(audio_rms)
+ audio_zcr = np.mean(audio_zcr)
+ audio_spectral_centroid = np.mean(audio_spectral_centroid)
+ audio_spectral_bandwidth = np.mean(audio_spectral_bandwidth)
+ audio_spectral_flatness = np.mean(audio_spectral_flatness)
+
+ # create a new array to be stored
+ features = [
+ audio_name,
+ audio_rms,
+ audio_zcr,
+ audio_spectral_centroid,
+ audio_spectral_bandwidth,
+ audio_spectral_flatness,
+ "no"
+ ]
+
+ # add to the DataFrame
+ df.loc[len(df)] = features
+
+
+
+print("Exporting to csv")
+
+# export to csv
+df.to_csv("../../data/data.csv", index=False)