Add files via upload

yst3147 · Jan 25, 2022 · 70a37b4 · 70a37b4
1 parent 3c2aacd
commit 70a37b4
Showing 1 changed file with 203 additions and 0 deletions.
diff --git a/ER_audio/Audio_preprocess.ipynb b/ER_audio/Audio_preprocess.ipynb
@@ -0,0 +1,203 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a3732a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import librosa\n",
+    "import glob\n",
+    "import json\n",
+    "from pathlib import Path\n",
+    "import numpy as np\n",
+    "import wave\n",
+    "import pandas as pd\n",
+    "from pydub import AudioSegment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "981a45be",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def find_values(id, json_repr):\n",
+    "    results = []\n",
+    "    def _decode_dict(a_dict):\n",
+    "        try:\n",
+    "            results.append(a_dict[id])\n",
+    "        except KeyError:\n",
+    "            pass\n",
+    "        return a_dict\n",
+    "    json.loads(json_repr, object_hook=_decode_dict) # Return value ignored.\n",
+    "    return results\n",
+    "\n",
+    "emotion_list = []\n",
+    "paths = list(Path('/audio/').rglob('*.wav'))\n",
+    "\n",
+    "for k in range(len(paths)):\n",
+    "    with open('clip_'+str(3601+k)+'.json', 'r',encoding=\"UTF-8\") as f:\n",
+    "        json_data = json.load(f)\n",
+    "\n",
+    "    json_repr = json.dumps(json_data['data'])\n",
+    "\n",
+    "    start = find_values('script_start', json_repr)\n",
+    "    index = [start.index(x) for x in dict.fromkeys(start)]\n",
+    "    start_list = list(dict.fromkeys(start))\n",
+    "\n",
+    "    end = find_values('script_end', json_repr)\n",
+    "    end_list = list(dict.fromkeys(end))\n",
+    "\n",
+    "    sound = find_values('sound', json_repr)\n",
+    "    sound_list = []\n",
+    "    for m in range(len(index)):\n",
+    "        sound_list.append(sound[m])\n",
+    "\n",
+    "    for j in range(len(sound_list)):\n",
+    "        emotion = list((sound_list[j].values()))[0]\n",
+    "        emotion_list.append(emotion)\n",
+    "    \n",
+    "    for i in range(len(start_list)):\n",
+    "        start = int(start_list[i]/30*1000) # Works in milliseconds\n",
+    "        end = int(end_list[i]/30*1000)\n",
+    "        Audio = AudioSegment.from_wav('clip_'+str(3601+k)+'.wav')\n",
+    "        newAudio = Audio[start:end]\n",
+    "        newAudio.export('clip_' + str(3601+k) + '_' + str(i) + '.wav', format=\"wav\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6032307c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = []\n",
+    "for i in range(len(emotion_list)):\n",
+    "    if emotion_list[i] == 'happy':\n",
+    "        a.append(int(0))\n",
+    "    if emotion_list[i] == 'surprise':\n",
+    "        a.append(int(1))\n",
+    "    if emotion_list[i] == 'angry':\n",
+    "        a.append(int(2))\n",
+    "    if emotion_list[i] == 'sad':\n",
+    "        a.append(int(3))\n",
+    "    if emotion_list[i] == 'dislike':\n",
+    "        a.append(int(4))\n",
+    "    if emotion_list[i] == 'fear':\n",
+    "        a.append(int(5))\n",
+    "    if emotion_list[i] == 'contempt':\n",
+    "        a.append(int(6))\n",
+    "    if emotion_list[i] == 'neutral':\n",
+    "        a.append(int(7))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4916f7ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "label = np.array(a)\n",
+    "label.reshape(-1,1)\n",
+    "n_values = np.max(a) + 1\n",
+    "oh = np.eye(n_values)[a] # one-hot\n",
+    "np.save('C:/Users/User/Desktop/audio/label', oh)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7afdda7e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_audio_file(file_path):\n",
+    "    input_length = 44100*4\n",
+    "    data = librosa.core.load(file_path,sr=44100)[0] #, sr=16000\n",
+    "    if len(data)>input_length:\n",
+    "        data = data[:input_length]\n",
+    "    else:\n",
+    "        data = np.pad(data, (0, max(0, input_length - len(data))), \"constant\")\n",
+    "    return data\n",
+    "def plot_time_series(data):\n",
+    "    fig = plt.figure(figsize=(14, 8))\n",
+    "    plt.title('Raw wave ')\n",
+    "    plt.ylabel('Amplitude')\n",
+    "    plt.plot(np.linspace(0, 4, len(data)), data)\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11d7524a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Augmentation\n",
+    "# changing pitch\n",
+    "def pitch_change(data, sampling_rate):\n",
+    "    pitch_factor = random.uniform(0, 5)\n",
+    "    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)\n",
+    "\n",
+    "# shifting\n",
+    "def shifting(data):\n",
+    "    sft = random.randrange(5000, 10000, 1000)\n",
+    "    data_roll = np.roll(data, sft)\n",
+    "    return data_roll\n",
+    "\n",
+    "# changing speed\n",
+    "def speed_change(data):\n",
+    "    speed_factor = random.uniform(0.5, 1.5)\n",
+    "    return librosa.effects.time_stretch(data, speed_factor)\n",
+    "\n",
+    "# white noise\n",
+    "def white_noise(data):\n",
+    "    wn = np.random.randn(len(data))\n",
+    "    data_wn = data + 0.005*wn\n",
+    "    return data_wn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b72c278c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def features_extractor_aug(file_name): # mfcc feature extract\n",
+    "    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')\n",
+    "    mfccs_audio = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)\n",
+    "    mfccs_scaled_audio = np.mean(mfccs_audio.T,axis=0)\n",
+    "    return mfccs_scaled_audio"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}