Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
min-ku authored Jan 25, 2022
1 parent 3c2aacd commit 70a37b4
Showing 1 changed file with 203 additions and 0 deletions.
203 changes: 203 additions & 0 deletions ER_audio/Audio_preprocess.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "0a3732a1",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import librosa\n",
"import glob\n",
"import json\n",
"from pathlib import Path\n",
"import numpy as np\n",
"import wave\n",
"import pandas as pd\n",
"from pydub import AudioSegment"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "981a45be",
"metadata": {},
"outputs": [],
"source": [
"def find_values(id, json_repr):\n",
" results = []\n",
" def _decode_dict(a_dict):\n",
" try:\n",
" results.append(a_dict[id])\n",
" except KeyError:\n",
" pass\n",
" return a_dict\n",
" json.loads(json_repr, object_hook=_decode_dict) # Return value ignored.\n",
" return results\n",
"\n",
"emotion_list = []\n",
"paths = list(Path('/audio/').rglob('*.wav'))\n",
"\n",
"for k in range(len(paths)):\n",
" with open('clip_'+str(3601+k)+'.json', 'r',encoding=\"UTF-8\") as f:\n",
" json_data = json.load(f)\n",
"\n",
" json_repr = json.dumps(json_data['data'])\n",
"\n",
" start = find_values('script_start', json_repr)\n",
" index = [start.index(x) for x in dict.fromkeys(start)]\n",
" start_list = list(dict.fromkeys(start))\n",
"\n",
" end = find_values('script_end', json_repr)\n",
" end_list = list(dict.fromkeys(end))\n",
"\n",
" sound = find_values('sound', json_repr)\n",
" sound_list = []\n",
" for m in range(len(index)):\n",
" sound_list.append(sound[m])\n",
"\n",
" for j in range(len(sound_list)):\n",
" emotion = list((sound_list[j].values()))[0]\n",
" emotion_list.append(emotion)\n",
" \n",
" for i in range(len(start_list)):\n",
" start = int(start_list[i]/30*1000) # Works in milliseconds\n",
" end = int(end_list[i]/30*1000)\n",
" Audio = AudioSegment.from_wav('clip_'+str(3601+k)+'.wav')\n",
" newAudio = Audio[start:end]\n",
" newAudio.export('clip_' + str(3601+k) + '_' + str(i) + '.wav', format=\"wav\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6032307c",
"metadata": {},
"outputs": [],
"source": [
"a = []\n",
"for i in range(len(emotion_list)):\n",
" if emotion_list[i] == 'happy':\n",
" a.append(int(0))\n",
" if emotion_list[i] == 'surprise':\n",
" a.append(int(1))\n",
" if emotion_list[i] == 'angry':\n",
" a.append(int(2))\n",
" if emotion_list[i] == 'sad':\n",
" a.append(int(3))\n",
" if emotion_list[i] == 'dislike':\n",
" a.append(int(4))\n",
" if emotion_list[i] == 'fear':\n",
" a.append(int(5))\n",
" if emotion_list[i] == 'contempt':\n",
" a.append(int(6))\n",
" if emotion_list[i] == 'neutral':\n",
" a.append(int(7))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4916f7ea",
"metadata": {},
"outputs": [],
"source": [
"label = np.array(a)\n",
"label.reshape(-1,1)\n",
"n_values = np.max(a) + 1\n",
"oh = np.eye(n_values)[a] # one-hot\n",
"np.save('C:/Users/User/Desktop/audio/label', oh)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7afdda7e",
"metadata": {},
"outputs": [],
"source": [
"def load_audio_file(file_path):\n",
" input_length = 44100*4\n",
" data = librosa.core.load(file_path,sr=44100)[0] #, sr=16000\n",
" if len(data)>input_length:\n",
" data = data[:input_length]\n",
" else:\n",
" data = np.pad(data, (0, max(0, input_length - len(data))), \"constant\")\n",
" return data\n",
"def plot_time_series(data):\n",
" fig = plt.figure(figsize=(14, 8))\n",
" plt.title('Raw wave ')\n",
" plt.ylabel('Amplitude')\n",
" plt.plot(np.linspace(0, 4, len(data)), data)\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11d7524a",
"metadata": {},
"outputs": [],
"source": [
"### Augmentation\n",
"# changing pitch\n",
"def pitch_change(data, sampling_rate):\n",
" pitch_factor = random.uniform(0, 5)\n",
" return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)\n",
"\n",
"# shifting\n",
"def shifting(data):\n",
" sft = random.randrange(5000, 10000, 1000)\n",
" data_roll = np.roll(data, sft)\n",
" return data_roll\n",
"\n",
"# changing speed\n",
"def speed_change(data):\n",
" speed_factor = random.uniform(0.5, 1.5)\n",
" return librosa.effects.time_stretch(data, speed_factor)\n",
"\n",
"# white noise\n",
"def white_noise(data):\n",
" wn = np.random.randn(len(data))\n",
" data_wn = data + 0.005*wn\n",
" return data_wn"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b72c278c",
"metadata": {},
"outputs": [],
"source": [
"def features_extractor_aug(file_name): # mfcc feature extract\n",
" audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')\n",
" mfccs_audio = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)\n",
" mfccs_scaled_audio = np.mean(mfccs_audio.T,axis=0)\n",
" return mfccs_scaled_audio"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 70a37b4

Please sign in to comment.