forked from zzw922cn/Automatic_Speech_Recognition
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Former-commit-id: bd48e5f
- Loading branch information
Showing
13 changed files
with
414 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
250 changes: 250 additions & 0 deletions
250
src/analysis/.ipynb_checkpoints/Untitled0-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,250 @@ | ||
{ | ||
"metadata": { | ||
"name": "", | ||
"signature": "sha256:1a0b483ab25223e35ebd5a1511ca1cde6f8502cdaf2ae905b41700029add44a1" | ||
}, | ||
"nbformat": 3, | ||
"nbformat_minor": 0, | ||
"worksheets": [ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"collapsed": false, | ||
"input": [ | ||
"import pandas as pd\n", | ||
"from pandas import Series,DataFrame\n", | ||
"import numpy as np\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import seaborn as sns\n", | ||
"sns.set_style('whitegrid')\n", | ||
"%matplotlib inline\n", | ||
"\n", | ||
"from sklearn.linear_model import LogisticRegression\n", | ||
"from sklearn.svm import SVC,LinearSVC\n", | ||
"from sklearn.ensemble import RandomForestClassifier\n", | ||
"from sklearn.neighbors import KNeighborsClassifier\n", | ||
"from sklearn.naive_bayes import GaussianNB\n", | ||
"\n", | ||
"titanic_df = pd.read_csv(\"~/Downloads/train.csv\", dtype={\"Age\": np.float64}, )\n", | ||
"test_df = pd.read_csv(\"~/Downloads/test.csv\", dtype={\"Age\": np.float64}, )\n", | ||
"\n", | ||
"titanic_df.head()" | ||
], | ||
"language": "python", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"stream": "stderr", | ||
"text": [ | ||
"/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module dateutil was already imported from /home/pony/.local/lib/python2.7/site-packages/dateutil/__init__.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n", | ||
" from pkg_resources import resource_stream\n", | ||
"/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module decorator was already imported from /home/pony/.local/lib/python2.7/site-packages/decorator.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n", | ||
" from pkg_resources import resource_stream\n", | ||
"/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module pytz was already imported from /home/pony/.local/lib/python2.7/site-packages/pytz/__init__.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n", | ||
" from pkg_resources import resource_stream\n", | ||
"/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module six was already imported from /home/pony/.local/lib/python2.7/site-packages/six.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n", | ||
" from pkg_resources import resource_stream\n" | ||
] | ||
}, | ||
{ | ||
"html": [ | ||
"<div style=\"max-width:1500px;overflow:auto;\">\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>PassengerId</th>\n", | ||
" <th>Survived</th>\n", | ||
" <th>Pclass</th>\n", | ||
" <th>Name</th>\n", | ||
" <th>Sex</th>\n", | ||
" <th>Age</th>\n", | ||
" <th>SibSp</th>\n", | ||
" <th>Parch</th>\n", | ||
" <th>Ticket</th>\n", | ||
" <th>Fare</th>\n", | ||
" <th>Cabin</th>\n", | ||
" <th>Embarked</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>1</td>\n", | ||
" <td>0</td>\n", | ||
" <td>3</td>\n", | ||
" <td>Braund, Mr. Owen Harris</td>\n", | ||
" <td>male</td>\n", | ||
" <td>22</td>\n", | ||
" <td>1</td>\n", | ||
" <td>0</td>\n", | ||
" <td>A/5 21171</td>\n", | ||
" <td>7.2500</td>\n", | ||
" <td>NaN</td>\n", | ||
" <td>S</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>2</td>\n", | ||
" <td>1</td>\n", | ||
" <td>1</td>\n", | ||
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", | ||
" <td>female</td>\n", | ||
" <td>38</td>\n", | ||
" <td>1</td>\n", | ||
" <td>0</td>\n", | ||
" <td>PC 17599</td>\n", | ||
" <td>71.2833</td>\n", | ||
" <td>C85</td>\n", | ||
" <td>C</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>3</td>\n", | ||
" <td>1</td>\n", | ||
" <td>3</td>\n", | ||
" <td>Heikkinen, Miss. Laina</td>\n", | ||
" <td>female</td>\n", | ||
" <td>26</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>STON/O2. 3101282</td>\n", | ||
" <td>7.9250</td>\n", | ||
" <td>NaN</td>\n", | ||
" <td>S</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>4</td>\n", | ||
" <td>1</td>\n", | ||
" <td>1</td>\n", | ||
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", | ||
" <td>female</td>\n", | ||
" <td>35</td>\n", | ||
" <td>1</td>\n", | ||
" <td>0</td>\n", | ||
" <td>113803</td>\n", | ||
" <td>53.1000</td>\n", | ||
" <td>C123</td>\n", | ||
" <td>S</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>5</td>\n", | ||
" <td>0</td>\n", | ||
" <td>3</td>\n", | ||
" <td>Allen, Mr. William Henry</td>\n", | ||
" <td>male</td>\n", | ||
" <td>35</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>373450</td>\n", | ||
" <td>8.0500</td>\n", | ||
" <td>NaN</td>\n", | ||
" <td>S</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"metadata": {}, | ||
"output_type": "pyout", | ||
"prompt_number": 1, | ||
"text": [ | ||
" PassengerId Survived Pclass \\\n", | ||
"0 1 0 3 \n", | ||
"1 2 1 1 \n", | ||
"2 3 1 3 \n", | ||
"3 4 1 1 \n", | ||
"4 5 0 3 \n", | ||
"\n", | ||
" Name Sex Age SibSp \\\n", | ||
"0 Braund, Mr. Owen Harris male 22 1 \n", | ||
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38 1 \n", | ||
"2 Heikkinen, Miss. Laina female 26 0 \n", | ||
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 \n", | ||
"4 Allen, Mr. William Henry male 35 0 \n", | ||
"\n", | ||
" Parch Ticket Fare Cabin Embarked \n", | ||
"0 0 A/5 21171 7.2500 NaN S \n", | ||
"1 0 PC 17599 71.2833 C85 C \n", | ||
"2 0 STON/O2. 3101282 7.9250 NaN S \n", | ||
"3 0 113803 53.1000 C123 S \n", | ||
"4 0 373450 8.0500 NaN S " | ||
] | ||
} | ||
], | ||
"prompt_number": 1 | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"collapsed": false, | ||
"input": [ | ||
"titanic_df.info()\n", | ||
"print '--------------------------------------------------------'\n", | ||
"test_df.info()" | ||
], | ||
"language": "python", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"stream": "stdout", | ||
"text": [ | ||
"<class 'pandas.core.frame.DataFrame'>\n", | ||
"Int64Index: 891 entries, 0 to 890\n", | ||
"Data columns (total 12 columns):\n", | ||
"PassengerId 891 non-null int64\n", | ||
"Survived 891 non-null int64\n", | ||
"Pclass 891 non-null int64\n", | ||
"Name 891 non-null object\n", | ||
"Sex 891 non-null object\n", | ||
"Age 714 non-null float64\n", | ||
"SibSp 891 non-null int64\n", | ||
"Parch 891 non-null int64\n", | ||
"Ticket 891 non-null object\n", | ||
"Fare 891 non-null float64\n", | ||
"Cabin 204 non-null object\n", | ||
"Embarked 889 non-null object\n", | ||
"dtypes: float64(2), int64(5), object(5)\n", | ||
"memory usage: 90.5+ KB\n", | ||
"--------------------------------------------------------\n", | ||
"<class 'pandas.core.frame.DataFrame'>\n", | ||
"Int64Index: 418 entries, 0 to 417\n", | ||
"Data columns (total 11 columns):\n", | ||
"PassengerId 418 non-null int64\n", | ||
"Pclass 418 non-null int64\n", | ||
"Name 418 non-null object\n", | ||
"Sex 418 non-null object\n", | ||
"Age 332 non-null float64\n", | ||
"SibSp 418 non-null int64\n", | ||
"Parch 418 non-null int64\n", | ||
"Ticket 418 non-null object\n", | ||
"Fare 417 non-null float64\n", | ||
"Cabin 91 non-null object\n", | ||
"Embarked 418 non-null object\n", | ||
"dtypes: float64(2), int64(4), object(5)\n", | ||
"memory usage: 39.2+ KB\n" | ||
] | ||
} | ||
], | ||
"prompt_number": 2 | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"collapsed": false, | ||
"input": [ | ||
"titanic_df=titanic_df.drop(['PassengerId','Name','Ticket'],axis=1)\n", | ||
"test_df = test_df.drop(['Name','Ticket'],axis=1)\n" | ||
], | ||
"language": "python", | ||
"metadata": {}, | ||
"outputs": [] | ||
} | ||
], | ||
"metadata": {} | ||
} | ||
] | ||
} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# -*- coding:utf-8 -*- | ||
''' data visualization for automatic speech recognition | ||
author: | ||
iiiiiiiiiiii iiiiiiiiiiii !!!!!!! !!!!!! | ||
# ### # ### ### I# #: | ||
# ### # I##; ##; ## ## | ||
### ### !## #### # | ||
### ### ### ## ### #' | ||
!##; `##% ##; ## ### ## | ||
### ### $## `# ## # | ||
### # ### # #### ####; | ||
`### -# ### `# ### ### | ||
############## ############## `# # | ||
date:2017-3-14 | ||
''' | ||
|
||
import os | ||
import time | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
def readlogs(rootdir): | ||
''' function for reading asr logs to visualize''' | ||
trainERs = [] | ||
testERs = [] | ||
fullFilenames = [] | ||
for subdir, dirs, files in os.walk(rootdir): | ||
for f in files: | ||
fullFilename = os.path.join(subdir, f) | ||
fullFilenames.append(fullFilename) | ||
fullFilenames.sort(key=lambda x: os.path.getctime(x)) | ||
print fullFilenames | ||
epoch = 0 | ||
if True: | ||
for fullFilename in fullFilenames: | ||
if fullFilename.endswith('.txt'): | ||
with open(fullFilename, 'r') as train_file: | ||
content = train_file.readlines() | ||
for line in content: | ||
if 'train error rate' in line: | ||
trainER = line.split(':')[2] | ||
trainERs.append(float(trainER)) | ||
epoch += 1 | ||
|
||
elif fullFilename.endswith('TEST'): | ||
with open(fullFilename, 'r') as test_file: | ||
content = test_file.readlines() | ||
for line in content: | ||
if 'test error rate' in line: | ||
testER = line.split(':')[1] | ||
testERs.append(float(testER)) | ||
|
||
return trainERs, testERs | ||
|
||
def visualize(trainERs, testERs): | ||
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) | ||
ax1.plot(trainERs, label='train phoneme error rate') | ||
ax2.plot(testERs, label='test phoneme error rate') | ||
ax1.legend() | ||
ax2.legend() | ||
ax1.grid() | ||
ax2.grid() | ||
plt.suptitle('dynamic bidirectional LSTM for Automatic Speech Recognition') | ||
plt.show() | ||
rootdir = '/home/pony/github/data/ASR/log/' | ||
train, test = readlogs(rootdir) | ||
visualize(train, test) |
Empty file.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Oops, something went wrong.