Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
Former-commit-id: bd48e5f
  • Loading branch information
nq555222 committed Mar 14, 2017
1 parent c6819f0 commit c324671
Show file tree
Hide file tree
Showing 13 changed files with 414 additions and 51 deletions.
Binary file added PER.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# Automatic-Speech-Recognition
End-to-end automatic speech recognition system implemented in TensorFlow.

## test Updates
- [x] **Test 1.0 version has born** (2017-02-26)

## Recent Updates
- [x] **Support TensorFlow r1.0** (2017-02-24)
- [x] **Support dropout for dynamic rnn** (2017-03-11)
- [x] **Support run in shell file** (2017-03-11)
- [x] **Support evaluation every several training epoches automatically** (2017-03-11)

## PER based dynamic BLSTM on TIMIT database, with casual tuning because time it limited
![image](https://github.com/zzw922cn/Automatic_Speech_Recognition/blob/master/PER.png)

## Content
This is a powerful library for **automatic speech recognition**, it is implemented in TensorFlow and support training with CPU/GPU. This library contains followings models you can choose to train your own model:
Expand Down
250 changes: 250 additions & 0 deletions src/analysis/.ipynb_checkpoints/Untitled0-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
{
"metadata": {
"name": "",
"signature": "sha256:1a0b483ab25223e35ebd5a1511ca1cde6f8502cdaf2ae905b41700029add44a1"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"from pandas import Series,DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"sns.set_style('whitegrid')\n",
"%matplotlib inline\n",
"\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.svm import SVC,LinearSVC\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.naive_bayes import GaussianNB\n",
"\n",
"titanic_df = pd.read_csv(\"~/Downloads/train.csv\", dtype={\"Age\": np.float64}, )\n",
"test_df = pd.read_csv(\"~/Downloads/test.csv\", dtype={\"Age\": np.float64}, )\n",
"\n",
"titanic_df.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module dateutil was already imported from /home/pony/.local/lib/python2.7/site-packages/dateutil/__init__.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n",
" from pkg_resources import resource_stream\n",
"/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module decorator was already imported from /home/pony/.local/lib/python2.7/site-packages/decorator.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n",
" from pkg_resources import resource_stream\n",
"/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module pytz was already imported from /home/pony/.local/lib/python2.7/site-packages/pytz/__init__.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n",
" from pkg_resources import resource_stream\n",
"/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module six was already imported from /home/pony/.local/lib/python2.7/site-packages/six.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n",
" from pkg_resources import resource_stream\n"
]
},
{
"html": [
"<div style=\"max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 1,
"text": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38 1 \n",
"2 Heikkinen, Miss. Laina female 26 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 \n",
"4 Allen, Mr. William Henry male 35 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S "
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"titanic_df.info()\n",
"print '--------------------------------------------------------'\n",
"test_df.info()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 891 entries, 0 to 890\n",
"Data columns (total 12 columns):\n",
"PassengerId 891 non-null int64\n",
"Survived 891 non-null int64\n",
"Pclass 891 non-null int64\n",
"Name 891 non-null object\n",
"Sex 891 non-null object\n",
"Age 714 non-null float64\n",
"SibSp 891 non-null int64\n",
"Parch 891 non-null int64\n",
"Ticket 891 non-null object\n",
"Fare 891 non-null float64\n",
"Cabin 204 non-null object\n",
"Embarked 889 non-null object\n",
"dtypes: float64(2), int64(5), object(5)\n",
"memory usage: 90.5+ KB\n",
"--------------------------------------------------------\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 418 entries, 0 to 417\n",
"Data columns (total 11 columns):\n",
"PassengerId 418 non-null int64\n",
"Pclass 418 non-null int64\n",
"Name 418 non-null object\n",
"Sex 418 non-null object\n",
"Age 332 non-null float64\n",
"SibSp 418 non-null int64\n",
"Parch 418 non-null int64\n",
"Ticket 418 non-null object\n",
"Fare 417 non-null float64\n",
"Cabin 91 non-null object\n",
"Embarked 418 non-null object\n",
"dtypes: float64(2), int64(4), object(5)\n",
"memory usage: 39.2+ KB\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"titanic_df=titanic_df.drop(['PassengerId','Name','Ticket'],axis=1)\n",
"test_df = test_df.drop(['Name','Ticket'],axis=1)\n"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Empty file modified src/analysis/extractdata.py
100755 → 100644
Empty file.
69 changes: 69 additions & 0 deletions src/analysis/parseLog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# -*- coding:utf-8 -*-
''' data visualization for automatic speech recognition
author:
iiiiiiiiiiii iiiiiiiiiiii !!!!!!! !!!!!!
# ### # ### ### I# #:
# ### # I##; ##; ## ##
### ### !## #### #
### ### ### ## ### #'
!##; `##% ##; ## ### ##
### ### $## `# ## #
### # ### # #### ####;
`### -# ### `# ### ###
############## ############## `# #
date:2017-3-14
'''

import os
import time
import numpy as np
import matplotlib.pyplot as plt

def readlogs(rootdir):
''' function for reading asr logs to visualize'''
trainERs = []
testERs = []
fullFilenames = []
for subdir, dirs, files in os.walk(rootdir):
for f in files:
fullFilename = os.path.join(subdir, f)
fullFilenames.append(fullFilename)
fullFilenames.sort(key=lambda x: os.path.getctime(x))
print fullFilenames
epoch = 0
if True:
for fullFilename in fullFilenames:
if fullFilename.endswith('.txt'):
with open(fullFilename, 'r') as train_file:
content = train_file.readlines()
for line in content:
if 'train error rate' in line:
trainER = line.split(':')[2]
trainERs.append(float(trainER))
epoch += 1

elif fullFilename.endswith('TEST'):
with open(fullFilename, 'r') as test_file:
content = test_file.readlines()
for line in content:
if 'test error rate' in line:
testER = line.split(':')[1]
testERs.append(float(testER))

return trainERs, testERs

def visualize(trainERs, testERs):
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.plot(trainERs, label='train phoneme error rate')
ax2.plot(testERs, label='test phoneme error rate')
ax1.legend()
ax2.legend()
ax1.grid()
ax2.grid()
plt.suptitle('dynamic bidirectional LSTM for Automatic Speech Recognition')
plt.show()
rootdir = '/home/pony/github/data/ASR/log/'
train, test = readlogs(rootdir)
visualize(train, test)
Empty file modified src/analysis/pplot.py
100755 → 100644
Empty file.
Binary file added src/feature/calcmfcc.pyc
Binary file not shown.
24 changes: 17 additions & 7 deletions src/feature/libri_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,22 @@
import glob
import sys

rootdir = '/home/pony/ASR/datasets/LibriSpeech/train-other-500/'


count = 0
subset = 0
labels=[]
#subset = 0
#labels=[]

keywords = ['dev-clean', 'dev-other', 'test-clean', 'test-other', 'train-clean-100', 'train-clean-360', 'train-other-500']

keyword = keywords[6]
label_dir = '/home/pony/github/data/libri/cha-level/'+keyword+'/label/'
mfcc_dir = '/home/pony/github/data/libri/cha-level/'+keyword+'/mfcc/'
if not os.path.exists(label_dir):
os.makedirs(label_dir)
if not os.path.exists(mfcc_dir):
os.makedirs(mfcc_dir)

label_dir = '/home/pony/github/ASR_libri/libri/cha-level/mfcc_and_label/label/'
mfcc_dir = '/home/pony/github/ASR_libri/libri/cha-level/mfcc_and_label/mfcc/'
rootdir = '/media/pony/Seagate Expansion Drive/学习/语音识别/ASR数据库/LibriSpeech/'+keyword

if True:
for subdir, dirs, files in os.walk(rootdir):
Expand All @@ -50,7 +57,7 @@
if f.endswith('.wav'):
print fullFilename
(rate,sig)= wav.read(fullFilename)
mfcc = calcMFCC_delta_delta(sig,rate,win_length=0.020,win_step=0.020)
mfcc = calcMFCC_delta_delta(sig,rate,win_length=0.020,win_step=0.010)
# transpose mfcc to array of (39,time_length)
mfcc = np.transpose(mfcc)
print mfcc.shape
Expand All @@ -63,9 +70,12 @@
characters = f.readline().strip()
print characters
targets = []
## totally 28 real characters
for c in characters:
if c == ' ':
targets.append(0)
elif c == "'":
targets.append(27)
else:
targets.append(ord(c)-96) #从1开始
targets = np.array(targets)
Expand Down
Binary file added src/feature/sigprocess.pyc
Binary file not shown.
Loading

0 comments on commit c324671

Please sign in to comment.