Skip to content

Commit

Permalink
improve reusable support
Browse files Browse the repository at this point in the history
Former-commit-id: 870d68a
  • Loading branch information
nq555222 committed Mar 15, 2017
1 parent c324671 commit a8990ae
Show file tree
Hide file tree
Showing 9 changed files with 4,702 additions and 90 deletions.
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
# Automatic-Speech-Recognition
End-to-end automatic speech recognition system implemented in TensorFlow.

## test Updates
- [x] **Test 1.0 version has born** (2017-02-26)

## Recent Updates
- [x] **Support TensorFlow r1.0** (2017-02-24)
- [x] **Support dropout for dynamic rnn** (2017-03-11)
- [x] **Support run in shell file** (2017-03-11)
- [x] **Support evaluation every several training epoches automatically** (2017-03-11)
- [x] **Fix bugs for character-level automatic speech recognition** (2017-03-14)
- [x] **Improve some function apis for reusable** (2017-03-14)
- [x] **Add scaling for data preprocessing** (2017-03-15)
- [x] **Add reusable support for LibriSpeech training** (2017-03-15)

## PER based dynamic BLSTM on TIMIT database, with casual tuning because time it limited
## Performance
### PER based dynamic BLSTM on TIMIT database, with casual tuning because time it limited
![image](https://github.com/zzw922cn/Automatic_Speech_Recognition/blob/master/PER.png)

## Content
Expand Down
7 changes: 4 additions & 3 deletions src/analysis/parseLog.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def visualize(trainERs, testERs):
ax2.grid()
plt.suptitle('dynamic bidirectional LSTM for Automatic Speech Recognition')
plt.show()
rootdir = '/home/pony/github/data/ASR/log/'
train, test = readlogs(rootdir)
visualize(train, test)
if __name__ == '__main__':
rootdir = '/home/pony/github/data/ASR/log/'
train, test = readlogs(rootdir)
visualize(train, test)
9 changes: 5 additions & 4 deletions src/feature/libri_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import cPickle
import glob
import sys
import sklearn
from sklearn import preprocessing


count = 0
Expand All @@ -38,7 +40,7 @@

keywords = ['dev-clean', 'dev-other', 'test-clean', 'test-other', 'train-clean-100', 'train-clean-360', 'train-other-500']

keyword = keywords[6]
keyword = keywords[0]
label_dir = '/home/pony/github/data/libri/cha-level/'+keyword+'/label/'
mfcc_dir = '/home/pony/github/data/libri/cha-level/'+keyword+'/mfcc/'
if not os.path.exists(label_dir):
Expand All @@ -58,17 +60,16 @@
print fullFilename
(rate,sig)= wav.read(fullFilename)
mfcc = calcMFCC_delta_delta(sig,rate,win_length=0.020,win_step=0.010)
mfcc = preprocessing.scale(mfcc)
# transpose mfcc to array of (39,time_length)
mfcc = np.transpose(mfcc)
print mfcc.shape
# save mfcc to file
m_f = mfcc_dir + filenameNoSuffix.split('/')[-1] +'.npy'
np.save(m_f,mfcc)

labelFilename = filenameNoSuffix + '.label'
with open(labelFilename,'r') as f:
characters = f.readline().strip()
print characters
characters = f.readline().strip().lower()
targets = []
## totally 28 real characters
for c in characters:
Expand Down
53 changes: 40 additions & 13 deletions src/feature/timit_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,23 @@
import numpy as np
import glob
import sys
import sklearn
from sklearn import preprocessing


## keywords can be set to either of 'train' and 'test'
keywords = 'test'
level = 'cha'
keywords = 'train'

mfcc_dir = '/home/pony/github/data/timit/'+keywords+'/mfcc/'
label_dir = '/home/pony/github/data/timit/'+keywords+'/label/'
mfcc_dir = '/home/pony/github/data/timit/'+level+'/'+keywords+'/mfcc/'
label_dir = '/home/pony/github/data/timit/'+level+'/'+keywords+'/label/'

rootdir = '/home/pony/ASR/datasets/TIMIT/'+keywords
if not os.path.exists(label_dir):
os.makedirs(label_dir)
if not os.path.exists(mfcc_dir):
os.makedirs(mfcc_dir)

rootdir = '/media/pony/Seagate Expansion Drive/学习/语音识别/ASR数据库/TIMIT/'+keywords

count = 0
## original phonemes
Expand All @@ -54,20 +62,39 @@
if file.endswith('.WAV'):
(rate,sig)= wav.read(fullFilename)
mfcc = calcMFCC_delta_delta(sig,rate,win_length=0.020,win_step=0.010)
mfcc = preprocessing.scale(mfcc)
mfcc = np.transpose(mfcc)
print mfcc.shape
m_f = mfcc_dir + filenameNoSuffix.split('/')[-2]+'-'+filenameNoSuffix.split('/')[-1]+'.npy'
np.save(m_f,mfcc)
if level == 'phn':
labelFilename = filenameNoSuffix + '.PHN'
phenome = []
with open(labelFilename,'r') as f:
for line in f.read().splitlines():
s=line.split(' ')[2]
p_index = phn.index(s)
phenome.append(p_index)
print phenome
phenome = np.array(phenome)
elif level == 'cha':
labelFilename = filenameNoSuffix + '.WRD'
phenome = []
sentence = ''
with open(labelFilename,'r') as f:
for line in f.read().splitlines():
s=line.split(' ')[2]
sentence += s+' '
for c in s:
if c=="'":
phenome.append(27)
else:
phenome.append(ord(c)-96)
phenome.append(0)
phenome = phenome[:-1]
print phenome
print sentence

labelFilename = filenameNoSuffix + '.PHN'
phenome = []
with open(labelFilename,'r') as f:
for line in f.read().splitlines():
s=line.split(' ')[2]
p_index = phn.index(s)
phenome.append(p_index)
print phenome
phenome = np.array(phenome)
t_f = label_dir + filenameNoSuffix.split('/')[-2]+'-'+filenameNoSuffix.split('/')[-1]+'.npy'
print t_f
np.save(t_f,phenome)
Expand Down
14 changes: 7 additions & 7 deletions src/models/dynamic_brnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@

import numpy as np
import tensorflow as tf
from tensorflow.python.ops import ctc_ops as ctc
from tensorflow.contrib.rnn.python.ops import rnn_cell
from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn
Expand All @@ -43,6 +42,7 @@
from src.utils.utils import build_conv_layer
from src.utils.utils import list_to_sparse_tensor
from src.utils.utils import dropout
from src.utils.utils import get_edit_distance

def build_multi_dynamic_brnn(args,
maxTimeSteps,
Expand Down Expand Up @@ -119,7 +119,8 @@ def build_graph(self, args, maxTimeSteps):
'activation':args.activation,
'optimizer':args.optimizer,
'learning rate':args.learning_rate,
'keep prob':args.keep_prob
'keep prob':args.keep_prob,
'batch size':args.batch_size
}

fbHrs = build_multi_dynamic_brnn(self.args,maxTimeSteps,self.inputX,self.cell_fn,self.seqLengths)
Expand All @@ -129,8 +130,7 @@ def build_graph(self, args, maxTimeSteps):
biasesClasses = tf.Variable(tf.zeros([args.num_class]),name='biasesClasses')
logits = [tf.matmul(t, weightsClasses) + biasesClasses for t in fbHrs]
logits3d = tf.stack(logits)
self.loss = tf.reduce_mean(ctc.ctc_loss(self.targetY, logits3d, self.seqLengths))

self.loss = tf.reduce_mean(tf.nn.ctc_loss(self.targetY, logits3d, self.seqLengths))
self.var_op = tf.global_variables()
self.var_trainable_op = tf.trainable_variables()

Expand All @@ -142,9 +142,9 @@ def build_graph(self, args, maxTimeSteps):
grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, self.var_trainable_op),args.grad_clip)
opti = tf.train.AdamOptimizer(args.learning_rate)
self.optimizer = opti.apply_gradients(zip(grads, self.var_trainable_op))
self.logitsMaxTest = tf.slice(tf.argmax(logits3d, 2), [0, 0], [self.seqLengths[0], 1])
self.predictions = tf.to_int32(ctc.ctc_beam_search_decoder(logits3d, self.seqLengths)[0][0])
self.errorRate = tf.reduce_sum(tf.edit_distance(self.predictions, self.targetY, normalize=False))/tf.to_float(tf.size(self.targetY.values))
self.predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(logits3d, self.seqLengths, merge_repeated=False)[0][0])
if args.level == 'cha':
self.errorRate = tf.reduce_sum(tf.edit_distance(self.predictions, self.targetY, normalize=True))
self.initial_op = tf.global_variables_initializer()
self.saver = tf.train.Saver(tf.global_variables(),max_to_keep=5,keep_checkpoint_every_n_hours=1)
self.logfile = args.log_dir+str(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S')+'.txt').replace(' ','').replace('/','')
Loading

0 comments on commit a8990ae

Please sign in to comment.