update

Former-commit-id: bd48e5f
gdy1201 · Mar 14, 2017 · c324671 · c324671
1 parent c6819f0
commit c324671
Show file tree

Hide file tree

Showing 13 changed files with 414 additions and 51 deletions.
diff --git a/PER.png b/PER.png
diff --git a/README.md b/README.md
@@ -1,8 +1,17 @@
 # Automatic-Speech-Recognition
 End-to-end automatic speech recognition system implemented in TensorFlow.
 
+## test Updates
+- [x] **Test 1.0 version has born** (2017-02-26)
+
 ## Recent Updates
 - [x] **Support TensorFlow r1.0** (2017-02-24)
+- [x] **Support dropout for dynamic rnn** (2017-03-11)
+- [x] **Support run in shell file** (2017-03-11)
+- [x] **Support evaluation every several training epoches automatically** (2017-03-11)
+
+## PER based dynamic BLSTM on TIMIT database, with casual tuning because time it limited
+![image](https://github.com/zzw922cn/Automatic_Speech_Recognition/blob/master/PER.png)
 
 ## Content
 This is a powerful library for **automatic speech recognition**, it is implemented in TensorFlow and support training with CPU/GPU. This library contains followings models you can choose to train your own model:

diff --git a/src/analysis/.ipynb_checkpoints/Untitled0-checkpoint.ipynb b/src/analysis/.ipynb_checkpoints/Untitled0-checkpoint.ipynb
@@ -0,0 +1,250 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:1a0b483ab25223e35ebd5a1511ca1cde6f8502cdaf2ae905b41700029add44a1"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import pandas as pd\n",
+      "from pandas import Series,DataFrame\n",
+      "import numpy as np\n",
+      "import matplotlib.pyplot as plt\n",
+      "import seaborn as sns\n",
+      "sns.set_style('whitegrid')\n",
+      "%matplotlib inline\n",
+      "\n",
+      "from sklearn.linear_model import LogisticRegression\n",
+      "from sklearn.svm import SVC,LinearSVC\n",
+      "from sklearn.ensemble import RandomForestClassifier\n",
+      "from sklearn.neighbors import KNeighborsClassifier\n",
+      "from sklearn.naive_bayes import GaussianNB\n",
+      "\n",
+      "titanic_df = pd.read_csv(\"~/Downloads/train.csv\", dtype={\"Age\": np.float64}, )\n",
+      "test_df    = pd.read_csv(\"~/Downloads/test.csv\", dtype={\"Age\": np.float64}, )\n",
+      "\n",
+      "titanic_df.head()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stderr",
+       "text": [
+        "/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module dateutil was already imported from /home/pony/.local/lib/python2.7/site-packages/dateutil/__init__.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n",
+        "  from pkg_resources import resource_stream\n",
+        "/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module decorator was already imported from /home/pony/.local/lib/python2.7/site-packages/decorator.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n",
+        "  from pkg_resources import resource_stream\n",
+        "/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module pytz was already imported from /home/pony/.local/lib/python2.7/site-packages/pytz/__init__.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n",
+        "  from pkg_resources import resource_stream\n",
+        "/home/pony/.local/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module six was already imported from /home/pony/.local/lib/python2.7/site-packages/six.pyc, but /usr/lib/python2.7/dist-packages is being added to sys.path\n",
+        "  from pkg_resources import resource_stream\n"
+       ]
+      },
+      {
+       "html": [
+        "<div style=\"max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>PassengerId</th>\n",
+        "      <th>Survived</th>\n",
+        "      <th>Pclass</th>\n",
+        "      <th>Name</th>\n",
+        "      <th>Sex</th>\n",
+        "      <th>Age</th>\n",
+        "      <th>SibSp</th>\n",
+        "      <th>Parch</th>\n",
+        "      <th>Ticket</th>\n",
+        "      <th>Fare</th>\n",
+        "      <th>Cabin</th>\n",
+        "      <th>Embarked</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>1</td>\n",
+        "      <td>0</td>\n",
+        "      <td>3</td>\n",
+        "      <td>Braund, Mr. Owen Harris</td>\n",
+        "      <td>male</td>\n",
+        "      <td>22</td>\n",
+        "      <td>1</td>\n",
+        "      <td>0</td>\n",
+        "      <td>A/5 21171</td>\n",
+        "      <td>7.2500</td>\n",
+        "      <td>NaN</td>\n",
+        "      <td>S</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>2</td>\n",
+        "      <td>1</td>\n",
+        "      <td>1</td>\n",
+        "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
+        "      <td>female</td>\n",
+        "      <td>38</td>\n",
+        "      <td>1</td>\n",
+        "      <td>0</td>\n",
+        "      <td>PC 17599</td>\n",
+        "      <td>71.2833</td>\n",
+        "      <td>C85</td>\n",
+        "      <td>C</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>3</td>\n",
+        "      <td>1</td>\n",
+        "      <td>3</td>\n",
+        "      <td>Heikkinen, Miss. Laina</td>\n",
+        "      <td>female</td>\n",
+        "      <td>26</td>\n",
+        "      <td>0</td>\n",
+        "      <td>0</td>\n",
+        "      <td>STON/O2. 3101282</td>\n",
+        "      <td>7.9250</td>\n",
+        "      <td>NaN</td>\n",
+        "      <td>S</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td>4</td>\n",
+        "      <td>1</td>\n",
+        "      <td>1</td>\n",
+        "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
+        "      <td>female</td>\n",
+        "      <td>35</td>\n",
+        "      <td>1</td>\n",
+        "      <td>0</td>\n",
+        "      <td>113803</td>\n",
+        "      <td>53.1000</td>\n",
+        "      <td>C123</td>\n",
+        "      <td>S</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>5</td>\n",
+        "      <td>0</td>\n",
+        "      <td>3</td>\n",
+        "      <td>Allen, Mr. William Henry</td>\n",
+        "      <td>male</td>\n",
+        "      <td>35</td>\n",
+        "      <td>0</td>\n",
+        "      <td>0</td>\n",
+        "      <td>373450</td>\n",
+        "      <td>8.0500</td>\n",
+        "      <td>NaN</td>\n",
+        "      <td>S</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 1,
+       "text": [
+        "   PassengerId  Survived  Pclass  \\\n",
+        "0            1         0       3   \n",
+        "1            2         1       1   \n",
+        "2            3         1       3   \n",
+        "3            4         1       1   \n",
+        "4            5         0       3   \n",
+        "\n",
+        "                                                Name     Sex  Age  SibSp  \\\n",
+        "0                            Braund, Mr. Owen Harris    male   22      1   \n",
+        "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female   38      1   \n",
+        "2                             Heikkinen, Miss. Laina  female   26      0   \n",
+        "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female   35      1   \n",
+        "4                           Allen, Mr. William Henry    male   35      0   \n",
+        "\n",
+        "   Parch            Ticket     Fare Cabin Embarked  \n",
+        "0      0         A/5 21171   7.2500   NaN        S  \n",
+        "1      0          PC 17599  71.2833   C85        C  \n",
+        "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
+        "3      0            113803  53.1000  C123        S  \n",
+        "4      0            373450   8.0500   NaN        S  "
+       ]
+      }
+     ],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "titanic_df.info()\n",
+      "print '--------------------------------------------------------'\n",
+      "test_df.info()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "<class 'pandas.core.frame.DataFrame'>\n",
+        "Int64Index: 891 entries, 0 to 890\n",
+        "Data columns (total 12 columns):\n",
+        "PassengerId    891 non-null int64\n",
+        "Survived       891 non-null int64\n",
+        "Pclass         891 non-null int64\n",
+        "Name           891 non-null object\n",
+        "Sex            891 non-null object\n",
+        "Age            714 non-null float64\n",
+        "SibSp          891 non-null int64\n",
+        "Parch          891 non-null int64\n",
+        "Ticket         891 non-null object\n",
+        "Fare           891 non-null float64\n",
+        "Cabin          204 non-null object\n",
+        "Embarked       889 non-null object\n",
+        "dtypes: float64(2), int64(5), object(5)\n",
+        "memory usage: 90.5+ KB\n",
+        "--------------------------------------------------------\n",
+        "<class 'pandas.core.frame.DataFrame'>\n",
+        "Int64Index: 418 entries, 0 to 417\n",
+        "Data columns (total 11 columns):\n",
+        "PassengerId    418 non-null int64\n",
+        "Pclass         418 non-null int64\n",
+        "Name           418 non-null object\n",
+        "Sex            418 non-null object\n",
+        "Age            332 non-null float64\n",
+        "SibSp          418 non-null int64\n",
+        "Parch          418 non-null int64\n",
+        "Ticket         418 non-null object\n",
+        "Fare           417 non-null float64\n",
+        "Cabin          91 non-null object\n",
+        "Embarked       418 non-null object\n",
+        "dtypes: float64(2), int64(4), object(5)\n",
+        "memory usage: 39.2+ KB\n"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "titanic_df=titanic_df.drop(['PassengerId','Name','Ticket'],axis=1)\n",
+      "test_df = test_df.drop(['Name','Ticket'],axis=1)\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
diff --git a/src/analysis/extractdata.py b/src/analysis/extractdata.py
diff --git a/src/analysis/parseLog.py b/src/analysis/parseLog.py
@@ -0,0 +1,69 @@
+# -*- coding:utf-8 -*-
+''' data visualization for automatic speech recognition
+author:
+
+      iiiiiiiiiiii            iiiiiiiiiiii         !!!!!!!             !!!!!!    
+      #        ###            #        ###           ###        I#        #:     
+      #      ###              #      I##;             ##;       ##       ##      
+            ###                     ###               !##      ####      #       
+           ###                     ###                 ###    ## ###    #'       
+         !##;                    `##%                   ##;  ##   ###  ##        
+        ###                     ###                     $## `#     ##  #         
+       ###        #            ###        #              ####      ####;         
+     `###        -#           ###        `#               ###       ###          
+     ##############          ##############               `#         #     
+     
+date:2017-3-14
+'''
+
+import os
+import time
+import numpy as np
+import matplotlib.pyplot as plt
+
+def readlogs(rootdir):
+    ''' function for reading asr logs to visualize'''
+    trainERs = []
+    testERs = []
+    fullFilenames = []
+    for subdir, dirs, files in os.walk(rootdir):
+        for f in files:
+	    fullFilename = os.path.join(subdir, f)
+	    fullFilenames.append(fullFilename)
+    fullFilenames.sort(key=lambda x: os.path.getctime(x))
+    print fullFilenames
+    epoch = 0
+    if True:
+        for fullFilename in fullFilenames:
+	    if fullFilename.endswith('.txt'):
+	        with open(fullFilename, 'r') as train_file:
+		    content = train_file.readlines()
+	        for line in content:
+		    if 'train error rate' in line:
+		        trainER = line.split(':')[2]
+		        trainERs.append(float(trainER))
+			epoch += 1
+
+	    elif fullFilename.endswith('TEST'):
+	        with open(fullFilename, 'r') as test_file:
+		    content = test_file.readlines()
+	        for line in content:
+		    if 'test error rate' in line:
+		        testER = line.split(':')[1]
+		        testERs.append(float(testER))
+
+    return trainERs, testERs
+
+def visualize(trainERs, testERs):
+    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
+    ax1.plot(trainERs, label='train phoneme error rate')
+    ax2.plot(testERs, label='test phoneme error rate')
+    ax1.legend()
+    ax2.legend()
+    ax1.grid()
+    ax2.grid()
+    plt.suptitle('dynamic bidirectional LSTM for Automatic Speech Recognition')
+    plt.show()
+rootdir = '/home/pony/github/data/ASR/log/'
+train, test = readlogs(rootdir)
+visualize(train, test)
diff --git a/src/analysis/pplot.py b/src/analysis/pplot.py
diff --git a/src/feature/calcmfcc.pyc b/src/feature/calcmfcc.pyc
diff --git a/src/feature/libri_preprocess.py b/src/feature/libri_preprocess.py
@@ -31,15 +31,22 @@
 import glob
 import sys
 
-rootdir = '/home/pony/ASR/datasets/LibriSpeech/train-other-500/'
-
 
 count = 0
-subset = 0
-labels=[]
+#subset = 0
+#labels=[]
+
+keywords = ['dev-clean', 'dev-other', 'test-clean', 'test-other', 'train-clean-100', 'train-clean-360', 'train-other-500']
+
+keyword = keywords[6]
+label_dir = '/home/pony/github/data/libri/cha-level/'+keyword+'/label/'
+mfcc_dir = '/home/pony/github/data/libri/cha-level/'+keyword+'/mfcc/'
+if not os.path.exists(label_dir):
+    os.makedirs(label_dir)
+if not os.path.exists(mfcc_dir):
+    os.makedirs(mfcc_dir)
 
-label_dir = '/home/pony/github/ASR_libri/libri/cha-level/mfcc_and_label/label/'
-mfcc_dir = '/home/pony/github/ASR_libri/libri/cha-level/mfcc_and_label/mfcc/'
+rootdir = '/media/pony/Seagate Expansion Drive/学习/语音识别/ASR数据库/LibriSpeech/'+keyword
 
 if True:
     for subdir, dirs, files in os.walk(rootdir):
@@ -50,7 +57,7 @@
 	        if f.endswith('.wav'):
 		    print fullFilename
 	            (rate,sig)= wav.read(fullFilename)
-                    mfcc = calcMFCC_delta_delta(sig,rate,win_length=0.020,win_step=0.020)
+                    mfcc = calcMFCC_delta_delta(sig,rate,win_length=0.020,win_step=0.010)
 		    # transpose mfcc to array of (39,time_length)
 		    mfcc = np.transpose(mfcc)
 		    print mfcc.shape
@@ -63,9 +70,12 @@
     	    	        characters = f.readline().strip()
 	            print characters
     	            targets = []
+		    ## totally 28 real characters
     	            for c in characters:
 			if c == ' ':
 			    targets.append(0)
+			elif c == "'":
+			    targets.append(27)
 			else:
 			    targets.append(ord(c)-96) #从1开始
 		    targets = np.array(targets)

diff --git a/src/feature/sigprocess.pyc b/src/feature/sigprocess.pyc