#1. Support mini batch for training

#2. Improve training performnce ~ 300% up #3. Fix learning rate update bug #4. Apply SIMD instruction to update error in layers #5. Code refactoring
zhongkaifu · May 3, 2017 · 5043eb3 · 5043eb3
1 parent 7934898
commit 5043eb3
Show file tree

Hide file tree

Showing 22 changed files with 833 additions and 508 deletions.
diff --git a/README.md b/README.md
@@ -261,18 +261,19 @@ RNNSharpConsole.exe is a console tool for recurrent neural network encoding and
 In this mode, the console tool can encode a RNN model by given feature set and training/validated corpus. The usage as follows:
 
 RNNSharpConsole.exe -mode train <parameters>  
- Parameters for training RNN based model  
+ Parameters for training RNN based model.
 -trainfile <string>: Training corpus file  
 -validfile <string>: Validated corpus for training  
 -cfgfile <string>: Configuration file  
 -tagfile <string>: Output tag or vocabulary file  
 -inctrain <boolean>: Incremental training. Starting from output model specified in configuration file. Default is false  
--alpha <float>: Learning rate, default is 0.1  
--maxiter <int>: Maximum iteration for training. 0 is no limition, default is 20  
--savestep <int>: Save temporary model after every <int> sentence, default is 0  
--vq <int> : Model vector quantization, 0 is disable, 1 is enable. default is 0  
+-alpha <float>: Learning rate, Default is 0.1  
+-maxiter <int>: Maximum iteration for training. 0 is no limition, Default is 20  
+-savestep <int>: Save temporary model after every <int> sentence, Default is 0  
+-vq <int> : Model vector quantization, 0 is disable, 1 is enable. Default is 0  
+-minibatch <int> : Updating weights every <int> sequence. Default is 1
 
-Example: RNNSharpConsole.exe -mode train -trainfile train.txt -validfile valid.txt -cfgfile config.txt -tagfile tags.txt -alpha 0.1 -maxiter 20 -savestep 200K -vq 0 -grad 15.0  
+Example: RNNSharpConsole.exe -mode train -trainfile train.txt -validfile valid.txt -cfgfile config.txt -tagfile tags.txt -alpha 0.1 -maxiter 20 -savestep 200K -vq 0 -grad 15.0  -minibatch 128
 
 ### Decode Model
 
@@ -369,3 +370,4 @@ The RNNSharp also provides some APIs for developers to leverage it into their pr
 ## RNNSharp referenced by the following published papers  
 1. [Project-Team IntuiDoc: Intuitive user interaction for document](https://www.irisa.fr/intuidoc/data/ra/intuidoc2015.pdf)
 2. [A New Pre-training Method for Training Deep Learning Models with Application to Spoken Language Understanding](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/07/IS2016.CameraReady-1.pdf)
+3. [Long Short-Term Memory](http://pages.cs.wisc.edu/~shavlik/cs638/lectureNotes/Long%20Short-Term%20Memory%20Networks.pdf)
diff --git a/RNNSharp/Config.cs b/RNNSharp/Config.cs
@@ -200,10 +200,6 @@ public void LoadFeatureConfigFromFile(string configFilePath)
             {
                 NetworkType = NETWORKTYPE.BiDirectionalAverage;
             }
-            else if (networkType.Equals(NETWORKTYPE.ForwardSeq2SeqLabeling.ToString(), StringComparison.InvariantCultureIgnoreCase))
-            {
-                NetworkType = NETWORKTYPE.ForwardSeq2SeqLabeling;
-            }
             else
             {
                 throw new ArgumentException($"Invalidated network type: {networkType}");
@@ -216,7 +212,7 @@ public void LoadFeatureConfigFromFile(string configFilePath)
             SetTFeatures();
 
             //Load auto-encoder model for sequence-to-sequence. This model is used to encode source sequence
-            if (NetworkType == NETWORKTYPE.ForwardSeq2Seq || NetworkType == NETWORKTYPE.ForwardSeq2SeqLabeling)
+            if (NetworkType == NETWORKTYPE.ForwardSeq2Seq)
             {
                 var seqAutoEncoderConfigFilePath = GetFilePath(currentDirectory,
                     config.GetValueRequired(SEQ2SEQ_AUTOENCODER_CONFIG));
@@ -364,10 +360,6 @@ var type in
                     OutputLayerConfig = new SimpleLayerConfig();
                     Logger.WriteLine("Initialize configuration for simple layer.");
                     break;
-
-                default:
-                    Logger.WriteLine($"Invalidated output layer type {sLayerType}");
-                    throw new ArgumentException($"Invalidated output layer type {sLayerType}");
             }
         }
 

diff --git a/RNNSharp/Enums.cs b/RNNSharp/Enums.cs
@@ -27,7 +27,6 @@ public enum NETWORKTYPE
     {
         Forward = 0,
         ForwardSeq2Seq,
-        ForwardSeq2SeqLabeling,
         BiDirectional,
         BiDirectionalAverage
     }

diff --git a/RNNSharp/Layers/DropoutLayer.cs b/RNNSharp/Layers/DropoutLayer.cs
@@ -2,6 +2,7 @@
 using System;
 using System.IO;
 using System.Numerics;
+using System.Threading.Tasks;
 
 namespace RNNSharp
 {
@@ -62,16 +63,10 @@ public override void PreUpdateWeights(Neuron neuron, float[] errs)
 
         public override void InitializeWeights(int sparseFeatureSize, int denseFeatureSize)
         {
-            SparseFeatureSize = sparseFeatureSize;
-            DenseFeatureSize = denseFeatureSize;
-            if (DenseFeatureSize % Vector<float>.Count != 0)
-            {
-                DenseFeatureSize += (Vector<float>.Count - (DenseFeatureSize % Vector<float>.Count));
-            }
-
             if (denseFeatureSize > 0)
             {
                 Logger.WriteLine("Initializing dense feature matrix. layer size = {0}, feature size = {1}", LayerSize, denseFeatureSize);
+                DenseFeatureSize = denseFeatureSize;
                 DenseWeights = new Matrix<float>(LayerSize, denseFeatureSize);
                 for (var i = 0; i < DenseWeights.Height; i++)
                 {
@@ -85,6 +80,7 @@ public override void InitializeWeights(int sparseFeatureSize, int denseFeatureSi
             if (sparseFeatureSize > 0)
             {
                 Logger.WriteLine("Initializing sparse feature matrix. layer size = {0}, feature size = {1}", LayerSize, sparseFeatureSize);
+                SparseFeatureSize = sparseFeatureSize;
                 SparseWeights = new Matrix<float>(LayerSize, SparseFeatureSize);
                 for (var i = 0; i < SparseWeights.Height; i++)
                 {
@@ -97,6 +93,12 @@ public override void InitializeWeights(int sparseFeatureSize, int denseFeatureSi
 
             InitializeInternalTrainingParameters();
         }
+
+        public override void UpdateWeights()
+        {
+
+        }
+
         public override void ForwardPass(SparseVector sparseFeature, float[] denseFeature)
         {
             if (LayerSize != denseFeature.Length)
@@ -106,7 +108,8 @@ public override void ForwardPass(SparseVector sparseFeature, float[] denseFeatur
 
             if (runningMode == RunningMode.Training)
             {
-                for (var i = 0; i < LayerSize; i++)
+
+                              for (var i = 0; i < LayerSize; i++)
                 {
                     var val = (float)rnd.NextDouble();
                     if (val < dropoutRatio)
@@ -123,7 +126,8 @@ public override void ForwardPass(SparseVector sparseFeature, float[] denseFeatur
             }
             else
             {
-                for (var i = 0; i < LayerSize; i++)
+
+                                for (var i = 0; i < LayerSize; i++)
                 {
                     Cells[i] = (float)(1.0 - dropoutRatio) * denseFeature[i];
                 }