Skip to content

Commit

Permalink
zhongkaifu#1. Support dropout for SimpleRNN (both forward and bi-dire…
Browse files Browse the repository at this point in the history
…ctional)

zhongkaifu#2. Speed up training performance
  • Loading branch information
zhongkaifu committed Dec 3, 2015
1 parent a4dcfcf commit debffdb
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 74 deletions.
Binary file modified RNNSharp.v12.suo
Binary file not shown.
10 changes: 5 additions & 5 deletions RNNSharp/BiRNN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ public override void SetGradientCutoff(double newGradient)
backwardRNN.SetGradientCutoff(newGradient);
}

public override void SetRegularization(double newBeta)
public override void SetDropout(double newDropout)
{
beta = newBeta;
dropout = newDropout;

forwardRNN.SetRegularization(newBeta);
backwardRNN.SetRegularization(newBeta);
forwardRNN.SetDropout(newDropout);
backwardRNN.SetDropout(newDropout);
}

public override void SetHiddenLayerSize(int newsize)
Expand Down Expand Up @@ -453,7 +453,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false)

}

public override void computeNet(State state, double[] doutput)
public override void computeNet(State state, double[] doutput, bool isTrain = true)
{

}
Expand Down
2 changes: 1 addition & 1 deletion RNNSharp/LSTMRNN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false)


// forward process. output layer consists of tag value
public override void computeNet(State state, double[] doutput)
public override void computeNet(State state, double[] doutput, bool isTrain = true)
{
//inputs(t) -> hidden(t)
//Get sparse feature and apply it into hidden layer
Expand Down
8 changes: 4 additions & 4 deletions RNNSharp/ModelSetting.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ public class ModelSetting
public double GetLearningRate(){ return m_LearningRate; }
public void SetLearningRate(double r) { m_LearningRate = r; }

public double GetRegularization() { return m_Regularization; }
public void SetRegularization(double r) { m_Regularization = r; }
public double GetDropout() { return m_Dropout; }
public void SetDropout(double r) { m_Dropout = r; }

public double GetTagTransitionWeight(){ return m_tagTransitionWeight; }
public void SetTagTransitionWeight(double r) { m_tagTransitionWeight = r; }
Expand Down Expand Up @@ -60,7 +60,7 @@ public long GetSaveStep()
int m_NumHidden;
double m_LearningRate;
double m_tagTransitionWeight;
double m_Regularization;
double m_Dropout;
int m_Bptt;
int m_MaxIteration;
bool m_bCRFTraining;
Expand Down Expand Up @@ -91,7 +91,7 @@ public void DumpSetting()
}

Console.WriteLine("Learning rate: {0}", m_LearningRate);
Console.WriteLine("Regularization: {0}", m_Regularization);
Console.WriteLine("Dropout: {0}", m_Dropout);
Console.WriteLine("Max Iteration: {0}", m_MaxIteration);
Console.WriteLine("Hidden layer size: {0}", m_NumHidden);
Console.WriteLine("RNN-CRF: {0}", m_bCRFTraining);
Expand Down
12 changes: 7 additions & 5 deletions RNNSharp/RNN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ abstract public class RNN
protected double minTknErrRatio;
protected double lastTknErrRatio;
protected long counter;
protected double beta;
protected double dropout;
protected ParallelOptions parallelOption = new ParallelOptions();
protected double gradient_cutoff;
protected bool m_bCRFTraining = false;
Expand Down Expand Up @@ -175,7 +175,7 @@ public RNN()
gradient_cutoff = 15;

alpha = 0.1;
beta = 0.0000001;
dropout = 0;
logp = 0;
llogp = -100000000;
minTknErrRatio = 1000000;
Expand Down Expand Up @@ -214,7 +214,7 @@ public bool ShouldTrainingStop()
public virtual void SetValidationSet(DataSet validation) { m_ValidationSet = validation; }
public virtual void SetGradientCutoff(double newGradient) { gradient_cutoff = newGradient; }
public virtual void SetLearningRate(double newAlpha) { alpha = newAlpha; }
public virtual void SetRegularization(double newBeta) { beta = newBeta; }
public virtual void SetDropout(double newDropout) { dropout = newDropout; }
public virtual void SetHiddenLayerSize(int newsize) { L1 = newsize;}
public virtual void SetModelFile(string strModelFile) { m_strModelFile = strModelFile; }

Expand All @@ -226,7 +226,7 @@ public bool IsCRFModel()
public double exp_10(double num) { return Math.Exp(num * 2.302585093); }

public abstract void netReset(bool updateNet = false);
public abstract void computeNet(State state, double[] doutput);
public abstract void computeNet(State state, double[] doutput, bool isTrain = true);


public virtual int[] PredictSentence(Sequence pSequence)
Expand Down Expand Up @@ -589,6 +589,7 @@ public void matrixXvectorADD(neuron[] dest, neuron[] srcvec, Matrix<double> srcm
//ac mod
Parallel.For(0, (to - from), parallelOption, i =>
{
dest[i + from].cellOutput = 0;
for (int j = 0; j < to2 - from2; j++)
{
dest[i + from].cellOutput += srcvec[j + from2].cellOutput * srcmatrix[i][j];
Expand All @@ -600,6 +601,7 @@ public void matrixXvectorADD(neuron[] dest, neuron[] srcvec, Matrix<double> srcm
{
Parallel.For(0, (to - from), parallelOption, i =>
{
dest[i + from].er = 0;
for (int j = 0; j < to2 - from2; j++)
{
dest[i + from].er += srcvec[j + from2].er * srcmatrix[j][i];
Expand Down Expand Up @@ -801,7 +803,7 @@ public virtual Matrix<double> InnerDecode(Sequence pSequence)
{
State state = pSequence.Get(curState);
setInputLayer(state, curState, numStates, predicted);
computeNet(state, m[curState]); //compute probability distribution
computeNet(state, m[curState], false); //compute probability distribution

predicted[curState] = GetBestOutputIndex();
}
Expand Down
2 changes: 1 addition & 1 deletion RNNSharp/RNNEncoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public void Train()
rnn.SetCRFTraining(m_modelSetting.IsCRFTraining());
rnn.SetLearningRate(m_modelSetting.GetLearningRate());
rnn.SetGradientCutoff(15.0);
rnn.SetRegularization(m_modelSetting.GetRegularization());
rnn.SetDropout(m_modelSetting.GetDropout());
rnn.SetHiddenLayerSize(m_modelSetting.GetNumHidden());
rnn.SetTagBigramTransitionWeight(m_modelSetting.GetTagTransitionWeight());

Expand Down
103 changes: 52 additions & 51 deletions RNNSharp/SimpleRNN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class SimpleRNN : RNN
protected int bptt;
protected int bptt_block;
protected neuron[] bptt_hidden;
protected neuron[] bptt_fea;
protected double[] bptt_fea;
protected SparseVector[] bptt_inputs = new SparseVector[MAX_RNN_HIST]; // TODO: add const constraint

protected Matrix<double> mat_bptt_syn0_w = new Matrix<double>();
Expand All @@ -30,7 +30,7 @@ public SimpleRNN()
{
m_modeltype = MODELTYPE.SIMPLE;
gradient_cutoff = 15;
beta = 0.0000001;
dropout = 0;
llogp = -100000000;
iter = 0;

Expand Down Expand Up @@ -99,18 +99,29 @@ public override void GetHiddenLayer(Matrix<double> m, int curStatus)
}
}

public void computeHiddenActivity()
public void computeHiddenActivity(bool isTrain)
{
for (int a = 0; a < L1; a++)
{
if (neuHidden[a].mask == true)
{
neuHidden[a].cellOutput = 0;
continue;
}

if (isTrain == false)
{
neuHidden[a].cellOutput = neuHidden[a].cellOutput * (1.0 - dropout);
}

if (neuHidden[a].cellOutput > 50) neuHidden[a].cellOutput = 50; //for numerical stability
if (neuHidden[a].cellOutput < -50) neuHidden[a].cellOutput = -50; //for numerical stability
neuHidden[a].cellOutput = 1.0 / (1.0 + Math.Exp(-neuHidden[a].cellOutput));
}
}

// forward process. output layer consists of tag value
public override void computeNet(State state, double[] doutput)
public override void computeNet(State state, double[] doutput, bool isTrain = true)
{
//keep last hidden layer and erase activations
neuLastHidden = new neuron[L1];
Expand Down Expand Up @@ -144,13 +155,7 @@ public override void computeNet(State state, double[] doutput)
}

//activate 1 --sigmoid
computeHiddenActivity();

//initialize output nodes
for (int c = 0; c < L2; c++)
{
neuOutput[c].cellOutput = 0;
}
computeHiddenActivity(isTrain);

matrixXvectorADD(neuOutput, neuHidden, mat_hidden2output, 0, L2, 0, L1, 0);
if (doutput != null)
Expand All @@ -174,11 +179,15 @@ public override void learnNet(State state, int timeat, bool biRNN = false)
CalculateOutputLayerError(state, timeat);
}

matrixXvectorADD(neuHidden, neuOutput, mat_hidden2output, 0, L1, 0, L2, 1); //error output->hidden for words from specific class

for (int a = 0; a < L1; a++)
{
neuHidden[a].er = 0;
if (neuHidden[a].mask == true)
{
neuHidden[a].er = 0;
}
}
matrixXvectorADD(neuHidden, neuOutput, mat_hidden2output, 0, L1, 0, L2, 1); //error output->hidden for words from specific class

for (int a = 0; a < L1; a++)
{
Expand Down Expand Up @@ -209,7 +218,7 @@ void learnBptt(State state)
{
for (int a = 0; a < fea_size; a++)
{
mat_bptt_synf[b][a] += neuHidden[b].er * bptt_fea[a + step * fea_size].cellOutput;
mat_bptt_synf[b][a] += neuHidden[b].er * bptt_fea[a + step * fea_size];
}
});
}
Expand All @@ -225,11 +234,6 @@ void learnBptt(State state)
}
});

for (int a = 0; a < L1; a++)
{
neuLastHidden[a].er = 0;
}

matrixXvectorADD(neuLastHidden, neuHidden, mat_hiddenBpttWeight, 0, L1, 0, L1, 1); //propagates errors hidden->input to the recurrent part

Parallel.For(0, L1, parallelOption, b =>
Expand Down Expand Up @@ -311,16 +315,7 @@ public void resetBpttMem()
}

bptt_hidden = new neuron[(bptt + bptt_block + 1) * L1];
for (int a = 0; a < (bptt + bptt_block) * L1; a++)
{
bptt_hidden[a].cellOutput = 0;
bptt_hidden[a].er = 0;
}

bptt_fea = new neuron[(bptt + bptt_block + 2) * fea_size];
for (int a = 0; a < (bptt + bptt_block) * fea_size; a++)
bptt_fea[a].cellOutput = 0;

bptt_fea = new double[(bptt + bptt_block + 2) * fea_size];
mat_bptt_syn0_w = new Matrix<double>(L1, L0);
mat_bptt_syn0_ph = new Matrix<double>(L1, L1);
mat_bptt_synf = new Matrix<double>(L1, fea_size);
Expand Down Expand Up @@ -360,13 +355,28 @@ public override void initMem()
public override void netReset(bool updateNet = false) //cleans hidden layer activation + bptt history
{
for (int a = 0; a < L1; a++)
{
neuHidden[a].cellOutput = 0.1;
neuHidden[a].mask = false;
}

if (updateNet == true)
{
//Train mode
for (int a = 0; a < L1; a++)
{
if (rand.NextDouble() < dropout)
{
neuHidden[a].mask = true;
}
}
}

if (bptt > 0)
{
bptt_inputs = new SparseVector[MAX_RNN_HIST];
bptt_hidden = new neuron[(bptt + bptt_block + 1) * L1];
bptt_fea = new neuron[(bptt + bptt_block + 2) * fea_size];
bptt_fea = new double[(bptt + bptt_block + 2) * fea_size];
}
}

Expand All @@ -375,37 +385,28 @@ public override void LearnBackTime(State state, int numStates, int curState)
{
if (bptt > 0)
{
//shift memory needed for bptt to next time step
for (int a = bptt + bptt_block - 1; a > 0; a--)
bptt_inputs[a] = bptt_inputs[a - 1];
bptt_inputs[0] = state.GetSparseData();

for (int a = bptt + bptt_block - 1; a > 0; a--)
int maxBptt = 0;
for (maxBptt = 0; maxBptt < bptt + bptt_block - 1; maxBptt++)
{
for (int b = 0; b < L1; b++)
if (bptt_inputs[maxBptt] == null)
{
bptt_hidden[a * L1 + b] = bptt_hidden[(a - 1) * L1 + b];
break;
}
}

for (int a = bptt + bptt_block - 1; a > 0; a--)
//shift memory needed for bptt to next time step
for (int a = maxBptt; a > 0; a--)
{
for (int b = 0; b < fea_size; b++)
{
bptt_fea[a * fea_size + b].cellOutput = bptt_fea[(a - 1) * fea_size + b].cellOutput;
}
bptt_inputs[a] = bptt_inputs[a - 1];
Array.Copy(bptt_hidden, (a - 1) * L1, bptt_hidden, a * L1, L1);
Array.Copy(bptt_fea, (a - 1) * fea_size, bptt_fea, a * fea_size, fea_size);
}
bptt_inputs[0] = state.GetSparseData();
}

//Save hidden and feature layer nodes values for bptt
for (int b = 0; b < L1; b++)
{
bptt_hidden[b] = neuHidden[b];
}
for (int b = 0; b < fea_size; b++)
{
bptt_fea[b].cellOutput = neuFeatures[b];
}
Array.Copy(neuHidden, 0, bptt_hidden, 0, L1);
Array.Copy(neuFeatures, 0, bptt_fea, 0, fea_size);

// time to learn bptt
if (((curState % bptt_block) == 0) || (curState == numStates - 1))
Expand Down
5 changes: 3 additions & 2 deletions RNNSharp/neuron.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ namespace RNNSharp
{
public struct neuron
{
public double cellOutput; //actual value stored in neuron
public double er; //error value in neuron, used by learning algorithm
public double cellOutput; //actual value stored in neuron
public double er; //error value in neuron, used by learning algorithm
public bool mask;
}
}
10 changes: 5 additions & 5 deletions RNNSharpConsole/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Program
static int iCRF = 0;
static long savestep = 0;
static double alpha = 0.1;
static double beta = 0.0000001;
static double dropout = 0;
static int bptt = 4;
static int modelType = 0;
static int nBest = 1;
Expand Down Expand Up @@ -70,8 +70,8 @@ static void UsageTrain()
Console.WriteLine(" -alpha <float>");
Console.WriteLine("\tLearning rate, default is 0.1");

Console.WriteLine(" -beta <float>");
Console.WriteLine("\tRegularization parameter, default is 1e-7");
Console.WriteLine(" -dropout <float>");
Console.WriteLine("\tDropout parameter [0, 1.0), default is 0");

Console.WriteLine(" -layersize <int>");
Console.WriteLine("\tHidden layer size for training, default is 200");
Expand Down Expand Up @@ -136,7 +136,7 @@ static void InitParameters(string[] args)
if ((i = ArgPos("-crf", args)) >= 0) iCRF = int.Parse(args[i + 1]);
if ((i = ArgPos("-maxiter", args)) >= 0) maxIter = int.Parse(args[i + 1]);
if ((i = ArgPos("-alpha", args)) >= 0) alpha = double.Parse(args[i + 1]);
if ((i = ArgPos("-beta", args)) >= 0) beta = double.Parse(args[i + 1]);
if ((i = ArgPos("-dropout", args)) >= 0) dropout = double.Parse(args[i + 1]);
if ((i = ArgPos("-bptt", args)) >= 0) bptt = int.Parse(args[i + 1]);
if ((i = ArgPos("-nbest", args)) >= 0) nBest = int.Parse(args[i + 1]);
if ((i = ArgPos("-dir", args)) >= 0) iDir = int.Parse(args[i + 1]);
Expand Down Expand Up @@ -440,7 +440,7 @@ private static void Train()
RNNConfig.SetMaxIteration(maxIter);
RNNConfig.SetSaveStep(savestep);
RNNConfig.SetLearningRate(alpha);
RNNConfig.SetRegularization(beta);
RNNConfig.SetDropout(dropout);
RNNConfig.SetBptt(bptt);

//Dump RNN setting on console
Expand Down

0 comments on commit debffdb

Please sign in to comment.