Skip to content

Commit

Permalink
#1. Code refactoring
Browse files Browse the repository at this point in the history
#2. Normalize LSTM cell value in weights updating
  • Loading branch information
zhongkaifu committed Feb 28, 2016
1 parent e699d62 commit 2d58c7a
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 25 deletions.
30 changes: 24 additions & 6 deletions RNNSharp/LSTMRNN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ public class LSTMRNN : RNN
private new Vector4 vecMaxGrad;
private new Vector4 vecMinGrad;

private new Vector3 vecMaxGrad3;
private new Vector3 vecMinGrad3;

public LSTMRNN()
{
ModelType = MODELTYPE.LSTM;
Expand Down Expand Up @@ -502,6 +505,10 @@ public override void CleanStatus()
vecNormalLearningRate3 = new Vector3(LearningRate, LearningRate, LearningRate);
vecMaxGrad = new Vector4((float)GradientCutoff, (float)GradientCutoff, (float)GradientCutoff, (float)GradientCutoff);
vecMinGrad = new Vector4((float)(-GradientCutoff), (float)(-GradientCutoff), (float)(-GradientCutoff), (float)(-GradientCutoff));

vecMaxGrad3 = new Vector3((float)GradientCutoff, (float)GradientCutoff, (float)GradientCutoff);
vecMinGrad3 = new Vector3((float)(-GradientCutoff), (float)(-GradientCutoff), (float)(-GradientCutoff));

}

public override void InitMem()
Expand Down Expand Up @@ -530,7 +537,7 @@ public override void InitMem()

private void CreateCell(BinaryReader br)
{
neuFeatures = new SingleVector(DenseFeatureSize);
neuFeatures = null;
OutputLayer = new SimpleLayer(L2);

neuHidden = new LSTMCell[L1];
Expand Down Expand Up @@ -598,6 +605,13 @@ public override void LearnOutputWeight()
});
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private Vector3 ComputeLearningRate(Vector3 vecDelta, ref Vector3 vecWeightLearningRate)
{
vecWeightLearningRate += vecDelta * vecDelta;
return vecNormalLearningRate3 / (Vector3.SquareRoot(vecWeightLearningRate) + Vector3.One);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private Vector4 ComputeLearningRate(Vector4 vecDelta, ref Vector4 vecWeightLearningRate)
{
Expand Down Expand Up @@ -651,9 +665,11 @@ public override void LearnNet(State state, int numStates, int curState)
}
wd_i[entry.Key] = wd;
//Computing final err delta
Vector4 vecDelta = new Vector4(wd, entry.Value);
vecDelta = vecErr * vecDelta;
//Computing actual learning rate
Vector4 vecLearningRate = ComputeLearningRate(vecDelta, ref wlr_i[entry.Key]);
w_i[entry.Key] += vecLearningRate * vecDelta;
}
Expand All @@ -678,6 +694,7 @@ public override void LearnNet(State state, int numStates, int curState)
Vector4 vecDelta = new Vector4(wd, feature);
vecDelta = vecErr * vecDelta;
//Computing actual learning rate
Vector4 vecLearningRate = ComputeLearningRate(vecDelta, ref wlr_i[j]);
w_i[j] += vecLearningRate * vecDelta;
}
Expand All @@ -692,14 +709,15 @@ public override void LearnNet(State state, int numStates, int curState)
//update internal weights
Vector3 vecCellDelta = new Vector3((float)c.dSWCellIn, (float)c.dSWCellForget, (float)c.cellState);
Vector3 vecCellErr = new Vector3(cellStateError, cellStateError, gradientOutputGate);
Vector3 vecCellLearningRate = CellLearningRate[i];
//Normalize err by gradient cut-off
vecCellErr = Vector3.Clamp(vecCellErr, vecMinGrad3, vecMaxGrad3);
vecCellDelta = vecCellErr * vecCellDelta;
vecCellLearningRate += (vecCellDelta * vecCellDelta);
CellLearningRate[i] = vecCellLearningRate;
//LearningRate / (1.0 + Math.Sqrt(dg));
vecCellLearningRate = vecNormalLearningRate3 / (Vector3.One + Vector3.SquareRoot(vecCellLearningRate));
//Computing actual learning rate
Vector3 vecCellLearningRate = ComputeLearningRate(vecCellDelta, ref CellLearningRate[i]);
vecCellDelta = vecCellLearningRate * vecCellDelta;
c.wCellIn += vecCellDelta.X;
Expand Down
3 changes: 3 additions & 0 deletions RNNSharp/ModelSetting.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class ModelSetting
public int ModelType { get; set; }
public int ModelDirection { get; set; }
public int VQ { get; set; }
public float GradientCutoff { get; set; }

public void DumpSetting()
{
Expand Down Expand Up @@ -49,6 +50,7 @@ public void DumpSetting()
Logger.WriteLine("RNN-CRF: {0}", IsCRFTraining);
Logger.WriteLine("SIMD: {0}, Size: {1}bits", System.Numerics.Vector.IsHardwareAccelerated,
Vector<double>.Count * sizeof(double) * 8);
Logger.WriteLine("Gradient cut-off: {0}", GradientCutoff);
if (SaveStep > 0)
{
Logger.WriteLine("Save temporary model after every {0} sentences", SaveStep);
Expand All @@ -60,6 +62,7 @@ public ModelSetting()
MaxIteration = 20;
Bptt = 4;
LearningRate = 0.1f;
GradientCutoff = 15.0f;
NumHidden = 200;
IsCRFTraining = true;
}
Expand Down
24 changes: 19 additions & 5 deletions RNNSharp/RNNEncoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void Train()
rnn.MaxIter = m_modelSetting.MaxIteration;
rnn.IsCRFTraining = m_modelSetting.IsCRFTraining;
rnn.LearningRate = m_modelSetting.LearningRate;
rnn.GradientCutoff = 15.0;
rnn.GradientCutoff = m_modelSetting.GradientCutoff;
rnn.Dropout = m_modelSetting.Dropout;
rnn.L1 = m_modelSetting.NumHidden;

Expand Down Expand Up @@ -116,18 +116,32 @@ public void Train()
betterValidateNet = rnn.ValidateNet(ValidationSet, iter);
}

if ((ValidationSet != null && betterValidateNet == false) ||
(ValidationSet == null && ppl >= lastPPL))
if (ppl >= lastPPL)
{
//We cannot get a better result on training corpus, so reduce learning rate
rnn.LearningRate = rnn.LearningRate / 2.0f;
}
else

if (betterValidateNet == true)
{
//If current model is better than before, save it into file
//We got better result on validated corpus, save this model
Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile);
rnn.SaveModel(m_modelSetting.ModelFile);
}


//if ((ValidationSet != null && betterValidateNet == false) ||
// (ValidationSet == null && ppl >= lastPPL))
//{
// rnn.LearningRate = rnn.LearningRate / 2.0f;
//}
//else
//{
// //If current model is better than before, save it into file
// Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile);
// rnn.SaveModel(m_modelSetting.ModelFile);
//}

lastPPL = ppl;

iter++;
Expand Down
26 changes: 13 additions & 13 deletions RNNSharp/SimpleRNN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ public class SimpleRNN : RNN
protected double[][] bptt_fea;
protected SparseVector[] bptt_inputs = new SparseVector[MAX_RNN_HIST];

protected Matrix<double> mat_bptt_syn0_w;
protected Matrix<double> mat_bptt_syn0_ph;
protected Matrix<double> mat_bptt_synf;
protected Matrix<double> Input2HiddenWeightsDelta;
protected Matrix<double> HiddenBpttWeightsDelta;
protected Matrix<double> Feature2HiddenWeightsDelta;

//Last hidden layer status
protected SimpleLayer neuLastHidden;
Expand Down Expand Up @@ -263,7 +263,7 @@ private void learnBptt(State state)
int i = 0;
if (DenseFeatureSize > 0)
{
vector_a = mat_bptt_synf[a];
vector_a = Feature2HiddenWeightsDelta[a];
i = 0;
while (i < DenseFeatureSize - Vector<double>.Count)
{
Expand All @@ -283,15 +283,15 @@ private void learnBptt(State state)
}
//sparse weight update hidden->input
vector_a = mat_bptt_syn0_w[a];
vector_a = Input2HiddenWeightsDelta[a];
for (i = 0; i < sparse.Count; i++)
{
var entry = sparse.GetEntry(i);
vector_a[entry.Key] += er * entry.Value;
}
//bptt weight update
vector_a = mat_bptt_syn0_ph[a];
vector_a = HiddenBpttWeightsDelta[a];
i = 0;
while (i < L1 - Vector<double>.Count)
{
Expand Down Expand Up @@ -340,7 +340,7 @@ private void learnBptt(State state)
//Update bptt feature weights
vector_b = HiddenBpttWeights[b];
vector_bf = mat_bptt_syn0_ph[b];
vector_bf = HiddenBpttWeightsDelta[b];
vector_lr = HiddenBpttWeightsLearningRate[b];
int i = 0;
Expand Down Expand Up @@ -383,7 +383,7 @@ private void learnBptt(State state)
if (DenseFeatureSize > 0)
{
vector_b = Feature2HiddenWeights[b];
vector_bf = mat_bptt_synf[b];
vector_bf = Feature2HiddenWeightsDelta[b];
vector_lr = Feature2HiddenWeightsLearningRate[b];
i = 0;
Expand Down Expand Up @@ -426,7 +426,7 @@ private void learnBptt(State state)
//Update sparse feature weights
vector_b = Input2HiddenWeights[b];
vector_bf = mat_bptt_syn0_w[b];
vector_bf = Input2HiddenWeightsDelta[b];
for (int step = 0; step < bptt + bptt_block - 2; step++)
{
var sparse = bptt_inputs[step];
Expand Down Expand Up @@ -466,9 +466,9 @@ public void resetBpttMem()
bptt_fea[i] = new double[DenseFeatureSize];
}

mat_bptt_syn0_w = new Matrix<double>(L1, L0);
mat_bptt_syn0_ph = new Matrix<double>(L1, L1);
mat_bptt_synf = new Matrix<double>(L1, DenseFeatureSize);
Input2HiddenWeightsDelta = new Matrix<double>(L1, L0);
HiddenBpttWeightsDelta = new Matrix<double>(L1, L1);
Feature2HiddenWeightsDelta = new Matrix<double>(L1, DenseFeatureSize);
}

public override void CleanStatus()
Expand Down Expand Up @@ -641,7 +641,7 @@ public override void LoadModel(string filename)

private void CreateCells()
{
neuFeatures = new SingleVector(DenseFeatureSize);
neuFeatures = null;
OutputLayer = new SimpleLayer(L2);
neuHidden = new SimpleLayer(L1);
}
Expand Down
8 changes: 7 additions & 1 deletion RNNSharpConsole/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class Program
static int nBest = 1;
static int iDir = 0;
static int iVQ = 0;
static float gradientCutoff = 15.0f;

static void UsageTitle()
{
Expand Down Expand Up @@ -94,8 +95,11 @@ static void UsageTrain()
Console.WriteLine(" -vq <int>");
Console.WriteLine("\tModel vector quantization, 0 is disable, 1 is enable. default is 0");

Console.WriteLine(" -grad <float>");
Console.WriteLine("\tGradient cut-off. Default is 15.0f");

Console.WriteLine();
Console.WriteLine("Example: RNNSharpConsole.exe -mode train -trainfile train.txt -validfile valid.txt -modelfile model.bin -ftrfile features.txt -tagfile tags.txt -modeltype 0 -layersize 200 -alpha 0.1 -crf 1 -maxiter 20 -savestep 200K -dir 0 -vq 0");
Console.WriteLine("Example: RNNSharpConsole.exe -mode train -trainfile train.txt -validfile valid.txt -modelfile model.bin -ftrfile features.txt -tagfile tags.txt -modeltype 0 -layersize 200 -alpha 0.1 -crf 1 -maxiter 20 -savestep 200K -dir 0 -vq 0 -grad 15.0");

}

Expand Down Expand Up @@ -147,6 +151,7 @@ static void InitParameters(string[] args)
if ((i = ArgPos("-nbest", args)) >= 0) nBest = int.Parse(args[i + 1]);
if ((i = ArgPos("-dir", args)) >= 0) iDir = int.Parse(args[i + 1]);
if ((i = ArgPos("-vq", args)) >= 0) iVQ = int.Parse(args[i + 1]);
if ((i = ArgPos("-grad", args)) >= 0) gradientCutoff = float.Parse(args[i + 1]);

if ((i = ArgPos("-savestep", args)) >= 0)
{
Expand Down Expand Up @@ -433,6 +438,7 @@ private static void Train()
RNNConfig.LearningRate = alpha;
RNNConfig.Dropout = dropout;
RNNConfig.Bptt = bptt;
RNNConfig.GradientCutoff = gradientCutoff;

//Dump RNN setting on console
RNNConfig.DumpSetting();
Expand Down

0 comments on commit 2d58c7a

Please sign in to comment.