Skip to content

Commit

Permalink
Revert "#1. Fix Forward-LSTM crash bug #2. Improve encoding performan…
Browse files Browse the repository at this point in the history
…ce by SIMD instructions"

This reverts commit 1a3070c.
  • Loading branch information
zhongkaifu committed Feb 25, 2016
1 parent 4fad1b6 commit 513cb0c
Show file tree
Hide file tree
Showing 11 changed files with 198 additions and 262 deletions.
39 changes: 17 additions & 22 deletions RNNSharp/BiRNN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
using System.Threading.Tasks;
using AdvUtils;
using System.Collections.Generic;
using System.Numerics;

/// <summary>
/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
Expand All @@ -14,7 +13,6 @@ class BiRNN : RNN
{
private RNN forwardRNN;
private RNN backwardRNN;
private Vector<float> vecConst2 = new Vector<float>(2.0f);

public BiRNN(RNN s_forwardRNN, RNN s_backwardRNN)
{
Expand Down Expand Up @@ -131,7 +129,7 @@ public override float LearningRate
}
}

public override float GradientCutoff
public override double GradientCutoff
{
get
{
Expand Down Expand Up @@ -211,7 +209,7 @@ public override void InitMem()
backwardRNN.InitMem();

//Create and intialise the weights from hidden to output layer, these are just normal weights
Hidden2OutputWeight = new Matrix<float>(L2, L1);
Hidden2OutputWeight = new Matrix<double>(L2, L1);

for (int i = 0; i < Hidden2OutputWeight.Height; i++)
{
Expand All @@ -224,7 +222,7 @@ public override void InitMem()
Hidden2OutputWeightLearningRate = new Matrix<float>(L2, L1);
}

public SimpleLayer[] InnerDecode(Sequence pSequence, out SimpleLayer[] outputHiddenLayer, out Matrix<float> rawOutputLayer)
public SimpleLayer[] InnerDecode(Sequence pSequence, out SimpleLayer[] outputHiddenLayer, out Matrix<double> rawOutputLayer)
{
int numStates = pSequence.States.Length;
SimpleLayer[] mForward = null;
Expand Down Expand Up @@ -268,18 +266,14 @@ public SimpleLayer[] InnerDecode(Sequence pSequence, out SimpleLayer[] outputHid
SimpleLayer forwardCells = mForward[curState];
SimpleLayer backwardCells = mBackward[curState];
for (int i = 0; i < forwardRNN.L1; i+=Vector<float>.Count)
for (int i = 0; i < forwardRNN.L1; i++)
{
Vector<float> v1 = new Vector<float>(forwardCells.cellOutput, i);
Vector<float> v2 = new Vector<float>(backwardCells.cellOutput, i);
Vector<float> v = (v1 + v2) / vecConst2;
v.CopyTo(cells.cellOutput, i);
cells.cellOutput[i] = (forwardCells.cellOutput[i] + backwardCells.cellOutput[i]) / 2.0;
}
});

//Calculate output layer
Matrix<float> tmp_rawOutputLayer = new Matrix<float>(numStates, L2);
Matrix<double> tmp_rawOutputLayer = new Matrix<double>(numStates, L2);
SimpleLayer[] seqOutput = new SimpleLayer[numStates];
Parallel.For(0, numStates, parallelOption, curState =>
{
Expand All @@ -288,7 +282,7 @@ public SimpleLayer[] InnerDecode(Sequence pSequence, out SimpleLayer[] outputHid
matrixXvectorADD(outputCells, mergedHiddenLayer[curState], Hidden2OutputWeight, L2, L1, 0);
float[] tmp_vector = tmp_rawOutputLayer[curState];
double[] tmp_vector = tmp_rawOutputLayer[curState];
outputCells.cellOutput.CopyTo(tmp_vector, 0);
//Activation on output layer
Expand All @@ -307,7 +301,7 @@ public override int[] PredictSentenceCRF(Sequence pSequence, RunningMode running
int numStates = pSequence.States.Length;
//Predict output
SimpleLayer[] mergedHiddenLayer = null;
Matrix<float> rawOutputLayer = null;
Matrix<double> rawOutputLayer = null;
SimpleLayer[] seqOutput = InnerDecode(pSequence, out mergedHiddenLayer, out rawOutputLayer);

ForwardBackward(numStates, rawOutputLayer);
Expand All @@ -332,7 +326,7 @@ public override int[] PredictSentenceCRF(Sequence pSequence, RunningMode running
{
int label = pSequence.States[curState].Label;
SimpleLayer layer = seqOutput[curState];
float[] CRFOutputLayer = CRFSeqOutput[curState];
double[] CRFOutputLayer = CRFSeqOutput[curState];

//For standard RNN
for (int c = 0; c < L2; c++)
Expand All @@ -348,14 +342,14 @@ public override int[] PredictSentenceCRF(Sequence pSequence, RunningMode running
return predict;
}

public override Matrix<float> PredictSentence(Sequence pSequence, RunningMode runningMode)
public override Matrix<double> PredictSentence(Sequence pSequence, RunningMode runningMode)
{
//Reset the network
int numStates = pSequence.States.Length;

//Predict output
SimpleLayer[] mergedHiddenLayer = null;
Matrix<float> rawOutputLayer = null;
Matrix<double> rawOutputLayer = null;
SimpleLayer[] seqOutput = InnerDecode(pSequence, out mergedHiddenLayer, out rawOutputLayer);

if (runningMode != RunningMode.Test)
Expand All @@ -380,7 +374,7 @@ public override Matrix<float> PredictSentence(Sequence pSequence, RunningMode ru
{
layer.er[c] = -layer.cellOutput[c];
}
layer.er[label] = 1.0f - layer.cellOutput[label];
layer.er[label] = 1.0 - layer.cellOutput[label];
}

LearnTwoRNN(pSequence, mergedHiddenLayer, seqOutput);
Expand Down Expand Up @@ -413,17 +407,18 @@ private void LearnTwoRNN(Sequence pSequence, SimpleLayer[] mergedHiddenLayer, Si
for (int i = 0; i < Hidden2OutputWeight.Height; i++)
{
//update weights for hidden to output layer
float er = outputCells.er[i];
float[] vector_i = Hidden2OutputWeight[i];
double er = outputCells.er[i];
double[] vector_i = Hidden2OutputWeight[i];
for (int k = 0; k < Hidden2OutputWeight.Width; k++)
{
double delta = NormalizeGradient(mergedHiddenCells.cellOutput[k] * er);
double newLearningRate = UpdateLearningRate(Hidden2OutputWeightLearningRate, i, k, delta);
vector_i[k] += (float)(newLearningRate * delta);
vector_i[k] += newLearningRate * delta;
}
}
}
},
()=>
{
Expand Down Expand Up @@ -490,7 +485,7 @@ public override void computeHiddenLayer(State state, bool isTrain = true)
throw new NotImplementedException("computeHiddenLayer is not implemented in BiRNN");
}

public override void computeOutput(float[] doutput)
public override void computeOutput(double[] doutput)
{
throw new NotImplementedException("computeOutput is not implemented in BiRNN");
}
Expand Down
91 changes: 58 additions & 33 deletions RNNSharp/LSTMRNN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ public class LSTMCell : SimpleCell
public double wCellForget;
public double wCellOut;

public float dCellInLearningRate;
public float dCellForgetLearningRate;
public float dCellOutLearningRate;

//partial derivatives
public double dSWCellIn;
public double dSWCellForget;
Expand All @@ -48,6 +52,22 @@ public struct LSTMWeight
public float wInputOutputGate;
}

//public struct LSTMWeightLearningRate
//{
// public float dInputCellLearningRate;
// public float dInputInputGateLearningRate;
// public float dInputForgetGateLearningRate;
// public float dInputOutputGateLearningRate;
//}

//public struct LSTMWeightDerivative
//{
// //partial derivatives. dont need partial derivative for output gate as it uses BP not RTRL
// public double dSInputCell;
// public double dSInputInputGate;
// public double dSInputForgetGate;
//}

public class LSTMRNN : RNN
{
public LSTMCell[] neuHidden; //neurons in hidden layer
Expand All @@ -56,15 +76,10 @@ public class LSTMRNN : RNN

protected Vector4[][] Input2HiddenLearningRate;
protected Vector4[][] Feature2HiddenLearningRate;
protected Vector3[] CellLearningRate;

protected Vector3[][] input2hiddenDeri;
protected Vector3[][] feature2hiddenDeri;

private Vector4 vecLearningRate;
private Vector3 vecLearningRate3;


public LSTMRNN()
{
ModelType = MODELTYPE.LSTM;
Expand Down Expand Up @@ -353,7 +368,7 @@ public override void SaveModel(string filename)
//weight input->hidden
Logger.WriteLine("Saving input2hidden weights...");
saveLSTMWeight(input2hidden, fo);

if (DenseFeatureSize > 0)
{
//weight fea->hidden
Expand Down Expand Up @@ -438,7 +453,7 @@ public override void initWeights()
}

//Create and intialise the weights from hidden to output layer, these are just normal weights
Hidden2OutputWeight = new Matrix<float>(L2, L1);
Hidden2OutputWeight = new Matrix<double>(L2, L1);

for (int i = 0; i < Hidden2OutputWeight.Height; i++)
{
Expand Down Expand Up @@ -484,9 +499,12 @@ public override void CleanStatus()
Feature2HiddenLearningRate = new Vector4[L1][];
}

CellLearningRate = new Vector3[L1];
Parallel.For(0, L1, parallelOption, i =>
{
neuHidden[i].dCellForgetLearningRate = 0;
neuHidden[i].dCellInLearningRate = 0;
neuHidden[i].dCellOutLearningRate = 0;
Input2HiddenLearningRate[i] = new Vector4[L0];
if (DenseFeatureSize > 0)
Expand All @@ -497,8 +515,6 @@ public override void CleanStatus()
});

Hidden2OutputWeightLearningRate = new Matrix<float>(L2, L1);
vecLearningRate = new Vector4(LearningRate, LearningRate, LearningRate, LearningRate);
vecLearningRate3 = new Vector3(LearningRate, LearningRate, LearningRate);
}

public override void InitMem()
Expand Down Expand Up @@ -567,7 +583,7 @@ public override void ComputeHiddenLayerErr()
//find the error by find the product of the output errors and their weight connection.
SimpleCell cell = neuHidden[i];
cell.er = 0.0f;
cell.er = 0.0;
if (cell.mask == false)
{
Expand All @@ -584,22 +600,30 @@ public override void LearnOutputWeight()
//update weights for hidden to output layer
Parallel.For(0, L1, parallelOption, i =>
{
float cellOutput = neuHidden[i].cellOutput;
double cellOutput = neuHidden[i].cellOutput;
for (int k = 0; k < L2; k++)
{
float delta = NormalizeGradient(cellOutput * OutputLayer.er[k]);
double newLearningRate = UpdateLearningRate(Hidden2OutputWeightLearningRate, k, i, delta);
double delta = NormalizeGradient(cellOutput * OutputLayer.er[k]);
double newLearningRate = UpdateLearningRate(Hidden2OutputWeightLearningRate, i, k, delta);
Hidden2OutputWeight[k][i] += (float)(newLearningRate * delta);
Hidden2OutputWeight[k][i] += newLearningRate * delta;
}
});
}

public double UpdateLearningRate(ref float mg, double delta)
{
double dg = mg + delta * delta;
mg = (float)dg;
return LearningRate / (1.0 + Math.Sqrt(dg));
}

public override void LearnNet(State state, int numStates, int curState)
{
//Get sparse feature and apply it into hidden layer
var sparse = state.SparseData;
int sparseFeatureSize = sparse.Count;
Vector4 vecLearningRate = new Vector4(LearningRate, LearningRate, LearningRate, LearningRate);

//put variables for derivaties in weight class and cell class
Parallel.For(0, L1, parallelOption, i =>
Expand All @@ -626,6 +650,8 @@ public override void LearnNet(State state, int numStates, int curState)
(float)Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn,
(float)ci_previousCellState_mul_SigmoidDerivative_ci_netForget);
double delta = 0;
double newLearningRate = 0;
for (int k = 0; k < sparseFeatureSize; k++)
{
var entry = sparse.GetEntry(k);
Expand All @@ -647,7 +673,9 @@ public override void LearnNet(State state, int numStates, int curState)
vecAlpha = wlr + vecAlpha;
wlr_i[entry.Key] = vecAlpha;
vecAlpha = vecLearningRate / (Vector4.SquareRoot(vecAlpha) + Vector4.One);
vecAlpha = Vector4.SquareRoot(vecAlpha) + Vector4.One;
vecAlpha = vecLearningRate / vecAlpha;
vecDelta = vecAlpha * vecDelta;
w.wInputCell += vecDelta.X;
Expand Down Expand Up @@ -685,7 +713,9 @@ public override void LearnNet(State state, int numStates, int curState)
vecAlpha = wlr + vecAlpha;
wlr_i[j] = vecAlpha;
vecAlpha = vecLearningRate / (Vector4.SquareRoot(vecAlpha) + Vector4.One);
vecAlpha = Vector4.SquareRoot(vecAlpha) + Vector4.One;
vecAlpha = vecLearningRate / vecAlpha;
vecDelta = vecAlpha * vecDelta;
w.wInputCell += vecDelta.X;
Expand All @@ -706,22 +736,17 @@ public override void LearnNet(State state, int numStates, int curState)
//update internal weights
Vector3 vecCellDelta = new Vector3((float)c.dSWCellIn, (float)c.dSWCellForget, (float)c.cellState);
Vector3 vecCellErr = new Vector3(cellStateError, cellStateError, gradientOutputGate);
Vector3 vecCellLearningRate = CellLearningRate[i];
vecCellDelta = vecCellErr * vecCellDelta;
vecCellLearningRate += (vecCellDelta * vecCellDelta);
CellLearningRate[i] = vecCellLearningRate;
//LearningRate / (1.0 + Math.Sqrt(dg));
vecCellLearningRate = vecLearningRate3 / (Vector3.One + Vector3.SquareRoot(vecCellLearningRate));
vecCellDelta = vecCellLearningRate * vecCellDelta;
delta = cellStateError * c.dSWCellIn;
newLearningRate = UpdateLearningRate(ref c.dCellInLearningRate, delta);
c.wCellIn += newLearningRate * delta;
c.wCellIn += vecCellDelta.X;
c.wCellForget += vecCellDelta.Y;
c.wCellOut += vecCellDelta.Z;
delta = cellStateError * c.dSWCellForget;
newLearningRate = UpdateLearningRate(ref c.dCellForgetLearningRate, delta);
c.wCellForget += newLearningRate * delta;
delta = gradientOutputGate * c.cellState;
newLearningRate = UpdateLearningRate(ref c.dCellOutLearningRate, delta);
c.wCellOut += newLearningRate * delta;
neuHidden[i] = c;
});
Expand Down Expand Up @@ -808,15 +833,15 @@ public override void computeHiddenLayer(State state, bool isTrain = true)
//squash output gate
cell_j.yOut = Sigmoid(cell_j.netOut);
cell_j.cellOutput = (float)(cell_j.cellState * cell_j.yOut);
cell_j.cellOutput = cell_j.cellState * cell_j.yOut;
neuHidden[j] = cell_j;
});
}


public override void computeOutput(float[] doutput)
public override void computeOutput(double[] doutput)
{
matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, L2, L1, 0);
if (doutput != null)
Expand Down
2 changes: 1 addition & 1 deletion RNNSharp/MathUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace RNNSharp
{
class MathUtil
{
public static int GetMaxProbIndex(float [] array)
public static int GetMaxProbIndex(double [] array)
{
int dim = array.Length;
double maxValue = array[0];
Expand Down
Loading

0 comments on commit 513cb0c

Please sign in to comment.