From 513cb0cd4d71393ee5b5c7e62fc6466ae80ba4a3 Mon Sep 17 00:00:00 2001 From: Zhongkai Fu Date: Thu, 25 Feb 2016 08:40:15 -0800 Subject: [PATCH] Revert "#1. Fix Forward-LSTM crash bug #2. Improve encoding performance by SIMD instructions" This reverts commit 1a3070ce9cf5189b66b912782e50d6a78644b50c. --- RNNSharp/BiRNN.cs | 39 ++++------ RNNSharp/LSTMRNN.cs | 91 ++++++++++++++-------- RNNSharp/MathUtil.cs | 2 +- RNNSharp/Matrix.cs | 13 +--- RNNSharp/RNN.cs | 119 ++++++++++++---------------- RNNSharp/RNNEncoder.cs | 4 +- RNNSharp/RNNSharp.csproj | 10 +-- RNNSharp/SimpleRNN.cs | 162 ++++++++++++++------------------------- RNNSharp/neuron.cs | 12 +-- RNNSharp/packages.config | 8 -- dll/txt2vec.dll | Bin 23552 -> 23552 bytes 11 files changed, 198 insertions(+), 262 deletions(-) delete mode 100644 RNNSharp/packages.config diff --git a/RNNSharp/BiRNN.cs b/RNNSharp/BiRNN.cs index 84ccd8c..6e9aea7 100644 --- a/RNNSharp/BiRNN.cs +++ b/RNNSharp/BiRNN.cs @@ -3,7 +3,6 @@ using System.Threading.Tasks; using AdvUtils; using System.Collections.Generic; -using System.Numerics; /// /// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) @@ -14,7 +13,6 @@ class BiRNN : RNN { private RNN forwardRNN; private RNN backwardRNN; - private Vector vecConst2 = new Vector(2.0f); public BiRNN(RNN s_forwardRNN, RNN s_backwardRNN) { @@ -131,7 +129,7 @@ public override float LearningRate } } - public override float GradientCutoff + public override double GradientCutoff { get { @@ -211,7 +209,7 @@ public override void InitMem() backwardRNN.InitMem(); //Create and intialise the weights from hidden to output layer, these are just normal weights - Hidden2OutputWeight = new Matrix(L2, L1); + Hidden2OutputWeight = new Matrix(L2, L1); for (int i = 0; i < Hidden2OutputWeight.Height; i++) { @@ -224,7 +222,7 @@ public override void InitMem() Hidden2OutputWeightLearningRate = new Matrix(L2, L1); } - public SimpleLayer[] InnerDecode(Sequence pSequence, out SimpleLayer[] outputHiddenLayer, out Matrix rawOutputLayer) + public SimpleLayer[] InnerDecode(Sequence pSequence, out SimpleLayer[] outputHiddenLayer, out Matrix rawOutputLayer) { int numStates = pSequence.States.Length; SimpleLayer[] mForward = null; @@ -268,18 +266,14 @@ public SimpleLayer[] InnerDecode(Sequence pSequence, out SimpleLayer[] outputHid SimpleLayer forwardCells = mForward[curState]; SimpleLayer backwardCells = mBackward[curState]; - for (int i = 0; i < forwardRNN.L1; i+=Vector.Count) + for (int i = 0; i < forwardRNN.L1; i++) { - Vector v1 = new Vector(forwardCells.cellOutput, i); - Vector v2 = new Vector(backwardCells.cellOutput, i); - Vector v = (v1 + v2) / vecConst2; - - v.CopyTo(cells.cellOutput, i); + cells.cellOutput[i] = (forwardCells.cellOutput[i] + backwardCells.cellOutput[i]) / 2.0; } }); //Calculate output layer - Matrix tmp_rawOutputLayer = new Matrix(numStates, L2); + Matrix tmp_rawOutputLayer = new Matrix(numStates, L2); SimpleLayer[] seqOutput = new SimpleLayer[numStates]; Parallel.For(0, numStates, parallelOption, curState => { @@ -288,7 +282,7 @@ public SimpleLayer[] InnerDecode(Sequence pSequence, out SimpleLayer[] outputHid matrixXvectorADD(outputCells, mergedHiddenLayer[curState], Hidden2OutputWeight, L2, L1, 0); - float[] tmp_vector = tmp_rawOutputLayer[curState]; + double[] tmp_vector = tmp_rawOutputLayer[curState]; outputCells.cellOutput.CopyTo(tmp_vector, 0); //Activation on output layer @@ -307,7 +301,7 @@ public override int[] PredictSentenceCRF(Sequence pSequence, RunningMode running int numStates = pSequence.States.Length; //Predict output SimpleLayer[] mergedHiddenLayer = null; - Matrix rawOutputLayer = null; + Matrix rawOutputLayer = null; SimpleLayer[] seqOutput = InnerDecode(pSequence, out mergedHiddenLayer, out rawOutputLayer); ForwardBackward(numStates, rawOutputLayer); @@ -332,7 +326,7 @@ public override int[] PredictSentenceCRF(Sequence pSequence, RunningMode running { int label = pSequence.States[curState].Label; SimpleLayer layer = seqOutput[curState]; - float[] CRFOutputLayer = CRFSeqOutput[curState]; + double[] CRFOutputLayer = CRFSeqOutput[curState]; //For standard RNN for (int c = 0; c < L2; c++) @@ -348,14 +342,14 @@ public override int[] PredictSentenceCRF(Sequence pSequence, RunningMode running return predict; } - public override Matrix PredictSentence(Sequence pSequence, RunningMode runningMode) + public override Matrix PredictSentence(Sequence pSequence, RunningMode runningMode) { //Reset the network int numStates = pSequence.States.Length; //Predict output SimpleLayer[] mergedHiddenLayer = null; - Matrix rawOutputLayer = null; + Matrix rawOutputLayer = null; SimpleLayer[] seqOutput = InnerDecode(pSequence, out mergedHiddenLayer, out rawOutputLayer); if (runningMode != RunningMode.Test) @@ -380,7 +374,7 @@ public override Matrix PredictSentence(Sequence pSequence, RunningMode ru { layer.er[c] = -layer.cellOutput[c]; } - layer.er[label] = 1.0f - layer.cellOutput[label]; + layer.er[label] = 1.0 - layer.cellOutput[label]; } LearnTwoRNN(pSequence, mergedHiddenLayer, seqOutput); @@ -413,17 +407,18 @@ private void LearnTwoRNN(Sequence pSequence, SimpleLayer[] mergedHiddenLayer, Si for (int i = 0; i < Hidden2OutputWeight.Height; i++) { //update weights for hidden to output layer - float er = outputCells.er[i]; - float[] vector_i = Hidden2OutputWeight[i]; + double er = outputCells.er[i]; + double[] vector_i = Hidden2OutputWeight[i]; for (int k = 0; k < Hidden2OutputWeight.Width; k++) { double delta = NormalizeGradient(mergedHiddenCells.cellOutput[k] * er); double newLearningRate = UpdateLearningRate(Hidden2OutputWeightLearningRate, i, k, delta); - vector_i[k] += (float)(newLearningRate * delta); + vector_i[k] += newLearningRate * delta; } } } + }, ()=> { @@ -490,7 +485,7 @@ public override void computeHiddenLayer(State state, bool isTrain = true) throw new NotImplementedException("computeHiddenLayer is not implemented in BiRNN"); } - public override void computeOutput(float[] doutput) + public override void computeOutput(double[] doutput) { throw new NotImplementedException("computeOutput is not implemented in BiRNN"); } diff --git a/RNNSharp/LSTMRNN.cs b/RNNSharp/LSTMRNN.cs index c569f85..db52eb4 100644 --- a/RNNSharp/LSTMRNN.cs +++ b/RNNSharp/LSTMRNN.cs @@ -30,6 +30,10 @@ public class LSTMCell : SimpleCell public double wCellForget; public double wCellOut; + public float dCellInLearningRate; + public float dCellForgetLearningRate; + public float dCellOutLearningRate; + //partial derivatives public double dSWCellIn; public double dSWCellForget; @@ -48,6 +52,22 @@ public struct LSTMWeight public float wInputOutputGate; } + //public struct LSTMWeightLearningRate + //{ + // public float dInputCellLearningRate; + // public float dInputInputGateLearningRate; + // public float dInputForgetGateLearningRate; + // public float dInputOutputGateLearningRate; + //} + + //public struct LSTMWeightDerivative + //{ + // //partial derivatives. dont need partial derivative for output gate as it uses BP not RTRL + // public double dSInputCell; + // public double dSInputInputGate; + // public double dSInputForgetGate; + //} + public class LSTMRNN : RNN { public LSTMCell[] neuHidden; //neurons in hidden layer @@ -56,15 +76,10 @@ public class LSTMRNN : RNN protected Vector4[][] Input2HiddenLearningRate; protected Vector4[][] Feature2HiddenLearningRate; - protected Vector3[] CellLearningRate; protected Vector3[][] input2hiddenDeri; protected Vector3[][] feature2hiddenDeri; - private Vector4 vecLearningRate; - private Vector3 vecLearningRate3; - - public LSTMRNN() { ModelType = MODELTYPE.LSTM; @@ -353,7 +368,7 @@ public override void SaveModel(string filename) //weight input->hidden Logger.WriteLine("Saving input2hidden weights..."); saveLSTMWeight(input2hidden, fo); - + if (DenseFeatureSize > 0) { //weight fea->hidden @@ -438,7 +453,7 @@ public override void initWeights() } //Create and intialise the weights from hidden to output layer, these are just normal weights - Hidden2OutputWeight = new Matrix(L2, L1); + Hidden2OutputWeight = new Matrix(L2, L1); for (int i = 0; i < Hidden2OutputWeight.Height; i++) { @@ -484,9 +499,12 @@ public override void CleanStatus() Feature2HiddenLearningRate = new Vector4[L1][]; } - CellLearningRate = new Vector3[L1]; Parallel.For(0, L1, parallelOption, i => { + neuHidden[i].dCellForgetLearningRate = 0; + neuHidden[i].dCellInLearningRate = 0; + neuHidden[i].dCellOutLearningRate = 0; + Input2HiddenLearningRate[i] = new Vector4[L0]; if (DenseFeatureSize > 0) @@ -497,8 +515,6 @@ public override void CleanStatus() }); Hidden2OutputWeightLearningRate = new Matrix(L2, L1); - vecLearningRate = new Vector4(LearningRate, LearningRate, LearningRate, LearningRate); - vecLearningRate3 = new Vector3(LearningRate, LearningRate, LearningRate); } public override void InitMem() @@ -567,7 +583,7 @@ public override void ComputeHiddenLayerErr() //find the error by find the product of the output errors and their weight connection. SimpleCell cell = neuHidden[i]; - cell.er = 0.0f; + cell.er = 0.0; if (cell.mask == false) { @@ -584,22 +600,30 @@ public override void LearnOutputWeight() //update weights for hidden to output layer Parallel.For(0, L1, parallelOption, i => { - float cellOutput = neuHidden[i].cellOutput; + double cellOutput = neuHidden[i].cellOutput; for (int k = 0; k < L2; k++) { - float delta = NormalizeGradient(cellOutput * OutputLayer.er[k]); - double newLearningRate = UpdateLearningRate(Hidden2OutputWeightLearningRate, k, i, delta); + double delta = NormalizeGradient(cellOutput * OutputLayer.er[k]); + double newLearningRate = UpdateLearningRate(Hidden2OutputWeightLearningRate, i, k, delta); - Hidden2OutputWeight[k][i] += (float)(newLearningRate * delta); + Hidden2OutputWeight[k][i] += newLearningRate * delta; } }); } + public double UpdateLearningRate(ref float mg, double delta) + { + double dg = mg + delta * delta; + mg = (float)dg; + return LearningRate / (1.0 + Math.Sqrt(dg)); + } + public override void LearnNet(State state, int numStates, int curState) { //Get sparse feature and apply it into hidden layer var sparse = state.SparseData; int sparseFeatureSize = sparse.Count; + Vector4 vecLearningRate = new Vector4(LearningRate, LearningRate, LearningRate, LearningRate); //put variables for derivaties in weight class and cell class Parallel.For(0, L1, parallelOption, i => @@ -626,6 +650,8 @@ public override void LearnNet(State state, int numStates, int curState) (float)Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn, (float)ci_previousCellState_mul_SigmoidDerivative_ci_netForget); + double delta = 0; + double newLearningRate = 0; for (int k = 0; k < sparseFeatureSize; k++) { var entry = sparse.GetEntry(k); @@ -647,7 +673,9 @@ public override void LearnNet(State state, int numStates, int curState) vecAlpha = wlr + vecAlpha; wlr_i[entry.Key] = vecAlpha; - vecAlpha = vecLearningRate / (Vector4.SquareRoot(vecAlpha) + Vector4.One); + vecAlpha = Vector4.SquareRoot(vecAlpha) + Vector4.One; + vecAlpha = vecLearningRate / vecAlpha; + vecDelta = vecAlpha * vecDelta; w.wInputCell += vecDelta.X; @@ -685,7 +713,9 @@ public override void LearnNet(State state, int numStates, int curState) vecAlpha = wlr + vecAlpha; wlr_i[j] = vecAlpha; - vecAlpha = vecLearningRate / (Vector4.SquareRoot(vecAlpha) + Vector4.One); + vecAlpha = Vector4.SquareRoot(vecAlpha) + Vector4.One; + vecAlpha = vecLearningRate / vecAlpha; + vecDelta = vecAlpha * vecDelta; w.wInputCell += vecDelta.X; @@ -706,22 +736,17 @@ public override void LearnNet(State state, int numStates, int curState) //update internal weights - Vector3 vecCellDelta = new Vector3((float)c.dSWCellIn, (float)c.dSWCellForget, (float)c.cellState); - Vector3 vecCellErr = new Vector3(cellStateError, cellStateError, gradientOutputGate); - Vector3 vecCellLearningRate = CellLearningRate[i]; - - vecCellDelta = vecCellErr * vecCellDelta; - vecCellLearningRate += (vecCellDelta * vecCellDelta); - CellLearningRate[i] = vecCellLearningRate; - - //LearningRate / (1.0 + Math.Sqrt(dg)); - vecCellLearningRate = vecLearningRate3 / (Vector3.One + Vector3.SquareRoot(vecCellLearningRate)); - vecCellDelta = vecCellLearningRate * vecCellDelta; + delta = cellStateError * c.dSWCellIn; + newLearningRate = UpdateLearningRate(ref c.dCellInLearningRate, delta); + c.wCellIn += newLearningRate * delta; - c.wCellIn += vecCellDelta.X; - c.wCellForget += vecCellDelta.Y; - c.wCellOut += vecCellDelta.Z; + delta = cellStateError * c.dSWCellForget; + newLearningRate = UpdateLearningRate(ref c.dCellForgetLearningRate, delta); + c.wCellForget += newLearningRate * delta; + delta = gradientOutputGate * c.cellState; + newLearningRate = UpdateLearningRate(ref c.dCellOutLearningRate, delta); + c.wCellOut += newLearningRate * delta; neuHidden[i] = c; }); @@ -808,7 +833,7 @@ public override void computeHiddenLayer(State state, bool isTrain = true) //squash output gate cell_j.yOut = Sigmoid(cell_j.netOut); - cell_j.cellOutput = (float)(cell_j.cellState * cell_j.yOut); + cell_j.cellOutput = cell_j.cellState * cell_j.yOut; neuHidden[j] = cell_j; @@ -816,7 +841,7 @@ public override void computeHiddenLayer(State state, bool isTrain = true) } - public override void computeOutput(float[] doutput) + public override void computeOutput(double[] doutput) { matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, L2, L1, 0); if (doutput != null) diff --git a/RNNSharp/MathUtil.cs b/RNNSharp/MathUtil.cs index 5494139..fbb09a2 100644 --- a/RNNSharp/MathUtil.cs +++ b/RNNSharp/MathUtil.cs @@ -7,7 +7,7 @@ namespace RNNSharp { class MathUtil { - public static int GetMaxProbIndex(float [] array) + public static int GetMaxProbIndex(double [] array) { int dim = array.Length; double maxValue = array[0]; diff --git a/RNNSharp/Matrix.cs b/RNNSharp/Matrix.cs index 3af8635..2794bce 100644 --- a/RNNSharp/Matrix.cs +++ b/RNNSharp/Matrix.cs @@ -1,11 +1,10 @@ -using System.Numerics; - + /// /// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) /// namespace RNNSharp { - public class Matrix where T : struct + public class Matrix { public int Height { get; set; } // the number of rows @@ -42,13 +41,7 @@ public Matrix CopyTo() for (int i = 0; i < Height; i++) { - T[] m_i = m[i]; - T[] m_saData_i = m_saData[i]; - for (int j = 0; j < Width; j += Vector.Count) - { - Vector v1 = new Vector(m_saData_i, j); - v1.CopyTo(m_i, j); - } + m_saData[i].CopyTo(m[i], 0); } return m; diff --git a/RNNSharp/RNN.cs b/RNNSharp/RNN.cs index 87052f3..86f5302 100644 --- a/RNNSharp/RNN.cs +++ b/RNNSharp/RNN.cs @@ -3,7 +3,6 @@ using System.Threading.Tasks; using System.IO; using AdvUtils; -using System.Numerics; /// /// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) @@ -48,7 +47,7 @@ abstract public class RNN public string ModelTempFile { get { return ModelFile + ".tmp"; } } public virtual MODELDIRECTION ModelDirection { get; set; } public virtual bool bVQ { get; set; } - public virtual float GradientCutoff { get; set; } + public virtual double GradientCutoff { get; set; } public virtual float Dropout { get; set; } public virtual float LearningRate { get; set; } public virtual int MaxIter { get; set; } @@ -59,13 +58,13 @@ abstract public class RNN public virtual int L2 { get; set; } public MODELTYPE ModelType { get; set; } - public Matrix CRFTagTransWeights { get; set; } + public Matrix CRFTagTransWeights { get; set; } public SimpleLayer OutputLayer { get; set; } - public Matrix Hidden2OutputWeight; + public Matrix Hidden2OutputWeight; public Matrix Hidden2OutputWeightLearningRate; // CRF result output - protected Matrix CRFSeqOutput; + protected Matrix CRFSeqOutput; protected double logp; protected double minTknErrRatio = double.MaxValue; protected ParallelOptions parallelOption = new ParallelOptions(); @@ -76,7 +75,7 @@ abstract public class RNN public virtual void setTagBigramTransition(List> m) { - CRFTagTransWeights = new Matrix(L2, L2); + CRFTagTransWeights = new Matrix(L2, L2); for (int i = 0; i < L2; i++) { for (int j = 0; j < L2; j++) @@ -106,7 +105,7 @@ public double UpdateLearningRate(Matrix m, int i, int j, double delta) } //Save matrix into file as binary format - protected void saveMatrixBin(Matrix mat, BinaryWriter fo) + protected void saveMatrixBin(Matrix mat, BinaryWriter fo) { //Save the width and height of the matrix fo.Write(mat.Width); @@ -169,14 +168,14 @@ protected void saveMatrixBin(Matrix mat, BinaryWriter fo) } } - protected Matrix loadMatrixBin(BinaryReader br) + protected Matrix loadMatrixBin(BinaryReader br) { int width = br.ReadInt32(); int height = br.ReadInt32(); int vqSize = br.ReadInt32(); Logger.WriteLine("Loading matrix. width: {0}, height: {1}, vqSize: {2}", width, height, vqSize); - Matrix m = new Matrix(height, width); + Matrix m = new Matrix(height, width); if (vqSize == 0) { for (int r = 0; r < height; r++) @@ -202,7 +201,7 @@ protected Matrix loadMatrixBin(BinaryReader br) for (int c = 0; c < width; c++) { int vqIndex = br.ReadByte(); - m[r][c] = (float)codeBook[vqIndex]; + m[r][c] = codeBook[vqIndex]; } } } @@ -239,13 +238,13 @@ public void setInputLayer(State state, int curState, int numStates, int[] predic public abstract void netReset(bool updateNet = false); public abstract void computeHiddenLayer(State state, bool isTrain = true); - public abstract void computeOutput(float[] doutput); + public abstract void computeOutput(double[] doutput); - public virtual Matrix PredictSentence(Sequence pSequence, RunningMode runningMode) + public virtual Matrix PredictSentence(Sequence pSequence, RunningMode runningMode) { int numStates = pSequence.States.Length; - Matrix m = new Matrix(numStates, L2); + Matrix m = new Matrix(numStates, L2); int[] predicted = new int[numStates]; bool isTraining = true; if (runningMode == RunningMode.Train) @@ -307,13 +306,11 @@ public void SoftmaxLayer(SimpleLayer layer) if (cellOutput < -50) cellOutput = -50; //for numerical stability double val = Math.Exp(cellOutput); sum += val; - layer.cellOutput[c] = (float)val; + layer.cellOutput[c] = val; } - - float sumf = (float)sum; for (int c = 0; c < L2; c++) { - layer.cellOutput[c] /= sumf; + layer.cellOutput[c] /= sum; } } @@ -336,7 +333,7 @@ public virtual int[] PredictSentenceCRF(Sequence pSequence, RunningMode runningM { int numStates = pSequence.States.Length; - Matrix nnOutput = PredictSentence(pSequence, RunningMode.Test); + Matrix nnOutput = PredictSentence(pSequence, RunningMode.Test); ForwardBackward(numStates, nnOutput); if (runningMode != RunningMode.Test) @@ -403,16 +400,16 @@ public void UpdateBigramTransition(Sequence seq) //Update tag Bigram LM for (int b = 0;b < L2;b++) { - float[] vector_b = CRFTagTransWeights[b]; + double[] vector_b = CRFTagTransWeights[b]; double[] vector_delta_b = m_DeltaBigramLM[b]; for (int a = 0; a < L2; a++) { - vector_b[a] += (float)(LearningRate * NormalizeGradient(vector_delta_b[a])); + vector_b[a] += LearningRate * NormalizeGradient(vector_delta_b[a]); } } } - public void ForwardBackward(int numStates, Matrix m_RawOutput) + public void ForwardBackward(int numStates, Matrix m_RawOutput) { //forward double[][] alphaSet = new double[numStates][]; @@ -472,12 +469,12 @@ public void ForwardBackward(int numStates, Matrix m_RawOutput) } //Calculate the output probability of each node - CRFSeqOutput = new Matrix(numStates, L2); + CRFSeqOutput = new Matrix(numStates, L2); for (int i = 0; i < numStates; i++) { for (int j = 0; j < L2; j++) { - CRFSeqOutput[i][j] = (float)Math.Exp(alphaSet[i][j] + betaSet[i][j] - m_RawOutput[i][j] - Z_); + CRFSeqOutput[i][j] = Math.Exp(alphaSet[i][j] + betaSet[i][j] - m_RawOutput[i][j] - Z_); } } @@ -507,7 +504,7 @@ public float RandInitWeight() public virtual double TrainNet(DataSet trainingSet, int iter) { DateTime start = DateTime.Now; - Logger.WriteLine("Iter " + iter + " begins with learning rate alpha = " + LearningRate + " ..."); + Logger.WriteLine("[TRACE] Iter " + iter + " begins with learning rate alpha = " + LearningRate + " ..."); //Initialize varibles logp = 0; @@ -519,7 +516,7 @@ public virtual double TrainNet(DataSet trainingSet, int iter) int wordCnt = 0; int tknErrCnt = 0; int sentErrCnt = 0; - Logger.WriteLine("Progress = 0/" + numSequence / 1000.0 + "K\r"); + Logger.WriteLine("[TRACE] Progress = 0/" + numSequence / 1000.0 + "K\r"); for (int curSequence = 0; curSequence < numSequence; curSequence++) { Sequence pSequence = trainingSet.SequenceList[curSequence]; @@ -532,7 +529,7 @@ public virtual double TrainNet(DataSet trainingSet, int iter) } else { - Matrix m; + Matrix m; m = PredictSentence(pSequence, RunningMode.Train); predicted = GetBestResult(m); } @@ -546,10 +543,10 @@ public virtual double TrainNet(DataSet trainingSet, int iter) if ((curSequence + 1) % 1000 == 0) { - Logger.WriteLine("Progress = {0} ", (curSequence + 1) / 1000 + "K/" + numSequence / 1000.0 + "K"); - Logger.WriteLine("Training cross-entropy = {0} ", -logp / Math.Log10(2.0) / wordCnt); - Logger.WriteLine("Error token ratio = {0}%", (double)tknErrCnt / (double)wordCnt * 100.0); - Logger.WriteLine("Error sentence ratio = {0}%", (double)sentErrCnt / (double)curSequence * 100.0); + Logger.WriteLine("[TRACE] Progress = {0} ", (curSequence + 1) / 1000 + "K/" + numSequence / 1000.0 + "K"); + Logger.WriteLine(" train cross-entropy = {0} ", -logp / Math.Log10(2.0) / wordCnt); + Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)wordCnt * 100.0); + Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)curSequence * 100.0); } if (SaveStep > 0 && (curSequence + 1) % SaveStep == 0) @@ -565,9 +562,9 @@ public virtual double TrainNet(DataSet trainingSet, int iter) double entropy = -logp / Math.Log10(2.0) / wordCnt; double ppl = exp_10(-logp / wordCnt); - Logger.WriteLine("Iter " + iter + " completed"); - Logger.WriteLine("Sentences = " + numSequence + ", time escape = " + duration + "s, speed = " + numSequence / duration.TotalSeconds); - Logger.WriteLine("In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl); + Logger.WriteLine("[TRACE] Iter " + iter + " completed"); + Logger.WriteLine("[TRACE] Sentences = " + numSequence + ", time escape = " + duration + "s, speed = " + numSequence / duration.TotalSeconds); + Logger.WriteLine("[TRACE] In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl); return ppl; } @@ -592,19 +589,6 @@ public static void CheckModelFileType(string filename, out MODELTYPE modelType, } - protected float NormalizeGradient(float err) - { - if (err > GradientCutoff) - { - err = GradientCutoff; - } - else if (err < -GradientCutoff) - { - err = -GradientCutoff; - } - return err; - } - protected double NormalizeGradient(double err) { if (err > GradientCutoff) @@ -617,15 +601,16 @@ protected double NormalizeGradient(double err) } return err; } - public void matrixXvectorADD(SimpleLayer dest, SimpleCell[] srcvec, Matrix srcmatrix, int DestSize, int SrcSize, int type) + + public void matrixXvectorADD(SimpleLayer dest, SimpleCell[] srcvec, Matrix srcmatrix, int DestSize, int SrcSize, int type) { if (type == 0) { //ac mod Parallel.For(0, DestSize, parallelOption, i => { - float[] vector_i = srcmatrix[i]; - float cellOutput = 0; + double[] vector_i = srcmatrix[i]; + double cellOutput = 0; for (int j = 0; j < SrcSize; j++) { cellOutput += srcvec[j].cellOutput * vector_i[j]; @@ -639,7 +624,7 @@ public void matrixXvectorADD(SimpleLayer dest, SimpleCell[] srcvec, Matrix { - float er = 0; + double er = 0; for (int j = 0; j < SrcSize; j++) { er += srcvec[j].er * srcmatrix[j][i]; @@ -650,20 +635,18 @@ public void matrixXvectorADD(SimpleLayer dest, SimpleCell[] srcvec, Matrix srcmatrix, int DestSize, int SrcSize, int type) + public void matrixXvectorADD(SimpleLayer dest, SimpleLayer srcvec, Matrix srcmatrix, int DestSize, int SrcSize, int type) { if (type == 0) { //ac mod Parallel.For(0, DestSize, parallelOption, i => { - float[] vector_i = srcmatrix[i]; - float cellOutput = 0; - for (int j = 0; j < SrcSize; j+=Vector.Count) + double[] vector_i = srcmatrix[i]; + double cellOutput = 0; + for (int j = 0; j < SrcSize; j++) { - Vector v1 = new Vector(srcvec.cellOutput, j); - Vector v2 = new Vector(vector_i, j); - cellOutput += Vector.Dot(v1, v2); + cellOutput += srcvec.cellOutput[j] * vector_i[j]; } dest.cellOutput[i] = cellOutput; }); @@ -673,7 +656,7 @@ public void matrixXvectorADD(SimpleLayer dest, SimpleLayer srcvec, Matrix { Parallel.For(0, DestSize, parallelOption, i => { - float er = 0; + double er = 0; for (int j = 0; j < SrcSize; j++) { er += srcvec.er[j] * srcmatrix[j][i]; @@ -684,7 +667,7 @@ public void matrixXvectorADD(SimpleLayer dest, SimpleLayer srcvec, Matrix } } - public int[] GetBestResult(Matrix ys) + public int[] GetBestResult(Matrix ys) { int[] output = new int[ys.Height]; @@ -698,7 +681,7 @@ public int[] GetBestResult(Matrix ys) public int[] DecodeNN(Sequence seq) { - Matrix ys = PredictSentence(seq, RunningMode.Test); + Matrix ys = PredictSentence(seq, RunningMode.Test); return GetBestResult(ys); } @@ -707,11 +690,11 @@ public int[][] DecodeNBestCRF(Sequence seq, int N) { //ys contains the output of RNN for each word - Matrix ys = PredictSentence(seq, RunningMode.Test); + Matrix ys = PredictSentence(seq, RunningMode.Test); int n = seq.States.Length; int K = L2; - Matrix STP = CRFTagTransWeights; + Matrix STP = CRFTagTransWeights; PAIR[, ,] vPath = new PAIR[n, K, N]; int DUMP_LABEL = -1; double[,] vPreAlpha = new double[K, N]; @@ -796,7 +779,7 @@ public int[][] DecodeNBestCRF(Sequence seq, int N) return vTagOutput; } - public int[] Viterbi(Matrix ys, int seqLen) + public int[] Viterbi(Matrix ys, int seqLen) { int[,] vPath = new int[seqLen, L2]; @@ -850,7 +833,7 @@ public int[] Viterbi(Matrix ys, int seqLen) public int[] DecodeCRF(Sequence seq) { //ys contains the output of RNN for each word - Matrix ys = PredictSentence(seq, RunningMode.Test); + Matrix ys = PredictSentence(seq, RunningMode.Test); return Viterbi(ys, seq.States.Length); } @@ -894,7 +877,7 @@ public void ComputeOutputLayerErr(State state, int timeat) public virtual bool ValidateNet(DataSet validationSet, int iter) { - Logger.WriteLine("Start validation ..."); + Logger.WriteLine("[TRACE] Start validation ..."); int wordcn = 0; int tknErrCnt = 0; int sentErrCnt = 0; @@ -914,7 +897,7 @@ public virtual bool ValidateNet(DataSet validationSet, int iter) } else { - Matrix m; + Matrix m; m = PredictSentence(pSequence, RunningMode.Validate); predicted = GetBestResult(m); } @@ -932,8 +915,8 @@ public virtual bool ValidateNet(DataSet validationSet, int iter) double tknErrRatio = (double)tknErrCnt / (double)wordcn * 100.0; double sentErrRatio = (double)sentErrCnt / (double)numSequence * 100.0; - Logger.WriteLine("In validation: error token ratio = {0}% error sentence ratio = {1}%", tknErrRatio, sentErrRatio); - Logger.WriteLine("In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl); + Logger.WriteLine("[TRACE] In validation: error token ratio = {0}% error sentence ratio = {1}%", tknErrRatio, sentErrRatio); + Logger.WriteLine("[TRACE] In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl); Logger.WriteLine(""); bool bUpdate = false; diff --git a/RNNSharp/RNNEncoder.cs b/RNNSharp/RNNEncoder.cs index 6fe958a..bb4b7db 100644 --- a/RNNSharp/RNNEncoder.cs +++ b/RNNSharp/RNNEncoder.cs @@ -64,7 +64,7 @@ public void Train() rnn.MaxIter = m_modelSetting.MaxIteration; rnn.IsCRFTraining = m_modelSetting.IsCRFTraining; rnn.LearningRate = m_modelSetting.LearningRate; - rnn.GradientCutoff = 15.0f; + rnn.GradientCutoff = 15.0; rnn.Dropout = m_modelSetting.Dropout; rnn.L1 = m_modelSetting.NumHidden; @@ -82,7 +82,7 @@ public void Train() Logger.WriteLine(""); - Logger.WriteLine("Iterative training begins ..."); + Logger.WriteLine("[TRACE] Iterative training begins ..."); double lastPPL = double.MaxValue; double lastAlpha = rnn.LearningRate; int iter = 0; diff --git a/RNNSharp/RNNSharp.csproj b/RNNSharp/RNNSharp.csproj index c451a41..9c72646 100644 --- a/RNNSharp/RNNSharp.csproj +++ b/RNNSharp/RNNSharp.csproj @@ -37,10 +37,7 @@ - - ..\packages\System.Numerics.Vectors.4.1.0\lib\net46\System.Numerics.Vectors.dll - True - + @@ -73,11 +70,6 @@ - - - Designer - -