diff --git a/RNNSharp/BiRNN.cs b/RNNSharp/BiRNN.cs index d57b635..8b47a7e 100644 --- a/RNNSharp/BiRNN.cs +++ b/RNNSharp/BiRNN.cs @@ -1,11 +1,11 @@ using System; -using System.Collections.Generic; using System.IO; -using System.Linq; -using System.Text; using System.Threading.Tasks; using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { class BiRNN : RNN @@ -18,19 +18,37 @@ public BiRNN(RNN s_forwardRNN, RNN s_backwardRNN) forwardRNN = s_forwardRNN; backwardRNN = s_backwardRNN; - m_modeldirection = MODELDIRECTION.BI_DIRECTIONAL; + ModelType = forwardRNN.ModelType; + ModelDirection = MODELDIRECTION.BI_DIRECTIONAL; } - public override void SetFeatureDimension(int denseFeatueSize, int sparseFeatureSize, int tagSize) + public override int L0 { - fea_size = denseFeatueSize; - L0 = sparseFeatureSize; - L2 = tagSize; + get + { + return forwardRNN.L0; + } - forwardRNN.SetFeatureDimension(denseFeatueSize, sparseFeatureSize, tagSize); - backwardRNN.SetFeatureDimension(denseFeatueSize, sparseFeatureSize, tagSize); + set + { + forwardRNN.L0 = value; + backwardRNN.L0 = value; + } } + public override int L2 + { + get + { + return forwardRNN.L2; + } + + set + { + forwardRNN.L2 = value; + backwardRNN.L2 = value; + } + } public override void initWeights() { @@ -39,72 +57,123 @@ public override void initWeights() } - public override void SetModelFile(string strModelFile) + public override string ModelFile { - m_strModelFile = strModelFile; + get { return forwardRNN.ModelFile; } + set + { + forwardRNN.ModelFile = value; + backwardRNN.ModelFile = value; + } + } - forwardRNN.mat_hidden2output = mat_hidden2output; - backwardRNN.mat_hidden2output = mat_hidden2output; + public override long SaveStep + { + get + { + return forwardRNN.SaveStep; + } - forwardRNN.SetModelFile(strModelFile); - backwardRNN.SetModelFile(strModelFile); + set + { + forwardRNN.SaveStep = value; + backwardRNN.SaveStep = value; + } } - public override void SetSaveStep(long savestep) + public override int MaxIter { - m_SaveStep = savestep; + get + { + return forwardRNN.MaxIter; + } - forwardRNN.SetSaveStep(savestep); - backwardRNN.SetSaveStep(savestep); + set + { + forwardRNN.MaxIter = value; + backwardRNN.MaxIter = value; + } } - public override void SetMaxIter(int _nMaxIter) + public override bool IsCRFTraining { - m_MaxIter = _nMaxIter; + get { return forwardRNN.IsCRFTraining; } - forwardRNN.SetMaxIter(_nMaxIter); - backwardRNN.SetMaxIter(_nMaxIter); + set + { + forwardRNN.IsCRFTraining = value; + backwardRNN.IsCRFTraining = value; + } } - - public override void SetCRFTraining(bool b) + public override float LearningRate { - m_bCRFTraining = b; + get + { + return forwardRNN.LearningRate; + } - forwardRNN.SetCRFTraining(b); - backwardRNN.SetCRFTraining(b); + set + { + forwardRNN.LearningRate = value; + backwardRNN.LearningRate = value; + } } - public override void SetLearningRate(double newAlpha) + public override float GradientCutoff { - alpha = newAlpha; + get + { + return forwardRNN.GradientCutoff; + } - forwardRNN.SetLearningRate(newAlpha); - backwardRNN.SetLearningRate(newAlpha); + set + { + forwardRNN.GradientCutoff = value; + backwardRNN.GradientCutoff = value; + } } - public override void SetGradientCutoff(double newGradient) + public override float Dropout { - gradient_cutoff = newGradient; + get + { + return forwardRNN.Dropout; + } - forwardRNN.SetGradientCutoff(newGradient); - backwardRNN.SetGradientCutoff(newGradient); + set + { + forwardRNN.Dropout = value; + backwardRNN.Dropout = value; + } } - public override void SetDropout(double newDropout) + public override int L1 { - dropout = newDropout; + get + { + return forwardRNN.L1; + } - forwardRNN.SetDropout(newDropout); - backwardRNN.SetDropout(newDropout); + set + { + forwardRNN.L1 = value; + backwardRNN.L1 = value; + } } - public override void SetHiddenLayerSize(int newsize) + public override int DenseFeatureSize { - L1 = newsize; + get + { + return forwardRNN.DenseFeatureSize; + } - forwardRNN.SetHiddenLayerSize(newsize); - backwardRNN.SetHiddenLayerSize(newsize); + set + { + forwardRNN.DenseFeatureSize = value; + backwardRNN.DenseFeatureSize = value; + } } public override void GetHiddenLayer(Matrix m, int curStatus) @@ -118,20 +187,20 @@ public override void initMem() backwardRNN.initMem(); //Create and intialise the weights from hidden to output layer, these are just normal weights - mat_hidden2output = new Matrix(L2, L1); + Hidden2OutputWeight = new Matrix(L2, L1); - for (int i = 0; i < mat_hidden2output.GetHeight(); i++) + for (int i = 0; i < Hidden2OutputWeight.GetHeight(); i++) { - for (int j = 0; j < mat_hidden2output.GetWidth(); j++) + for (int j = 0; j < Hidden2OutputWeight.GetWidth(); j++) { - mat_hidden2output[i][j] = RandInitWeight(); + Hidden2OutputWeight[i][j] = RandInitWeight(); } } } public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHiddenLayer, out Matrix rawOutputLayer) { - int numStates = pSequence.GetSize(); + int numStates = pSequence.States.Length; Matrix mForward = null; Matrix mBackward = null; @@ -144,7 +213,7 @@ public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHidde mForward = new Matrix(numStates, forwardRNN.L1); for (int curState = 0; curState < numStates; curState++) { - State state = pSequence.Get(curState); + State state = pSequence.States[curState]; forwardRNN.setInputLayer(state, curState, numStates, null); forwardRNN.computeNet(state, null); //compute probability distribution @@ -157,7 +226,7 @@ public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHidde mBackward = new Matrix(numStates, backwardRNN.L1); for (int curState = numStates - 1; curState >= 0; curState--) { - State state = pSequence.Get(curState); + State state = pSequence.States[curState]; backwardRNN.setInputLayer(state, curState, numStates, null, false); backwardRNN.computeNet(state, null); //compute probability distribution @@ -181,7 +250,7 @@ public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHidde Parallel.For(0, numStates, parallelOption, curState => { seqOutput[curState] = new neuron[L2]; - matrixXvectorADD(seqOutput[curState], mergedHiddenLayer[curState], mat_hidden2output, 0, L2, 0, L1, 0); + matrixXvectorADD(seqOutput[curState], mergedHiddenLayer[curState], Hidden2OutputWeight, 0, L2, 0, L1, 0); for (int i = 0; i < L2; i++) { @@ -198,10 +267,10 @@ public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHidde return seqOutput; } - public override Matrix learnSentenceForRNNCRF(Sequence pSequence, RunningMode runningMode) + public override int[] PredictSentenceCRF(Sequence pSequence, RunningMode runningMode) { //Reset the network - int numStates = pSequence.GetSize(); + int numStates = pSequence.States.Length; //Predict output Matrix mergedHiddenLayer = null; Matrix rawOutputLayer = null; @@ -209,37 +278,43 @@ public override Matrix learnSentenceForRNNCRF(Sequence pSequence, Runnin ForwardBackward(numStates, rawOutputLayer); - //Get the best result - for (int i = 0; i < numStates; i++) + if (runningMode != RunningMode.Test) { - State state = pSequence.Get(i); - logp += Math.Log10(mat_CRFSeqOutput[i][state.GetLabel()]); - counter++; + //Get the best result + for (int i = 0; i < numStates; i++) + { + logp += Math.Log10(CRFSeqOutput[i][pSequence.States[i].Label]); + } } - UpdateBigramTransition(pSequence); + int[] predict = Viterbi(rawOutputLayer, numStates); - //Update hidden-output layer weights - for (int curState = 0; curState < numStates; curState++) + if (runningMode == RunningMode.Train) { - State state = pSequence.Get(curState); - //For standard RNN - for (int c = 0; c < L2; c++) + UpdateBigramTransition(pSequence); + + //Update hidden-output layer weights + for (int curState = 0; curState < numStates; curState++) { - seqOutput[curState][c].er = -mat_CRFSeqOutput[curState][c]; + int label = pSequence.States[curState].Label; + //For standard RNN + for (int c = 0; c < L2; c++) + { + seqOutput[curState][c].er = -CRFSeqOutput[curState][c]; + } + seqOutput[curState][label].er = 1 - CRFSeqOutput[curState][label]; } - seqOutput[curState][state.GetLabel()].er = 1 - mat_CRFSeqOutput[curState][state.GetLabel()]; - } - LearnTwoRNN(pSequence, mergedHiddenLayer, seqOutput); + LearnTwoRNN(pSequence, mergedHiddenLayer, seqOutput); + } - return mat_CRFSeqOutput; + return predict; } public override Matrix PredictSentence(Sequence pSequence, RunningMode runningMode) { //Reset the network - int numStates = pSequence.GetSize(); + int numStates = pSequence.States.Length; //Predict output Matrix mergedHiddenLayer = null; @@ -251,9 +326,7 @@ public override Matrix PredictSentence(Sequence pSequence, RunningMode r //Merge forward and backward for (int curState = 0; curState < numStates; curState++) { - State state = pSequence.Get(curState); - logp += Math.Log10(seqOutput[curState][state.GetLabel()].cellOutput); - counter++; + logp += Math.Log10(seqOutput[curState][pSequence.States[curState].Label].cellOutput); } } @@ -262,13 +335,13 @@ public override Matrix PredictSentence(Sequence pSequence, RunningMode r //Update hidden-output layer weights for (int curState = 0; curState < numStates; curState++) { - State state = pSequence.Get(curState); + int label = pSequence.States[curState].Label; //For standard RNN for (int c = 0; c < L2; c++) { seqOutput[curState][c].er = -seqOutput[curState][c].cellOutput; } - seqOutput[curState][state.GetLabel()].er = 1 - seqOutput[curState][state.GetLabel()].cellOutput; + seqOutput[curState][label].er = 1 - seqOutput[curState][label].cellOutput; } LearnTwoRNN(pSequence, mergedHiddenLayer, seqOutput); @@ -281,21 +354,21 @@ private void LearnTwoRNN(Sequence pSequence, Matrix mergedHiddenLayer, n { netReset(true); - int numStates = pSequence.GetSize(); - forwardRNN.mat_hidden2output = mat_hidden2output.CopyTo(); - backwardRNN.mat_hidden2output = mat_hidden2output.CopyTo(); + int numStates = pSequence.States.Length; + forwardRNN.Hidden2OutputWeight = Hidden2OutputWeight.CopyTo(); + backwardRNN.Hidden2OutputWeight = Hidden2OutputWeight.CopyTo(); Parallel.Invoke(() => { for (int curState = 0; curState < numStates; curState++) { - for (int i = 0; i < mat_hidden2output.GetHeight(); i++) + for (int i = 0; i < Hidden2OutputWeight.GetHeight(); i++) { //update weights for hidden to output layer - for (int k = 0; k < mat_hidden2output.GetWidth(); k++) + for (int k = 0; k < Hidden2OutputWeight.GetWidth(); k++) { - mat_hidden2output[i][k] += alpha * mergedHiddenLayer[curState][k].cellOutput * seqOutput[curState][i].er; + Hidden2OutputWeight[i][k] += LearningRate * mergedHiddenLayer[curState][k].cellOutput * seqOutput[curState][i].er; } } } @@ -308,12 +381,12 @@ private void LearnTwoRNN(Sequence pSequence, Matrix mergedHiddenLayer, n for (int curState = 0; curState < numStates; curState++) { // error propogation - State state = pSequence.Get(curState); + State state = pSequence.States[curState]; forwardRNN.setInputLayer(state, curState, numStates, null); forwardRNN.computeNet(state, null); //compute probability distribution //Copy output result to forward net work's output - forwardRNN.neuOutput = seqOutput[curState]; + forwardRNN.OutputLayer = seqOutput[curState]; forwardRNN.learnNet(state, curState, true); forwardRNN.LearnBackTime(state, numStates, curState); @@ -327,12 +400,12 @@ private void LearnTwoRNN(Sequence pSequence, Matrix mergedHiddenLayer, n int curState2 = numStates - 1 - curState; // error propogation - State state2 = pSequence.Get(curState2); + State state2 = pSequence.States[curState2]; backwardRNN.setInputLayer(state2, curState2, numStates, null, false); backwardRNN.computeNet(state2, null); //compute probability distribution //Copy output result to forward net work's output - backwardRNN.neuOutput = seqOutput[curState2]; + backwardRNN.OutputLayer = seqOutput[curState2]; backwardRNN.learnNet(state2, curState2, true); backwardRNN.LearnBackTime(state2, numStates, curState2); @@ -340,36 +413,6 @@ private void LearnTwoRNN(Sequence pSequence, Matrix mergedHiddenLayer, n }); } - public int GetBestOutputIndex(Matrix m, int curState) - { - int imax = 0; - double dmax = m[curState][0]; - for (int k = 1; k < m.GetWidth(); k++) - { - if (m[curState][k] > dmax) - { - dmax = m[curState][k]; - imax = k; - } - } - return imax; - } - - - public int GetBestOutputIndex(neuron[][] m, int curState, int L2) - { - int imax = 0; - double dmax = m[curState][0].cellOutput; - for (int k = 1; k < L2; k++) - { - if (m[curState][k].cellOutput > dmax) - { - dmax = m[curState][k].cellOutput; - imax = k; - } - } - return imax; - } public override void LearnBackTime(State state, int numStates, int curState) { @@ -394,11 +437,11 @@ public override void netReset(bool updateNet = false) public override void saveNetBin(string filename) { //Save bi-directional model - forwardRNN.mat_hidden2output = mat_hidden2output; - backwardRNN.mat_hidden2output = mat_hidden2output; + forwardRNN.Hidden2OutputWeight = Hidden2OutputWeight; + backwardRNN.Hidden2OutputWeight = Hidden2OutputWeight; - forwardRNN.mat_CRFTagTransWeights = mat_CRFTagTransWeights; - backwardRNN.mat_CRFTagTransWeights = mat_CRFTagTransWeights; + forwardRNN.CRFTagTransWeights = CRFTagTransWeights; + backwardRNN.CRFTagTransWeights = CRFTagTransWeights; forwardRNN.saveNetBin(filename + ".forward"); backwardRNN.saveNetBin(filename + ".backward"); @@ -407,12 +450,12 @@ public override void saveNetBin(string filename) using (StreamWriter sw = new StreamWriter(filename)) { BinaryWriter fo = new BinaryWriter(sw.BaseStream); - fo.Write((int)m_modeltype); - fo.Write((int)m_modeldirection); + fo.Write((int)ModelType); + fo.Write((int)ModelDirection); // Signiture , 0 is for RNN or 1 is for RNN-CRF int iflag = 0; - if (m_bCRFTraining == true) + if (IsCRFTraining == true) { iflag = 1; } @@ -421,7 +464,7 @@ public override void saveNetBin(string filename) fo.Write(L0); fo.Write(L1); fo.Write(L2); - fo.Write(fea_size); + fo.Write(DenseFeatureSize); } } @@ -432,31 +475,31 @@ public override void loadNetBin(string filename) forwardRNN.loadNetBin(filename + ".forward"); backwardRNN.loadNetBin(filename + ".backward"); - mat_hidden2output = forwardRNN.mat_hidden2output; - mat_CRFTagTransWeights = forwardRNN.mat_CRFTagTransWeights; + Hidden2OutputWeight = forwardRNN.Hidden2OutputWeight; + CRFTagTransWeights = forwardRNN.CRFTagTransWeights; using (StreamReader sr = new StreamReader(filename)) { BinaryReader br = new BinaryReader(sr.BaseStream); - m_modeltype = (MODELTYPE)br.ReadInt32(); - m_modeldirection = (MODELDIRECTION)br.ReadInt32(); + ModelType = (MODELTYPE)br.ReadInt32(); + ModelDirection = (MODELDIRECTION)br.ReadInt32(); int iflag = br.ReadInt32(); if (iflag == 1) { - m_bCRFTraining = true; + IsCRFTraining = true; } else { - m_bCRFTraining = false; + IsCRFTraining = false; } //Load basic parameters L0 = br.ReadInt32(); L1 = br.ReadInt32(); L2 = br.ReadInt32(); - fea_size = br.ReadInt32(); + DenseFeatureSize = br.ReadInt32(); } } } diff --git a/RNNSharp/DataSet.cs b/RNNSharp/DataSet.cs index 3254335..5cb684a 100644 --- a/RNNSharp/DataSet.cs +++ b/RNNSharp/DataSet.cs @@ -1,133 +1,90 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class DataSet { - List m_Data; - int m_tagSize; - List> m_LabelBigramTransition; - - /// - /// Split current corpus into two parts according given ratio - /// - /// - /// - /// - public void SplitDataSet(double ratio, out DataSet ds1, out DataSet ds2) - { - Random rnd = new Random(DateTime.Now.Millisecond); - ds1 = new DataSet(m_tagSize); - ds2 = new DataSet(m_tagSize); - - for (int i = 0; i < m_Data.Count; i++) - { - if (rnd.NextDouble() < ratio) - { - ds1.Add(m_Data[i]); - } - else - { - ds2.Add(m_Data[i]); - } - } - - ds1.BuildLabelBigramTransition(); - ds2.BuildLabelBigramTransition(); - } - - public void Add(Sequence sequence) { m_Data.Add(sequence); } + public List SequenceList { get; set; } + public int TagSize { get; set; } + public List> CRFLabelBigramTransition { get; set; } public void Shuffle() { Random rnd = new Random(DateTime.Now.Millisecond); - for (int i = 0; i < m_Data.Count; i++) + for (int i = 0; i < SequenceList.Count; i++) { - int m = rnd.Next() % m_Data.Count; - Sequence tmp = m_Data[i]; - m_Data[i] = m_Data[m]; - m_Data[m] = tmp; + int m = rnd.Next() % SequenceList.Count; + Sequence tmp = SequenceList[i]; + SequenceList[i] = SequenceList[m]; + SequenceList[m] = tmp; } } public DataSet(int tagSize) { - m_tagSize = tagSize; - m_Data = new List(); - m_LabelBigramTransition = new List>(); - } - - public int GetSize() - { - return m_Data.Count; + TagSize = tagSize; + SequenceList = new List(); + CRFLabelBigramTransition = new List>(); } - public Sequence Get(int i) { return m_Data[i]; } - public int GetTagSize() { return m_tagSize; } - - - public int GetDenseDimension() + public int DenseFeatureSize() { - if (0 == m_Data.Count) return 0; - return m_Data[0].GetDenseDimension(); + if (0 == SequenceList.Count) return 0; + return SequenceList[0].GetDenseDimension(); } public int GetSparseDimension() { - if (0 == m_Data.Count) return 0; - return m_Data[0].GetSparseDimension(); + if (0 == SequenceList.Count) return 0; + return SequenceList[0].GetSparseDimension(); } - - public List> GetLabelBigramTransition() { return m_LabelBigramTransition; } - - - public void BuildLabelBigramTransition(double smooth = 1.0) + public void BuildLabelBigramTransition(float smooth = 1.0f) { - m_LabelBigramTransition = new List>(); + CRFLabelBigramTransition = new List>(); - for (int i = 0; i < m_tagSize; i++) + for (int i = 0; i < TagSize; i++) { - m_LabelBigramTransition.Add(new List()); + CRFLabelBigramTransition.Add(new List()); } - for (int i = 0; i < m_tagSize; i++) + for (int i = 0; i < TagSize; i++) { - for (int j = 0; j < m_tagSize; j++) + for (int j = 0; j < TagSize; j++) { - m_LabelBigramTransition[i].Add(smooth); + CRFLabelBigramTransition[i].Add(smooth); } } - for (int i = 0; i < m_Data.Count; i++) + for (int i = 0; i < SequenceList.Count; i++) { - var sequence = m_Data[i]; - if (sequence.GetSize() <= 1) + var sequence = SequenceList[i]; + if (sequence.States.Length <= 1) continue; - int pLabel = sequence.Get(0).GetLabel(); - for (int j = 1; j < sequence.GetSize(); j++) + int pLabel = sequence.States[0].Label; + for (int j = 1; j < sequence.States.Length; j++) { - int label = sequence.Get(j).GetLabel(); - m_LabelBigramTransition[label][pLabel]++; + int label = sequence.States[j].Label; + CRFLabelBigramTransition[label][pLabel]++; pLabel = label; } } - for (int i = 0; i < m_tagSize; i++) + for (int i = 0; i < TagSize; i++) { double sum = 0; - for (int j = 0; j < m_tagSize; j++) + for (int j = 0; j < TagSize; j++) { - sum += m_LabelBigramTransition[i][j]; + sum += CRFLabelBigramTransition[i][j]; } - for (int j = 0; j < m_tagSize; j++) + for (int j = 0; j < TagSize; j++) { - m_LabelBigramTransition[i][j] = Math.Log(m_LabelBigramTransition[i][j] / sum); + CRFLabelBigramTransition[i][j] = (float)Math.Log(CRFLabelBigramTransition[i][j] / sum); } } } diff --git a/RNNSharp/Featurizer.cs b/RNNSharp/Featurizer.cs index 73337cf..088182e 100644 --- a/RNNSharp/Featurizer.cs +++ b/RNNSharp/Featurizer.cs @@ -1,12 +1,11 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; using System.IO; -using Txt2Vec; using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { enum TFEATURE_WEIGHT_TYPE_ENUM @@ -17,14 +16,14 @@ enum TFEATURE_WEIGHT_TYPE_ENUM public class Featurizer { + public TagSet TagSet { get; set; } + Dictionary> m_FeatureConfiguration; int m_SparseDimension; int m_DenseDimension; int m_WordEmbeddingCloumn; TFEATURE_WEIGHT_TYPE_ENUM m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY; - WordEMWrapFeaturizer m_WordEmbedding; - TagSet m_TagSet; TemplateFeaturizer m_TFeaturizer; static string TFEATURE_CONTEXT = "TFEATURE_CONTEXT"; @@ -35,12 +34,6 @@ public class Featurizer static string WORDEMBEDDING_COLUMN = "WORDEMBEDDING_COLUMN"; static string TFEATURE_WEIGHT_TYPE = "TFEATURE_WEIGHT_TYPE"; - public TagSet GetTagSet() - { - return m_TagSet; - } - - //The format of configuration file public void LoadFeatureConfigFromFile(string strFileName) { @@ -125,7 +118,7 @@ public int TruncPosition(int current, int lower, int upper) public Featurizer(string strFeatureConfigFileName, TagSet tagSet) { LoadFeatureConfigFromFile(strFeatureConfigFileName); - m_TagSet = tagSet; + TagSet = tagSet; InitComponentFeaturizer(); } @@ -143,7 +136,7 @@ void InitComponentFeaturizer() if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true) { - m_SparseDimension += m_TagSet.GetSize() * fc[RT_FEATURE_CONTEXT].Count; + m_SparseDimension += TagSet.GetSize() * fc[RT_FEATURE_CONTEXT].Count; } m_DenseDimension = 0; @@ -173,7 +166,7 @@ public void ShowFeatureSize() Logger.WriteLine(Logger.Level.info, "Template feature context size: {0}", m_TFeaturizer.GetFeatureSize() * fc[TFEATURE_CONTEXT].Count); if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true) - Logger.WriteLine(Logger.Level.info, "Run time feature size: {0}", m_TagSet.GetSize() * fc[RT_FEATURE_CONTEXT].Count); + Logger.WriteLine(Logger.Level.info, "Run time feature size: {0}", TagSet.GetSize() * fc[RT_FEATURE_CONTEXT].Count); if (fc.ContainsKey(WORDEMBEDDING_CONTEXT) == true) Logger.WriteLine(Logger.Level.info, "Word embedding feature size: {0}", m_WordEmbedding.GetDimension() * fc[WORDEMBEDDING_CONTEXT].Count); @@ -181,7 +174,7 @@ public void ShowFeatureSize() void ExtractSparseFeature(int currentState, int numStates, List features, State pState) { - Dictionary sparseFeature = new Dictionary(); + Dictionary sparseFeature = new Dictionary(); int start = 0; var fc = m_FeatureConfiguration; @@ -224,14 +217,14 @@ void ExtractSparseFeature(int currentState, int numStates, List featur if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true) { List v = fc[RT_FEATURE_CONTEXT]; - pState.SetNumRuntimeFeature(v.Count); + pState.RuntimeFeatures = new PriviousLabelFeature[v.Count]; for (int j = 0; j < v.Count; j++) { if (v[j] < 0) { pState.AddRuntimeFeaturePlacehold(j, v[j], sparseFeature.Count, start); sparseFeature[start] = 0; //Placehold a position - start += m_TagSet.GetSize(); + start += TagSet.GetSize(); } else { @@ -240,7 +233,7 @@ void ExtractSparseFeature(int currentState, int numStates, List featur } } - SparseVector spSparseFeature = pState.GetSparseData(); + SparseVector spSparseFeature = pState.SparseData; spSparseFeature.SetDimension(m_SparseDimension); spSparseFeature.SetData(sparseFeature); } @@ -284,19 +277,16 @@ public Vector ExtractDenseFeature(int currentState, int numStates, List features = sentence.GetFeatureSet(); + int n = sentence.TokensList.Count; + Sequence sequence = new Sequence(n); //For each token, get its sparse and dense feature set according configuration and training corpus - sequence.SetSize(n); for (int i = 0; i < n; i++) { - State state = sequence.Get(i); - ExtractSparseFeature(i, n, features, state); + State state = sequence.States[i]; + ExtractSparseFeature(i, n, sentence.TokensList, state); - var spDenseFeature = ExtractDenseFeature(i, n, features); - state.SetDenseData(spDenseFeature); + state.DenseData = ExtractDenseFeature(i, n, sentence.TokensList); } return sequence; diff --git a/RNNSharp/LSTMRNN.cs b/RNNSharp/LSTMRNN.cs index 1b629b1..ae080e5 100644 --- a/RNNSharp/LSTMRNN.cs +++ b/RNNSharp/LSTMRNN.cs @@ -1,63 +1,64 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; using System.Threading.Tasks; using System.IO; using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class LSTMCell { //input gate - public double netIn; - public double yIn; + public float netIn; + public float yIn; //forget gate - public double netForget; - public double yForget; + public float netForget; + public float yForget; //cell state - public double netCellState; - public double previousCellState; - public double cellState; + public float netCellState; + public float previousCellState; + public float cellState; //internal weights and deltas - public double wCellIn; - public double wCellForget; - public double wCellOut; + public float wCellIn; + public float wCellForget; + public float wCellOut; //partial derivatives - public double dSWCellIn; - public double dSWCellForget; + public float dSWCellIn; + public float dSWCellForget; //double dSWCellState; //output gate - public double netOut; - public double yOut; + public float netOut; + public float yOut; //cell output - public double cellOutput; + public float cellOutput; public bool mask; } public struct LSTMWeight { //variables - public double wInputCell; - public double wInputInputGate; - public double wInputForgetGate; - public double wInputOutputGate; + public float wInputCell; + public float wInputInputGate; + public float wInputForgetGate; + public float wInputOutputGate; } public struct LSTMWeightDerivative { //partial derivatives. dont need partial derivative for output gate as it uses BP not RTRL - public double dSInputCell; - public double dSInputInputGate; - public double dSInputForgetGate; + public float dSInputCell; + public float dSInputInputGate; + public float dSInputForgetGate; } public class LSTMRNN : RNN @@ -71,7 +72,7 @@ public class LSTMRNN : RNN public LSTMRNN() { - m_modeltype = MODELTYPE.LSTM; + ModelType = MODELTYPE.LSTM; } @@ -87,6 +88,17 @@ public LSTMWeight[][] loadLSTMWeight(BinaryReader br) { int w = br.ReadInt32(); int h = br.ReadInt32(); + int vqSize = br.ReadInt32(); + + Logger.WriteLine("Loading LSTM-Weight: width:{0}, height:{1}, vqSize:{2}...", w, h, vqSize); + + List codeBook = new List(); + for (int i = 0; i < vqSize; i++) + { + codeBook.Add(br.ReadDouble()); + } + + LSTMWeight[][] m = new LSTMWeight[w][]; for (int i = 0; i < w; i++) @@ -94,10 +106,17 @@ public LSTMWeight[][] loadLSTMWeight(BinaryReader br) m[i] = new LSTMWeight[h]; for (int j = 0; j < h; j++) { - m[i][j].wInputCell = br.ReadSingle(); - m[i][j].wInputForgetGate = br.ReadSingle(); - m[i][j].wInputInputGate = br.ReadSingle(); - m[i][j].wInputOutputGate = br.ReadSingle(); + int vqIdx = br.ReadByte(); + m[i][j].wInputCell = (float)codeBook[vqIdx]; + + vqIdx = br.ReadByte(); + m[i][j].wInputForgetGate = (float)codeBook[vqIdx]; + + vqIdx = br.ReadByte(); + m[i][j].wInputInputGate = (float)codeBook[vqIdx]; + + vqIdx = br.ReadByte(); + m[i][j].wInputOutputGate = (float)codeBook[vqIdx]; } } @@ -106,26 +125,47 @@ public LSTMWeight[][] loadLSTMWeight(BinaryReader br) private void saveLSTMWeight(LSTMWeight[][] weight, BinaryWriter fo) { - if (weight == null || weight.Length == 0) - { - fo.Write(0); - fo.Write(0); - } + int w = weight.Length; + int h = weight[0].Length; + int vqSize = 256; + + Logger.WriteLine("Saving LSTM weight matrix. width:{0}, height:{1}, vqSize:{2}", w, h, vqSize); fo.Write(weight.Length); fo.Write(weight[0].Length); - int w = weight.Length; - int h = weight[0].Length; + //Build vector quantization model + VectorQuantization vq = new VectorQuantization(); + for (int i = 0; i < w; i++) + { + for (int j = 0; j < h; j++) + { + vq.Add(weight[i][j].wInputCell); + vq.Add(weight[i][j].wInputForgetGate); + vq.Add(weight[i][j].wInputInputGate); + vq.Add(weight[i][j].wInputOutputGate); + } + } + + + double distortion = vq.BuildCodebook(vqSize); + Logger.WriteLine("Distortion: {0}", distortion); + + //Save VQ codebook into file + fo.Write(vqSize); + for (int j = 0; j < vqSize; j++) + { + fo.Write(vq.CodeBook[j]); + } for (int i = 0; i < w; i++) { for (int j = 0; j < h; j++) { - fo.Write((float)weight[i][j].wInputCell); - fo.Write((float)weight[i][j].wInputForgetGate); - fo.Write((float)weight[i][j].wInputInputGate); - fo.Write((float)weight[i][j].wInputOutputGate); + fo.Write((byte)vq.ComputeVQ(weight[i][j].wInputCell)); + fo.Write((byte)vq.ComputeVQ(weight[i][j].wInputForgetGate)); + fo.Write((byte)vq.ComputeVQ(weight[i][j].wInputInputGate)); + fo.Write((byte)vq.ComputeVQ(weight[i][j].wInputOutputGate)); } } @@ -138,49 +178,53 @@ public override void loadNetBin(string filename) StreamReader sr = new StreamReader(filename); BinaryReader br = new BinaryReader(sr.BaseStream); - m_modeltype = (MODELTYPE)br.ReadInt32(); - if (m_modeltype != MODELTYPE.LSTM) + ModelType = (MODELTYPE)br.ReadInt32(); + if (ModelType != MODELTYPE.LSTM) { throw new Exception("Invalidated model format: must be LSTM-RNN format"); } - m_modeldirection = (MODELDIRECTION)br.ReadInt32(); + ModelDirection = (MODELDIRECTION)br.ReadInt32(); int iflag = br.ReadInt32(); if (iflag == 1) { - m_bCRFTraining = true; + IsCRFTraining = true; } else { - m_bCRFTraining = false; + IsCRFTraining = false; } //Load basic parameters L0 = br.ReadInt32(); L1 = br.ReadInt32(); L2 = br.ReadInt32(); - fea_size = br.ReadInt32(); + DenseFeatureSize = br.ReadInt32(); //Create cells of each layer CreateCell(br); //Load weight matrix between each two layer pairs //weight input->hidden + Logger.WriteLine("Loading input2hidden weights..."); input2hidden = loadLSTMWeight(br); - if (fea_size > 0) + if (DenseFeatureSize > 0) { //weight fea->hidden + Logger.WriteLine("Loading feature2hidden weights..."); feature2hidden = loadLSTMWeight(br); } //weight hidden->output - mat_hidden2output = loadMatrixBin(br); + Logger.WriteLine("Loading hidden2output weights..."); + Hidden2OutputWeight = loadMatrixBin(br); if (iflag == 1) { - mat_CRFTagTransWeights = loadMatrixBin(br); + Logger.WriteLine("Loading CRF tag trans weights..."); + CRFTagTransWeights = loadMatrixBin(br); } sr.Close(); @@ -203,13 +247,13 @@ public override void saveNetBin(string filename) StreamWriter sw = new StreamWriter(filename); BinaryWriter fo = new BinaryWriter(sw.BaseStream); - fo.Write((int)m_modeltype); + fo.Write((int)ModelType); - fo.Write((int)m_modeldirection); + fo.Write((int)ModelDirection); // Signiture , 0 is for RNN or 1 is for RNN-CRF int iflag = 0; - if (m_bCRFTraining == true) + if (IsCRFTraining == true) { iflag = 1; } @@ -218,27 +262,32 @@ public override void saveNetBin(string filename) fo.Write(L0); fo.Write(L1); fo.Write(L2); - fo.Write(fea_size); + fo.Write(DenseFeatureSize); //Save hidden layer weights + Logger.WriteLine("Saving hidden layer weights..."); SaveHiddenLayerWeights(fo); //weight input->hidden + Logger.WriteLine("Saving input2hidden weights..."); saveLSTMWeight(input2hidden, fo); - if (fea_size > 0) + if (DenseFeatureSize > 0) { //weight fea->hidden + Logger.WriteLine("Saving feature2hidden weights..."); saveLSTMWeight(feature2hidden, fo); } //weight hidden->output - saveMatrixBin(mat_hidden2output, fo); + Logger.WriteLine("Saving hidden2output weights..."); + saveMatrixBin(Hidden2OutputWeight, fo); if (iflag == 1) { // Save Bigram - saveMatrixBin(mat_CRFTagTransWeights, fo); + Logger.WriteLine("Saving CRF tag trans weights..."); + saveMatrixBin(CRFTagTransWeights, fo); } fo.Close(); @@ -293,13 +342,13 @@ public override void initWeights() } } - if (fea_size > 0) + if (DenseFeatureSize > 0) { feature2hidden = new LSTMWeight[L1][]; for (int i = 0; i < L1; i++) { - feature2hidden[i] = new LSTMWeight[fea_size]; - for (int j = 0; j < fea_size; j++) + feature2hidden[i] = new LSTMWeight[DenseFeatureSize]; + for (int j = 0; j < DenseFeatureSize; j++) { feature2hidden[i][j] = LSTMWeightInit(); } @@ -307,13 +356,13 @@ public override void initWeights() } //Create and intialise the weights from hidden to output layer, these are just normal weights - mat_hidden2output = new Matrix(L2, L1); + Hidden2OutputWeight = new Matrix(L2, L1); - for (int i = 0; i < mat_hidden2output.GetHeight(); i++) + for (int i = 0; i < Hidden2OutputWeight.GetHeight(); i++) { - for (int j = 0; j < mat_hidden2output.GetWidth(); j++) + for (int j = 0; j < Hidden2OutputWeight.GetWidth(); j++) { - mat_hidden2output[i][j] = RandInitWeight(); + Hidden2OutputWeight[i][j] = RandInitWeight(); } } } @@ -350,7 +399,7 @@ public override void initMem() CreateCell(null); input2hiddenDeri = new LSTMWeightDerivative[L1][]; - if (fea_size > 0) + if (DenseFeatureSize > 0) { feature2hiddenDeri = new LSTMWeightDerivative[L1][]; } @@ -359,9 +408,9 @@ public override void initMem() { input2hiddenDeri[i] = new LSTMWeightDerivative[L0]; - if (fea_size > 0) + if (DenseFeatureSize > 0) { - feature2hiddenDeri[i] = new LSTMWeightDerivative[fea_size]; + feature2hiddenDeri[i] = new LSTMWeightDerivative[DenseFeatureSize]; } } @@ -371,13 +420,13 @@ public override void initMem() private void CreateCell(BinaryReader br) { - neuFeatures = new double[fea_size]; - neuOutput = new neuron[L2]; + neuFeatures = new SingleVector(DenseFeatureSize); + OutputLayer = new neuron[L2]; for (int a = 0; a < L2; a++) { - neuOutput[a].cellOutput = 0; - neuOutput[a].er = 0; + OutputLayer[a].cellOutput = 0; + OutputLayer[a].er = 0; } neuHidden = new LSTMCell[L1]; @@ -392,9 +441,9 @@ private void CreateCell(BinaryReader br) //Load weight from input file for (int i = 0; i < L1; i++) { - neuHidden[i].wCellIn = br.ReadDouble(); - neuHidden[i].wCellForget = br.ReadDouble(); - neuHidden[i].wCellOut = br.ReadDouble(); + neuHidden[i].wCellIn = br.ReadSingle(); + neuHidden[i].wCellForget = br.ReadSingle(); + neuHidden[i].wCellOut = br.ReadSingle(); } } else @@ -436,7 +485,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false) } //Get sparse feature and apply it into hidden layer - var sparse = state.GetSparseData(); + var sparse = state.SparseData; int sparseFeatureSize = sparse.GetNumberOfEntries(); //put variables for derivaties in weight class and cell class @@ -444,34 +493,36 @@ public override void learnNet(State state, int timeat, bool biRNN = false) { LSTMWeightDerivative[] w_i = input2hiddenDeri[i]; LSTMCell c = neuHidden[i]; + float Sigmoid2Derivative_ci_netCellState_mul_ci_yIn = (float)(Sigmoid2Derivative(c.netCellState) * c.yIn); + float Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn = (float)(Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn)); + float ci_previousCellState_mul_SigmoidDerivative_ci_netForget = (float)(c.previousCellState * SigmoidDerivative(c.netForget)); + for (int k = 0; k < sparseFeatureSize; k++) { var entry = sparse.GetEntry(k); LSTMWeightDerivative w = w_i[entry.Key]; - w_i[entry.Key].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative(c.netCellState) * c.yIn * entry.Value; - w_i[entry.Key].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn) * entry.Value; - w_i[entry.Key].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * SigmoidDerivative(c.netForget) * entry.Value; - + w_i[entry.Key].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative_ci_netCellState_mul_ci_yIn * entry.Value; + w_i[entry.Key].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * entry.Value; + w_i[entry.Key].dSInputForgetGate = w.dSInputForgetGate * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * entry.Value; } - if (fea_size > 0) + if (DenseFeatureSize > 0) { w_i = feature2hiddenDeri[i]; - for (int j = 0; j < fea_size; j++) + for (int j = 0; j < DenseFeatureSize; j++) { LSTMWeightDerivative w = w_i[j]; - w_i[j].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative(c.netCellState) * c.yIn * neuFeatures[j]; - w_i[j].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn) * neuFeatures[j]; - w_i[j].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * SigmoidDerivative(c.netForget) * neuFeatures[j]; - + w_i[j].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative_ci_netCellState_mul_ci_yIn * neuFeatures[j]; + w_i[j].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * neuFeatures[j]; + w_i[j].dSInputForgetGate = w.dSInputForgetGate * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * neuFeatures[j]; } } //partial derivatives for internal connections - c.dSWCellIn = c.dSWCellIn * c.yForget + Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn) * c.cellState; + c.dSWCellIn = c.dSWCellIn * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * c.cellState; //partial derivatives for internal connections, initially zero as dS is zero and previous cell state is zero - c.dSWCellForget = c.dSWCellForget * c.yForget + c.previousCellState * SigmoidDerivative(c.netForget) * c.previousCellState; + c.dSWCellForget = c.dSWCellForget * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * c.previousCellState; neuHidden[i] = c; }); @@ -482,18 +533,18 @@ public override void learnNet(State state, int timeat, bool biRNN = false) LSTMCell c = neuHidden[i]; //find the error by find the product of the output errors and their weight connection. - double weightedSum = 0; + var weightedSum = 0.0; for (int k = 0; k < L2; k++) { - weightedSum += neuOutput[k].er * mat_hidden2output[k][i]; + weightedSum += OutputLayer[k].er * Hidden2OutputWeight[k][i]; } weightedSum = NormalizeErr(weightedSum); //using the error find the gradient of the output gate - double gradientOutputGate = SigmoidDerivative(c.netOut) * c.cellState * weightedSum; + var gradientOutputGate = (float)(LearningRate * SigmoidDerivative(c.netOut) * c.cellState * weightedSum); //internal cell state error - double cellStateError = c.yOut * weightedSum; + var cellStateError = (float)(LearningRate * c.yOut * weightedSum); //weight updates LSTMWeight[] w_i = input2hidden[i]; @@ -502,32 +553,32 @@ public override void learnNet(State state, int timeat, bool biRNN = false) { var entry = sparse.GetEntry(k); //updates weights for input to hidden layer - w_i[entry.Key].wInputCell += alpha * cellStateError * wd_i[entry.Key].dSInputCell; - w_i[entry.Key].wInputInputGate += alpha * cellStateError * wd_i[entry.Key].dSInputInputGate; - w_i[entry.Key].wInputForgetGate += alpha * cellStateError * wd_i[entry.Key].dSInputForgetGate; - w_i[entry.Key].wInputOutputGate += alpha * gradientOutputGate * entry.Value; + w_i[entry.Key].wInputCell += cellStateError * wd_i[entry.Key].dSInputCell; + w_i[entry.Key].wInputInputGate += cellStateError * wd_i[entry.Key].dSInputInputGate; + w_i[entry.Key].wInputForgetGate += cellStateError * wd_i[entry.Key].dSInputForgetGate; + w_i[entry.Key].wInputOutputGate += gradientOutputGate * entry.Value; } - if (fea_size > 0) + if (DenseFeatureSize > 0) { w_i = feature2hidden[i]; wd_i = feature2hiddenDeri[i]; - for (int j = 0; j < fea_size; j++) + for (int j = 0; j < DenseFeatureSize; j++) { //make the delta equal to the learning rate multiplied by the gradient multipled by the input for the connection //update connection weights - w_i[j].wInputCell += alpha * cellStateError * wd_i[j].dSInputCell; - w_i[j].wInputInputGate += alpha * cellStateError * wd_i[j].dSInputInputGate; - w_i[j].wInputForgetGate += alpha * cellStateError * wd_i[j].dSInputForgetGate; - w_i[j].wInputOutputGate += alpha * gradientOutputGate * neuFeatures[j]; + w_i[j].wInputCell += cellStateError * wd_i[j].dSInputCell; + w_i[j].wInputInputGate += cellStateError * wd_i[j].dSInputInputGate; + w_i[j].wInputForgetGate += cellStateError * wd_i[j].dSInputForgetGate; + w_i[j].wInputOutputGate += gradientOutputGate * neuFeatures[j]; } } //update internal weights - c.wCellIn += alpha * cellStateError * c.dSWCellIn; - c.wCellForget += alpha * cellStateError * c.dSWCellForget; - c.wCellOut += alpha * gradientOutputGate * c.cellState; + c.wCellIn += cellStateError * c.dSWCellIn; + c.wCellForget += cellStateError * c.dSWCellForget; + c.wCellOut += gradientOutputGate * c.cellState; neuHidden[i] = c; }); @@ -537,7 +588,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false) { for (int k = 0; k < L2; k++) { - mat_hidden2output[k][i] += alpha * neuHidden[i].cellOutput * neuOutput[k].er; + Hidden2OutputWeight[k][i] += (float)(LearningRate * neuHidden[i].cellOutput * OutputLayer[k].er); } }); } @@ -548,7 +599,7 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr { //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer - var sparse = state.GetSparseData(); + var sparse = state.SparseData; int sparseFeatureSize = sparse.GetNumberOfEntries(); Parallel.For(0, L1, parallelOption, j => @@ -577,10 +628,11 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr cell_j.netOut += entry.Value * w.wInputOutputGate; } + //fea(t) -> hidden(t) - if (fea_size > 0) + if (DenseFeatureSize > 0) { - for (int i = 0; i < fea_size; i++) + for (int i = 0; i < DenseFeatureSize; i++) { LSTMWeight w = feature2hidden[j][i]; cell_j.netIn += neuFeatures[i] * w.wInputInputGate; @@ -593,11 +645,11 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr //include internal connection multiplied by the previous cell state cell_j.netIn += cell_j.previousCellState * cell_j.wCellIn; //squash input - cell_j.yIn = Sigmoid(cell_j.netIn); + cell_j.yIn = (float)Sigmoid(cell_j.netIn); //include internal connection multiplied by the previous cell state cell_j.netForget += cell_j.previousCellState * cell_j.wCellForget; - cell_j.yForget = Sigmoid(cell_j.netForget); + cell_j.yForget = (float)Sigmoid(cell_j.netForget); if (cell_j.mask == true) { @@ -606,14 +658,14 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr else { //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate - cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * Sigmoid2(cell_j.netCellState); + cell_j.cellState = (float)(cell_j.yForget * cell_j.previousCellState + cell_j.yIn * Sigmoid2(cell_j.netCellState)); } ////include the internal connection multiplied by the CURRENT cell state cell_j.netOut += cell_j.cellState * cell_j.wCellOut; //squash output gate - cell_j.yOut = Sigmoid(cell_j.netOut); + cell_j.yOut = (float)(Sigmoid(cell_j.netOut)); cell_j.cellOutput = cell_j.cellState * cell_j.yOut; @@ -621,53 +673,47 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr neuHidden[j] = cell_j; }); - matrixXvectorADD(neuOutput, neuHidden, mat_hidden2output, 0, L2, 0, L1); + matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, 0, L2, 0, L1); if (doutput != null) { for (int i = 0; i < L2; i++) { - doutput[i] = neuOutput[i].cellOutput; + doutput[i] = OutputLayer[i].cellOutput; } } //activation 2 --softmax on words - SoftmaxLayer(neuOutput); + SoftmaxLayer(OutputLayer); } public override void netReset(bool updateNet = false) //cleans hidden layer activation + bptt history { - for (int a = 0; a < L1; a++) - { - neuHidden[a].mask = false; - } - - if (updateNet == true) - { - //Train mode - for (int a = 0; a < L1; a++) - { - if (rand.NextDouble() < dropout) - { - neuHidden[a].mask = true; - } - } - } - Parallel.For(0, L1, parallelOption, i => { + neuHidden[i].mask = false; LSTMCellInit(neuHidden[i]); if (updateNet == true) { Array.Clear(input2hiddenDeri[i], 0, L0); - if (fea_size > 0) + if (DenseFeatureSize > 0) { - Array.Clear(feature2hiddenDeri[i], 0, fea_size); + Array.Clear(feature2hiddenDeri[i], 0, DenseFeatureSize); } } }); - + if (updateNet == true) + { + //Train mode + for (int a = 0; a < L1; a++) + { + if (rand.NextDouble() < Dropout) + { + neuHidden[a].mask = true; + } + } + } } } diff --git a/RNNSharp/MathUtil.cs b/RNNSharp/MathUtil.cs index f40ce1b..fbb09a2 100644 --- a/RNNSharp/MathUtil.cs +++ b/RNNSharp/MathUtil.cs @@ -1,9 +1,8 @@ using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { class MathUtil diff --git a/RNNSharp/Matrix.cs b/RNNSharp/Matrix.cs index f4b6d89..68c957b 100644 --- a/RNNSharp/Matrix.cs +++ b/RNNSharp/Matrix.cs @@ -1,10 +1,7 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System.IO; - + +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class Matrix diff --git a/RNNSharp/ModelSetting.cs b/RNNSharp/ModelSetting.cs index fe897c6..8a49556 100644 --- a/RNNSharp/ModelSetting.cs +++ b/RNNSharp/ModelSetting.cs @@ -1,84 +1,37 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using AdvUtils; +using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class ModelSetting { - public string GetModelFile() { return m_strModelFile; } - public void SetModelFile(string modelFile) { m_strModelFile = modelFile; } - - public int GetNumHidden() { return m_NumHidden; } - public void SetNumHidden(int n) { m_NumHidden = n; } - - public double GetLearningRate(){ return m_LearningRate; } - public void SetLearningRate(double r) { m_LearningRate = r; } - - public double GetDropout() { return m_Dropout; } - public void SetDropout(double r) { m_Dropout = r; } - - public int GetBptt() { return m_Bptt; } - public void SetBptt(int n) { m_Bptt = n; } - - - public int GetModelType() { return m_ModelType; } - public void SetModelType(int n) { m_ModelType = n; } - - public int GetMaxIteration() { return m_MaxIteration; } - public void SetMaxIteration(int i) { m_MaxIteration = i; } - - public virtual bool IsCRFTraining() { return m_bCRFTraining; } - public void SetCRFTraining(bool s) { m_bCRFTraining = s; } - - public void SetDir(int dir) - { - m_iDir = dir; - } - - public int GetModelDirection() - { - return m_iDir; - } - - public void SetSaveStep(long savestep) - { - m_SaveStep = savestep; - } - - public long GetSaveStep() - { - return m_SaveStep; - } - - string m_strModelFile; - int m_NumHidden; - double m_LearningRate; - double m_Dropout; - int m_Bptt; - int m_MaxIteration; - bool m_bCRFTraining; - long m_SaveStep; - int m_ModelType; - int m_iDir; + public string ModelFile { get; set; } + public int NumHidden { get; set; } + public float LearningRate { get; set; } + public float Dropout { get; set; } + public int Bptt { get; set; } + public int MaxIteration { get; set; } + public bool IsCRFTraining { get; set; } + public long SaveStep { get; set; } + public int ModelType { get; set; } + public int ModelDirection { get; set; } public void DumpSetting() { - Logger.WriteLine(Logger.Level.info, "Model File: {0}", m_strModelFile); - if (m_ModelType == 0) + Logger.WriteLine(Logger.Level.info, "Model File: {0}", ModelFile); + if (ModelType == 0) { Logger.WriteLine(Logger.Level.info, "Model Structure: Simple RNN"); - Logger.WriteLine(Logger.Level.info, "BPTT: {0}", m_Bptt); + Logger.WriteLine(Logger.Level.info, "BPTT: {0}", Bptt); } - else if (m_ModelType == 1) + else if (ModelType == 1) { Logger.WriteLine(Logger.Level.info, "Model Structure: LSTM-RNN"); } - if (m_iDir == 0) + if (ModelDirection == 0) { Logger.WriteLine(Logger.Level.info, "RNN Direction: Forward"); } @@ -87,24 +40,24 @@ public void DumpSetting() Logger.WriteLine(Logger.Level.info, "RNN Direction: Bi-directional"); } - Logger.WriteLine(Logger.Level.info, "Learning rate: {0}", m_LearningRate); - Logger.WriteLine(Logger.Level.info, "Dropout: {0}", m_Dropout); - Logger.WriteLine(Logger.Level.info, "Max Iteration: {0}", m_MaxIteration); - Logger.WriteLine(Logger.Level.info, "Hidden layer size: {0}", m_NumHidden); - Logger.WriteLine(Logger.Level.info, "RNN-CRF: {0}", m_bCRFTraining); - if (m_SaveStep > 0) + Logger.WriteLine(Logger.Level.info, "Learning rate: {0}", LearningRate); + Logger.WriteLine(Logger.Level.info, "Dropout: {0}", Dropout); + Logger.WriteLine(Logger.Level.info, "Max Iteration: {0}", MaxIteration); + Logger.WriteLine(Logger.Level.info, "Hidden layer size: {0}", NumHidden); + Logger.WriteLine(Logger.Level.info, "RNN-CRF: {0}", IsCRFTraining); + if (SaveStep > 0) { - Logger.WriteLine(Logger.Level.info, "Save temporary model after every {0} sentences", m_SaveStep); + Logger.WriteLine(Logger.Level.info, "Save temporary model after every {0} sentences", SaveStep); } } public ModelSetting() { - m_MaxIteration = 20; - m_Bptt = 4; - m_LearningRate = 0.1; - m_NumHidden = 200; - m_bCRFTraining = true; + MaxIteration = 20; + Bptt = 4; + LearningRate = 0.1f; + NumHidden = 200; + IsCRFTraining = true; } } } diff --git a/RNNSharp/RNN.cs b/RNNSharp/RNN.cs index 2798ff5..ca06946 100644 --- a/RNNSharp/RNN.cs +++ b/RNNSharp/RNN.cs @@ -1,12 +1,12 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; using System.Threading.Tasks; -using System.Threading; using System.IO; using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public enum MODELTYPE @@ -42,59 +42,47 @@ public PAIR(T f, K s) abstract public class RNN { - protected double logp, llogp; - protected double minTknErrRatio; - protected long counter; - protected double dropout; + public virtual bool IsCRFTraining { get; set; } + public virtual string ModelFile { get; set; } + public string ModelTempFile { get { return ModelFile + ".tmp"; } } + public virtual MODELDIRECTION ModelDirection { get; set; } + public virtual float GradientCutoff { get; set; } + public virtual float Dropout { get; set; } + public virtual float LearningRate { get; set; } + public virtual int MaxIter { get; set; } + public virtual long SaveStep { get; set; } + public virtual int DenseFeatureSize { get; set; } + public virtual int L0 { get; set; } + public virtual int L1 { get; set; } + public virtual int L2 { get; set; } + + public MODELTYPE ModelType { get; set; } + public Matrix CRFTagTransWeights { get; set; } + public neuron[] OutputLayer { get; set; } + public Matrix Hidden2OutputWeight; + + // CRF result output + protected Matrix CRFSeqOutput; + protected double logp; + protected double minTknErrRatio = double.MaxValue; protected ParallelOptions parallelOption = new ParallelOptions(); - protected double gradient_cutoff; - protected bool m_bCRFTraining = false; - protected MODELTYPE m_modeltype; - protected MODELDIRECTION m_modeldirection; - protected string m_strModelFile; - protected static Random rand = new Random(DateTime.Now.Millisecond); //multiple processor declaration - protected int L0; - public int L1; - public int L2; - protected int fea_size; - - protected double alpha; - public double Alpha - { - get { return alpha; } - set { alpha = value; } - } - - protected double[] neuFeatures; //features in input layer - public neuron[] neuOutput; //neurons in output layer - public Matrix mat_hidden2output; - - protected const int MAX_RNN_HIST = 512; - - protected Matrix m_RawOutput; - protected int counterTokenForLM; - - // for Viterbi decoding - public Matrix mat_CRFTagTransWeights; - - /// for sequence training - public Matrix mat_CRFSeqOutput; + protected Vector neuFeatures; //features in input layer + protected const int MAX_RNN_HIST = 64; - public virtual void setTagBigramTransition(List> m) + public virtual void setTagBigramTransition(List> m) { - if (null == mat_CRFTagTransWeights) - mat_CRFTagTransWeights = new Matrix(L2, L2); + CRFTagTransWeights = new Matrix(L2, L2); for (int i = 0; i < L2; i++) for (int j = 0; j < L2; j++) - mat_CRFTagTransWeights[i][j] = m[i][j]; + CRFTagTransWeights[i][j] = m[i][j]; } //Save matrix into file as binary format - protected void saveMatrixBin(Matrix mat, BinaryWriter fo) + protected void saveMatrixBin(Matrix mat, BinaryWriter fo, bool BuildVQ = true) { int width = mat.GetWidth(); int height = mat.GetHeight(); @@ -103,12 +91,59 @@ protected void saveMatrixBin(Matrix mat, BinaryWriter fo) fo.Write(width); fo.Write(height); - //Save the data in matrix - for (int r = 0; r < height; r++) + if (BuildVQ == false) { - for (int c = 0; c < width; c++) + Logger.WriteLine("Saving matrix without VQ..."); + fo.Write(0); // non-VQ + + //Save the data in matrix + for (int r = 0; r < height; r++) { - fo.Write((float)(mat[r][c])); + for (int c = 0; c < width; c++) + { + fo.Write((float)mat[r][c]); + } + } + } + else + { + //Build vector quantization matrix + int vqSize = 256; + VectorQuantization vq = new VectorQuantization(); + Logger.WriteLine("Saving matrix with VQ {0}...", vqSize); + + int valSize = 0; + for (int i = 0; i < height; i++) + { + for (int j = 0; j < width; j++) + { + vq.Add(mat[i][j]); + valSize++; + } + } + + if (vqSize > valSize) + { + vqSize = valSize; + } + + double distortion = vq.BuildCodebook(vqSize); + Logger.WriteLine("Distortion: {0}, vqSize: {1}", distortion, vqSize); + + //Save VQ codebook into file + fo.Write(vqSize); + for (int j = 0; j < vqSize; j++) + { + fo.Write(vq.CodeBook[j]); + } + + //Save the data in matrix + for (int r = 0; r < height; r++) + { + for (int c = 0; c < width; c++) + { + fo.Write((byte)vq.ComputeVQ(mat[r][c])); + } } } } @@ -117,14 +152,37 @@ protected Matrix loadMatrixBin(BinaryReader br) { int width = br.ReadInt32(); int height = br.ReadInt32(); + int vqSize = br.ReadInt32(); + Logger.WriteLine("Loading matrix. width: {0}, height: {1}, vqSize: {2}", width, height, vqSize); Matrix m = new Matrix(height, width); - - for (int r = 0; r < height; r++) + if (vqSize == 0) + { + for (int r = 0; r < height; r++) + { + for (int c = 0; c < width; c++) + { + m[r][c] = br.ReadSingle(); + } + } + } + else { - for (int c = 0; c < width; c++) + List codeBook = new List(); + + for (int i = 0; i < vqSize; i++) + { + codeBook.Add(br.ReadDouble()); + } + + + for (int r = 0; r < height; r++) { - m[r][c] = br.ReadSingle(); + for (int c = 0; c < width; c++) + { + int vqIndex = br.ReadByte(); + m[r][c] = codeBook[vqIndex]; + } } } @@ -133,10 +191,10 @@ protected Matrix loadMatrixBin(BinaryReader br) public void setInputLayer(State state, int curState, int numStates, int[] predicted, bool forward = true) { - if (predicted != null) + if (predicted != null && state.RuntimeFeatures != null) { // set runtime feature - for (int i = 0; i < state.GetNumRuntimeFeature(); i++) + for (int i = 0; i < state.RuntimeFeatures.Length; i++) { for (int j = 0; j < L2; j++) { @@ -144,7 +202,7 @@ public void setInputLayer(State state, int curState, int numStates, int[] predic state.SetRuntimeFeature(i, j, 0); } - int pos = curState + ((forward == true) ? 1 : -1) * state.GetRuntimeFeature(i).OffsetToCurrentState; + int pos = curState + ((forward == true) ? 1 : -1) * state.RuntimeFeatures[i].OffsetToCurrentState; if (pos >= 0 && pos < numStates) { state.SetRuntimeFeature(i, predicted[pos], 1); @@ -152,66 +210,7 @@ public void setInputLayer(State state, int curState, int numStates, int[] predic } } - var dense = state.GetDenseData(); - for (int i = 0; i < dense.GetDimension(); i++) - { - neuFeatures[i] = dense[i]; - } - } - - public long m_SaveStep; - public virtual void SetSaveStep(long savestep) - { - m_SaveStep = savestep; - } - - protected int m_MaxIter; - public int MaxIter { get { return m_MaxIter; } } - public virtual void SetMaxIter(int _nMaxIter) - { - m_MaxIter = _nMaxIter; - } - - public RNN() - { - gradient_cutoff = 15; - - alpha = 0.1; - dropout = 0; - logp = 0; - llogp = -100000000; - minTknErrRatio = double.MaxValue; - L1 = 30; - - fea_size = 0; - - neuFeatures = null; - neuOutput = null; - } - - public void SetModelDirection(int dir) - { - m_modeldirection = (MODELDIRECTION)dir; - } - - - public virtual void SetFeatureDimension(int denseFeatueSize, int sparseFeatureSize, int tagSize) - { - fea_size = denseFeatueSize; - L0 = sparseFeatureSize; - L2 = tagSize; - } - - public virtual void SetCRFTraining(bool b) { m_bCRFTraining = b; } - public virtual void SetGradientCutoff(double newGradient) { gradient_cutoff = newGradient; } - public virtual void SetLearningRate(double newAlpha) { alpha = newAlpha; } - public virtual void SetDropout(double newDropout) { dropout = newDropout; } - public virtual void SetHiddenLayerSize(int newsize) { L1 = newsize;} - public virtual void SetModelFile(string strModelFile) { m_strModelFile = strModelFile; } - - public bool IsCRFModel() - { - return m_bCRFTraining; + neuFeatures = state.DenseData; } public double exp_10(double num) { return Math.Exp(num * 2.302585093); } @@ -222,7 +221,7 @@ public bool IsCRFModel() public virtual Matrix PredictSentence(Sequence pSequence, RunningMode runningMode) { - int numStates = pSequence.GetSize(); + int numStates = pSequence.States.Length; Matrix m = new Matrix(numStates, L2); int[] predicted = new int[numStates]; bool isTraining = true; @@ -238,15 +237,14 @@ public virtual Matrix PredictSentence(Sequence pSequence, RunningMode ru netReset(isTraining); for (int curState = 0; curState < numStates; curState++) { - State state = pSequence.Get(curState); + State state = pSequence.States[curState]; setInputLayer(state, curState, numStates, predicted); computeNet(state, m[curState], isTraining); predicted[curState] = GetBestOutputIndex(); if (runningMode != RunningMode.Test) { - logp += Math.Log10(neuOutput[state.GetLabel()].cellOutput); - counter++; + logp += Math.Log10(OutputLayer[state.Label].cellOutput); } if (runningMode == RunningMode.Train) @@ -282,70 +280,58 @@ public void SoftmaxLayer(neuron[] layer) public int GetBestOutputIndex() { int imax = 0; - double dmax = neuOutput[0].cellOutput; + double dmax = OutputLayer[0].cellOutput; for (int k = 1; k < L2; k++) { - if (neuOutput[k].cellOutput > dmax) + if (OutputLayer[k].cellOutput > dmax) { - dmax = neuOutput[k].cellOutput; + dmax = OutputLayer[k].cellOutput; imax = k; } } return imax; } - public virtual Matrix learnSentenceForRNNCRF(Sequence pSequence, RunningMode runningMode) + public virtual int[] PredictSentenceCRF(Sequence pSequence, RunningMode runningMode) { - //Reset the network - netReset(false); - int numStates = pSequence.GetSize(); - - int[] predicted_nn = new int[numStates]; - m_RawOutput = new Matrix(numStates, L2);// new double[numStates][]; - for (int curState = 0; curState < numStates; curState++) - { - State state = pSequence.Get(curState); - - setInputLayer(state, curState, numStates, predicted_nn); - computeNet(state, m_RawOutput[curState]); //compute probability distribution - - predicted_nn[curState] = GetBestOutputIndex(); - } + int numStates = pSequence.States.Length; - ForwardBackward(numStates, m_RawOutput); + Matrix nnOutput = PredictSentence(pSequence, RunningMode.Test); + ForwardBackward(numStates, nnOutput); - //Get the best result - int[] predicted = new int[numStates]; - for (int i = 0; i < numStates; i++) + if (runningMode != RunningMode.Test) { - State state = pSequence.Get(i); - logp += Math.Log10(mat_CRFSeqOutput[i][state.GetLabel()]); - - predicted[i] = GetBestZIndex(i); + //Get the best result + for (int i = 0; i < numStates; i++) + { + logp += Math.Log10(CRFSeqOutput[i][pSequence.States[i].Label]); + } } - UpdateBigramTransition(pSequence); + int[] predicted = Viterbi(nnOutput, numStates); - netReset(true); - for (int curState = 0; curState < numStates; curState++) + if (runningMode == RunningMode.Train) { - // error propogation - State state = pSequence.Get(curState); - setInputLayer(state, curState, numStates, predicted_nn); - computeNet(state, m_RawOutput[curState]); //compute probability distribution - - counter++; + UpdateBigramTransition(pSequence); + netReset(true); + for (int curState = 0; curState < numStates; curState++) + { + // error propogation + State state = pSequence.States[curState]; + setInputLayer(state, curState, numStates, null); + computeNet(state, null); //compute probability distribution - learnNet(state, curState); - LearnBackTime(state, numStates, curState); + learnNet(state, curState); + LearnBackTime(state, numStates, curState); + } } - return mat_CRFSeqOutput; + return predicted; } public void UpdateBigramTransition(Sequence seq) { - int numStates = seq.GetSize(); + int numStates = seq.States.Length; Matrix m_DeltaBigramLM = new Matrix(L2, L2); for (int timeat = 1; timeat < numStates; timeat++) @@ -354,43 +340,25 @@ public void UpdateBigramTransition(Sequence seq) { for (int j = 0; j < L2; j++) { - m_DeltaBigramLM[i][j] -= (mat_CRFTagTransWeights[i][j] * mat_CRFSeqOutput[timeat][i] * mat_CRFSeqOutput[timeat - 1][j]); + m_DeltaBigramLM[i][j] -= (CRFTagTransWeights[i][j] * CRFSeqOutput[timeat][i] * CRFSeqOutput[timeat - 1][j]); } } - int iTagId = seq.Get(timeat).GetLabel(); - int iLastTagId = seq.Get(timeat - 1).GetLabel(); + int iTagId = seq.States[timeat].Label; + int iLastTagId = seq.States[timeat - 1].Label; m_DeltaBigramLM[iTagId][iLastTagId] += 1; } - counterTokenForLM++; - //Update tag Bigram LM for (int b = 0;b < L2;b++) { for (int a = 0; a < L2; a++) { - mat_CRFTagTransWeights[b][a] += alpha * m_DeltaBigramLM[b][a]; + CRFTagTransWeights[b][a] += LearningRate * m_DeltaBigramLM[b][a]; } } } - public int GetBestZIndex(int currStatus) - { - //Get the output tag - int imax = 0; - double dmax = mat_CRFSeqOutput[currStatus][0]; - for (int j = 1; j < L2; j++) - { - if (mat_CRFSeqOutput[currStatus][j] > dmax) - { - dmax = mat_CRFSeqOutput[currStatus][j]; - imax = j; - } - } - return imax; - } - public void ForwardBackward(int numStates, Matrix m_RawOutput) { //forward @@ -405,7 +373,7 @@ public void ForwardBackward(int numStates, Matrix m_RawOutput) { for (int k = 0; k < L2; k++) { - double fbgm = mat_CRFTagTransWeights[j][k]; + double fbgm = CRFTagTransWeights[j][k]; double finit = alphaSet[i - 1][k]; double ftmp = fbgm + finit; @@ -429,7 +397,7 @@ public void ForwardBackward(int numStates, Matrix m_RawOutput) { for (int k = 0; k < L2; k++) { - double fbgm = mat_CRFTagTransWeights[k][j]; + double fbgm = CRFTagTransWeights[k][j]; double finit = betaSet[i + 1][k]; double ftmp = fbgm + finit; @@ -443,7 +411,6 @@ public void ForwardBackward(int numStates, Matrix m_RawOutput) } //Z_ - double Z_ = 0.0; for (int i = 0; i < L2; i++) { @@ -452,14 +419,15 @@ public void ForwardBackward(int numStates, Matrix m_RawOutput) } //Calculate the output probability of each node - mat_CRFSeqOutput = new Matrix(numStates, L2); + CRFSeqOutput = new Matrix(numStates, L2); for (int i = 0; i < numStates; i++) { for (int j = 0; j < L2; j++) { - mat_CRFSeqOutput[i][j] = Math.Exp(alphaSet[i][j] + betaSet[i][j] - m_RawOutput[i][j] - Z_); + CRFSeqOutput[i][j] = Math.Exp(alphaSet[i][j] + betaSet[i][j] - m_RawOutput[i][j] - Z_); } } + } @@ -471,9 +439,9 @@ private double random(double min, double max) return rand.NextDouble() * (max - min) + min; } - public double RandInitWeight() + public float RandInitWeight() { - return random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1); + return (float)(random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1)); } @@ -483,43 +451,35 @@ public double RandInitWeight() public virtual double TrainNet(DataSet trainingSet, int iter) { DateTime start = DateTime.Now; - int[] predicted; - Logger.WriteLine(Logger.Level.info, "[TRACE] Iter " + iter + " begins with learning rate alpha = " + alpha + " ..."); + Logger.WriteLine(Logger.Level.info, "[TRACE] Iter " + iter + " begins with learning rate alpha = " + LearningRate + " ..."); //Initialize varibles - counter = 0; logp = 0; - counterTokenForLM = 0; //Shffle training corpus trainingSet.Shuffle(); - int numSequence = trainingSet.GetSize(); + int numSequence = trainingSet.SequenceList.Count; + int wordCnt = 0; int tknErrCnt = 0; int sentErrCnt = 0; Logger.WriteLine(Logger.Level.info, "[TRACE] Progress = 0/" + numSequence / 1000.0 + "K\r"); for (int curSequence = 0; curSequence < numSequence; curSequence++) { - Sequence pSequence = trainingSet.Get(curSequence); - int numStates = pSequence.GetSize(); - - if (numStates < 3) - continue; + Sequence pSequence = trainingSet.SequenceList[curSequence]; + int numStates = pSequence.States.Length; + wordCnt += numStates; - Matrix m; - if (m_bCRFTraining == true) + int[] predicted; + if (IsCRFTraining == true) { - m = learnSentenceForRNNCRF(pSequence, RunningMode.Train); + predicted = PredictSentenceCRF(pSequence, RunningMode.Train); } else { + Matrix m; m = PredictSentence(pSequence, RunningMode.Train); - } - - predicted = new int[pSequence.GetSize()]; - for (int i = 0; i < pSequence.GetSize(); i++) - { - predicted[i] = MathUtil.GetMaxProbIndex(m[i]); + predicted = GetBestResult(m); } int newTknErrCnt = GetErrorTokenNum(pSequence, predicted); @@ -532,24 +492,24 @@ public virtual double TrainNet(DataSet trainingSet, int iter) if ((curSequence + 1) % 1000 == 0) { Logger.WriteLine(Logger.Level.info, "[TRACE] Progress = {0} ", (curSequence + 1) / 1000 + "K/" + numSequence / 1000.0 + "K"); - Logger.WriteLine(Logger.Level.info, " train cross-entropy = {0} ", -logp / Math.Log10(2.0) / counter); - Logger.WriteLine(Logger.Level.info, " Error token ratio = {0}%", (double)tknErrCnt / (double)counter * 100); - Logger.WriteLine(Logger.Level.info, " Error sentence ratio = {0}%", (double)sentErrCnt / (double)curSequence * 100); + Logger.WriteLine(Logger.Level.info, " train cross-entropy = {0} ", -logp / Math.Log10(2.0) / wordCnt); + Logger.WriteLine(Logger.Level.info, " Error token ratio = {0}%", (double)tknErrCnt / (double)wordCnt * 100.0); + Logger.WriteLine(Logger.Level.info, " Error sentence ratio = {0}%", (double)sentErrCnt / (double)curSequence * 100.0); } - if (m_SaveStep > 0 && (curSequence + 1) % m_SaveStep == 0) + if (SaveStep > 0 && (curSequence + 1) % SaveStep == 0) { //After processed every m_SaveStep sentences, save current model into a temporary file Logger.WriteLine(Logger.Level.info, "Saving temporary model into file..."); - saveNetBin(m_strModelFile + ".tmp"); + saveNetBin(ModelTempFile); } } DateTime now = DateTime.Now; TimeSpan duration = now.Subtract(start); - double entropy = -logp / Math.Log10(2.0) / counter; - double ppl = exp_10(-logp / counter); + double entropy = -logp / Math.Log10(2.0) / wordCnt; + double ppl = exp_10(-logp / wordCnt); Logger.WriteLine(Logger.Level.info, "[TRACE] Iter " + iter + " completed"); Logger.WriteLine(Logger.Level.info, "[TRACE] Sentences = " + numSequence + ", time escape = " + duration + "s, speed = " + numSequence / duration.TotalSeconds); Logger.WriteLine(Logger.Level.info, "[TRACE] In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl); @@ -572,15 +532,17 @@ public static void CheckModelFileType(string filename, out MODELTYPE modelType, modelType = (MODELTYPE)br.ReadInt32(); modelDir = (MODELDIRECTION)br.ReadInt32(); } + + Logger.WriteLine("Get model type {0} and direction {1}", modelType, modelDir); } protected double NormalizeErr(double err) { - if (err > gradient_cutoff) - err = gradient_cutoff; - if (err < -gradient_cutoff) - err = -gradient_cutoff; + if (err > GradientCutoff) + err = GradientCutoff; + if (err < -GradientCutoff) + err = -GradientCutoff; return err; } @@ -618,13 +580,12 @@ public void matrixXvectorADD(neuron[] dest, neuron[] srcvec, Matrix srcm } } - public int[] DecodeNN(Sequence seq) + + public int[] GetBestResult(Matrix ys) { - Matrix ys = PredictSentence(seq, RunningMode.Test); - int n = seq.GetSize(); - int[] output = new int[n]; + int[] output = new int[ys.GetHeight()]; - for (int i = 0; i < n; i++) + for (int i = 0; i < ys.GetHeight(); i++) { output[i] = MathUtil.GetMaxProbIndex(ys[i]); } @@ -632,6 +593,12 @@ public int[] DecodeNN(Sequence seq) return output; } + public int[] DecodeNN(Sequence seq) + { + Matrix ys = PredictSentence(seq, RunningMode.Test); + return GetBestResult(ys); + } + public int[][] DecodeNBestCRF(Sequence seq, int N) { @@ -639,9 +606,9 @@ public int[][] DecodeNBestCRF(Sequence seq, int N) //ys contains the output of RNN for each word Matrix ys = PredictSentence(seq, RunningMode.Test); - int n = seq.GetSize(); + int n = seq.States.Length; int K = L2; - Matrix STP = mat_CRFTagTransWeights; + Matrix STP = CRFTagTransWeights; PAIR[, ,] vPath = new PAIR[n, K, N]; int DUMP_LABEL = -1; double[,] vPreAlpha = new double[K, N]; @@ -726,41 +693,33 @@ public int[][] DecodeNBestCRF(Sequence seq, int N) return vTagOutput; } - public int[] DecodeCRF(Sequence seq) + public int[] Viterbi(Matrix ys, int seqLen) { - //ys contains the output of RNN for each word - Matrix ys = PredictSentence(seq, RunningMode.Test); - - int n = seq.GetSize(); - int K = L2; - Matrix STP = mat_CRFTagTransWeights; - int[,] vPath = new int[n, K]; + int[,] vPath = new int[seqLen, L2]; - double[] vPreAlpha = new double[K]; - double[] vAlpha = new double[K]; + double[] vPreAlpha = new double[L2]; + double[] vAlpha = new double[L2]; int nStartTagIndex = 0; - double MIN_VALUE = double.MinValue; //viterbi algorithm - for (int i = 0; i < K; i++) + for (int i = 0; i < L2; i++) { vPreAlpha[i] = ys[0][i]; if (i != nStartTagIndex) - vPreAlpha[i] += MIN_VALUE; + vPreAlpha[i] += double.MinValue; vPath[0, i] = nStartTagIndex; } - for (int t = 1; t < n; t++) + for (int t = 0; t < seqLen; t++) { - for (int j = 0; j < K; j++) + for (int j = 0; j < L2; j++) { vPath[t, j] = 0; - vAlpha[j] = MIN_VALUE; + vAlpha[j] = double.MinValue; - for (int i = 0; i < K; i++) + for (int i = 0; i < L2; i++) { - double score = vPreAlpha[i] + STP[j][i] + ys[t][j]; - + double score = vPreAlpha[i] + CRFTagTransWeights[j][i] + ys[t][j]; if (score > vAlpha[j]) { vAlpha[j] = score; @@ -769,14 +728,14 @@ public int[] DecodeCRF(Sequence seq) } } vPreAlpha = vAlpha; - vAlpha = new double[K]; + vAlpha = new double[L2]; } //backtrace to get the best result path - int[] tagOutputs = new int[n]; - tagOutputs[n - 1] = nStartTagIndex; - int nNextTag = tagOutputs[n - 1]; - for (int t = n - 2; t >= 0; t--) + int[] tagOutputs = new int[seqLen]; + tagOutputs[seqLen - 1] = nStartTagIndex; + int nNextTag = tagOutputs[seqLen - 1]; + for (int t = seqLen - 2; t >= 0; t--) { tagOutputs[t] = vPath[t + 1, nNextTag]; nNextTag = tagOutputs[t]; @@ -785,14 +744,20 @@ public int[] DecodeCRF(Sequence seq) return tagOutputs; } + public int[] DecodeCRF(Sequence seq) + { + //ys contains the output of RNN for each word + Matrix ys = PredictSentence(seq, RunningMode.Test); + return Viterbi(ys, seq.States.Length); + } + private int GetErrorTokenNum(Sequence seq, int[] predicted) { int tknErrCnt = 0; - int numStates = seq.GetSize(); + int numStates = seq.States.Length; for (int curState = 0; curState < numStates; curState++) { - State state = seq.Get(curState); - if (predicted[curState] != state.GetLabel()) + if (predicted[curState] != seq.States[curState].Label) { tknErrCnt++; } @@ -803,61 +768,52 @@ private int GetErrorTokenNum(Sequence seq, int[] predicted) public void CalculateOutputLayerError(State state, int timeat) { - if (m_bCRFTraining == true) + if (IsCRFTraining == true) { //For RNN-CRF, use joint probability of output layer nodes and transition between contigous nodes for (int c = 0; c < L2; c++) { - neuOutput[c].er = -mat_CRFSeqOutput[timeat][c]; + OutputLayer[c].er = -CRFSeqOutput[timeat][c]; } - neuOutput[state.GetLabel()].er = 1 - mat_CRFSeqOutput[timeat][state.GetLabel()]; + OutputLayer[state.Label].er = 1 - CRFSeqOutput[timeat][state.Label]; } else { //For standard RNN for (int c = 0; c < L2; c++) { - neuOutput[c].er = -neuOutput[c].cellOutput; + OutputLayer[c].er = -OutputLayer[c].cellOutput; } - neuOutput[state.GetLabel()].er = 1 - neuOutput[state.GetLabel()].cellOutput; + OutputLayer[state.Label].er = 1 - OutputLayer[state.Label].cellOutput; } } - - public virtual bool ValidateNet(DataSet validationSet) + public virtual bool ValidateNet(DataSet validationSet, int iter) { Logger.WriteLine(Logger.Level.info, "[TRACE] Start validation ..."); int wordcn = 0; - int[] predicted; int tknErrCnt = 0; int sentErrCnt = 0; //Initialize varibles - counter = 0; logp = 0; - counterTokenForLM = 0; - - int numSequence = validationSet.GetSize(); + int numSequence = validationSet.SequenceList.Count; for (int curSequence = 0; curSequence < numSequence; curSequence++) { - Sequence pSequence = validationSet.Get(curSequence); - wordcn += pSequence.GetSize(); + Sequence pSequence = validationSet.SequenceList[curSequence]; + wordcn += pSequence.States.Length; - Matrix m; - if (m_bCRFTraining == true) + int[] predicted; + if (IsCRFTraining == true) { - m = learnSentenceForRNNCRF(pSequence, RunningMode.Validate); + predicted = PredictSentenceCRF(pSequence, RunningMode.Validate); } else { + Matrix m; m = PredictSentence(pSequence, RunningMode.Validate); - } - - predicted = new int[pSequence.GetSize()]; - for (int i = 0; i < pSequence.GetSize(); i++) - { - predicted[i] = MathUtil.GetMaxProbIndex(m[i]); + predicted = GetBestResult(m); } int newTknErrCnt = GetErrorTokenNum(pSequence, predicted); @@ -868,10 +824,10 @@ public virtual bool ValidateNet(DataSet validationSet) } } - double entropy = -logp / Math.Log10(2.0) / counter; - double ppl = exp_10(-logp / counter); - double tknErrRatio = (double)tknErrCnt / (double)wordcn * 100; - double sentErrRatio = (double)sentErrCnt / (double)numSequence * 100; + double entropy = -logp / Math.Log10(2.0) / wordcn; + double ppl = exp_10(-logp / wordcn); + double tknErrRatio = (double)tknErrCnt / (double)wordcn * 100.0; + double sentErrRatio = (double)sentErrCnt / (double)numSequence * 100.0; Logger.WriteLine(Logger.Level.info, "[TRACE] In validation: error token ratio = {0}% error sentence ratio = {1}%", tknErrRatio, sentErrRatio); Logger.WriteLine(Logger.Level.info, "[TRACE] In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl); diff --git a/RNNSharp/RNNDecoder.cs b/RNNSharp/RNNDecoder.cs index 96ae4a7..d9021e1 100644 --- a/RNNSharp/RNNDecoder.cs +++ b/RNNSharp/RNNDecoder.cs @@ -1,11 +1,9 @@ using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System.IO; using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class RNNDecoder @@ -47,34 +45,22 @@ public RNNDecoder(string strModelFileName, Featurizer featurizer) } m_Rnn.loadNetBin(strModelFileName); - Logger.WriteLine(Logger.Level.info, "CRF Model: {0}", m_Rnn.IsCRFModel()); + Logger.WriteLine(Logger.Level.info, "CRF Model: {0}", m_Rnn.IsCRFTraining); m_Featurizer = featurizer; } public int[][] ProcessNBest(Sentence sent, int nbest) { - if (m_Rnn.IsCRFModel() == false) + if (m_Rnn.IsCRFTraining == false) { - return null; + throw new ArgumentException("N-best result is only for RNN-CRF model."); } Sequence seq = m_Featurizer.ExtractFeatures(sent); int[][] predicted = m_Rnn.DecodeNBestCRF(seq, nbest); - - //Remove the beginning and end character from result - int[][] results = new int[nbest][]; - - for (int k = 0; k < nbest; k++) - { - results[k] = new int[predicted[k].Length - 2]; - for (int i = 1; i < predicted[k].Length - 1; i++) - { - results[k][i - 1] = predicted[k][i]; - } - } - return results; + return predicted; } @@ -82,7 +68,7 @@ public int[] Process(Sentence sent) { Sequence seq = m_Featurizer.ExtractFeatures(sent); int[] predicted; - if (m_Rnn.IsCRFModel() == true) + if (m_Rnn.IsCRFTraining == true) { predicted = m_Rnn.DecodeCRF(seq); } @@ -91,14 +77,7 @@ public int[] Process(Sentence sent) predicted = m_Rnn.DecodeNN(seq); } - //Remove the beginning and end character from result - int[] results = new int[predicted.Length - 2]; - for (int i = 1; i < predicted.Length - 1; i++) - { - results[i - 1] = predicted[i]; - } - - return results; + return predicted; } } } diff --git a/RNNSharp/RNNEncoder.cs b/RNNSharp/RNNEncoder.cs index 6e21131..3f8c4ac 100644 --- a/RNNSharp/RNNEncoder.cs +++ b/RNNSharp/RNNEncoder.cs @@ -1,51 +1,32 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System.IO; -using AdvUtils; +using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class RNNEncoder { ModelSetting m_modelSetting; - DataSet m_TrainingSet; - DataSet m_ValidationSet; - List> m_LabelBigramTransition; - - public void SetLabelBigramTransition(List> m) - { - m_LabelBigramTransition = m; - } + public DataSet TrainingSet { get; set; } + public DataSet ValidationSet { get; set; } public RNNEncoder(ModelSetting modelSetting) { m_modelSetting = modelSetting; } - - public void SetTrainingSet(DataSet train) - { - m_TrainingSet = train; - } - public void SetValidationSet(DataSet validation) - { - m_ValidationSet = validation; - } - public void Train() { RNN rnn; - if (m_modelSetting.GetModelDirection() == 0) + if (m_modelSetting.ModelDirection == 0) { - if (m_modelSetting.GetModelType() == 0) + if (m_modelSetting.ModelType == 0) { SimpleRNN sRNN = new SimpleRNN(); - sRNN.setBPTT(m_modelSetting.GetBptt() + 1); + sRNN.setBPTT(m_modelSetting.Bptt + 1); sRNN.setBPTTBlock(10); rnn = sRNN; @@ -57,15 +38,15 @@ public void Train() } else { - if (m_modelSetting.GetModelType() == 0) + if (m_modelSetting.ModelType == 0) { SimpleRNN sForwardRNN = new SimpleRNN(); SimpleRNN sBackwardRNN = new SimpleRNN(); - sForwardRNN.setBPTT(m_modelSetting.GetBptt() + 1); + sForwardRNN.setBPTT(m_modelSetting.Bptt + 1); sForwardRNN.setBPTTBlock(10); - sBackwardRNN.setBPTT(m_modelSetting.GetBptt() + 1); + sBackwardRNN.setBPTT(m_modelSetting.Bptt + 1); sBackwardRNN.setBPTTBlock(10); rnn = new BiRNN(sForwardRNN, sBackwardRNN); @@ -76,37 +57,33 @@ public void Train() } } - //Set model type - rnn.SetModelDirection(m_modelSetting.GetModelDirection()); + rnn.ModelDirection = (MODELDIRECTION)m_modelSetting.ModelDirection; + rnn.ModelFile = m_modelSetting.ModelFile; + rnn.SaveStep = m_modelSetting.SaveStep; + rnn.MaxIter = m_modelSetting.MaxIteration; + rnn.IsCRFTraining = m_modelSetting.IsCRFTraining; + rnn.LearningRate = m_modelSetting.LearningRate; + rnn.GradientCutoff = 15.0f; + rnn.Dropout = m_modelSetting.Dropout; + rnn.L1 = m_modelSetting.NumHidden; - //Set feature dimension - rnn.SetFeatureDimension(m_TrainingSet.GetDenseDimension(), - m_TrainingSet.GetSparseDimension(), - m_TrainingSet.GetTagSize()); - - - rnn.SetModelFile(m_modelSetting.GetModelFile()); - rnn.SetSaveStep(m_modelSetting.GetSaveStep()); - rnn.SetMaxIter(m_modelSetting.GetMaxIteration()); - rnn.SetCRFTraining(m_modelSetting.IsCRFTraining()); - rnn.SetLearningRate(m_modelSetting.GetLearningRate()); - rnn.SetGradientCutoff(15.0); - rnn.SetDropout(m_modelSetting.GetDropout()); - rnn.SetHiddenLayerSize(m_modelSetting.GetNumHidden()); + rnn.DenseFeatureSize = TrainingSet.DenseFeatureSize(); + rnn.L0 = TrainingSet.GetSparseDimension(); + rnn.L2 = TrainingSet.TagSize; rnn.initMem(); //Create tag-bigram transition probability matrix only for sequence RNN mode - if (m_modelSetting.IsCRFTraining() == true) + if (m_modelSetting.IsCRFTraining) { - rnn.setTagBigramTransition(m_LabelBigramTransition); + rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition); } Logger.WriteLine(Logger.Level.info, ""); Logger.WriteLine(Logger.Level.info, "[TRACE] Iterative training begins ..."); double lastPPL = double.MaxValue; - double lastAlpha = rnn.Alpha; + double lastAlpha = rnn.LearningRate; int iter = 0; while (true) { @@ -117,37 +94,31 @@ public void Train() } //Start to train model - double ppl = rnn.TrainNet(m_TrainingSet, iter); + double ppl = rnn.TrainNet(TrainingSet, iter); //Validate the model by validated corpus bool betterValidateNet = false; - if (rnn.ValidateNet(m_ValidationSet) == true) + if (rnn.ValidateNet(ValidationSet, iter) == true) { //If current model is better than before, save it into file - Logger.WriteLine(Logger.Level.info, "Saving better model into file {0}...", m_modelSetting.GetModelFile()); - rnn.saveNetBin(m_modelSetting.GetModelFile()); + Logger.WriteLine(Logger.Level.info, "Saving better model into file {0}...", m_modelSetting.ModelFile); + rnn.saveNetBin(m_modelSetting.ModelFile); betterValidateNet = true; } - //else - //{ - // Logger.WriteLine(Logger.Level.info, "Loading previous best model from file {0}...", m_modelSetting.GetModelFile()); - // rnn.loadNetBin(m_modelSetting.GetModelFile()); - //} - - if (ppl >= lastPPL && lastAlpha != rnn.Alpha) + if (ppl >= lastPPL && lastAlpha != rnn.LearningRate) { //Although we reduce alpha value, we still cannot get better result. Logger.WriteLine(Logger.Level.info, "Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL); - Logger.WriteLine(Logger.Level.info, "Current alpha: {0}, the previous alpha: {1}", rnn.Alpha, lastAlpha); + Logger.WriteLine(Logger.Level.info, "Current alpha: {0}, the previous alpha: {1}", rnn.LearningRate, lastAlpha); break; } - lastAlpha = rnn.Alpha; + lastAlpha = rnn.LearningRate; if (betterValidateNet == false) { - rnn.Alpha = rnn.Alpha / 2.0; + rnn.LearningRate = rnn.LearningRate / 2.0f; } lastPPL = ppl; diff --git a/RNNSharp/Sentence.cs b/RNNSharp/Sentence.cs index 103361d..956aa1b 100644 --- a/RNNSharp/Sentence.cs +++ b/RNNSharp/Sentence.cs @@ -1,65 +1,81 @@ using System; using System.Collections.Generic; -using System.Linq; using System.Text; -using System.Threading.Tasks; using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class Sentence { - private List m_features; + public List TokensList { get; } - public List GetFeatureSet() + public Sentence(List tokensList) { - return m_features; - } + int dim = 0; + TokensList = new List(); - public int GetTokenSize() - { - return m_features.Count; - } + if (tokensList.Count == 0) + { + return; + } - public void DumpFeatures() - { - foreach (string[] features in m_features) + //Check if dimension is consistent inside the sentence + foreach (string[] tokens in tokensList) { - StringBuilder sb = new StringBuilder(); - foreach (string strFeature in features) + if (dim > 0 && tokens.Length != dim) { - sb.Append(strFeature); - sb.Append('\t'); + string err = ReportInvalidateTokens(tokensList, dim, tokens); + throw new FormatException(String.Format("Invalidated record: {0}", err)); } - Logger.WriteLine(Logger.Level.info, sb.ToString().Trim()); + dim = tokens.Length; + TokensList.Add(tokens); } - } - - public virtual void SetFeatures(List tokenList) - { - m_features = new List(); - //Add the begining term for current record - string[] curfeature = new string[2]; - curfeature[0] = ""; - curfeature[1] = "O"; - m_features.Add(curfeature); + //Add begin/end of sentence flag into feature + string[] beginFeatures = new string[dim]; + string[] endFeatures = new string[dim]; - foreach (string s in tokenList) + for (int i = 0; i < dim - 1; i++) { - string[] tokens = s.Split('\t'); - m_features.Add(tokens); + beginFeatures[i] = ""; + endFeatures[i] = ""; } - //Add the end term of current record - curfeature = new string[2]; - curfeature[0] = ""; - curfeature[1] = "O"; - m_features.Add(curfeature); + beginFeatures[dim - 1] = TagSet.DefaultTag; + endFeatures[dim - 1] = TagSet.DefaultTag; + + TokensList.Insert(0, beginFeatures); + TokensList.Add(endFeatures); } + public override string ToString() + { + StringBuilder sb = new StringBuilder(); + foreach (string[] tokens in TokensList) + { + foreach (string token in tokens) + { + sb.Append(token); + sb.Append('\t'); + } + sb.AppendLine(); + } + + return sb.ToString(); + } + private string ReportInvalidateTokens(List tokenList, int dim, string[] badTokens) + { + StringBuilder sb = new StringBuilder(); + sb.AppendLine(String.Format("Inconsistent feature dimension in the record.It's {0}, but it should be {1}", badTokens.Length, dim)); + sb.AppendLine(ToString()); + Logger.WriteLine(Logger.Level.err, sb.ToString()); + return sb.ToString(); + } } } diff --git a/RNNSharp/Sequence.cs b/RNNSharp/Sequence.cs index f80e2ce..d6f8d76 100644 --- a/RNNSharp/Sequence.cs +++ b/RNNSharp/Sequence.cs @@ -1,71 +1,62 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class Sequence { - State[] m_States; - int m_NumStates; - - public int GetSize() { return m_NumStates; } - public State Get(int i) { return m_States[i]; } - + public State[] States { get;} public int GetDenseDimension() { - if (0 == m_NumStates) return 0; - else return m_States[0].GetDenseDimension(); + if (0 == States.Length || States[0].DenseData == null) + { + return 0; + } + else + { + return States[0].DenseData.GetDimension(); + } } public int GetSparseDimension() { - if (0 == m_NumStates) return 0; - else return m_States[0].GetSparseDimension(); + if (0 == States.Length) return 0; + else return States[0].SparseData.GetDimension(); } - public bool SetLabel(Sentence sent, TagSet tagSet) + public void SetLabel(Sentence sent, TagSet tagSet) { - List features = sent.GetFeatureSet(); - if (features.Count != m_States.Length) + List tokensList = sent.TokensList; + if (tokensList.Count != States.Length) { - return false; + throw new DataMisalignedException(String.Format("Error: Inconsistent token({0}) and state({1}) size. Tokens list: {2}", + tokensList.Count, States.Length, sent.ToString())); } - for (int i = 0; i < features.Count; i++) + for (int i = 0; i < tokensList.Count; i++) { - string strTagName = features[i][features[i].Length - 1]; + string strTagName = tokensList[i][tokensList[i].Length - 1]; int tagId = tagSet.GetIndex(strTagName); if (tagId < 0) { - Logger.WriteLine(Logger.Level.info, "Error: tag {0} is unknown.", strTagName); - return false; + throw new DataMisalignedException(String.Format("Error: tag {0} is unknown. Tokens list: {1}", + strTagName, sent.ToString())); } - m_States[i].SetLabel(tagId); + States[i].Label = tagId; } - - return true; } - public void SetSize(int numStates) + public Sequence(int numStates) { - if (m_NumStates != numStates) + States = new State[numStates]; + for (int i = 0; i < numStates; i++) { - m_NumStates = numStates; - m_States = null; - if (m_NumStates > 0) - { - m_States = new State[m_NumStates]; - for (int i = 0; i < m_NumStates; i++) - { - m_States[i] = new State(); - } - } + States[i] = new State(); } } diff --git a/RNNSharp/SimpleRNN.cs b/RNNSharp/SimpleRNN.cs index 0ee0669..621c5ce 100644 --- a/RNNSharp/SimpleRNN.cs +++ b/RNNSharp/SimpleRNN.cs @@ -1,11 +1,11 @@ using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; using System.Threading.Tasks; using System.IO; using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class SimpleRNN : RNN @@ -29,10 +29,9 @@ public class SimpleRNN : RNN public SimpleRNN() { - m_modeltype = MODELTYPE.SIMPLE; - gradient_cutoff = 15; - dropout = 0; - llogp = -100000000; + ModelType = MODELTYPE.SIMPLE; + GradientCutoff = 15; + Dropout = 0; L1 = 30; bptt = 5; @@ -41,12 +40,12 @@ public SimpleRNN() bptt_fea = null; - fea_size = 0; + DenseFeatureSize = 0; neuLastHidden = null; neuFeatures = null; neuHidden = null; - neuOutput = null; + OutputLayer = null; } public void setBPTT(int newval) { bptt = newval; } @@ -67,18 +66,18 @@ public override void initWeights() for (b = 0; b < L1; b++) { - for (a = 0; a < fea_size; a++) + for (a = 0; a < DenseFeatureSize; a++) { mat_feature2hidden[b][a] = RandInitWeight(); } } - for (b = 0; b < mat_hidden2output.GetHeight(); b++) + for (b = 0; b < Hidden2OutputWeight.GetHeight(); b++) { for (a = 0; a < L1; a++) { - mat_hidden2output[b][a] = RandInitWeight(); + Hidden2OutputWeight[b][a] = RandInitWeight(); } } @@ -111,7 +110,7 @@ public void computeHiddenActivity(bool isTrain) if (isTrain == false) { - neuHidden[a].cellOutput = neuHidden[a].cellOutput * (1.0 - dropout); + neuHidden[a].cellOutput = neuHidden[a].cellOutput * (1.0 - Dropout); } if (neuHidden[a].cellOutput > 50) neuHidden[a].cellOutput = 50; //for numerical stability @@ -131,7 +130,7 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr matrixXvectorADD(neuHidden, neuLastHidden, mat_hiddenBpttWeight, 0, L1, 0, L1, 0); //Apply feature values on hidden layer - var sparse = state.GetSparseData(); + var sparse = state.SparseData; int n = sparse.GetNumberOfEntries(); Parallel.For(0, L1, parallelOption, b => { @@ -146,9 +145,9 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr //Dense features: //fea(t) -> hidden(t) - if (fea_size > 0) + if (DenseFeatureSize > 0) { - for (int j = 0; j < fea_size; j++) + for (int j = 0; j < DenseFeatureSize; j++) { neuHidden[b].cellOutput += neuFeatures[j] * mat_feature2hidden[b][j]; } @@ -159,21 +158,19 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr computeHiddenActivity(isTrain); //Calculate output layer - matrixXvectorADD(neuOutput, neuHidden, mat_hidden2output, 0, L2, 0, L1, 0); + matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, 0, L2, 0, L1, 0); if (doutput != null) { for (int i = 0; i < L2; i++) { - doutput[i] = neuOutput[i].cellOutput; + doutput[i] = OutputLayer[i].cellOutput; } } //activation 2 --softmax on words - SoftmaxLayer(neuOutput); + SoftmaxLayer(OutputLayer); } - - public override void learnNet(State state, int timeat, bool biRNN = false) { if (biRNN == false) @@ -182,7 +179,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false) } //error output->hidden for words from specific class - matrixXvectorADD(neuHidden, neuOutput, mat_hidden2output, 0, L1, 0, L2, 1); + matrixXvectorADD(neuHidden, OutputLayer, Hidden2OutputWeight, 0, L1, 0, L2, 1); //Apply drop out on error in hidden layer for (int i = 0; i < L1; i++) @@ -198,7 +195,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false) { for (int c = 0; c < L2; c++) { - mat_hidden2output[c][a] += alpha * neuOutput[c].er * neuHidden[a].cellOutput; + Hidden2OutputWeight[c][a] += LearningRate * OutputLayer[c].er * neuHidden[a].cellOutput; } }); } @@ -217,11 +214,11 @@ void learnBptt(State state) neuHidden[a].er *= neuHidden[a].cellOutput * (1 - neuHidden[a].cellOutput); //dense weight update fea->0 - if (fea_size > 0) + if (DenseFeatureSize > 0) { - for (int i = 0; i < fea_size; i++) + for (int i = 0; i < DenseFeatureSize; i++) { - mat_bptt_synf[a][i] += neuHidden[a].er * bptt_fea[i + step * fea_size]; + mat_bptt_synf[a][i] += neuHidden[a].er * bptt_fea[i + step * DenseFeatureSize]; } } @@ -269,17 +266,17 @@ void learnBptt(State state) //Update bptt feature weights for (int i = 0; i < L1; i++) { - mat_hiddenBpttWeight[b][i] += alpha * mat_bptt_syn0_ph[b][i]; + mat_hiddenBpttWeight[b][i] += LearningRate * mat_bptt_syn0_ph[b][i]; //Clean bptt weight error mat_bptt_syn0_ph[b][i] = 0; } //Update dense feature weights - if (fea_size > 0) + if (DenseFeatureSize > 0) { - for (int i = 0; i < fea_size; i++) + for (int i = 0; i < DenseFeatureSize; i++) { - mat_feature2hidden[b][i] += alpha * mat_bptt_synf[b][i]; + mat_feature2hidden[b][i] += LearningRate * mat_bptt_synf[b][i]; //Clean dense feature weights error mat_bptt_synf[b][i] = 0; } @@ -295,7 +292,7 @@ void learnBptt(State state) for (int i = 0; i < sparse.GetNumberOfEntries(); i++) { int pos = sparse.GetEntry(i).Key; - mat_input2hidden[b][pos] += alpha * mat_bptt_syn0_w[b][pos]; + mat_input2hidden[b][pos] += LearningRate * mat_bptt_syn0_w[b][pos]; //Clean sparse feature weight error mat_bptt_syn0_w[b][pos] = 0; @@ -309,19 +306,19 @@ public void resetBpttMem() { bptt_inputs = new SparseVector[MAX_RNN_HIST]; bptt_hidden = new neuron[(bptt + bptt_block + 1) * L1]; - bptt_fea = new double[(bptt + bptt_block + 2) * fea_size]; + bptt_fea = new double[(bptt + bptt_block + 2) * DenseFeatureSize]; mat_bptt_syn0_w = new Matrix(L1, L0); mat_bptt_syn0_ph = new Matrix(L1, L1); - mat_bptt_synf = new Matrix(L1, fea_size); + mat_bptt_synf = new Matrix(L1, DenseFeatureSize); } public override void initMem() { CreateCells(); - mat_hidden2output = new Matrix(L2, L1); + Hidden2OutputWeight = new Matrix(L2, L1); mat_input2hidden = new Matrix(L1, L0); - mat_feature2hidden = new Matrix(L1, fea_size); + mat_feature2hidden = new Matrix(L1, DenseFeatureSize); mat_hiddenBpttWeight = new Matrix(L1, L1); @@ -346,16 +343,16 @@ public override void netReset(bool updateNet = false) //cleans hidden layer ac //Train mode for (int a = 0; a < L1; a++) { - if (rand.NextDouble() < dropout) + if (rand.NextDouble() < Dropout) { neuHidden[a].mask = true; } } - } - Array.Clear(bptt_inputs, 0, MAX_RNN_HIST); - Array.Clear(bptt_hidden, 0, (bptt + bptt_block + 1) * L1); - Array.Clear(bptt_fea, 0, (bptt + bptt_block + 2) * fea_size); + Array.Clear(bptt_inputs, 0, MAX_RNN_HIST); + Array.Clear(bptt_hidden, 0, (bptt + bptt_block + 1) * L1); + Array.Clear(bptt_fea, 0, (bptt + bptt_block + 2) * DenseFeatureSize); + } } @@ -375,13 +372,17 @@ public override void LearnBackTime(State state, int numStates, int curState) { bptt_inputs[a] = bptt_inputs[a - 1]; Array.Copy(bptt_hidden, (a - 1) * L1, bptt_hidden, a * L1, L1); - Array.Copy(bptt_fea, (a - 1) * fea_size, bptt_fea, a * fea_size, fea_size); + Array.Copy(bptt_fea, (a - 1) * DenseFeatureSize, bptt_fea, a * DenseFeatureSize, DenseFeatureSize); } - bptt_inputs[0] = state.GetSparseData(); + bptt_inputs[0] = state.SparseData; //Save hidden and feature layer nodes values for bptt Array.Copy(neuHidden, 0, bptt_hidden, 0, L1); - Array.Copy(neuFeatures, 0, bptt_fea, 0, fea_size); + + for (int i = 0; i < DenseFeatureSize; i++) + { + bptt_fea[i] = neuFeatures[i]; + } // time to learn bptt if (((curState % bptt_block) == 0) || (curState == numStates - 1)) @@ -397,44 +398,50 @@ public override void loadNetBin(string filename) StreamReader sr = new StreamReader(filename); BinaryReader br = new BinaryReader(sr.BaseStream); - m_modeltype = (MODELTYPE)br.ReadInt32(); - if (m_modeltype != MODELTYPE.SIMPLE) + ModelType = (MODELTYPE)br.ReadInt32(); + if (ModelType != MODELTYPE.SIMPLE) { throw new Exception("Invalidated model format: must be simple RNN"); } - m_modeldirection = (MODELDIRECTION)br.ReadInt32(); + ModelDirection = (MODELDIRECTION)br.ReadInt32(); int iflag = br.ReadInt32(); if (iflag == 1) { - m_bCRFTraining = true; + IsCRFTraining = true; } else { - m_bCRFTraining = false; + IsCRFTraining = false; } //Load basic parameters L0 = br.ReadInt32(); L1 = br.ReadInt32(); L2 = br.ReadInt32(); - fea_size = br.ReadInt32(); + DenseFeatureSize = br.ReadInt32(); //Create cells of each layer CreateCells(); //Load weight matrix between each two layer pairs + Logger.WriteLine("Loading input2hidden weights..."); mat_input2hidden = loadMatrixBin(br); + + Logger.WriteLine("Loading bptt hidden weights..."); mat_hiddenBpttWeight = loadMatrixBin(br); + Logger.WriteLine("Loading feature2hidden weights..."); mat_feature2hidden = loadMatrixBin(br); - mat_hidden2output = loadMatrixBin(br); + Logger.WriteLine("Loading hidden2output weights..."); + Hidden2OutputWeight = loadMatrixBin(br); if (iflag == 1) { - mat_CRFTagTransWeights = loadMatrixBin(br); + Logger.WriteLine("Loading CRF tag trans weights..."); + CRFTagTransWeights = loadMatrixBin(br); } sr.Close(); @@ -442,8 +449,8 @@ public override void loadNetBin(string filename) private void CreateCells() { - neuFeatures = new double[fea_size]; - neuOutput = new neuron[L2]; + neuFeatures = new SingleVector(DenseFeatureSize); + OutputLayer = new neuron[L2]; neuHidden = new neuron[L1]; } @@ -453,12 +460,12 @@ public override void saveNetBin(string filename) StreamWriter sw = new StreamWriter(filename); BinaryWriter fo = new BinaryWriter(sw.BaseStream); - fo.Write((int)m_modeltype); - fo.Write((int)m_modeldirection); + fo.Write((int)ModelType); + fo.Write((int)ModelDirection); // Signiture , 0 is for RNN or 1 is for RNN-CRF int iflag = 0; - if (m_bCRFTraining == true) + if (IsCRFTraining == true) { iflag = 1; } @@ -467,23 +474,28 @@ public override void saveNetBin(string filename) fo.Write(L0); fo.Write(L1); fo.Write(L2); - fo.Write(fea_size); + fo.Write(DenseFeatureSize); //weight input->hidden + Logger.WriteLine("Saving input2hidden weights..."); saveMatrixBin(mat_input2hidden, fo); + + Logger.WriteLine("Saving bptt hidden weights..."); saveMatrixBin(mat_hiddenBpttWeight, fo); //weight fea->hidden + Logger.WriteLine("Saving feature2hidden weights..."); saveMatrixBin(mat_feature2hidden, fo); //weight hidden->output - saveMatrixBin(mat_hidden2output, fo); + Logger.WriteLine("Saving hidden2output weights..."); + saveMatrixBin(Hidden2OutputWeight, fo); if (iflag == 1) { // Save Bigram - saveMatrixBin(mat_CRFTagTransWeights, fo); + saveMatrixBin(CRFTagTransWeights, fo); } fo.Close(); diff --git a/RNNSharp/SparseVector.cs b/RNNSharp/SparseVector.cs index 475c6dd..8a2045f 100644 --- a/RNNSharp/SparseVector.cs +++ b/RNNSharp/SparseVector.cs @@ -1,43 +1,35 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Collections.Generic; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class SparseVector : SingleVector { - KeyValuePair[] m_Data; + KeyValuePair[] m_Data; int m_Dimension; int m_Size; - - public KeyValuePair GetEntry(int pos) { return m_Data[pos]; } + public KeyValuePair GetEntry(int pos) { return m_Data[pos]; } public override int GetDimension() { return m_Dimension; } public int GetNumberOfEntries() { return m_Size; } - public void ChangeValue(int positionInSparseVector, int dimension, double value) + public void ChangeValue(int positionInSparseVector, int dimension, float value) { - m_Data[positionInSparseVector] = new KeyValuePair(dimension, value); + m_Data[positionInSparseVector] = new KeyValuePair(dimension, value); } public void SetDimension(int s) { m_Dimension = s; } - - public KeyValuePair[] GetIndexValues() - { - return m_Data; - } - - public void SetData(Dictionary m) + public void SetData(Dictionary m) { m_Size = m.Count; - m_Data = new KeyValuePair[m_Size]; + m_Data = new KeyValuePair[m_Size]; int count = 0; - foreach (KeyValuePair pair in m) + foreach (KeyValuePair pair in m) { m_Data[count] = pair; count++; diff --git a/RNNSharp/State.cs b/RNNSharp/State.cs index 2d8436a..fde905b 100644 --- a/RNNSharp/State.cs +++ b/RNNSharp/State.cs @@ -1,9 +1,7 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - + +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class PriviousLabelFeature @@ -16,80 +14,31 @@ public class PriviousLabelFeature public class State { //Store sparse features, such as template features - SparseVector m_SparseData = new SparseVector(); - + public SparseVector SparseData { get;} //Store dense features, such as word embedding - Vector m_spDenseData = null; - + public Vector DenseData { get; set; } + public int Label { get; set; } //Store run time features - PriviousLabelFeature[] m_RuntimeFeatures; - int m_NumRuntimeFeature; - - int m_Label; - - public int GetLabel() { return m_Label; } - - public SparseVector GetSparseData() { return m_SparseData; } - - public Vector GetDenseData() { return m_spDenseData; } + public PriviousLabelFeature[] RuntimeFeatures { get; set; } - - public PriviousLabelFeature GetRuntimeFeature(int i) { return m_RuntimeFeatures[i]; } - - public int GetNumRuntimeFeature() { return m_NumRuntimeFeature; } - - - public void SetNumRuntimeFeature(int n) + public State() { - if (m_NumRuntimeFeature != n) - { - m_NumRuntimeFeature = n; - m_RuntimeFeatures = null; - if (m_NumRuntimeFeature > 0) - m_RuntimeFeatures = new PriviousLabelFeature[m_NumRuntimeFeature]; - } + SparseData = new SparseVector(); } - - public void SetRuntimeFeature(int i, int offset, double v) + public void SetRuntimeFeature(int i, int offset, float v) { - PriviousLabelFeature f = m_RuntimeFeatures[i]; - m_SparseData.ChangeValue(f.PositionInSparseVector, f.StartInDimension + offset, v); + PriviousLabelFeature f = RuntimeFeatures[i]; + SparseData.ChangeValue(f.PositionInSparseVector, f.StartInDimension + offset, v); } - - public void SetDenseData(Vector dense) - { - m_spDenseData = dense; - } - - public void SetLabel(int label) - { - m_Label = label; - } - - - public int GetDenseDimension() - { - if (null != m_spDenseData) - return m_spDenseData.GetDimension(); - else - return 0; - } - - public int GetSparseDimension() - { - return m_SparseData.GetDimension(); - } - - public void AddRuntimeFeaturePlacehold(int i, int offsetToCurentState, int posInSparseVector, int startInDimension) { PriviousLabelFeature r = new PriviousLabelFeature(); r.OffsetToCurrentState = offsetToCurentState; r.StartInDimension = startInDimension; r.PositionInSparseVector = posInSparseVector; - m_RuntimeFeatures[i] = r; + RuntimeFeatures[i] = r; } } diff --git a/RNNSharp/TagSet.cs b/RNNSharp/TagSet.cs index f4afba0..8594cd2 100644 --- a/RNNSharp/TagSet.cs +++ b/RNNSharp/TagSet.cs @@ -1,15 +1,15 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Collections.Generic; using System.IO; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class TagSet { - public Dictionary m_Tag2Index = new Dictionary(); + public Dictionary m_Tag2Index; + public static string DefaultTag = "SentBE"; public int GetSize() { @@ -41,30 +41,30 @@ public int GetIndex(string strTagName) } - //Load the tag id and its name mapping from given file - //Format: tagid /t tag name + //Load tag name from given file + //Format: each line has one tag name public TagSet(string strTagFileName) { - StreamReader fin = new StreamReader(strTagFileName); + m_Tag2Index = new Dictionary(); + int idx = 0; + m_Tag2Index.Add(DefaultTag, idx); + idx++; - int idx; - string strTagName; string strLine = null; - while ((strLine = fin.ReadLine()) != null) + using (StreamReader fin = new StreamReader(strTagFileName)) { - strLine = strLine.Trim(); - if (strLine.Length == 0) + while ((strLine = fin.ReadLine()) != null) { - continue; - } - - string[] items = strLine.Split('\t'); - idx = int.Parse(items[0]); - strTagName = items[1]; + strLine = strLine.Trim(); + if (strLine.Length == 0) + { + continue; + } - m_Tag2Index.Add(strTagName, idx); + m_Tag2Index.Add(strLine, idx); + idx++; + } } - fin.Close(); } } } diff --git a/RNNSharp/TemplateFeaturizer.cs b/RNNSharp/TemplateFeaturizer.cs index 4dfa81e..8ee4926 100644 --- a/RNNSharp/TemplateFeaturizer.cs +++ b/RNNSharp/TemplateFeaturizer.cs @@ -1,11 +1,12 @@ using System; using System.Collections.Generic; -using System.Linq; using System.Text; -using System.Threading.Tasks; using System.IO; using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { //Template feature processor @@ -32,11 +33,6 @@ public int GetFeatureSize() return m_maxFeatureId; } - public List GetFeatureTemplates() - { - return m_Templates; - } - //Extract feature id list from given record and start position public List GetFeatureIds(List record, int startX) { diff --git a/RNNSharp/Vector.cs b/RNNSharp/Vector.cs index 92d692f..6d1ed68 100644 --- a/RNNSharp/Vector.cs +++ b/RNNSharp/Vector.cs @@ -1,9 +1,9 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class Vector @@ -13,7 +13,7 @@ public virtual int GetDimension() return 0; } - public virtual double this[int i] + public virtual float this[int i] { get { @@ -58,7 +58,7 @@ public void Append(SingleVector vector) } - public override double this[int i] + public override float this[int i] { get { @@ -74,7 +74,7 @@ public override double this[int i] public class SingleVector : Vector { - private double[] m_innerData; + private float[] m_innerData; int m_nLen; public override int GetDimension() { return m_nLen; } @@ -83,10 +83,10 @@ public SingleVector() m_innerData = null; } - public SingleVector(int nLen, double[] val) + public SingleVector(int nLen, float[] val) { m_nLen = nLen; - m_innerData = new double[m_nLen]; + m_innerData = new float[m_nLen]; for (int i = 0; i < m_nLen; i++) { m_innerData[i] = val[i]; @@ -95,12 +95,12 @@ public SingleVector(int nLen, double[] val) public SingleVector(int nLen) { - m_innerData = new double[nLen]; + m_innerData = new float[nLen]; m_nLen = nLen; } - public override double this[int i] + public override float this[int i] { get { @@ -111,33 +111,5 @@ public override double this[int i] m_innerData[i] = value; } } - - - public SingleVector Set(SingleVector rhs, int startOffset) - { - for (int i = 0; i < rhs.GetDimension(); i++) - { - m_innerData[i + startOffset] = rhs.m_innerData[i]; - } - return this; - } - - public void Normalize() - { - - double sum = 0; - for (int i = 0; i < m_nLen; i++) - { - sum += m_innerData[i] * m_innerData[i]; - } - - if (0 == sum) return; - double df = Math.Sqrt(sum); - - for (int i = 0; i < m_nLen; i++) - { - m_innerData[i] /= df; - } - } } } diff --git a/RNNSharp/WordEMWrapFeaturizer.cs b/RNNSharp/WordEMWrapFeaturizer.cs index 708eb2c..d78768a 100644 --- a/RNNSharp/WordEMWrapFeaturizer.cs +++ b/RNNSharp/WordEMWrapFeaturizer.cs @@ -1,9 +1,8 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Collections.Generic; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public class WordEMWrapFeaturizer @@ -14,25 +13,22 @@ public class WordEMWrapFeaturizer public WordEMWrapFeaturizer(string filename) { - Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(); - decoder.LoadBinaryModel(filename); + Txt2Vec.Model model = new Txt2Vec.Model(); + model.LoadBinaryModel(filename); - string[] terms = decoder.GetAllTerms(); - vectorSize = decoder.GetVectorSize(); + string[] terms = model.GetAllTerms(); + vectorSize = model.VectorSize; m_WordEmbedding = new Dictionary(); m_UnkEmbedding = new SingleVector(vectorSize); foreach (string term in terms) { - double[] vector = decoder.GetVector(term); + float[] vector = model.GetVector(term); if (vector != null) { SingleVector spVector = new SingleVector(vectorSize, vector); - - spVector.Normalize(); - m_WordEmbedding.Add(term, spVector); } } diff --git a/RNNSharp/neuron.cs b/RNNSharp/neuron.cs index 6689ef1..2457cd9 100644 --- a/RNNSharp/neuron.cs +++ b/RNNSharp/neuron.cs @@ -1,9 +1,7 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - + +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharp { public struct neuron diff --git a/RNNSharpConsole/Program.cs b/RNNSharpConsole/Program.cs index 36cbd48..8ee9065 100644 --- a/RNNSharpConsole/Program.cs +++ b/RNNSharpConsole/Program.cs @@ -1,12 +1,13 @@ using System; using System.Collections.Generic; -using System.Linq; using System.Text; -using System.Threading.Tasks; using System.IO; using RNNSharp; using AdvUtils; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace RNNSharpConsole { class Program @@ -22,8 +23,8 @@ class Program static int layersize = 200; static int iCRF = 0; static long savestep = 0; - static double alpha = 0.1; - static double dropout = 0; + static float alpha = 0.1f; + static float dropout = 0; static int bptt = 4; static int modelType = 0; static int nBest = 1; @@ -31,7 +32,7 @@ class Program static void UsageTitle() { - Console.WriteLine("Recurrent Neural Network Toolkit v1.1 by Zhongkai Fu (fuzhongkai@gmail.com)"); + Console.WriteLine("Recurrent Neural Network Toolkit v1.2 by Zhongkai Fu (fuzhongkai@gmail.com)"); } static void Usage() @@ -136,8 +137,8 @@ static void InitParameters(string[] args) if ((i = ArgPos("-modeltype", args)) >= 0) modelType = int.Parse(args[i + 1]); if ((i = ArgPos("-crf", args)) >= 0) iCRF = int.Parse(args[i + 1]); if ((i = ArgPos("-maxiter", args)) >= 0) maxIter = int.Parse(args[i + 1]); - if ((i = ArgPos("-alpha", args)) >= 0) alpha = double.Parse(args[i + 1]); - if ((i = ArgPos("-dropout", args)) >= 0) dropout = double.Parse(args[i + 1]); + if ((i = ArgPos("-alpha", args)) >= 0) alpha = float.Parse(args[i + 1]); + if ((i = ArgPos("-dropout", args)) >= 0) dropout = float.Parse(args[i + 1]); if ((i = ArgPos("-bptt", args)) >= 0) bptt = int.Parse(args[i + 1]); if ((i = ArgPos("-nbest", args)) >= 0) nBest = int.Parse(args[i + 1]); if ((i = ArgPos("-dir", args)) >= 0) iDir = int.Parse(args[i + 1]); @@ -221,28 +222,21 @@ static void LoadDataset(string strFileName, Featurizer featurizer, DataSet dataS while (true) { - List tokenList = ReadRecord(sr); - if (tokenList.Count == 0) + //Extract features from it and convert it into sequence + Sentence sent = new Sentence(ReadRecord(sr)); + if (sent.TokensList.Count <= 2) { - //No more record + //No more record, it only contain and break; } - //Extract features from it and convert it into sequence - Sentence sent = new Sentence(); - sent.SetFeatures(tokenList); Sequence seq = featurizer.ExtractFeatures(sent); - + //Set label for the sequence - if (seq.SetLabel(sent, featurizer.GetTagSet()) == false) - { - Logger.WriteLine(Logger.Level.info, "Error: Invalidated record."); - sent.DumpFeatures(); - continue; - } + seq.SetLabel(sent, featurizer.TagSet); //Add the sequence into data set - dataSet.Add(seq); + dataSet.SequenceList.Add(seq); //Show state at every 1000 record RecordCount++; @@ -284,9 +278,9 @@ static void Main(string[] args) } } - private static List ReadRecord(StreamReader sr) + private static List ReadRecord(StreamReader sr) { - List record = new List(); + List record = new List(); string strLine = null; //Read each line from file @@ -299,7 +293,7 @@ private static List ReadRecord(StreamReader sr) return record; } - record.Add(strLine); + record.Add(strLine.Split('\t')); } return record; @@ -314,7 +308,7 @@ private static void Test() return; } - //Load tag id and its name from file + //Load tag name TagSet tagSet = new TagSet(strTagFile); if (String.IsNullOrEmpty(strModelFile) == true) @@ -342,12 +336,10 @@ private static void Test() Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet); featurizer.ShowFeatureSize(); - //Create an instance for the model - // Model model = new Model(strModelFile); - //Create instance for decoder RNNSharp.RNNDecoder decoder = new RNNSharp.RNNDecoder(strModelFile, featurizer); + if (File.Exists(strTestFile) == false) { Logger.WriteLine(Logger.Level.err, "FAILED: The test corpus {0} isn't existed.", strTestFile); @@ -360,25 +352,23 @@ private static void Test() while (true) { - List tokenList = ReadRecord(sr); - if (tokenList.Count == 0) + Sentence sent = new Sentence(ReadRecord(sr)); + if (sent.TokensList.Count <= 2) { - //No more record + //No more record, it only contains and break; } - Sentence sent = new Sentence(); - sent.SetFeatures(tokenList); - if (nBest == 1) { int[] output = decoder.Process(sent); //Output decoded result //Append the decoded result into the end of feature set of each token StringBuilder sb = new StringBuilder(); - for (int i = 0; i < tokenList.Count; i++) + for (int i = 0; i < sent.TokensList.Count; i++) { - sb.Append(tokenList[i]); + string tokens = String.Join("\t", sent.TokensList[i]); + sb.Append(tokens); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i])); sb.AppendLine(); @@ -389,19 +379,13 @@ private static void Test() else { int[][] output = decoder.ProcessNBest(sent, nBest); - if (output == null) - { - Logger.WriteLine(Logger.Level.err, "FAILED: decode failed. Dump current sentence..."); - sent.DumpFeatures(); - return; - } - StringBuilder sb = new StringBuilder(); for (int i = 0; i < nBest; i++) { - for (int j = 0; j < tokenList.Count; j++) + for (int j = 0; j < sent.TokensList.Count; j++) { - sb.Append(tokenList[j]); + string tokens = String.Join("\t", sent.TokensList[i]); + sb.Append(tokens); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i][j])); sb.AppendLine(); @@ -433,23 +417,23 @@ private static void Train() //Create configuration instance and set parameters ModelSetting RNNConfig = new ModelSetting(); - RNNConfig.SetModelFile(strModelFile); - RNNConfig.SetNumHidden(layersize); - RNNConfig.SetCRFTraining((iCRF == 1) ? true : false); - RNNConfig.SetDir(iDir); - RNNConfig.SetModelType(modelType); - RNNConfig.SetMaxIteration(maxIter); - RNNConfig.SetSaveStep(savestep); - RNNConfig.SetLearningRate(alpha); - RNNConfig.SetDropout(dropout); - RNNConfig.SetBptt(bptt); + RNNConfig.ModelFile = strModelFile; + RNNConfig.NumHidden = layersize; + RNNConfig.IsCRFTraining = (iCRF == 1) ? true : false; + RNNConfig.ModelDirection = iDir; + RNNConfig.ModelType = modelType; + RNNConfig.MaxIteration = maxIter; + RNNConfig.SaveStep = savestep; + RNNConfig.LearningRate = alpha; + RNNConfig.Dropout = dropout; + RNNConfig.Bptt = bptt; //Dump RNN setting on console RNNConfig.DumpSetting(); if (File.Exists(strFeatureConfigFile) == false) { - Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} isn't existed.", strFeatureConfigFile); + Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} doesn't exist.", strFeatureConfigFile); UsageTrain(); return; } @@ -463,38 +447,37 @@ private static void Train() UsageTrain(); return; } - if (String.IsNullOrEmpty(strTrainFile) == true) + + if (File.Exists(strTrainFile) == false) { - Logger.WriteLine(Logger.Level.err, "FAILED: The training corpus isn't specified."); + Logger.WriteLine(Logger.Level.err, "FAILED: The training corpus doesn't exist."); UsageTrain(); return; } - //LoadFeatureConfig training corpus and extract feature set - DataSet dataSetTrain = new DataSet(tagSet.GetSize()); - LoadDataset(strTrainFile, featurizer, dataSetTrain); - - DataSet dataSetValidation = null; - if (String.IsNullOrEmpty(strValidFile) == true) + if (File.Exists(strValidFile) == false) { - Logger.WriteLine(Logger.Level.err, "FAILED: The validation corpus isn't specified."); + Logger.WriteLine(Logger.Level.err, "FAILED: The validation corpus doesn't exist."); + UsageTrain(); return; } - //LoadFeatureConfig validated corpus and extract feature set - dataSetValidation = new DataSet(tagSet.GetSize()); - LoadDataset(strValidFile, featurizer, dataSetValidation); //Create RNN encoder and save necessary parameters RNNEncoder encoder = new RNNEncoder(RNNConfig); - encoder.SetTrainingSet(dataSetTrain); - encoder.SetValidationSet(dataSetValidation); + + //LoadFeatureConfig training corpus and extract feature set + encoder.TrainingSet = new DataSet(tagSet.GetSize()); + LoadDataset(strTrainFile, featurizer, encoder.TrainingSet); + + //LoadFeatureConfig validated corpus and extract feature set + encoder.ValidationSet = new DataSet(tagSet.GetSize()); + LoadDataset(strValidFile, featurizer, encoder.ValidationSet); if (iCRF == 1) { Logger.WriteLine(Logger.Level.info, "Initialize output tag bigram transition probability..."); //Build tag bigram transition matrix - dataSetTrain.BuildLabelBigramTransition(); - encoder.SetLabelBigramTransition(dataSetTrain.GetLabelBigramTransition()); + encoder.TrainingSet.BuildLabelBigramTransition(); } //Start to train the model diff --git a/RNNSharpOverview.jpg b/RNNSharpOverview.jpg index f047cc7..b0577c4 100644 Binary files a/RNNSharpOverview.jpg and b/RNNSharpOverview.jpg differ diff --git a/TFeatureBin/Program.cs b/TFeatureBin/Program.cs index 1e8e625..f59cfd3 100644 --- a/TFeatureBin/Program.cs +++ b/TFeatureBin/Program.cs @@ -1,12 +1,13 @@ using System; using System.Collections.Generic; -using System.Linq; using System.Text; -using System.Threading.Tasks; using System.IO; using AdvUtils; using RNNSharp; +/// +/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com) +/// namespace TFeatureBin { class Program @@ -167,56 +168,60 @@ static IDictionary ExtractFeatureSetFromFile() Logger.WriteLine(Logger.Level.info, "Generate feature set..."); BigDictionary feature2freq = new BigDictionary(); - List record = new List(); - StreamReader srCorpus = new StreamReader(strInputFile, Encoding.UTF8); + + + List tokenList = new List(); string strLine = null; - while ((strLine = srCorpus.ReadLine()) != null) + Sentence sentence = null; + + using (StreamReader srCorpus = new StreamReader(strInputFile, Encoding.UTF8)) { - strLine = strLine.Trim(); - if (strLine.Length == 0) + while ((strLine = srCorpus.ReadLine()) != null) { - //The end of current record - for (int i = 0; i < record.Count; i++) + strLine = strLine.Trim(); + if (strLine.Length == 0) { - //Get feature of current token - List featureList = templateFeaturizer.GenerateFeature(record, i); - foreach (string strFeature in featureList) + //The end of current record + sentence = new Sentence(tokenList); + for (int i = 0; i < sentence.TokensList.Count; i++) { - if (feature2freq.ContainsKey(strFeature) == false) + //Get feature of i-th token + List featureList = templateFeaturizer.GenerateFeature(sentence.TokensList, i); + foreach (string strFeature in featureList) { - feature2freq.Add(strFeature, 0); + if (feature2freq.ContainsKey(strFeature) == false) + { + feature2freq.Add(strFeature, 0); + } + feature2freq[strFeature]++; } - feature2freq[strFeature]++; } - } - record.Clear(); - } - else - { - string[] items = strLine.Split('\t'); - record.Add(items); + tokenList.Clear(); + } + else + { + tokenList.Add(strLine.Split('\t')); + } } - } - - //The end of current record - for (int i = 0; i < record.Count; i++) - { - //Get feature of current token - List featureList = templateFeaturizer.GenerateFeature(record, i); - foreach (string strFeature in featureList) + //The end of current record + sentence = new Sentence(tokenList); + for (int i = 0; i < sentence.TokensList.Count; i++) { - if (feature2freq.ContainsKey(strFeature) == false) + //Get feature of i-th token + List featureList = templateFeaturizer.GenerateFeature(sentence.TokensList, i); + foreach (string strFeature in featureList) { - feature2freq.Add(strFeature, 0); + if (feature2freq.ContainsKey(strFeature) == false) + { + feature2freq.Add(strFeature, 0); + } + feature2freq[strFeature]++; } - feature2freq[strFeature]++; } } - srCorpus.Close(); - //Only save the feature whose frequency is not less than minfreq Logger.WriteLine(Logger.Level.info, "Filter out features whose frequency is less than {0}", minfreq); SortedDictionary features = new SortedDictionary(StringComparer.Ordinal); diff --git a/dll/AdvUtils.dll b/dll/AdvUtils.dll index 337a33a..ebb81b3 100644 Binary files a/dll/AdvUtils.dll and b/dll/AdvUtils.dll differ diff --git a/dll/txt2vec.dll b/dll/txt2vec.dll index ae77c5c..35fb8f1 100644 Binary files a/dll/txt2vec.dll and b/dll/txt2vec.dll differ