diff --git a/RNNSharp/BiRNN.cs b/RNNSharp/BiRNN.cs
index d57b635..8b47a7e 100644
--- a/RNNSharp/BiRNN.cs
+++ b/RNNSharp/BiRNN.cs
@@ -1,11 +1,11 @@
using System;
-using System.Collections.Generic;
using System.IO;
-using System.Linq;
-using System.Text;
using System.Threading.Tasks;
using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
class BiRNN : RNN
@@ -18,19 +18,37 @@ public BiRNN(RNN s_forwardRNN, RNN s_backwardRNN)
forwardRNN = s_forwardRNN;
backwardRNN = s_backwardRNN;
- m_modeldirection = MODELDIRECTION.BI_DIRECTIONAL;
+ ModelType = forwardRNN.ModelType;
+ ModelDirection = MODELDIRECTION.BI_DIRECTIONAL;
}
- public override void SetFeatureDimension(int denseFeatueSize, int sparseFeatureSize, int tagSize)
+ public override int L0
{
- fea_size = denseFeatueSize;
- L0 = sparseFeatureSize;
- L2 = tagSize;
+ get
+ {
+ return forwardRNN.L0;
+ }
- forwardRNN.SetFeatureDimension(denseFeatueSize, sparseFeatureSize, tagSize);
- backwardRNN.SetFeatureDimension(denseFeatueSize, sparseFeatureSize, tagSize);
+ set
+ {
+ forwardRNN.L0 = value;
+ backwardRNN.L0 = value;
+ }
}
+ public override int L2
+ {
+ get
+ {
+ return forwardRNN.L2;
+ }
+
+ set
+ {
+ forwardRNN.L2 = value;
+ backwardRNN.L2 = value;
+ }
+ }
public override void initWeights()
{
@@ -39,72 +57,123 @@ public override void initWeights()
}
- public override void SetModelFile(string strModelFile)
+ public override string ModelFile
{
- m_strModelFile = strModelFile;
+ get { return forwardRNN.ModelFile; }
+ set
+ {
+ forwardRNN.ModelFile = value;
+ backwardRNN.ModelFile = value;
+ }
+ }
- forwardRNN.mat_hidden2output = mat_hidden2output;
- backwardRNN.mat_hidden2output = mat_hidden2output;
+ public override long SaveStep
+ {
+ get
+ {
+ return forwardRNN.SaveStep;
+ }
- forwardRNN.SetModelFile(strModelFile);
- backwardRNN.SetModelFile(strModelFile);
+ set
+ {
+ forwardRNN.SaveStep = value;
+ backwardRNN.SaveStep = value;
+ }
}
- public override void SetSaveStep(long savestep)
+ public override int MaxIter
{
- m_SaveStep = savestep;
+ get
+ {
+ return forwardRNN.MaxIter;
+ }
- forwardRNN.SetSaveStep(savestep);
- backwardRNN.SetSaveStep(savestep);
+ set
+ {
+ forwardRNN.MaxIter = value;
+ backwardRNN.MaxIter = value;
+ }
}
- public override void SetMaxIter(int _nMaxIter)
+ public override bool IsCRFTraining
{
- m_MaxIter = _nMaxIter;
+ get { return forwardRNN.IsCRFTraining; }
- forwardRNN.SetMaxIter(_nMaxIter);
- backwardRNN.SetMaxIter(_nMaxIter);
+ set
+ {
+ forwardRNN.IsCRFTraining = value;
+ backwardRNN.IsCRFTraining = value;
+ }
}
-
- public override void SetCRFTraining(bool b)
+ public override float LearningRate
{
- m_bCRFTraining = b;
+ get
+ {
+ return forwardRNN.LearningRate;
+ }
- forwardRNN.SetCRFTraining(b);
- backwardRNN.SetCRFTraining(b);
+ set
+ {
+ forwardRNN.LearningRate = value;
+ backwardRNN.LearningRate = value;
+ }
}
- public override void SetLearningRate(double newAlpha)
+ public override float GradientCutoff
{
- alpha = newAlpha;
+ get
+ {
+ return forwardRNN.GradientCutoff;
+ }
- forwardRNN.SetLearningRate(newAlpha);
- backwardRNN.SetLearningRate(newAlpha);
+ set
+ {
+ forwardRNN.GradientCutoff = value;
+ backwardRNN.GradientCutoff = value;
+ }
}
- public override void SetGradientCutoff(double newGradient)
+ public override float Dropout
{
- gradient_cutoff = newGradient;
+ get
+ {
+ return forwardRNN.Dropout;
+ }
- forwardRNN.SetGradientCutoff(newGradient);
- backwardRNN.SetGradientCutoff(newGradient);
+ set
+ {
+ forwardRNN.Dropout = value;
+ backwardRNN.Dropout = value;
+ }
}
- public override void SetDropout(double newDropout)
+ public override int L1
{
- dropout = newDropout;
+ get
+ {
+ return forwardRNN.L1;
+ }
- forwardRNN.SetDropout(newDropout);
- backwardRNN.SetDropout(newDropout);
+ set
+ {
+ forwardRNN.L1 = value;
+ backwardRNN.L1 = value;
+ }
}
- public override void SetHiddenLayerSize(int newsize)
+ public override int DenseFeatureSize
{
- L1 = newsize;
+ get
+ {
+ return forwardRNN.DenseFeatureSize;
+ }
- forwardRNN.SetHiddenLayerSize(newsize);
- backwardRNN.SetHiddenLayerSize(newsize);
+ set
+ {
+ forwardRNN.DenseFeatureSize = value;
+ backwardRNN.DenseFeatureSize = value;
+ }
}
public override void GetHiddenLayer(Matrix m, int curStatus)
@@ -118,20 +187,20 @@ public override void initMem()
backwardRNN.initMem();
//Create and intialise the weights from hidden to output layer, these are just normal weights
- mat_hidden2output = new Matrix(L2, L1);
+ Hidden2OutputWeight = new Matrix(L2, L1);
- for (int i = 0; i < mat_hidden2output.GetHeight(); i++)
+ for (int i = 0; i < Hidden2OutputWeight.GetHeight(); i++)
{
- for (int j = 0; j < mat_hidden2output.GetWidth(); j++)
+ for (int j = 0; j < Hidden2OutputWeight.GetWidth(); j++)
{
- mat_hidden2output[i][j] = RandInitWeight();
+ Hidden2OutputWeight[i][j] = RandInitWeight();
}
}
}
public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHiddenLayer, out Matrix rawOutputLayer)
{
- int numStates = pSequence.GetSize();
+ int numStates = pSequence.States.Length;
Matrix mForward = null;
Matrix mBackward = null;
@@ -144,7 +213,7 @@ public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHidde
mForward = new Matrix(numStates, forwardRNN.L1);
for (int curState = 0; curState < numStates; curState++)
{
- State state = pSequence.Get(curState);
+ State state = pSequence.States[curState];
forwardRNN.setInputLayer(state, curState, numStates, null);
forwardRNN.computeNet(state, null); //compute probability distribution
@@ -157,7 +226,7 @@ public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHidde
mBackward = new Matrix(numStates, backwardRNN.L1);
for (int curState = numStates - 1; curState >= 0; curState--)
{
- State state = pSequence.Get(curState);
+ State state = pSequence.States[curState];
backwardRNN.setInputLayer(state, curState, numStates, null, false);
backwardRNN.computeNet(state, null); //compute probability distribution
@@ -181,7 +250,7 @@ public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHidde
Parallel.For(0, numStates, parallelOption, curState =>
{
seqOutput[curState] = new neuron[L2];
- matrixXvectorADD(seqOutput[curState], mergedHiddenLayer[curState], mat_hidden2output, 0, L2, 0, L1, 0);
+ matrixXvectorADD(seqOutput[curState], mergedHiddenLayer[curState], Hidden2OutputWeight, 0, L2, 0, L1, 0);
for (int i = 0; i < L2; i++)
{
@@ -198,10 +267,10 @@ public neuron[][] InnerDecode(Sequence pSequence, out Matrix outputHidde
return seqOutput;
}
- public override Matrix learnSentenceForRNNCRF(Sequence pSequence, RunningMode runningMode)
+ public override int[] PredictSentenceCRF(Sequence pSequence, RunningMode runningMode)
{
//Reset the network
- int numStates = pSequence.GetSize();
+ int numStates = pSequence.States.Length;
//Predict output
Matrix mergedHiddenLayer = null;
Matrix rawOutputLayer = null;
@@ -209,37 +278,43 @@ public override Matrix learnSentenceForRNNCRF(Sequence pSequence, Runnin
ForwardBackward(numStates, rawOutputLayer);
- //Get the best result
- for (int i = 0; i < numStates; i++)
+ if (runningMode != RunningMode.Test)
{
- State state = pSequence.Get(i);
- logp += Math.Log10(mat_CRFSeqOutput[i][state.GetLabel()]);
- counter++;
+ //Get the best result
+ for (int i = 0; i < numStates; i++)
+ {
+ logp += Math.Log10(CRFSeqOutput[i][pSequence.States[i].Label]);
+ }
}
- UpdateBigramTransition(pSequence);
+ int[] predict = Viterbi(rawOutputLayer, numStates);
- //Update hidden-output layer weights
- for (int curState = 0; curState < numStates; curState++)
+ if (runningMode == RunningMode.Train)
{
- State state = pSequence.Get(curState);
- //For standard RNN
- for (int c = 0; c < L2; c++)
+ UpdateBigramTransition(pSequence);
+
+ //Update hidden-output layer weights
+ for (int curState = 0; curState < numStates; curState++)
{
- seqOutput[curState][c].er = -mat_CRFSeqOutput[curState][c];
+ int label = pSequence.States[curState].Label;
+ //For standard RNN
+ for (int c = 0; c < L2; c++)
+ {
+ seqOutput[curState][c].er = -CRFSeqOutput[curState][c];
+ }
+ seqOutput[curState][label].er = 1 - CRFSeqOutput[curState][label];
}
- seqOutput[curState][state.GetLabel()].er = 1 - mat_CRFSeqOutput[curState][state.GetLabel()];
- }
- LearnTwoRNN(pSequence, mergedHiddenLayer, seqOutput);
+ LearnTwoRNN(pSequence, mergedHiddenLayer, seqOutput);
+ }
- return mat_CRFSeqOutput;
+ return predict;
}
public override Matrix PredictSentence(Sequence pSequence, RunningMode runningMode)
{
//Reset the network
- int numStates = pSequence.GetSize();
+ int numStates = pSequence.States.Length;
//Predict output
Matrix mergedHiddenLayer = null;
@@ -251,9 +326,7 @@ public override Matrix PredictSentence(Sequence pSequence, RunningMode r
//Merge forward and backward
for (int curState = 0; curState < numStates; curState++)
{
- State state = pSequence.Get(curState);
- logp += Math.Log10(seqOutput[curState][state.GetLabel()].cellOutput);
- counter++;
+ logp += Math.Log10(seqOutput[curState][pSequence.States[curState].Label].cellOutput);
}
}
@@ -262,13 +335,13 @@ public override Matrix PredictSentence(Sequence pSequence, RunningMode r
//Update hidden-output layer weights
for (int curState = 0; curState < numStates; curState++)
{
- State state = pSequence.Get(curState);
+ int label = pSequence.States[curState].Label;
//For standard RNN
for (int c = 0; c < L2; c++)
{
seqOutput[curState][c].er = -seqOutput[curState][c].cellOutput;
}
- seqOutput[curState][state.GetLabel()].er = 1 - seqOutput[curState][state.GetLabel()].cellOutput;
+ seqOutput[curState][label].er = 1 - seqOutput[curState][label].cellOutput;
}
LearnTwoRNN(pSequence, mergedHiddenLayer, seqOutput);
@@ -281,21 +354,21 @@ private void LearnTwoRNN(Sequence pSequence, Matrix mergedHiddenLayer, n
{
netReset(true);
- int numStates = pSequence.GetSize();
- forwardRNN.mat_hidden2output = mat_hidden2output.CopyTo();
- backwardRNN.mat_hidden2output = mat_hidden2output.CopyTo();
+ int numStates = pSequence.States.Length;
+ forwardRNN.Hidden2OutputWeight = Hidden2OutputWeight.CopyTo();
+ backwardRNN.Hidden2OutputWeight = Hidden2OutputWeight.CopyTo();
Parallel.Invoke(() =>
{
for (int curState = 0; curState < numStates; curState++)
{
- for (int i = 0; i < mat_hidden2output.GetHeight(); i++)
+ for (int i = 0; i < Hidden2OutputWeight.GetHeight(); i++)
{
//update weights for hidden to output layer
- for (int k = 0; k < mat_hidden2output.GetWidth(); k++)
+ for (int k = 0; k < Hidden2OutputWeight.GetWidth(); k++)
{
- mat_hidden2output[i][k] += alpha * mergedHiddenLayer[curState][k].cellOutput * seqOutput[curState][i].er;
+ Hidden2OutputWeight[i][k] += LearningRate * mergedHiddenLayer[curState][k].cellOutput * seqOutput[curState][i].er;
}
}
}
@@ -308,12 +381,12 @@ private void LearnTwoRNN(Sequence pSequence, Matrix mergedHiddenLayer, n
for (int curState = 0; curState < numStates; curState++)
{
// error propogation
- State state = pSequence.Get(curState);
+ State state = pSequence.States[curState];
forwardRNN.setInputLayer(state, curState, numStates, null);
forwardRNN.computeNet(state, null); //compute probability distribution
//Copy output result to forward net work's output
- forwardRNN.neuOutput = seqOutput[curState];
+ forwardRNN.OutputLayer = seqOutput[curState];
forwardRNN.learnNet(state, curState, true);
forwardRNN.LearnBackTime(state, numStates, curState);
@@ -327,12 +400,12 @@ private void LearnTwoRNN(Sequence pSequence, Matrix mergedHiddenLayer, n
int curState2 = numStates - 1 - curState;
// error propogation
- State state2 = pSequence.Get(curState2);
+ State state2 = pSequence.States[curState2];
backwardRNN.setInputLayer(state2, curState2, numStates, null, false);
backwardRNN.computeNet(state2, null); //compute probability distribution
//Copy output result to forward net work's output
- backwardRNN.neuOutput = seqOutput[curState2];
+ backwardRNN.OutputLayer = seqOutput[curState2];
backwardRNN.learnNet(state2, curState2, true);
backwardRNN.LearnBackTime(state2, numStates, curState2);
@@ -340,36 +413,6 @@ private void LearnTwoRNN(Sequence pSequence, Matrix mergedHiddenLayer, n
});
}
- public int GetBestOutputIndex(Matrix m, int curState)
- {
- int imax = 0;
- double dmax = m[curState][0];
- for (int k = 1; k < m.GetWidth(); k++)
- {
- if (m[curState][k] > dmax)
- {
- dmax = m[curState][k];
- imax = k;
- }
- }
- return imax;
- }
-
-
- public int GetBestOutputIndex(neuron[][] m, int curState, int L2)
- {
- int imax = 0;
- double dmax = m[curState][0].cellOutput;
- for (int k = 1; k < L2; k++)
- {
- if (m[curState][k].cellOutput > dmax)
- {
- dmax = m[curState][k].cellOutput;
- imax = k;
- }
- }
- return imax;
- }
public override void LearnBackTime(State state, int numStates, int curState)
{
@@ -394,11 +437,11 @@ public override void netReset(bool updateNet = false)
public override void saveNetBin(string filename)
{
//Save bi-directional model
- forwardRNN.mat_hidden2output = mat_hidden2output;
- backwardRNN.mat_hidden2output = mat_hidden2output;
+ forwardRNN.Hidden2OutputWeight = Hidden2OutputWeight;
+ backwardRNN.Hidden2OutputWeight = Hidden2OutputWeight;
- forwardRNN.mat_CRFTagTransWeights = mat_CRFTagTransWeights;
- backwardRNN.mat_CRFTagTransWeights = mat_CRFTagTransWeights;
+ forwardRNN.CRFTagTransWeights = CRFTagTransWeights;
+ backwardRNN.CRFTagTransWeights = CRFTagTransWeights;
forwardRNN.saveNetBin(filename + ".forward");
backwardRNN.saveNetBin(filename + ".backward");
@@ -407,12 +450,12 @@ public override void saveNetBin(string filename)
using (StreamWriter sw = new StreamWriter(filename))
{
BinaryWriter fo = new BinaryWriter(sw.BaseStream);
- fo.Write((int)m_modeltype);
- fo.Write((int)m_modeldirection);
+ fo.Write((int)ModelType);
+ fo.Write((int)ModelDirection);
// Signiture , 0 is for RNN or 1 is for RNN-CRF
int iflag = 0;
- if (m_bCRFTraining == true)
+ if (IsCRFTraining == true)
{
iflag = 1;
}
@@ -421,7 +464,7 @@ public override void saveNetBin(string filename)
fo.Write(L0);
fo.Write(L1);
fo.Write(L2);
- fo.Write(fea_size);
+ fo.Write(DenseFeatureSize);
}
}
@@ -432,31 +475,31 @@ public override void loadNetBin(string filename)
forwardRNN.loadNetBin(filename + ".forward");
backwardRNN.loadNetBin(filename + ".backward");
- mat_hidden2output = forwardRNN.mat_hidden2output;
- mat_CRFTagTransWeights = forwardRNN.mat_CRFTagTransWeights;
+ Hidden2OutputWeight = forwardRNN.Hidden2OutputWeight;
+ CRFTagTransWeights = forwardRNN.CRFTagTransWeights;
using (StreamReader sr = new StreamReader(filename))
{
BinaryReader br = new BinaryReader(sr.BaseStream);
- m_modeltype = (MODELTYPE)br.ReadInt32();
- m_modeldirection = (MODELDIRECTION)br.ReadInt32();
+ ModelType = (MODELTYPE)br.ReadInt32();
+ ModelDirection = (MODELDIRECTION)br.ReadInt32();
int iflag = br.ReadInt32();
if (iflag == 1)
{
- m_bCRFTraining = true;
+ IsCRFTraining = true;
}
else
{
- m_bCRFTraining = false;
+ IsCRFTraining = false;
}
//Load basic parameters
L0 = br.ReadInt32();
L1 = br.ReadInt32();
L2 = br.ReadInt32();
- fea_size = br.ReadInt32();
+ DenseFeatureSize = br.ReadInt32();
}
}
}
diff --git a/RNNSharp/DataSet.cs b/RNNSharp/DataSet.cs
index 3254335..5cb684a 100644
--- a/RNNSharp/DataSet.cs
+++ b/RNNSharp/DataSet.cs
@@ -1,133 +1,90 @@
using System;
using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class DataSet
{
- List m_Data;
- int m_tagSize;
- List> m_LabelBigramTransition;
-
- ///
- /// Split current corpus into two parts according given ratio
- ///
- ///
- ///
- ///
- public void SplitDataSet(double ratio, out DataSet ds1, out DataSet ds2)
- {
- Random rnd = new Random(DateTime.Now.Millisecond);
- ds1 = new DataSet(m_tagSize);
- ds2 = new DataSet(m_tagSize);
-
- for (int i = 0; i < m_Data.Count; i++)
- {
- if (rnd.NextDouble() < ratio)
- {
- ds1.Add(m_Data[i]);
- }
- else
- {
- ds2.Add(m_Data[i]);
- }
- }
-
- ds1.BuildLabelBigramTransition();
- ds2.BuildLabelBigramTransition();
- }
-
- public void Add(Sequence sequence) { m_Data.Add(sequence); }
+ public List SequenceList { get; set; }
+ public int TagSize { get; set; }
+ public List> CRFLabelBigramTransition { get; set; }
public void Shuffle()
{
Random rnd = new Random(DateTime.Now.Millisecond);
- for (int i = 0; i < m_Data.Count; i++)
+ for (int i = 0; i < SequenceList.Count; i++)
{
- int m = rnd.Next() % m_Data.Count;
- Sequence tmp = m_Data[i];
- m_Data[i] = m_Data[m];
- m_Data[m] = tmp;
+ int m = rnd.Next() % SequenceList.Count;
+ Sequence tmp = SequenceList[i];
+ SequenceList[i] = SequenceList[m];
+ SequenceList[m] = tmp;
}
}
public DataSet(int tagSize)
{
- m_tagSize = tagSize;
- m_Data = new List();
- m_LabelBigramTransition = new List>();
- }
-
- public int GetSize()
- {
- return m_Data.Count;
+ TagSize = tagSize;
+ SequenceList = new List();
+ CRFLabelBigramTransition = new List>();
}
- public Sequence Get(int i) { return m_Data[i]; }
- public int GetTagSize() { return m_tagSize; }
-
-
- public int GetDenseDimension()
+ public int DenseFeatureSize()
{
- if (0 == m_Data.Count) return 0;
- return m_Data[0].GetDenseDimension();
+ if (0 == SequenceList.Count) return 0;
+ return SequenceList[0].GetDenseDimension();
}
public int GetSparseDimension()
{
- if (0 == m_Data.Count) return 0;
- return m_Data[0].GetSparseDimension();
+ if (0 == SequenceList.Count) return 0;
+ return SequenceList[0].GetSparseDimension();
}
-
- public List> GetLabelBigramTransition() { return m_LabelBigramTransition; }
-
-
- public void BuildLabelBigramTransition(double smooth = 1.0)
+ public void BuildLabelBigramTransition(float smooth = 1.0f)
{
- m_LabelBigramTransition = new List>();
+ CRFLabelBigramTransition = new List>();
- for (int i = 0; i < m_tagSize; i++)
+ for (int i = 0; i < TagSize; i++)
{
- m_LabelBigramTransition.Add(new List());
+ CRFLabelBigramTransition.Add(new List());
}
- for (int i = 0; i < m_tagSize; i++)
+ for (int i = 0; i < TagSize; i++)
{
- for (int j = 0; j < m_tagSize; j++)
+ for (int j = 0; j < TagSize; j++)
{
- m_LabelBigramTransition[i].Add(smooth);
+ CRFLabelBigramTransition[i].Add(smooth);
}
}
- for (int i = 0; i < m_Data.Count; i++)
+ for (int i = 0; i < SequenceList.Count; i++)
{
- var sequence = m_Data[i];
- if (sequence.GetSize() <= 1)
+ var sequence = SequenceList[i];
+ if (sequence.States.Length <= 1)
continue;
- int pLabel = sequence.Get(0).GetLabel();
- for (int j = 1; j < sequence.GetSize(); j++)
+ int pLabel = sequence.States[0].Label;
+ for (int j = 1; j < sequence.States.Length; j++)
{
- int label = sequence.Get(j).GetLabel();
- m_LabelBigramTransition[label][pLabel]++;
+ int label = sequence.States[j].Label;
+ CRFLabelBigramTransition[label][pLabel]++;
pLabel = label;
}
}
- for (int i = 0; i < m_tagSize; i++)
+ for (int i = 0; i < TagSize; i++)
{
double sum = 0;
- for (int j = 0; j < m_tagSize; j++)
+ for (int j = 0; j < TagSize; j++)
{
- sum += m_LabelBigramTransition[i][j];
+ sum += CRFLabelBigramTransition[i][j];
}
- for (int j = 0; j < m_tagSize; j++)
+ for (int j = 0; j < TagSize; j++)
{
- m_LabelBigramTransition[i][j] = Math.Log(m_LabelBigramTransition[i][j] / sum);
+ CRFLabelBigramTransition[i][j] = (float)Math.Log(CRFLabelBigramTransition[i][j] / sum);
}
}
}
diff --git a/RNNSharp/Featurizer.cs b/RNNSharp/Featurizer.cs
index 73337cf..088182e 100644
--- a/RNNSharp/Featurizer.cs
+++ b/RNNSharp/Featurizer.cs
@@ -1,12 +1,11 @@
using System;
using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
using System.IO;
-using Txt2Vec;
using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
enum TFEATURE_WEIGHT_TYPE_ENUM
@@ -17,14 +16,14 @@ enum TFEATURE_WEIGHT_TYPE_ENUM
public class Featurizer
{
+ public TagSet TagSet { get; set; }
+
Dictionary> m_FeatureConfiguration;
int m_SparseDimension;
int m_DenseDimension;
int m_WordEmbeddingCloumn;
TFEATURE_WEIGHT_TYPE_ENUM m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY;
-
WordEMWrapFeaturizer m_WordEmbedding;
- TagSet m_TagSet;
TemplateFeaturizer m_TFeaturizer;
static string TFEATURE_CONTEXT = "TFEATURE_CONTEXT";
@@ -35,12 +34,6 @@ public class Featurizer
static string WORDEMBEDDING_COLUMN = "WORDEMBEDDING_COLUMN";
static string TFEATURE_WEIGHT_TYPE = "TFEATURE_WEIGHT_TYPE";
- public TagSet GetTagSet()
- {
- return m_TagSet;
- }
-
-
//The format of configuration file
public void LoadFeatureConfigFromFile(string strFileName)
{
@@ -125,7 +118,7 @@ public int TruncPosition(int current, int lower, int upper)
public Featurizer(string strFeatureConfigFileName, TagSet tagSet)
{
LoadFeatureConfigFromFile(strFeatureConfigFileName);
- m_TagSet = tagSet;
+ TagSet = tagSet;
InitComponentFeaturizer();
}
@@ -143,7 +136,7 @@ void InitComponentFeaturizer()
if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true)
{
- m_SparseDimension += m_TagSet.GetSize() * fc[RT_FEATURE_CONTEXT].Count;
+ m_SparseDimension += TagSet.GetSize() * fc[RT_FEATURE_CONTEXT].Count;
}
m_DenseDimension = 0;
@@ -173,7 +166,7 @@ public void ShowFeatureSize()
Logger.WriteLine(Logger.Level.info, "Template feature context size: {0}", m_TFeaturizer.GetFeatureSize() * fc[TFEATURE_CONTEXT].Count);
if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true)
- Logger.WriteLine(Logger.Level.info, "Run time feature size: {0}", m_TagSet.GetSize() * fc[RT_FEATURE_CONTEXT].Count);
+ Logger.WriteLine(Logger.Level.info, "Run time feature size: {0}", TagSet.GetSize() * fc[RT_FEATURE_CONTEXT].Count);
if (fc.ContainsKey(WORDEMBEDDING_CONTEXT) == true)
Logger.WriteLine(Logger.Level.info, "Word embedding feature size: {0}", m_WordEmbedding.GetDimension() * fc[WORDEMBEDDING_CONTEXT].Count);
@@ -181,7 +174,7 @@ public void ShowFeatureSize()
void ExtractSparseFeature(int currentState, int numStates, List features, State pState)
{
- Dictionary sparseFeature = new Dictionary();
+ Dictionary sparseFeature = new Dictionary();
int start = 0;
var fc = m_FeatureConfiguration;
@@ -224,14 +217,14 @@ void ExtractSparseFeature(int currentState, int numStates, List featur
if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true)
{
List v = fc[RT_FEATURE_CONTEXT];
- pState.SetNumRuntimeFeature(v.Count);
+ pState.RuntimeFeatures = new PriviousLabelFeature[v.Count];
for (int j = 0; j < v.Count; j++)
{
if (v[j] < 0)
{
pState.AddRuntimeFeaturePlacehold(j, v[j], sparseFeature.Count, start);
sparseFeature[start] = 0; //Placehold a position
- start += m_TagSet.GetSize();
+ start += TagSet.GetSize();
}
else
{
@@ -240,7 +233,7 @@ void ExtractSparseFeature(int currentState, int numStates, List featur
}
}
- SparseVector spSparseFeature = pState.GetSparseData();
+ SparseVector spSparseFeature = pState.SparseData;
spSparseFeature.SetDimension(m_SparseDimension);
spSparseFeature.SetData(sparseFeature);
}
@@ -284,19 +277,16 @@ public Vector ExtractDenseFeature(int currentState, int numStates, List features = sentence.GetFeatureSet();
+ int n = sentence.TokensList.Count;
+ Sequence sequence = new Sequence(n);
//For each token, get its sparse and dense feature set according configuration and training corpus
- sequence.SetSize(n);
for (int i = 0; i < n; i++)
{
- State state = sequence.Get(i);
- ExtractSparseFeature(i, n, features, state);
+ State state = sequence.States[i];
+ ExtractSparseFeature(i, n, sentence.TokensList, state);
- var spDenseFeature = ExtractDenseFeature(i, n, features);
- state.SetDenseData(spDenseFeature);
+ state.DenseData = ExtractDenseFeature(i, n, sentence.TokensList);
}
return sequence;
diff --git a/RNNSharp/LSTMRNN.cs b/RNNSharp/LSTMRNN.cs
index 1b629b1..ae080e5 100644
--- a/RNNSharp/LSTMRNN.cs
+++ b/RNNSharp/LSTMRNN.cs
@@ -1,63 +1,64 @@
using System;
using System.Collections.Generic;
-using System.Linq;
-using System.Text;
using System.Threading.Tasks;
using System.IO;
using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class LSTMCell
{
//input gate
- public double netIn;
- public double yIn;
+ public float netIn;
+ public float yIn;
//forget gate
- public double netForget;
- public double yForget;
+ public float netForget;
+ public float yForget;
//cell state
- public double netCellState;
- public double previousCellState;
- public double cellState;
+ public float netCellState;
+ public float previousCellState;
+ public float cellState;
//internal weights and deltas
- public double wCellIn;
- public double wCellForget;
- public double wCellOut;
+ public float wCellIn;
+ public float wCellForget;
+ public float wCellOut;
//partial derivatives
- public double dSWCellIn;
- public double dSWCellForget;
+ public float dSWCellIn;
+ public float dSWCellForget;
//double dSWCellState;
//output gate
- public double netOut;
- public double yOut;
+ public float netOut;
+ public float yOut;
//cell output
- public double cellOutput;
+ public float cellOutput;
public bool mask;
}
public struct LSTMWeight
{
//variables
- public double wInputCell;
- public double wInputInputGate;
- public double wInputForgetGate;
- public double wInputOutputGate;
+ public float wInputCell;
+ public float wInputInputGate;
+ public float wInputForgetGate;
+ public float wInputOutputGate;
}
public struct LSTMWeightDerivative
{
//partial derivatives. dont need partial derivative for output gate as it uses BP not RTRL
- public double dSInputCell;
- public double dSInputInputGate;
- public double dSInputForgetGate;
+ public float dSInputCell;
+ public float dSInputInputGate;
+ public float dSInputForgetGate;
}
public class LSTMRNN : RNN
@@ -71,7 +72,7 @@ public class LSTMRNN : RNN
public LSTMRNN()
{
- m_modeltype = MODELTYPE.LSTM;
+ ModelType = MODELTYPE.LSTM;
}
@@ -87,6 +88,17 @@ public LSTMWeight[][] loadLSTMWeight(BinaryReader br)
{
int w = br.ReadInt32();
int h = br.ReadInt32();
+ int vqSize = br.ReadInt32();
+
+ Logger.WriteLine("Loading LSTM-Weight: width:{0}, height:{1}, vqSize:{2}...", w, h, vqSize);
+
+ List codeBook = new List();
+ for (int i = 0; i < vqSize; i++)
+ {
+ codeBook.Add(br.ReadDouble());
+ }
+
+
LSTMWeight[][] m = new LSTMWeight[w][];
for (int i = 0; i < w; i++)
@@ -94,10 +106,17 @@ public LSTMWeight[][] loadLSTMWeight(BinaryReader br)
m[i] = new LSTMWeight[h];
for (int j = 0; j < h; j++)
{
- m[i][j].wInputCell = br.ReadSingle();
- m[i][j].wInputForgetGate = br.ReadSingle();
- m[i][j].wInputInputGate = br.ReadSingle();
- m[i][j].wInputOutputGate = br.ReadSingle();
+ int vqIdx = br.ReadByte();
+ m[i][j].wInputCell = (float)codeBook[vqIdx];
+
+ vqIdx = br.ReadByte();
+ m[i][j].wInputForgetGate = (float)codeBook[vqIdx];
+
+ vqIdx = br.ReadByte();
+ m[i][j].wInputInputGate = (float)codeBook[vqIdx];
+
+ vqIdx = br.ReadByte();
+ m[i][j].wInputOutputGate = (float)codeBook[vqIdx];
}
}
@@ -106,26 +125,47 @@ public LSTMWeight[][] loadLSTMWeight(BinaryReader br)
private void saveLSTMWeight(LSTMWeight[][] weight, BinaryWriter fo)
{
- if (weight == null || weight.Length == 0)
- {
- fo.Write(0);
- fo.Write(0);
- }
+ int w = weight.Length;
+ int h = weight[0].Length;
+ int vqSize = 256;
+
+ Logger.WriteLine("Saving LSTM weight matrix. width:{0}, height:{1}, vqSize:{2}", w, h, vqSize);
fo.Write(weight.Length);
fo.Write(weight[0].Length);
- int w = weight.Length;
- int h = weight[0].Length;
+ //Build vector quantization model
+ VectorQuantization vq = new VectorQuantization();
+ for (int i = 0; i < w; i++)
+ {
+ for (int j = 0; j < h; j++)
+ {
+ vq.Add(weight[i][j].wInputCell);
+ vq.Add(weight[i][j].wInputForgetGate);
+ vq.Add(weight[i][j].wInputInputGate);
+ vq.Add(weight[i][j].wInputOutputGate);
+ }
+ }
+
+
+ double distortion = vq.BuildCodebook(vqSize);
+ Logger.WriteLine("Distortion: {0}", distortion);
+
+ //Save VQ codebook into file
+ fo.Write(vqSize);
+ for (int j = 0; j < vqSize; j++)
+ {
+ fo.Write(vq.CodeBook[j]);
+ }
for (int i = 0; i < w; i++)
{
for (int j = 0; j < h; j++)
{
- fo.Write((float)weight[i][j].wInputCell);
- fo.Write((float)weight[i][j].wInputForgetGate);
- fo.Write((float)weight[i][j].wInputInputGate);
- fo.Write((float)weight[i][j].wInputOutputGate);
+ fo.Write((byte)vq.ComputeVQ(weight[i][j].wInputCell));
+ fo.Write((byte)vq.ComputeVQ(weight[i][j].wInputForgetGate));
+ fo.Write((byte)vq.ComputeVQ(weight[i][j].wInputInputGate));
+ fo.Write((byte)vq.ComputeVQ(weight[i][j].wInputOutputGate));
}
}
@@ -138,49 +178,53 @@ public override void loadNetBin(string filename)
StreamReader sr = new StreamReader(filename);
BinaryReader br = new BinaryReader(sr.BaseStream);
- m_modeltype = (MODELTYPE)br.ReadInt32();
- if (m_modeltype != MODELTYPE.LSTM)
+ ModelType = (MODELTYPE)br.ReadInt32();
+ if (ModelType != MODELTYPE.LSTM)
{
throw new Exception("Invalidated model format: must be LSTM-RNN format");
}
- m_modeldirection = (MODELDIRECTION)br.ReadInt32();
+ ModelDirection = (MODELDIRECTION)br.ReadInt32();
int iflag = br.ReadInt32();
if (iflag == 1)
{
- m_bCRFTraining = true;
+ IsCRFTraining = true;
}
else
{
- m_bCRFTraining = false;
+ IsCRFTraining = false;
}
//Load basic parameters
L0 = br.ReadInt32();
L1 = br.ReadInt32();
L2 = br.ReadInt32();
- fea_size = br.ReadInt32();
+ DenseFeatureSize = br.ReadInt32();
//Create cells of each layer
CreateCell(br);
//Load weight matrix between each two layer pairs
//weight input->hidden
+ Logger.WriteLine("Loading input2hidden weights...");
input2hidden = loadLSTMWeight(br);
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
//weight fea->hidden
+ Logger.WriteLine("Loading feature2hidden weights...");
feature2hidden = loadLSTMWeight(br);
}
//weight hidden->output
- mat_hidden2output = loadMatrixBin(br);
+ Logger.WriteLine("Loading hidden2output weights...");
+ Hidden2OutputWeight = loadMatrixBin(br);
if (iflag == 1)
{
- mat_CRFTagTransWeights = loadMatrixBin(br);
+ Logger.WriteLine("Loading CRF tag trans weights...");
+ CRFTagTransWeights = loadMatrixBin(br);
}
sr.Close();
@@ -203,13 +247,13 @@ public override void saveNetBin(string filename)
StreamWriter sw = new StreamWriter(filename);
BinaryWriter fo = new BinaryWriter(sw.BaseStream);
- fo.Write((int)m_modeltype);
+ fo.Write((int)ModelType);
- fo.Write((int)m_modeldirection);
+ fo.Write((int)ModelDirection);
// Signiture , 0 is for RNN or 1 is for RNN-CRF
int iflag = 0;
- if (m_bCRFTraining == true)
+ if (IsCRFTraining == true)
{
iflag = 1;
}
@@ -218,27 +262,32 @@ public override void saveNetBin(string filename)
fo.Write(L0);
fo.Write(L1);
fo.Write(L2);
- fo.Write(fea_size);
+ fo.Write(DenseFeatureSize);
//Save hidden layer weights
+ Logger.WriteLine("Saving hidden layer weights...");
SaveHiddenLayerWeights(fo);
//weight input->hidden
+ Logger.WriteLine("Saving input2hidden weights...");
saveLSTMWeight(input2hidden, fo);
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
//weight fea->hidden
+ Logger.WriteLine("Saving feature2hidden weights...");
saveLSTMWeight(feature2hidden, fo);
}
//weight hidden->output
- saveMatrixBin(mat_hidden2output, fo);
+ Logger.WriteLine("Saving hidden2output weights...");
+ saveMatrixBin(Hidden2OutputWeight, fo);
if (iflag == 1)
{
// Save Bigram
- saveMatrixBin(mat_CRFTagTransWeights, fo);
+ Logger.WriteLine("Saving CRF tag trans weights...");
+ saveMatrixBin(CRFTagTransWeights, fo);
}
fo.Close();
@@ -293,13 +342,13 @@ public override void initWeights()
}
}
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
feature2hidden = new LSTMWeight[L1][];
for (int i = 0; i < L1; i++)
{
- feature2hidden[i] = new LSTMWeight[fea_size];
- for (int j = 0; j < fea_size; j++)
+ feature2hidden[i] = new LSTMWeight[DenseFeatureSize];
+ for (int j = 0; j < DenseFeatureSize; j++)
{
feature2hidden[i][j] = LSTMWeightInit();
}
@@ -307,13 +356,13 @@ public override void initWeights()
}
//Create and intialise the weights from hidden to output layer, these are just normal weights
- mat_hidden2output = new Matrix(L2, L1);
+ Hidden2OutputWeight = new Matrix(L2, L1);
- for (int i = 0; i < mat_hidden2output.GetHeight(); i++)
+ for (int i = 0; i < Hidden2OutputWeight.GetHeight(); i++)
{
- for (int j = 0; j < mat_hidden2output.GetWidth(); j++)
+ for (int j = 0; j < Hidden2OutputWeight.GetWidth(); j++)
{
- mat_hidden2output[i][j] = RandInitWeight();
+ Hidden2OutputWeight[i][j] = RandInitWeight();
}
}
}
@@ -350,7 +399,7 @@ public override void initMem()
CreateCell(null);
input2hiddenDeri = new LSTMWeightDerivative[L1][];
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
feature2hiddenDeri = new LSTMWeightDerivative[L1][];
}
@@ -359,9 +408,9 @@ public override void initMem()
{
input2hiddenDeri[i] = new LSTMWeightDerivative[L0];
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
- feature2hiddenDeri[i] = new LSTMWeightDerivative[fea_size];
+ feature2hiddenDeri[i] = new LSTMWeightDerivative[DenseFeatureSize];
}
}
@@ -371,13 +420,13 @@ public override void initMem()
private void CreateCell(BinaryReader br)
{
- neuFeatures = new double[fea_size];
- neuOutput = new neuron[L2];
+ neuFeatures = new SingleVector(DenseFeatureSize);
+ OutputLayer = new neuron[L2];
for (int a = 0; a < L2; a++)
{
- neuOutput[a].cellOutput = 0;
- neuOutput[a].er = 0;
+ OutputLayer[a].cellOutput = 0;
+ OutputLayer[a].er = 0;
}
neuHidden = new LSTMCell[L1];
@@ -392,9 +441,9 @@ private void CreateCell(BinaryReader br)
//Load weight from input file
for (int i = 0; i < L1; i++)
{
- neuHidden[i].wCellIn = br.ReadDouble();
- neuHidden[i].wCellForget = br.ReadDouble();
- neuHidden[i].wCellOut = br.ReadDouble();
+ neuHidden[i].wCellIn = br.ReadSingle();
+ neuHidden[i].wCellForget = br.ReadSingle();
+ neuHidden[i].wCellOut = br.ReadSingle();
}
}
else
@@ -436,7 +485,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false)
}
//Get sparse feature and apply it into hidden layer
- var sparse = state.GetSparseData();
+ var sparse = state.SparseData;
int sparseFeatureSize = sparse.GetNumberOfEntries();
//put variables for derivaties in weight class and cell class
@@ -444,34 +493,36 @@ public override void learnNet(State state, int timeat, bool biRNN = false)
{
LSTMWeightDerivative[] w_i = input2hiddenDeri[i];
LSTMCell c = neuHidden[i];
+ float Sigmoid2Derivative_ci_netCellState_mul_ci_yIn = (float)(Sigmoid2Derivative(c.netCellState) * c.yIn);
+ float Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn = (float)(Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn));
+ float ci_previousCellState_mul_SigmoidDerivative_ci_netForget = (float)(c.previousCellState * SigmoidDerivative(c.netForget));
+
for (int k = 0; k < sparseFeatureSize; k++)
{
var entry = sparse.GetEntry(k);
LSTMWeightDerivative w = w_i[entry.Key];
- w_i[entry.Key].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative(c.netCellState) * c.yIn * entry.Value;
- w_i[entry.Key].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn) * entry.Value;
- w_i[entry.Key].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * SigmoidDerivative(c.netForget) * entry.Value;
-
+ w_i[entry.Key].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative_ci_netCellState_mul_ci_yIn * entry.Value;
+ w_i[entry.Key].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * entry.Value;
+ w_i[entry.Key].dSInputForgetGate = w.dSInputForgetGate * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * entry.Value;
}
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
w_i = feature2hiddenDeri[i];
- for (int j = 0; j < fea_size; j++)
+ for (int j = 0; j < DenseFeatureSize; j++)
{
LSTMWeightDerivative w = w_i[j];
- w_i[j].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative(c.netCellState) * c.yIn * neuFeatures[j];
- w_i[j].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn) * neuFeatures[j];
- w_i[j].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * SigmoidDerivative(c.netForget) * neuFeatures[j];
-
+ w_i[j].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative_ci_netCellState_mul_ci_yIn * neuFeatures[j];
+ w_i[j].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * neuFeatures[j];
+ w_i[j].dSInputForgetGate = w.dSInputForgetGate * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * neuFeatures[j];
}
}
//partial derivatives for internal connections
- c.dSWCellIn = c.dSWCellIn * c.yForget + Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn) * c.cellState;
+ c.dSWCellIn = c.dSWCellIn * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * c.cellState;
//partial derivatives for internal connections, initially zero as dS is zero and previous cell state is zero
- c.dSWCellForget = c.dSWCellForget * c.yForget + c.previousCellState * SigmoidDerivative(c.netForget) * c.previousCellState;
+ c.dSWCellForget = c.dSWCellForget * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * c.previousCellState;
neuHidden[i] = c;
});
@@ -482,18 +533,18 @@ public override void learnNet(State state, int timeat, bool biRNN = false)
LSTMCell c = neuHidden[i];
//find the error by find the product of the output errors and their weight connection.
- double weightedSum = 0;
+ var weightedSum = 0.0;
for (int k = 0; k < L2; k++)
{
- weightedSum += neuOutput[k].er * mat_hidden2output[k][i];
+ weightedSum += OutputLayer[k].er * Hidden2OutputWeight[k][i];
}
weightedSum = NormalizeErr(weightedSum);
//using the error find the gradient of the output gate
- double gradientOutputGate = SigmoidDerivative(c.netOut) * c.cellState * weightedSum;
+ var gradientOutputGate = (float)(LearningRate * SigmoidDerivative(c.netOut) * c.cellState * weightedSum);
//internal cell state error
- double cellStateError = c.yOut * weightedSum;
+ var cellStateError = (float)(LearningRate * c.yOut * weightedSum);
//weight updates
LSTMWeight[] w_i = input2hidden[i];
@@ -502,32 +553,32 @@ public override void learnNet(State state, int timeat, bool biRNN = false)
{
var entry = sparse.GetEntry(k);
//updates weights for input to hidden layer
- w_i[entry.Key].wInputCell += alpha * cellStateError * wd_i[entry.Key].dSInputCell;
- w_i[entry.Key].wInputInputGate += alpha * cellStateError * wd_i[entry.Key].dSInputInputGate;
- w_i[entry.Key].wInputForgetGate += alpha * cellStateError * wd_i[entry.Key].dSInputForgetGate;
- w_i[entry.Key].wInputOutputGate += alpha * gradientOutputGate * entry.Value;
+ w_i[entry.Key].wInputCell += cellStateError * wd_i[entry.Key].dSInputCell;
+ w_i[entry.Key].wInputInputGate += cellStateError * wd_i[entry.Key].dSInputInputGate;
+ w_i[entry.Key].wInputForgetGate += cellStateError * wd_i[entry.Key].dSInputForgetGate;
+ w_i[entry.Key].wInputOutputGate += gradientOutputGate * entry.Value;
}
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
w_i = feature2hidden[i];
wd_i = feature2hiddenDeri[i];
- for (int j = 0; j < fea_size; j++)
+ for (int j = 0; j < DenseFeatureSize; j++)
{
//make the delta equal to the learning rate multiplied by the gradient multipled by the input for the connection
//update connection weights
- w_i[j].wInputCell += alpha * cellStateError * wd_i[j].dSInputCell;
- w_i[j].wInputInputGate += alpha * cellStateError * wd_i[j].dSInputInputGate;
- w_i[j].wInputForgetGate += alpha * cellStateError * wd_i[j].dSInputForgetGate;
- w_i[j].wInputOutputGate += alpha * gradientOutputGate * neuFeatures[j];
+ w_i[j].wInputCell += cellStateError * wd_i[j].dSInputCell;
+ w_i[j].wInputInputGate += cellStateError * wd_i[j].dSInputInputGate;
+ w_i[j].wInputForgetGate += cellStateError * wd_i[j].dSInputForgetGate;
+ w_i[j].wInputOutputGate += gradientOutputGate * neuFeatures[j];
}
}
//update internal weights
- c.wCellIn += alpha * cellStateError * c.dSWCellIn;
- c.wCellForget += alpha * cellStateError * c.dSWCellForget;
- c.wCellOut += alpha * gradientOutputGate * c.cellState;
+ c.wCellIn += cellStateError * c.dSWCellIn;
+ c.wCellForget += cellStateError * c.dSWCellForget;
+ c.wCellOut += gradientOutputGate * c.cellState;
neuHidden[i] = c;
});
@@ -537,7 +588,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false)
{
for (int k = 0; k < L2; k++)
{
- mat_hidden2output[k][i] += alpha * neuHidden[i].cellOutput * neuOutput[k].er;
+ Hidden2OutputWeight[k][i] += (float)(LearningRate * neuHidden[i].cellOutput * OutputLayer[k].er);
}
});
}
@@ -548,7 +599,7 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr
{
//inputs(t) -> hidden(t)
//Get sparse feature and apply it into hidden layer
- var sparse = state.GetSparseData();
+ var sparse = state.SparseData;
int sparseFeatureSize = sparse.GetNumberOfEntries();
Parallel.For(0, L1, parallelOption, j =>
@@ -577,10 +628,11 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr
cell_j.netOut += entry.Value * w.wInputOutputGate;
}
+
//fea(t) -> hidden(t)
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
- for (int i = 0; i < fea_size; i++)
+ for (int i = 0; i < DenseFeatureSize; i++)
{
LSTMWeight w = feature2hidden[j][i];
cell_j.netIn += neuFeatures[i] * w.wInputInputGate;
@@ -593,11 +645,11 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr
//include internal connection multiplied by the previous cell state
cell_j.netIn += cell_j.previousCellState * cell_j.wCellIn;
//squash input
- cell_j.yIn = Sigmoid(cell_j.netIn);
+ cell_j.yIn = (float)Sigmoid(cell_j.netIn);
//include internal connection multiplied by the previous cell state
cell_j.netForget += cell_j.previousCellState * cell_j.wCellForget;
- cell_j.yForget = Sigmoid(cell_j.netForget);
+ cell_j.yForget = (float)Sigmoid(cell_j.netForget);
if (cell_j.mask == true)
{
@@ -606,14 +658,14 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr
else
{
//cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate
- cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * Sigmoid2(cell_j.netCellState);
+ cell_j.cellState = (float)(cell_j.yForget * cell_j.previousCellState + cell_j.yIn * Sigmoid2(cell_j.netCellState));
}
////include the internal connection multiplied by the CURRENT cell state
cell_j.netOut += cell_j.cellState * cell_j.wCellOut;
//squash output gate
- cell_j.yOut = Sigmoid(cell_j.netOut);
+ cell_j.yOut = (float)(Sigmoid(cell_j.netOut));
cell_j.cellOutput = cell_j.cellState * cell_j.yOut;
@@ -621,53 +673,47 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr
neuHidden[j] = cell_j;
});
- matrixXvectorADD(neuOutput, neuHidden, mat_hidden2output, 0, L2, 0, L1);
+ matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, 0, L2, 0, L1);
if (doutput != null)
{
for (int i = 0; i < L2; i++)
{
- doutput[i] = neuOutput[i].cellOutput;
+ doutput[i] = OutputLayer[i].cellOutput;
}
}
//activation 2 --softmax on words
- SoftmaxLayer(neuOutput);
+ SoftmaxLayer(OutputLayer);
}
public override void netReset(bool updateNet = false) //cleans hidden layer activation + bptt history
{
- for (int a = 0; a < L1; a++)
- {
- neuHidden[a].mask = false;
- }
-
- if (updateNet == true)
- {
- //Train mode
- for (int a = 0; a < L1; a++)
- {
- if (rand.NextDouble() < dropout)
- {
- neuHidden[a].mask = true;
- }
- }
- }
-
Parallel.For(0, L1, parallelOption, i =>
{
+ neuHidden[i].mask = false;
LSTMCellInit(neuHidden[i]);
if (updateNet == true)
{
Array.Clear(input2hiddenDeri[i], 0, L0);
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
- Array.Clear(feature2hiddenDeri[i], 0, fea_size);
+ Array.Clear(feature2hiddenDeri[i], 0, DenseFeatureSize);
}
}
});
-
+ if (updateNet == true)
+ {
+ //Train mode
+ for (int a = 0; a < L1; a++)
+ {
+ if (rand.NextDouble() < Dropout)
+ {
+ neuHidden[a].mask = true;
+ }
+ }
+ }
}
}
diff --git a/RNNSharp/MathUtil.cs b/RNNSharp/MathUtil.cs
index f40ce1b..fbb09a2 100644
--- a/RNNSharp/MathUtil.cs
+++ b/RNNSharp/MathUtil.cs
@@ -1,9 +1,8 @@
using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
class MathUtil
diff --git a/RNNSharp/Matrix.cs b/RNNSharp/Matrix.cs
index f4b6d89..68c957b 100644
--- a/RNNSharp/Matrix.cs
+++ b/RNNSharp/Matrix.cs
@@ -1,10 +1,7 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-using System.IO;
-
+
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class Matrix
diff --git a/RNNSharp/ModelSetting.cs b/RNNSharp/ModelSetting.cs
index fe897c6..8a49556 100644
--- a/RNNSharp/ModelSetting.cs
+++ b/RNNSharp/ModelSetting.cs
@@ -1,84 +1,37 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-using AdvUtils;
+using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class ModelSetting
{
- public string GetModelFile() { return m_strModelFile; }
- public void SetModelFile(string modelFile) { m_strModelFile = modelFile; }
-
- public int GetNumHidden() { return m_NumHidden; }
- public void SetNumHidden(int n) { m_NumHidden = n; }
-
- public double GetLearningRate(){ return m_LearningRate; }
- public void SetLearningRate(double r) { m_LearningRate = r; }
-
- public double GetDropout() { return m_Dropout; }
- public void SetDropout(double r) { m_Dropout = r; }
-
- public int GetBptt() { return m_Bptt; }
- public void SetBptt(int n) { m_Bptt = n; }
-
-
- public int GetModelType() { return m_ModelType; }
- public void SetModelType(int n) { m_ModelType = n; }
-
- public int GetMaxIteration() { return m_MaxIteration; }
- public void SetMaxIteration(int i) { m_MaxIteration = i; }
-
- public virtual bool IsCRFTraining() { return m_bCRFTraining; }
- public void SetCRFTraining(bool s) { m_bCRFTraining = s; }
-
- public void SetDir(int dir)
- {
- m_iDir = dir;
- }
-
- public int GetModelDirection()
- {
- return m_iDir;
- }
-
- public void SetSaveStep(long savestep)
- {
- m_SaveStep = savestep;
- }
-
- public long GetSaveStep()
- {
- return m_SaveStep;
- }
-
- string m_strModelFile;
- int m_NumHidden;
- double m_LearningRate;
- double m_Dropout;
- int m_Bptt;
- int m_MaxIteration;
- bool m_bCRFTraining;
- long m_SaveStep;
- int m_ModelType;
- int m_iDir;
+ public string ModelFile { get; set; }
+ public int NumHidden { get; set; }
+ public float LearningRate { get; set; }
+ public float Dropout { get; set; }
+ public int Bptt { get; set; }
+ public int MaxIteration { get; set; }
+ public bool IsCRFTraining { get; set; }
+ public long SaveStep { get; set; }
+ public int ModelType { get; set; }
+ public int ModelDirection { get; set; }
public void DumpSetting()
{
- Logger.WriteLine(Logger.Level.info, "Model File: {0}", m_strModelFile);
- if (m_ModelType == 0)
+ Logger.WriteLine(Logger.Level.info, "Model File: {0}", ModelFile);
+ if (ModelType == 0)
{
Logger.WriteLine(Logger.Level.info, "Model Structure: Simple RNN");
- Logger.WriteLine(Logger.Level.info, "BPTT: {0}", m_Bptt);
+ Logger.WriteLine(Logger.Level.info, "BPTT: {0}", Bptt);
}
- else if (m_ModelType == 1)
+ else if (ModelType == 1)
{
Logger.WriteLine(Logger.Level.info, "Model Structure: LSTM-RNN");
}
- if (m_iDir == 0)
+ if (ModelDirection == 0)
{
Logger.WriteLine(Logger.Level.info, "RNN Direction: Forward");
}
@@ -87,24 +40,24 @@ public void DumpSetting()
Logger.WriteLine(Logger.Level.info, "RNN Direction: Bi-directional");
}
- Logger.WriteLine(Logger.Level.info, "Learning rate: {0}", m_LearningRate);
- Logger.WriteLine(Logger.Level.info, "Dropout: {0}", m_Dropout);
- Logger.WriteLine(Logger.Level.info, "Max Iteration: {0}", m_MaxIteration);
- Logger.WriteLine(Logger.Level.info, "Hidden layer size: {0}", m_NumHidden);
- Logger.WriteLine(Logger.Level.info, "RNN-CRF: {0}", m_bCRFTraining);
- if (m_SaveStep > 0)
+ Logger.WriteLine(Logger.Level.info, "Learning rate: {0}", LearningRate);
+ Logger.WriteLine(Logger.Level.info, "Dropout: {0}", Dropout);
+ Logger.WriteLine(Logger.Level.info, "Max Iteration: {0}", MaxIteration);
+ Logger.WriteLine(Logger.Level.info, "Hidden layer size: {0}", NumHidden);
+ Logger.WriteLine(Logger.Level.info, "RNN-CRF: {0}", IsCRFTraining);
+ if (SaveStep > 0)
{
- Logger.WriteLine(Logger.Level.info, "Save temporary model after every {0} sentences", m_SaveStep);
+ Logger.WriteLine(Logger.Level.info, "Save temporary model after every {0} sentences", SaveStep);
}
}
public ModelSetting()
{
- m_MaxIteration = 20;
- m_Bptt = 4;
- m_LearningRate = 0.1;
- m_NumHidden = 200;
- m_bCRFTraining = true;
+ MaxIteration = 20;
+ Bptt = 4;
+ LearningRate = 0.1f;
+ NumHidden = 200;
+ IsCRFTraining = true;
}
}
}
diff --git a/RNNSharp/RNN.cs b/RNNSharp/RNN.cs
index 2798ff5..ca06946 100644
--- a/RNNSharp/RNN.cs
+++ b/RNNSharp/RNN.cs
@@ -1,12 +1,12 @@
using System;
using System.Collections.Generic;
-using System.Linq;
-using System.Text;
using System.Threading.Tasks;
-using System.Threading;
using System.IO;
using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public enum MODELTYPE
@@ -42,59 +42,47 @@ public PAIR(T f, K s)
abstract public class RNN
{
- protected double logp, llogp;
- protected double minTknErrRatio;
- protected long counter;
- protected double dropout;
+ public virtual bool IsCRFTraining { get; set; }
+ public virtual string ModelFile { get; set; }
+ public string ModelTempFile { get { return ModelFile + ".tmp"; } }
+ public virtual MODELDIRECTION ModelDirection { get; set; }
+ public virtual float GradientCutoff { get; set; }
+ public virtual float Dropout { get; set; }
+ public virtual float LearningRate { get; set; }
+ public virtual int MaxIter { get; set; }
+ public virtual long SaveStep { get; set; }
+ public virtual int DenseFeatureSize { get; set; }
+ public virtual int L0 { get; set; }
+ public virtual int L1 { get; set; }
+ public virtual int L2 { get; set; }
+
+ public MODELTYPE ModelType { get; set; }
+ public Matrix CRFTagTransWeights { get; set; }
+ public neuron[] OutputLayer { get; set; }
+ public Matrix Hidden2OutputWeight;
+
+ // CRF result output
+ protected Matrix CRFSeqOutput;
+ protected double logp;
+ protected double minTknErrRatio = double.MaxValue;
protected ParallelOptions parallelOption = new ParallelOptions();
- protected double gradient_cutoff;
- protected bool m_bCRFTraining = false;
- protected MODELTYPE m_modeltype;
- protected MODELDIRECTION m_modeldirection;
- protected string m_strModelFile;
-
protected static Random rand = new Random(DateTime.Now.Millisecond);
//multiple processor declaration
- protected int L0;
- public int L1;
- public int L2;
- protected int fea_size;
-
- protected double alpha;
- public double Alpha
- {
- get { return alpha; }
- set { alpha = value; }
- }
-
- protected double[] neuFeatures; //features in input layer
- public neuron[] neuOutput; //neurons in output layer
- public Matrix mat_hidden2output;
-
- protected const int MAX_RNN_HIST = 512;
-
- protected Matrix m_RawOutput;
- protected int counterTokenForLM;
-
- // for Viterbi decoding
- public Matrix mat_CRFTagTransWeights;
-
- /// for sequence training
- public Matrix mat_CRFSeqOutput;
+ protected Vector neuFeatures; //features in input layer
+ protected const int MAX_RNN_HIST = 64;
- public virtual void setTagBigramTransition(List> m)
+ public virtual void setTagBigramTransition(List> m)
{
- if (null == mat_CRFTagTransWeights)
- mat_CRFTagTransWeights = new Matrix(L2, L2);
+ CRFTagTransWeights = new Matrix(L2, L2);
for (int i = 0; i < L2; i++)
for (int j = 0; j < L2; j++)
- mat_CRFTagTransWeights[i][j] = m[i][j];
+ CRFTagTransWeights[i][j] = m[i][j];
}
//Save matrix into file as binary format
- protected void saveMatrixBin(Matrix mat, BinaryWriter fo)
+ protected void saveMatrixBin(Matrix mat, BinaryWriter fo, bool BuildVQ = true)
{
int width = mat.GetWidth();
int height = mat.GetHeight();
@@ -103,12 +91,59 @@ protected void saveMatrixBin(Matrix mat, BinaryWriter fo)
fo.Write(width);
fo.Write(height);
- //Save the data in matrix
- for (int r = 0; r < height; r++)
+ if (BuildVQ == false)
{
- for (int c = 0; c < width; c++)
+ Logger.WriteLine("Saving matrix without VQ...");
+ fo.Write(0); // non-VQ
+
+ //Save the data in matrix
+ for (int r = 0; r < height; r++)
{
- fo.Write((float)(mat[r][c]));
+ for (int c = 0; c < width; c++)
+ {
+ fo.Write((float)mat[r][c]);
+ }
+ }
+ }
+ else
+ {
+ //Build vector quantization matrix
+ int vqSize = 256;
+ VectorQuantization vq = new VectorQuantization();
+ Logger.WriteLine("Saving matrix with VQ {0}...", vqSize);
+
+ int valSize = 0;
+ for (int i = 0; i < height; i++)
+ {
+ for (int j = 0; j < width; j++)
+ {
+ vq.Add(mat[i][j]);
+ valSize++;
+ }
+ }
+
+ if (vqSize > valSize)
+ {
+ vqSize = valSize;
+ }
+
+ double distortion = vq.BuildCodebook(vqSize);
+ Logger.WriteLine("Distortion: {0}, vqSize: {1}", distortion, vqSize);
+
+ //Save VQ codebook into file
+ fo.Write(vqSize);
+ for (int j = 0; j < vqSize; j++)
+ {
+ fo.Write(vq.CodeBook[j]);
+ }
+
+ //Save the data in matrix
+ for (int r = 0; r < height; r++)
+ {
+ for (int c = 0; c < width; c++)
+ {
+ fo.Write((byte)vq.ComputeVQ(mat[r][c]));
+ }
}
}
}
@@ -117,14 +152,37 @@ protected Matrix loadMatrixBin(BinaryReader br)
{
int width = br.ReadInt32();
int height = br.ReadInt32();
+ int vqSize = br.ReadInt32();
+ Logger.WriteLine("Loading matrix. width: {0}, height: {1}, vqSize: {2}", width, height, vqSize);
Matrix m = new Matrix(height, width);
-
- for (int r = 0; r < height; r++)
+ if (vqSize == 0)
+ {
+ for (int r = 0; r < height; r++)
+ {
+ for (int c = 0; c < width; c++)
+ {
+ m[r][c] = br.ReadSingle();
+ }
+ }
+ }
+ else
{
- for (int c = 0; c < width; c++)
+ List codeBook = new List();
+
+ for (int i = 0; i < vqSize; i++)
+ {
+ codeBook.Add(br.ReadDouble());
+ }
+
+
+ for (int r = 0; r < height; r++)
{
- m[r][c] = br.ReadSingle();
+ for (int c = 0; c < width; c++)
+ {
+ int vqIndex = br.ReadByte();
+ m[r][c] = codeBook[vqIndex];
+ }
}
}
@@ -133,10 +191,10 @@ protected Matrix loadMatrixBin(BinaryReader br)
public void setInputLayer(State state, int curState, int numStates, int[] predicted, bool forward = true)
{
- if (predicted != null)
+ if (predicted != null && state.RuntimeFeatures != null)
{
// set runtime feature
- for (int i = 0; i < state.GetNumRuntimeFeature(); i++)
+ for (int i = 0; i < state.RuntimeFeatures.Length; i++)
{
for (int j = 0; j < L2; j++)
{
@@ -144,7 +202,7 @@ public void setInputLayer(State state, int curState, int numStates, int[] predic
state.SetRuntimeFeature(i, j, 0);
}
- int pos = curState + ((forward == true) ? 1 : -1) * state.GetRuntimeFeature(i).OffsetToCurrentState;
+ int pos = curState + ((forward == true) ? 1 : -1) * state.RuntimeFeatures[i].OffsetToCurrentState;
if (pos >= 0 && pos < numStates)
{
state.SetRuntimeFeature(i, predicted[pos], 1);
@@ -152,66 +210,7 @@ public void setInputLayer(State state, int curState, int numStates, int[] predic
}
}
- var dense = state.GetDenseData();
- for (int i = 0; i < dense.GetDimension(); i++)
- {
- neuFeatures[i] = dense[i];
- }
- }
-
- public long m_SaveStep;
- public virtual void SetSaveStep(long savestep)
- {
- m_SaveStep = savestep;
- }
-
- protected int m_MaxIter;
- public int MaxIter { get { return m_MaxIter; } }
- public virtual void SetMaxIter(int _nMaxIter)
- {
- m_MaxIter = _nMaxIter;
- }
-
- public RNN()
- {
- gradient_cutoff = 15;
-
- alpha = 0.1;
- dropout = 0;
- logp = 0;
- llogp = -100000000;
- minTknErrRatio = double.MaxValue;
- L1 = 30;
-
- fea_size = 0;
-
- neuFeatures = null;
- neuOutput = null;
- }
-
- public void SetModelDirection(int dir)
- {
- m_modeldirection = (MODELDIRECTION)dir;
- }
-
-
- public virtual void SetFeatureDimension(int denseFeatueSize, int sparseFeatureSize, int tagSize)
- {
- fea_size = denseFeatueSize;
- L0 = sparseFeatureSize;
- L2 = tagSize;
- }
-
- public virtual void SetCRFTraining(bool b) { m_bCRFTraining = b; }
- public virtual void SetGradientCutoff(double newGradient) { gradient_cutoff = newGradient; }
- public virtual void SetLearningRate(double newAlpha) { alpha = newAlpha; }
- public virtual void SetDropout(double newDropout) { dropout = newDropout; }
- public virtual void SetHiddenLayerSize(int newsize) { L1 = newsize;}
- public virtual void SetModelFile(string strModelFile) { m_strModelFile = strModelFile; }
-
- public bool IsCRFModel()
- {
- return m_bCRFTraining;
+ neuFeatures = state.DenseData;
}
public double exp_10(double num) { return Math.Exp(num * 2.302585093); }
@@ -222,7 +221,7 @@ public bool IsCRFModel()
public virtual Matrix PredictSentence(Sequence pSequence, RunningMode runningMode)
{
- int numStates = pSequence.GetSize();
+ int numStates = pSequence.States.Length;
Matrix m = new Matrix(numStates, L2);
int[] predicted = new int[numStates];
bool isTraining = true;
@@ -238,15 +237,14 @@ public virtual Matrix PredictSentence(Sequence pSequence, RunningMode ru
netReset(isTraining);
for (int curState = 0; curState < numStates; curState++)
{
- State state = pSequence.Get(curState);
+ State state = pSequence.States[curState];
setInputLayer(state, curState, numStates, predicted);
computeNet(state, m[curState], isTraining);
predicted[curState] = GetBestOutputIndex();
if (runningMode != RunningMode.Test)
{
- logp += Math.Log10(neuOutput[state.GetLabel()].cellOutput);
- counter++;
+ logp += Math.Log10(OutputLayer[state.Label].cellOutput);
}
if (runningMode == RunningMode.Train)
@@ -282,70 +280,58 @@ public void SoftmaxLayer(neuron[] layer)
public int GetBestOutputIndex()
{
int imax = 0;
- double dmax = neuOutput[0].cellOutput;
+ double dmax = OutputLayer[0].cellOutput;
for (int k = 1; k < L2; k++)
{
- if (neuOutput[k].cellOutput > dmax)
+ if (OutputLayer[k].cellOutput > dmax)
{
- dmax = neuOutput[k].cellOutput;
+ dmax = OutputLayer[k].cellOutput;
imax = k;
}
}
return imax;
}
- public virtual Matrix learnSentenceForRNNCRF(Sequence pSequence, RunningMode runningMode)
+ public virtual int[] PredictSentenceCRF(Sequence pSequence, RunningMode runningMode)
{
- //Reset the network
- netReset(false);
- int numStates = pSequence.GetSize();
-
- int[] predicted_nn = new int[numStates];
- m_RawOutput = new Matrix(numStates, L2);// new double[numStates][];
- for (int curState = 0; curState < numStates; curState++)
- {
- State state = pSequence.Get(curState);
-
- setInputLayer(state, curState, numStates, predicted_nn);
- computeNet(state, m_RawOutput[curState]); //compute probability distribution
-
- predicted_nn[curState] = GetBestOutputIndex();
- }
+ int numStates = pSequence.States.Length;
- ForwardBackward(numStates, m_RawOutput);
+ Matrix nnOutput = PredictSentence(pSequence, RunningMode.Test);
+ ForwardBackward(numStates, nnOutput);
- //Get the best result
- int[] predicted = new int[numStates];
- for (int i = 0; i < numStates; i++)
+ if (runningMode != RunningMode.Test)
{
- State state = pSequence.Get(i);
- logp += Math.Log10(mat_CRFSeqOutput[i][state.GetLabel()]);
-
- predicted[i] = GetBestZIndex(i);
+ //Get the best result
+ for (int i = 0; i < numStates; i++)
+ {
+ logp += Math.Log10(CRFSeqOutput[i][pSequence.States[i].Label]);
+ }
}
- UpdateBigramTransition(pSequence);
+ int[] predicted = Viterbi(nnOutput, numStates);
- netReset(true);
- for (int curState = 0; curState < numStates; curState++)
+ if (runningMode == RunningMode.Train)
{
- // error propogation
- State state = pSequence.Get(curState);
- setInputLayer(state, curState, numStates, predicted_nn);
- computeNet(state, m_RawOutput[curState]); //compute probability distribution
-
- counter++;
+ UpdateBigramTransition(pSequence);
+ netReset(true);
+ for (int curState = 0; curState < numStates; curState++)
+ {
+ // error propogation
+ State state = pSequence.States[curState];
+ setInputLayer(state, curState, numStates, null);
+ computeNet(state, null); //compute probability distribution
- learnNet(state, curState);
- LearnBackTime(state, numStates, curState);
+ learnNet(state, curState);
+ LearnBackTime(state, numStates, curState);
+ }
}
- return mat_CRFSeqOutput;
+ return predicted;
}
public void UpdateBigramTransition(Sequence seq)
{
- int numStates = seq.GetSize();
+ int numStates = seq.States.Length;
Matrix m_DeltaBigramLM = new Matrix(L2, L2);
for (int timeat = 1; timeat < numStates; timeat++)
@@ -354,43 +340,25 @@ public void UpdateBigramTransition(Sequence seq)
{
for (int j = 0; j < L2; j++)
{
- m_DeltaBigramLM[i][j] -= (mat_CRFTagTransWeights[i][j] * mat_CRFSeqOutput[timeat][i] * mat_CRFSeqOutput[timeat - 1][j]);
+ m_DeltaBigramLM[i][j] -= (CRFTagTransWeights[i][j] * CRFSeqOutput[timeat][i] * CRFSeqOutput[timeat - 1][j]);
}
}
- int iTagId = seq.Get(timeat).GetLabel();
- int iLastTagId = seq.Get(timeat - 1).GetLabel();
+ int iTagId = seq.States[timeat].Label;
+ int iLastTagId = seq.States[timeat - 1].Label;
m_DeltaBigramLM[iTagId][iLastTagId] += 1;
}
- counterTokenForLM++;
-
//Update tag Bigram LM
for (int b = 0;b < L2;b++)
{
for (int a = 0; a < L2; a++)
{
- mat_CRFTagTransWeights[b][a] += alpha * m_DeltaBigramLM[b][a];
+ CRFTagTransWeights[b][a] += LearningRate * m_DeltaBigramLM[b][a];
}
}
}
- public int GetBestZIndex(int currStatus)
- {
- //Get the output tag
- int imax = 0;
- double dmax = mat_CRFSeqOutput[currStatus][0];
- for (int j = 1; j < L2; j++)
- {
- if (mat_CRFSeqOutput[currStatus][j] > dmax)
- {
- dmax = mat_CRFSeqOutput[currStatus][j];
- imax = j;
- }
- }
- return imax;
- }
-
public void ForwardBackward(int numStates, Matrix m_RawOutput)
{
//forward
@@ -405,7 +373,7 @@ public void ForwardBackward(int numStates, Matrix m_RawOutput)
{
for (int k = 0; k < L2; k++)
{
- double fbgm = mat_CRFTagTransWeights[j][k];
+ double fbgm = CRFTagTransWeights[j][k];
double finit = alphaSet[i - 1][k];
double ftmp = fbgm + finit;
@@ -429,7 +397,7 @@ public void ForwardBackward(int numStates, Matrix m_RawOutput)
{
for (int k = 0; k < L2; k++)
{
- double fbgm = mat_CRFTagTransWeights[k][j];
+ double fbgm = CRFTagTransWeights[k][j];
double finit = betaSet[i + 1][k];
double ftmp = fbgm + finit;
@@ -443,7 +411,6 @@ public void ForwardBackward(int numStates, Matrix m_RawOutput)
}
//Z_
-
double Z_ = 0.0;
for (int i = 0; i < L2; i++)
{
@@ -452,14 +419,15 @@ public void ForwardBackward(int numStates, Matrix m_RawOutput)
}
//Calculate the output probability of each node
- mat_CRFSeqOutput = new Matrix(numStates, L2);
+ CRFSeqOutput = new Matrix(numStates, L2);
for (int i = 0; i < numStates; i++)
{
for (int j = 0; j < L2; j++)
{
- mat_CRFSeqOutput[i][j] = Math.Exp(alphaSet[i][j] + betaSet[i][j] - m_RawOutput[i][j] - Z_);
+ CRFSeqOutput[i][j] = Math.Exp(alphaSet[i][j] + betaSet[i][j] - m_RawOutput[i][j] - Z_);
}
}
+
}
@@ -471,9 +439,9 @@ private double random(double min, double max)
return rand.NextDouble() * (max - min) + min;
}
- public double RandInitWeight()
+ public float RandInitWeight()
{
- return random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1);
+ return (float)(random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1));
}
@@ -483,43 +451,35 @@ public double RandInitWeight()
public virtual double TrainNet(DataSet trainingSet, int iter)
{
DateTime start = DateTime.Now;
- int[] predicted;
- Logger.WriteLine(Logger.Level.info, "[TRACE] Iter " + iter + " begins with learning rate alpha = " + alpha + " ...");
+ Logger.WriteLine(Logger.Level.info, "[TRACE] Iter " + iter + " begins with learning rate alpha = " + LearningRate + " ...");
//Initialize varibles
- counter = 0;
logp = 0;
- counterTokenForLM = 0;
//Shffle training corpus
trainingSet.Shuffle();
- int numSequence = trainingSet.GetSize();
+ int numSequence = trainingSet.SequenceList.Count;
+ int wordCnt = 0;
int tknErrCnt = 0;
int sentErrCnt = 0;
Logger.WriteLine(Logger.Level.info, "[TRACE] Progress = 0/" + numSequence / 1000.0 + "K\r");
for (int curSequence = 0; curSequence < numSequence; curSequence++)
{
- Sequence pSequence = trainingSet.Get(curSequence);
- int numStates = pSequence.GetSize();
-
- if (numStates < 3)
- continue;
+ Sequence pSequence = trainingSet.SequenceList[curSequence];
+ int numStates = pSequence.States.Length;
+ wordCnt += numStates;
- Matrix m;
- if (m_bCRFTraining == true)
+ int[] predicted;
+ if (IsCRFTraining == true)
{
- m = learnSentenceForRNNCRF(pSequence, RunningMode.Train);
+ predicted = PredictSentenceCRF(pSequence, RunningMode.Train);
}
else
{
+ Matrix m;
m = PredictSentence(pSequence, RunningMode.Train);
- }
-
- predicted = new int[pSequence.GetSize()];
- for (int i = 0; i < pSequence.GetSize(); i++)
- {
- predicted[i] = MathUtil.GetMaxProbIndex(m[i]);
+ predicted = GetBestResult(m);
}
int newTknErrCnt = GetErrorTokenNum(pSequence, predicted);
@@ -532,24 +492,24 @@ public virtual double TrainNet(DataSet trainingSet, int iter)
if ((curSequence + 1) % 1000 == 0)
{
Logger.WriteLine(Logger.Level.info, "[TRACE] Progress = {0} ", (curSequence + 1) / 1000 + "K/" + numSequence / 1000.0 + "K");
- Logger.WriteLine(Logger.Level.info, " train cross-entropy = {0} ", -logp / Math.Log10(2.0) / counter);
- Logger.WriteLine(Logger.Level.info, " Error token ratio = {0}%", (double)tknErrCnt / (double)counter * 100);
- Logger.WriteLine(Logger.Level.info, " Error sentence ratio = {0}%", (double)sentErrCnt / (double)curSequence * 100);
+ Logger.WriteLine(Logger.Level.info, " train cross-entropy = {0} ", -logp / Math.Log10(2.0) / wordCnt);
+ Logger.WriteLine(Logger.Level.info, " Error token ratio = {0}%", (double)tknErrCnt / (double)wordCnt * 100.0);
+ Logger.WriteLine(Logger.Level.info, " Error sentence ratio = {0}%", (double)sentErrCnt / (double)curSequence * 100.0);
}
- if (m_SaveStep > 0 && (curSequence + 1) % m_SaveStep == 0)
+ if (SaveStep > 0 && (curSequence + 1) % SaveStep == 0)
{
//After processed every m_SaveStep sentences, save current model into a temporary file
Logger.WriteLine(Logger.Level.info, "Saving temporary model into file...");
- saveNetBin(m_strModelFile + ".tmp");
+ saveNetBin(ModelTempFile);
}
}
DateTime now = DateTime.Now;
TimeSpan duration = now.Subtract(start);
- double entropy = -logp / Math.Log10(2.0) / counter;
- double ppl = exp_10(-logp / counter);
+ double entropy = -logp / Math.Log10(2.0) / wordCnt;
+ double ppl = exp_10(-logp / wordCnt);
Logger.WriteLine(Logger.Level.info, "[TRACE] Iter " + iter + " completed");
Logger.WriteLine(Logger.Level.info, "[TRACE] Sentences = " + numSequence + ", time escape = " + duration + "s, speed = " + numSequence / duration.TotalSeconds);
Logger.WriteLine(Logger.Level.info, "[TRACE] In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl);
@@ -572,15 +532,17 @@ public static void CheckModelFileType(string filename, out MODELTYPE modelType,
modelType = (MODELTYPE)br.ReadInt32();
modelDir = (MODELDIRECTION)br.ReadInt32();
}
+
+ Logger.WriteLine("Get model type {0} and direction {1}", modelType, modelDir);
}
protected double NormalizeErr(double err)
{
- if (err > gradient_cutoff)
- err = gradient_cutoff;
- if (err < -gradient_cutoff)
- err = -gradient_cutoff;
+ if (err > GradientCutoff)
+ err = GradientCutoff;
+ if (err < -GradientCutoff)
+ err = -GradientCutoff;
return err;
}
@@ -618,13 +580,12 @@ public void matrixXvectorADD(neuron[] dest, neuron[] srcvec, Matrix srcm
}
}
- public int[] DecodeNN(Sequence seq)
+
+ public int[] GetBestResult(Matrix ys)
{
- Matrix ys = PredictSentence(seq, RunningMode.Test);
- int n = seq.GetSize();
- int[] output = new int[n];
+ int[] output = new int[ys.GetHeight()];
- for (int i = 0; i < n; i++)
+ for (int i = 0; i < ys.GetHeight(); i++)
{
output[i] = MathUtil.GetMaxProbIndex(ys[i]);
}
@@ -632,6 +593,12 @@ public int[] DecodeNN(Sequence seq)
return output;
}
+ public int[] DecodeNN(Sequence seq)
+ {
+ Matrix ys = PredictSentence(seq, RunningMode.Test);
+ return GetBestResult(ys);
+ }
+
public int[][] DecodeNBestCRF(Sequence seq, int N)
{
@@ -639,9 +606,9 @@ public int[][] DecodeNBestCRF(Sequence seq, int N)
//ys contains the output of RNN for each word
Matrix ys = PredictSentence(seq, RunningMode.Test);
- int n = seq.GetSize();
+ int n = seq.States.Length;
int K = L2;
- Matrix STP = mat_CRFTagTransWeights;
+ Matrix STP = CRFTagTransWeights;
PAIR[, ,] vPath = new PAIR[n, K, N];
int DUMP_LABEL = -1;
double[,] vPreAlpha = new double[K, N];
@@ -726,41 +693,33 @@ public int[][] DecodeNBestCRF(Sequence seq, int N)
return vTagOutput;
}
- public int[] DecodeCRF(Sequence seq)
+ public int[] Viterbi(Matrix ys, int seqLen)
{
- //ys contains the output of RNN for each word
- Matrix ys = PredictSentence(seq, RunningMode.Test);
-
- int n = seq.GetSize();
- int K = L2;
- Matrix STP = mat_CRFTagTransWeights;
- int[,] vPath = new int[n, K];
+ int[,] vPath = new int[seqLen, L2];
- double[] vPreAlpha = new double[K];
- double[] vAlpha = new double[K];
+ double[] vPreAlpha = new double[L2];
+ double[] vAlpha = new double[L2];
int nStartTagIndex = 0;
- double MIN_VALUE = double.MinValue;
//viterbi algorithm
- for (int i = 0; i < K; i++)
+ for (int i = 0; i < L2; i++)
{
vPreAlpha[i] = ys[0][i];
if (i != nStartTagIndex)
- vPreAlpha[i] += MIN_VALUE;
+ vPreAlpha[i] += double.MinValue;
vPath[0, i] = nStartTagIndex;
}
- for (int t = 1; t < n; t++)
+ for (int t = 0; t < seqLen; t++)
{
- for (int j = 0; j < K; j++)
+ for (int j = 0; j < L2; j++)
{
vPath[t, j] = 0;
- vAlpha[j] = MIN_VALUE;
+ vAlpha[j] = double.MinValue;
- for (int i = 0; i < K; i++)
+ for (int i = 0; i < L2; i++)
{
- double score = vPreAlpha[i] + STP[j][i] + ys[t][j];
-
+ double score = vPreAlpha[i] + CRFTagTransWeights[j][i] + ys[t][j];
if (score > vAlpha[j])
{
vAlpha[j] = score;
@@ -769,14 +728,14 @@ public int[] DecodeCRF(Sequence seq)
}
}
vPreAlpha = vAlpha;
- vAlpha = new double[K];
+ vAlpha = new double[L2];
}
//backtrace to get the best result path
- int[] tagOutputs = new int[n];
- tagOutputs[n - 1] = nStartTagIndex;
- int nNextTag = tagOutputs[n - 1];
- for (int t = n - 2; t >= 0; t--)
+ int[] tagOutputs = new int[seqLen];
+ tagOutputs[seqLen - 1] = nStartTagIndex;
+ int nNextTag = tagOutputs[seqLen - 1];
+ for (int t = seqLen - 2; t >= 0; t--)
{
tagOutputs[t] = vPath[t + 1, nNextTag];
nNextTag = tagOutputs[t];
@@ -785,14 +744,20 @@ public int[] DecodeCRF(Sequence seq)
return tagOutputs;
}
+ public int[] DecodeCRF(Sequence seq)
+ {
+ //ys contains the output of RNN for each word
+ Matrix ys = PredictSentence(seq, RunningMode.Test);
+ return Viterbi(ys, seq.States.Length);
+ }
+
private int GetErrorTokenNum(Sequence seq, int[] predicted)
{
int tknErrCnt = 0;
- int numStates = seq.GetSize();
+ int numStates = seq.States.Length;
for (int curState = 0; curState < numStates; curState++)
{
- State state = seq.Get(curState);
- if (predicted[curState] != state.GetLabel())
+ if (predicted[curState] != seq.States[curState].Label)
{
tknErrCnt++;
}
@@ -803,61 +768,52 @@ private int GetErrorTokenNum(Sequence seq, int[] predicted)
public void CalculateOutputLayerError(State state, int timeat)
{
- if (m_bCRFTraining == true)
+ if (IsCRFTraining == true)
{
//For RNN-CRF, use joint probability of output layer nodes and transition between contigous nodes
for (int c = 0; c < L2; c++)
{
- neuOutput[c].er = -mat_CRFSeqOutput[timeat][c];
+ OutputLayer[c].er = -CRFSeqOutput[timeat][c];
}
- neuOutput[state.GetLabel()].er = 1 - mat_CRFSeqOutput[timeat][state.GetLabel()];
+ OutputLayer[state.Label].er = 1 - CRFSeqOutput[timeat][state.Label];
}
else
{
//For standard RNN
for (int c = 0; c < L2; c++)
{
- neuOutput[c].er = -neuOutput[c].cellOutput;
+ OutputLayer[c].er = -OutputLayer[c].cellOutput;
}
- neuOutput[state.GetLabel()].er = 1 - neuOutput[state.GetLabel()].cellOutput;
+ OutputLayer[state.Label].er = 1 - OutputLayer[state.Label].cellOutput;
}
}
-
- public virtual bool ValidateNet(DataSet validationSet)
+ public virtual bool ValidateNet(DataSet validationSet, int iter)
{
Logger.WriteLine(Logger.Level.info, "[TRACE] Start validation ...");
int wordcn = 0;
- int[] predicted;
int tknErrCnt = 0;
int sentErrCnt = 0;
//Initialize varibles
- counter = 0;
logp = 0;
- counterTokenForLM = 0;
-
- int numSequence = validationSet.GetSize();
+ int numSequence = validationSet.SequenceList.Count;
for (int curSequence = 0; curSequence < numSequence; curSequence++)
{
- Sequence pSequence = validationSet.Get(curSequence);
- wordcn += pSequence.GetSize();
+ Sequence pSequence = validationSet.SequenceList[curSequence];
+ wordcn += pSequence.States.Length;
- Matrix m;
- if (m_bCRFTraining == true)
+ int[] predicted;
+ if (IsCRFTraining == true)
{
- m = learnSentenceForRNNCRF(pSequence, RunningMode.Validate);
+ predicted = PredictSentenceCRF(pSequence, RunningMode.Validate);
}
else
{
+ Matrix m;
m = PredictSentence(pSequence, RunningMode.Validate);
- }
-
- predicted = new int[pSequence.GetSize()];
- for (int i = 0; i < pSequence.GetSize(); i++)
- {
- predicted[i] = MathUtil.GetMaxProbIndex(m[i]);
+ predicted = GetBestResult(m);
}
int newTknErrCnt = GetErrorTokenNum(pSequence, predicted);
@@ -868,10 +824,10 @@ public virtual bool ValidateNet(DataSet validationSet)
}
}
- double entropy = -logp / Math.Log10(2.0) / counter;
- double ppl = exp_10(-logp / counter);
- double tknErrRatio = (double)tknErrCnt / (double)wordcn * 100;
- double sentErrRatio = (double)sentErrCnt / (double)numSequence * 100;
+ double entropy = -logp / Math.Log10(2.0) / wordcn;
+ double ppl = exp_10(-logp / wordcn);
+ double tknErrRatio = (double)tknErrCnt / (double)wordcn * 100.0;
+ double sentErrRatio = (double)sentErrCnt / (double)numSequence * 100.0;
Logger.WriteLine(Logger.Level.info, "[TRACE] In validation: error token ratio = {0}% error sentence ratio = {1}%", tknErrRatio, sentErrRatio);
Logger.WriteLine(Logger.Level.info, "[TRACE] In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl);
diff --git a/RNNSharp/RNNDecoder.cs b/RNNSharp/RNNDecoder.cs
index 96ae4a7..d9021e1 100644
--- a/RNNSharp/RNNDecoder.cs
+++ b/RNNSharp/RNNDecoder.cs
@@ -1,11 +1,9 @@
using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-using System.IO;
using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class RNNDecoder
@@ -47,34 +45,22 @@ public RNNDecoder(string strModelFileName, Featurizer featurizer)
}
m_Rnn.loadNetBin(strModelFileName);
- Logger.WriteLine(Logger.Level.info, "CRF Model: {0}", m_Rnn.IsCRFModel());
+ Logger.WriteLine(Logger.Level.info, "CRF Model: {0}", m_Rnn.IsCRFTraining);
m_Featurizer = featurizer;
}
public int[][] ProcessNBest(Sentence sent, int nbest)
{
- if (m_Rnn.IsCRFModel() == false)
+ if (m_Rnn.IsCRFTraining == false)
{
- return null;
+ throw new ArgumentException("N-best result is only for RNN-CRF model.");
}
Sequence seq = m_Featurizer.ExtractFeatures(sent);
int[][] predicted = m_Rnn.DecodeNBestCRF(seq, nbest);
-
- //Remove the beginning and end character from result
- int[][] results = new int[nbest][];
-
- for (int k = 0; k < nbest; k++)
- {
- results[k] = new int[predicted[k].Length - 2];
- for (int i = 1; i < predicted[k].Length - 1; i++)
- {
- results[k][i - 1] = predicted[k][i];
- }
- }
- return results;
+ return predicted;
}
@@ -82,7 +68,7 @@ public int[] Process(Sentence sent)
{
Sequence seq = m_Featurizer.ExtractFeatures(sent);
int[] predicted;
- if (m_Rnn.IsCRFModel() == true)
+ if (m_Rnn.IsCRFTraining == true)
{
predicted = m_Rnn.DecodeCRF(seq);
}
@@ -91,14 +77,7 @@ public int[] Process(Sentence sent)
predicted = m_Rnn.DecodeNN(seq);
}
- //Remove the beginning and end character from result
- int[] results = new int[predicted.Length - 2];
- for (int i = 1; i < predicted.Length - 1; i++)
- {
- results[i - 1] = predicted[i];
- }
-
- return results;
+ return predicted;
}
}
}
diff --git a/RNNSharp/RNNEncoder.cs b/RNNSharp/RNNEncoder.cs
index 6e21131..3f8c4ac 100644
--- a/RNNSharp/RNNEncoder.cs
+++ b/RNNSharp/RNNEncoder.cs
@@ -1,51 +1,32 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-using System.IO;
-using AdvUtils;
+using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class RNNEncoder
{
ModelSetting m_modelSetting;
- DataSet m_TrainingSet;
- DataSet m_ValidationSet;
- List> m_LabelBigramTransition;
-
- public void SetLabelBigramTransition(List> m)
- {
- m_LabelBigramTransition = m;
- }
+ public DataSet TrainingSet { get; set; }
+ public DataSet ValidationSet { get; set; }
public RNNEncoder(ModelSetting modelSetting)
{
m_modelSetting = modelSetting;
}
-
- public void SetTrainingSet(DataSet train)
- {
- m_TrainingSet = train;
- }
- public void SetValidationSet(DataSet validation)
- {
- m_ValidationSet = validation;
- }
-
public void Train()
{
RNN rnn;
- if (m_modelSetting.GetModelDirection() == 0)
+ if (m_modelSetting.ModelDirection == 0)
{
- if (m_modelSetting.GetModelType() == 0)
+ if (m_modelSetting.ModelType == 0)
{
SimpleRNN sRNN = new SimpleRNN();
- sRNN.setBPTT(m_modelSetting.GetBptt() + 1);
+ sRNN.setBPTT(m_modelSetting.Bptt + 1);
sRNN.setBPTTBlock(10);
rnn = sRNN;
@@ -57,15 +38,15 @@ public void Train()
}
else
{
- if (m_modelSetting.GetModelType() == 0)
+ if (m_modelSetting.ModelType == 0)
{
SimpleRNN sForwardRNN = new SimpleRNN();
SimpleRNN sBackwardRNN = new SimpleRNN();
- sForwardRNN.setBPTT(m_modelSetting.GetBptt() + 1);
+ sForwardRNN.setBPTT(m_modelSetting.Bptt + 1);
sForwardRNN.setBPTTBlock(10);
- sBackwardRNN.setBPTT(m_modelSetting.GetBptt() + 1);
+ sBackwardRNN.setBPTT(m_modelSetting.Bptt + 1);
sBackwardRNN.setBPTTBlock(10);
rnn = new BiRNN(sForwardRNN, sBackwardRNN);
@@ -76,37 +57,33 @@ public void Train()
}
}
- //Set model type
- rnn.SetModelDirection(m_modelSetting.GetModelDirection());
+ rnn.ModelDirection = (MODELDIRECTION)m_modelSetting.ModelDirection;
+ rnn.ModelFile = m_modelSetting.ModelFile;
+ rnn.SaveStep = m_modelSetting.SaveStep;
+ rnn.MaxIter = m_modelSetting.MaxIteration;
+ rnn.IsCRFTraining = m_modelSetting.IsCRFTraining;
+ rnn.LearningRate = m_modelSetting.LearningRate;
+ rnn.GradientCutoff = 15.0f;
+ rnn.Dropout = m_modelSetting.Dropout;
+ rnn.L1 = m_modelSetting.NumHidden;
- //Set feature dimension
- rnn.SetFeatureDimension(m_TrainingSet.GetDenseDimension(),
- m_TrainingSet.GetSparseDimension(),
- m_TrainingSet.GetTagSize());
-
-
- rnn.SetModelFile(m_modelSetting.GetModelFile());
- rnn.SetSaveStep(m_modelSetting.GetSaveStep());
- rnn.SetMaxIter(m_modelSetting.GetMaxIteration());
- rnn.SetCRFTraining(m_modelSetting.IsCRFTraining());
- rnn.SetLearningRate(m_modelSetting.GetLearningRate());
- rnn.SetGradientCutoff(15.0);
- rnn.SetDropout(m_modelSetting.GetDropout());
- rnn.SetHiddenLayerSize(m_modelSetting.GetNumHidden());
+ rnn.DenseFeatureSize = TrainingSet.DenseFeatureSize();
+ rnn.L0 = TrainingSet.GetSparseDimension();
+ rnn.L2 = TrainingSet.TagSize;
rnn.initMem();
//Create tag-bigram transition probability matrix only for sequence RNN mode
- if (m_modelSetting.IsCRFTraining() == true)
+ if (m_modelSetting.IsCRFTraining)
{
- rnn.setTagBigramTransition(m_LabelBigramTransition);
+ rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition);
}
Logger.WriteLine(Logger.Level.info, "");
Logger.WriteLine(Logger.Level.info, "[TRACE] Iterative training begins ...");
double lastPPL = double.MaxValue;
- double lastAlpha = rnn.Alpha;
+ double lastAlpha = rnn.LearningRate;
int iter = 0;
while (true)
{
@@ -117,37 +94,31 @@ public void Train()
}
//Start to train model
- double ppl = rnn.TrainNet(m_TrainingSet, iter);
+ double ppl = rnn.TrainNet(TrainingSet, iter);
//Validate the model by validated corpus
bool betterValidateNet = false;
- if (rnn.ValidateNet(m_ValidationSet) == true)
+ if (rnn.ValidateNet(ValidationSet, iter) == true)
{
//If current model is better than before, save it into file
- Logger.WriteLine(Logger.Level.info, "Saving better model into file {0}...", m_modelSetting.GetModelFile());
- rnn.saveNetBin(m_modelSetting.GetModelFile());
+ Logger.WriteLine(Logger.Level.info, "Saving better model into file {0}...", m_modelSetting.ModelFile);
+ rnn.saveNetBin(m_modelSetting.ModelFile);
betterValidateNet = true;
}
- //else
- //{
- // Logger.WriteLine(Logger.Level.info, "Loading previous best model from file {0}...", m_modelSetting.GetModelFile());
- // rnn.loadNetBin(m_modelSetting.GetModelFile());
- //}
-
- if (ppl >= lastPPL && lastAlpha != rnn.Alpha)
+ if (ppl >= lastPPL && lastAlpha != rnn.LearningRate)
{
//Although we reduce alpha value, we still cannot get better result.
Logger.WriteLine(Logger.Level.info, "Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL);
- Logger.WriteLine(Logger.Level.info, "Current alpha: {0}, the previous alpha: {1}", rnn.Alpha, lastAlpha);
+ Logger.WriteLine(Logger.Level.info, "Current alpha: {0}, the previous alpha: {1}", rnn.LearningRate, lastAlpha);
break;
}
- lastAlpha = rnn.Alpha;
+ lastAlpha = rnn.LearningRate;
if (betterValidateNet == false)
{
- rnn.Alpha = rnn.Alpha / 2.0;
+ rnn.LearningRate = rnn.LearningRate / 2.0f;
}
lastPPL = ppl;
diff --git a/RNNSharp/Sentence.cs b/RNNSharp/Sentence.cs
index 103361d..956aa1b 100644
--- a/RNNSharp/Sentence.cs
+++ b/RNNSharp/Sentence.cs
@@ -1,65 +1,81 @@
using System;
using System.Collections.Generic;
-using System.Linq;
using System.Text;
-using System.Threading.Tasks;
using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class Sentence
{
- private List m_features;
+ public List TokensList { get; }
- public List GetFeatureSet()
+ public Sentence(List tokensList)
{
- return m_features;
- }
+ int dim = 0;
+ TokensList = new List();
- public int GetTokenSize()
- {
- return m_features.Count;
- }
+ if (tokensList.Count == 0)
+ {
+ return;
+ }
- public void DumpFeatures()
- {
- foreach (string[] features in m_features)
+ //Check if dimension is consistent inside the sentence
+ foreach (string[] tokens in tokensList)
{
- StringBuilder sb = new StringBuilder();
- foreach (string strFeature in features)
+ if (dim > 0 && tokens.Length != dim)
{
- sb.Append(strFeature);
- sb.Append('\t');
+ string err = ReportInvalidateTokens(tokensList, dim, tokens);
+ throw new FormatException(String.Format("Invalidated record: {0}", err));
}
- Logger.WriteLine(Logger.Level.info, sb.ToString().Trim());
+ dim = tokens.Length;
+ TokensList.Add(tokens);
}
- }
-
- public virtual void SetFeatures(List tokenList)
- {
- m_features = new List();
- //Add the begining term for current record
- string[] curfeature = new string[2];
- curfeature[0] = "";
- curfeature[1] = "O";
- m_features.Add(curfeature);
+ //Add begin/end of sentence flag into feature
+ string[] beginFeatures = new string[dim];
+ string[] endFeatures = new string[dim];
- foreach (string s in tokenList)
+ for (int i = 0; i < dim - 1; i++)
{
- string[] tokens = s.Split('\t');
- m_features.Add(tokens);
+ beginFeatures[i] = "";
+ endFeatures[i] = "";
}
- //Add the end term of current record
- curfeature = new string[2];
- curfeature[0] = "";
- curfeature[1] = "O";
- m_features.Add(curfeature);
+ beginFeatures[dim - 1] = TagSet.DefaultTag;
+ endFeatures[dim - 1] = TagSet.DefaultTag;
+
+ TokensList.Insert(0, beginFeatures);
+ TokensList.Add(endFeatures);
}
+ public override string ToString()
+ {
+ StringBuilder sb = new StringBuilder();
+ foreach (string[] tokens in TokensList)
+ {
+ foreach (string token in tokens)
+ {
+ sb.Append(token);
+ sb.Append('\t');
+ }
+ sb.AppendLine();
+ }
+
+ return sb.ToString();
+ }
+ private string ReportInvalidateTokens(List tokenList, int dim, string[] badTokens)
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.AppendLine(String.Format("Inconsistent feature dimension in the record.It's {0}, but it should be {1}", badTokens.Length, dim));
+ sb.AppendLine(ToString());
+ Logger.WriteLine(Logger.Level.err, sb.ToString());
+ return sb.ToString();
+ }
}
}
diff --git a/RNNSharp/Sequence.cs b/RNNSharp/Sequence.cs
index f80e2ce..d6f8d76 100644
--- a/RNNSharp/Sequence.cs
+++ b/RNNSharp/Sequence.cs
@@ -1,71 +1,62 @@
using System;
using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class Sequence
{
- State[] m_States;
- int m_NumStates;
-
- public int GetSize() { return m_NumStates; }
- public State Get(int i) { return m_States[i]; }
-
+ public State[] States { get;}
public int GetDenseDimension()
{
- if (0 == m_NumStates) return 0;
- else return m_States[0].GetDenseDimension();
+ if (0 == States.Length || States[0].DenseData == null)
+ {
+ return 0;
+ }
+ else
+ {
+ return States[0].DenseData.GetDimension();
+ }
}
public int GetSparseDimension()
{
- if (0 == m_NumStates) return 0;
- else return m_States[0].GetSparseDimension();
+ if (0 == States.Length) return 0;
+ else return States[0].SparseData.GetDimension();
}
- public bool SetLabel(Sentence sent, TagSet tagSet)
+ public void SetLabel(Sentence sent, TagSet tagSet)
{
- List features = sent.GetFeatureSet();
- if (features.Count != m_States.Length)
+ List tokensList = sent.TokensList;
+ if (tokensList.Count != States.Length)
{
- return false;
+ throw new DataMisalignedException(String.Format("Error: Inconsistent token({0}) and state({1}) size. Tokens list: {2}",
+ tokensList.Count, States.Length, sent.ToString()));
}
- for (int i = 0; i < features.Count; i++)
+ for (int i = 0; i < tokensList.Count; i++)
{
- string strTagName = features[i][features[i].Length - 1];
+ string strTagName = tokensList[i][tokensList[i].Length - 1];
int tagId = tagSet.GetIndex(strTagName);
if (tagId < 0)
{
- Logger.WriteLine(Logger.Level.info, "Error: tag {0} is unknown.", strTagName);
- return false;
+ throw new DataMisalignedException(String.Format("Error: tag {0} is unknown. Tokens list: {1}",
+ strTagName, sent.ToString()));
}
- m_States[i].SetLabel(tagId);
+ States[i].Label = tagId;
}
-
- return true;
}
- public void SetSize(int numStates)
+ public Sequence(int numStates)
{
- if (m_NumStates != numStates)
+ States = new State[numStates];
+ for (int i = 0; i < numStates; i++)
{
- m_NumStates = numStates;
- m_States = null;
- if (m_NumStates > 0)
- {
- m_States = new State[m_NumStates];
- for (int i = 0; i < m_NumStates; i++)
- {
- m_States[i] = new State();
- }
- }
+ States[i] = new State();
}
}
diff --git a/RNNSharp/SimpleRNN.cs b/RNNSharp/SimpleRNN.cs
index 0ee0669..621c5ce 100644
--- a/RNNSharp/SimpleRNN.cs
+++ b/RNNSharp/SimpleRNN.cs
@@ -1,11 +1,11 @@
using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
using System.Threading.Tasks;
using System.IO;
using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class SimpleRNN : RNN
@@ -29,10 +29,9 @@ public class SimpleRNN : RNN
public SimpleRNN()
{
- m_modeltype = MODELTYPE.SIMPLE;
- gradient_cutoff = 15;
- dropout = 0;
- llogp = -100000000;
+ ModelType = MODELTYPE.SIMPLE;
+ GradientCutoff = 15;
+ Dropout = 0;
L1 = 30;
bptt = 5;
@@ -41,12 +40,12 @@ public SimpleRNN()
bptt_fea = null;
- fea_size = 0;
+ DenseFeatureSize = 0;
neuLastHidden = null;
neuFeatures = null;
neuHidden = null;
- neuOutput = null;
+ OutputLayer = null;
}
public void setBPTT(int newval) { bptt = newval; }
@@ -67,18 +66,18 @@ public override void initWeights()
for (b = 0; b < L1; b++)
{
- for (a = 0; a < fea_size; a++)
+ for (a = 0; a < DenseFeatureSize; a++)
{
mat_feature2hidden[b][a] = RandInitWeight();
}
}
- for (b = 0; b < mat_hidden2output.GetHeight(); b++)
+ for (b = 0; b < Hidden2OutputWeight.GetHeight(); b++)
{
for (a = 0; a < L1; a++)
{
- mat_hidden2output[b][a] = RandInitWeight();
+ Hidden2OutputWeight[b][a] = RandInitWeight();
}
}
@@ -111,7 +110,7 @@ public void computeHiddenActivity(bool isTrain)
if (isTrain == false)
{
- neuHidden[a].cellOutput = neuHidden[a].cellOutput * (1.0 - dropout);
+ neuHidden[a].cellOutput = neuHidden[a].cellOutput * (1.0 - Dropout);
}
if (neuHidden[a].cellOutput > 50) neuHidden[a].cellOutput = 50; //for numerical stability
@@ -131,7 +130,7 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr
matrixXvectorADD(neuHidden, neuLastHidden, mat_hiddenBpttWeight, 0, L1, 0, L1, 0);
//Apply feature values on hidden layer
- var sparse = state.GetSparseData();
+ var sparse = state.SparseData;
int n = sparse.GetNumberOfEntries();
Parallel.For(0, L1, parallelOption, b =>
{
@@ -146,9 +145,9 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr
//Dense features:
//fea(t) -> hidden(t)
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
- for (int j = 0; j < fea_size; j++)
+ for (int j = 0; j < DenseFeatureSize; j++)
{
neuHidden[b].cellOutput += neuFeatures[j] * mat_feature2hidden[b][j];
}
@@ -159,21 +158,19 @@ public override void computeNet(State state, double[] doutput, bool isTrain = tr
computeHiddenActivity(isTrain);
//Calculate output layer
- matrixXvectorADD(neuOutput, neuHidden, mat_hidden2output, 0, L2, 0, L1, 0);
+ matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, 0, L2, 0, L1, 0);
if (doutput != null)
{
for (int i = 0; i < L2; i++)
{
- doutput[i] = neuOutput[i].cellOutput;
+ doutput[i] = OutputLayer[i].cellOutput;
}
}
//activation 2 --softmax on words
- SoftmaxLayer(neuOutput);
+ SoftmaxLayer(OutputLayer);
}
-
-
public override void learnNet(State state, int timeat, bool biRNN = false)
{
if (biRNN == false)
@@ -182,7 +179,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false)
}
//error output->hidden for words from specific class
- matrixXvectorADD(neuHidden, neuOutput, mat_hidden2output, 0, L1, 0, L2, 1);
+ matrixXvectorADD(neuHidden, OutputLayer, Hidden2OutputWeight, 0, L1, 0, L2, 1);
//Apply drop out on error in hidden layer
for (int i = 0; i < L1; i++)
@@ -198,7 +195,7 @@ public override void learnNet(State state, int timeat, bool biRNN = false)
{
for (int c = 0; c < L2; c++)
{
- mat_hidden2output[c][a] += alpha * neuOutput[c].er * neuHidden[a].cellOutput;
+ Hidden2OutputWeight[c][a] += LearningRate * OutputLayer[c].er * neuHidden[a].cellOutput;
}
});
}
@@ -217,11 +214,11 @@ void learnBptt(State state)
neuHidden[a].er *= neuHidden[a].cellOutput * (1 - neuHidden[a].cellOutput);
//dense weight update fea->0
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
- for (int i = 0; i < fea_size; i++)
+ for (int i = 0; i < DenseFeatureSize; i++)
{
- mat_bptt_synf[a][i] += neuHidden[a].er * bptt_fea[i + step * fea_size];
+ mat_bptt_synf[a][i] += neuHidden[a].er * bptt_fea[i + step * DenseFeatureSize];
}
}
@@ -269,17 +266,17 @@ void learnBptt(State state)
//Update bptt feature weights
for (int i = 0; i < L1; i++)
{
- mat_hiddenBpttWeight[b][i] += alpha * mat_bptt_syn0_ph[b][i];
+ mat_hiddenBpttWeight[b][i] += LearningRate * mat_bptt_syn0_ph[b][i];
//Clean bptt weight error
mat_bptt_syn0_ph[b][i] = 0;
}
//Update dense feature weights
- if (fea_size > 0)
+ if (DenseFeatureSize > 0)
{
- for (int i = 0; i < fea_size; i++)
+ for (int i = 0; i < DenseFeatureSize; i++)
{
- mat_feature2hidden[b][i] += alpha * mat_bptt_synf[b][i];
+ mat_feature2hidden[b][i] += LearningRate * mat_bptt_synf[b][i];
//Clean dense feature weights error
mat_bptt_synf[b][i] = 0;
}
@@ -295,7 +292,7 @@ void learnBptt(State state)
for (int i = 0; i < sparse.GetNumberOfEntries(); i++)
{
int pos = sparse.GetEntry(i).Key;
- mat_input2hidden[b][pos] += alpha * mat_bptt_syn0_w[b][pos];
+ mat_input2hidden[b][pos] += LearningRate * mat_bptt_syn0_w[b][pos];
//Clean sparse feature weight error
mat_bptt_syn0_w[b][pos] = 0;
@@ -309,19 +306,19 @@ public void resetBpttMem()
{
bptt_inputs = new SparseVector[MAX_RNN_HIST];
bptt_hidden = new neuron[(bptt + bptt_block + 1) * L1];
- bptt_fea = new double[(bptt + bptt_block + 2) * fea_size];
+ bptt_fea = new double[(bptt + bptt_block + 2) * DenseFeatureSize];
mat_bptt_syn0_w = new Matrix(L1, L0);
mat_bptt_syn0_ph = new Matrix(L1, L1);
- mat_bptt_synf = new Matrix(L1, fea_size);
+ mat_bptt_synf = new Matrix(L1, DenseFeatureSize);
}
public override void initMem()
{
CreateCells();
- mat_hidden2output = new Matrix(L2, L1);
+ Hidden2OutputWeight = new Matrix(L2, L1);
mat_input2hidden = new Matrix(L1, L0);
- mat_feature2hidden = new Matrix(L1, fea_size);
+ mat_feature2hidden = new Matrix(L1, DenseFeatureSize);
mat_hiddenBpttWeight = new Matrix(L1, L1);
@@ -346,16 +343,16 @@ public override void netReset(bool updateNet = false) //cleans hidden layer ac
//Train mode
for (int a = 0; a < L1; a++)
{
- if (rand.NextDouble() < dropout)
+ if (rand.NextDouble() < Dropout)
{
neuHidden[a].mask = true;
}
}
- }
- Array.Clear(bptt_inputs, 0, MAX_RNN_HIST);
- Array.Clear(bptt_hidden, 0, (bptt + bptt_block + 1) * L1);
- Array.Clear(bptt_fea, 0, (bptt + bptt_block + 2) * fea_size);
+ Array.Clear(bptt_inputs, 0, MAX_RNN_HIST);
+ Array.Clear(bptt_hidden, 0, (bptt + bptt_block + 1) * L1);
+ Array.Clear(bptt_fea, 0, (bptt + bptt_block + 2) * DenseFeatureSize);
+ }
}
@@ -375,13 +372,17 @@ public override void LearnBackTime(State state, int numStates, int curState)
{
bptt_inputs[a] = bptt_inputs[a - 1];
Array.Copy(bptt_hidden, (a - 1) * L1, bptt_hidden, a * L1, L1);
- Array.Copy(bptt_fea, (a - 1) * fea_size, bptt_fea, a * fea_size, fea_size);
+ Array.Copy(bptt_fea, (a - 1) * DenseFeatureSize, bptt_fea, a * DenseFeatureSize, DenseFeatureSize);
}
- bptt_inputs[0] = state.GetSparseData();
+ bptt_inputs[0] = state.SparseData;
//Save hidden and feature layer nodes values for bptt
Array.Copy(neuHidden, 0, bptt_hidden, 0, L1);
- Array.Copy(neuFeatures, 0, bptt_fea, 0, fea_size);
+
+ for (int i = 0; i < DenseFeatureSize; i++)
+ {
+ bptt_fea[i] = neuFeatures[i];
+ }
// time to learn bptt
if (((curState % bptt_block) == 0) || (curState == numStates - 1))
@@ -397,44 +398,50 @@ public override void loadNetBin(string filename)
StreamReader sr = new StreamReader(filename);
BinaryReader br = new BinaryReader(sr.BaseStream);
- m_modeltype = (MODELTYPE)br.ReadInt32();
- if (m_modeltype != MODELTYPE.SIMPLE)
+ ModelType = (MODELTYPE)br.ReadInt32();
+ if (ModelType != MODELTYPE.SIMPLE)
{
throw new Exception("Invalidated model format: must be simple RNN");
}
- m_modeldirection = (MODELDIRECTION)br.ReadInt32();
+ ModelDirection = (MODELDIRECTION)br.ReadInt32();
int iflag = br.ReadInt32();
if (iflag == 1)
{
- m_bCRFTraining = true;
+ IsCRFTraining = true;
}
else
{
- m_bCRFTraining = false;
+ IsCRFTraining = false;
}
//Load basic parameters
L0 = br.ReadInt32();
L1 = br.ReadInt32();
L2 = br.ReadInt32();
- fea_size = br.ReadInt32();
+ DenseFeatureSize = br.ReadInt32();
//Create cells of each layer
CreateCells();
//Load weight matrix between each two layer pairs
+ Logger.WriteLine("Loading input2hidden weights...");
mat_input2hidden = loadMatrixBin(br);
+
+ Logger.WriteLine("Loading bptt hidden weights...");
mat_hiddenBpttWeight = loadMatrixBin(br);
+ Logger.WriteLine("Loading feature2hidden weights...");
mat_feature2hidden = loadMatrixBin(br);
- mat_hidden2output = loadMatrixBin(br);
+ Logger.WriteLine("Loading hidden2output weights...");
+ Hidden2OutputWeight = loadMatrixBin(br);
if (iflag == 1)
{
- mat_CRFTagTransWeights = loadMatrixBin(br);
+ Logger.WriteLine("Loading CRF tag trans weights...");
+ CRFTagTransWeights = loadMatrixBin(br);
}
sr.Close();
@@ -442,8 +449,8 @@ public override void loadNetBin(string filename)
private void CreateCells()
{
- neuFeatures = new double[fea_size];
- neuOutput = new neuron[L2];
+ neuFeatures = new SingleVector(DenseFeatureSize);
+ OutputLayer = new neuron[L2];
neuHidden = new neuron[L1];
}
@@ -453,12 +460,12 @@ public override void saveNetBin(string filename)
StreamWriter sw = new StreamWriter(filename);
BinaryWriter fo = new BinaryWriter(sw.BaseStream);
- fo.Write((int)m_modeltype);
- fo.Write((int)m_modeldirection);
+ fo.Write((int)ModelType);
+ fo.Write((int)ModelDirection);
// Signiture , 0 is for RNN or 1 is for RNN-CRF
int iflag = 0;
- if (m_bCRFTraining == true)
+ if (IsCRFTraining == true)
{
iflag = 1;
}
@@ -467,23 +474,28 @@ public override void saveNetBin(string filename)
fo.Write(L0);
fo.Write(L1);
fo.Write(L2);
- fo.Write(fea_size);
+ fo.Write(DenseFeatureSize);
//weight input->hidden
+ Logger.WriteLine("Saving input2hidden weights...");
saveMatrixBin(mat_input2hidden, fo);
+
+ Logger.WriteLine("Saving bptt hidden weights...");
saveMatrixBin(mat_hiddenBpttWeight, fo);
//weight fea->hidden
+ Logger.WriteLine("Saving feature2hidden weights...");
saveMatrixBin(mat_feature2hidden, fo);
//weight hidden->output
- saveMatrixBin(mat_hidden2output, fo);
+ Logger.WriteLine("Saving hidden2output weights...");
+ saveMatrixBin(Hidden2OutputWeight, fo);
if (iflag == 1)
{
// Save Bigram
- saveMatrixBin(mat_CRFTagTransWeights, fo);
+ saveMatrixBin(CRFTagTransWeights, fo);
}
fo.Close();
diff --git a/RNNSharp/SparseVector.cs b/RNNSharp/SparseVector.cs
index 475c6dd..8a2045f 100644
--- a/RNNSharp/SparseVector.cs
+++ b/RNNSharp/SparseVector.cs
@@ -1,43 +1,35 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+using System.Collections.Generic;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class SparseVector : SingleVector
{
- KeyValuePair[] m_Data;
+ KeyValuePair[] m_Data;
int m_Dimension;
int m_Size;
-
- public KeyValuePair GetEntry(int pos) { return m_Data[pos]; }
+ public KeyValuePair GetEntry(int pos) { return m_Data[pos]; }
public override int GetDimension() { return m_Dimension; }
public int GetNumberOfEntries() { return m_Size; }
- public void ChangeValue(int positionInSparseVector, int dimension, double value)
+ public void ChangeValue(int positionInSparseVector, int dimension, float value)
{
- m_Data[positionInSparseVector] = new KeyValuePair(dimension, value);
+ m_Data[positionInSparseVector] = new KeyValuePair(dimension, value);
}
public void SetDimension(int s) { m_Dimension = s; }
-
- public KeyValuePair[] GetIndexValues()
- {
- return m_Data;
- }
-
- public void SetData(Dictionary m)
+ public void SetData(Dictionary m)
{
m_Size = m.Count;
- m_Data = new KeyValuePair[m_Size];
+ m_Data = new KeyValuePair[m_Size];
int count = 0;
- foreach (KeyValuePair pair in m)
+ foreach (KeyValuePair pair in m)
{
m_Data[count] = pair;
count++;
diff --git a/RNNSharp/State.cs b/RNNSharp/State.cs
index 2d8436a..fde905b 100644
--- a/RNNSharp/State.cs
+++ b/RNNSharp/State.cs
@@ -1,9 +1,7 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
+
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class PriviousLabelFeature
@@ -16,80 +14,31 @@ public class PriviousLabelFeature
public class State
{
//Store sparse features, such as template features
- SparseVector m_SparseData = new SparseVector();
-
+ public SparseVector SparseData { get;}
//Store dense features, such as word embedding
- Vector m_spDenseData = null;
-
+ public Vector DenseData { get; set; }
+ public int Label { get; set; }
//Store run time features
- PriviousLabelFeature[] m_RuntimeFeatures;
- int m_NumRuntimeFeature;
-
- int m_Label;
-
- public int GetLabel() { return m_Label; }
-
- public SparseVector GetSparseData() { return m_SparseData; }
-
- public Vector GetDenseData() { return m_spDenseData; }
+ public PriviousLabelFeature[] RuntimeFeatures { get; set; }
-
- public PriviousLabelFeature GetRuntimeFeature(int i) { return m_RuntimeFeatures[i]; }
-
- public int GetNumRuntimeFeature() { return m_NumRuntimeFeature; }
-
-
- public void SetNumRuntimeFeature(int n)
+ public State()
{
- if (m_NumRuntimeFeature != n)
- {
- m_NumRuntimeFeature = n;
- m_RuntimeFeatures = null;
- if (m_NumRuntimeFeature > 0)
- m_RuntimeFeatures = new PriviousLabelFeature[m_NumRuntimeFeature];
- }
+ SparseData = new SparseVector();
}
-
- public void SetRuntimeFeature(int i, int offset, double v)
+ public void SetRuntimeFeature(int i, int offset, float v)
{
- PriviousLabelFeature f = m_RuntimeFeatures[i];
- m_SparseData.ChangeValue(f.PositionInSparseVector, f.StartInDimension + offset, v);
+ PriviousLabelFeature f = RuntimeFeatures[i];
+ SparseData.ChangeValue(f.PositionInSparseVector, f.StartInDimension + offset, v);
}
-
- public void SetDenseData(Vector dense)
- {
- m_spDenseData = dense;
- }
-
- public void SetLabel(int label)
- {
- m_Label = label;
- }
-
-
- public int GetDenseDimension()
- {
- if (null != m_spDenseData)
- return m_spDenseData.GetDimension();
- else
- return 0;
- }
-
- public int GetSparseDimension()
- {
- return m_SparseData.GetDimension();
- }
-
-
public void AddRuntimeFeaturePlacehold(int i, int offsetToCurentState, int posInSparseVector, int startInDimension)
{
PriviousLabelFeature r = new PriviousLabelFeature();
r.OffsetToCurrentState = offsetToCurentState;
r.StartInDimension = startInDimension;
r.PositionInSparseVector = posInSparseVector;
- m_RuntimeFeatures[i] = r;
+ RuntimeFeatures[i] = r;
}
}
diff --git a/RNNSharp/TagSet.cs b/RNNSharp/TagSet.cs
index f4afba0..8594cd2 100644
--- a/RNNSharp/TagSet.cs
+++ b/RNNSharp/TagSet.cs
@@ -1,15 +1,15 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+using System.Collections.Generic;
using System.IO;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class TagSet
{
- public Dictionary m_Tag2Index = new Dictionary();
+ public Dictionary m_Tag2Index;
+ public static string DefaultTag = "SentBE";
public int GetSize()
{
@@ -41,30 +41,30 @@ public int GetIndex(string strTagName)
}
- //Load the tag id and its name mapping from given file
- //Format: tagid /t tag name
+ //Load tag name from given file
+ //Format: each line has one tag name
public TagSet(string strTagFileName)
{
- StreamReader fin = new StreamReader(strTagFileName);
+ m_Tag2Index = new Dictionary();
+ int idx = 0;
+ m_Tag2Index.Add(DefaultTag, idx);
+ idx++;
- int idx;
- string strTagName;
string strLine = null;
- while ((strLine = fin.ReadLine()) != null)
+ using (StreamReader fin = new StreamReader(strTagFileName))
{
- strLine = strLine.Trim();
- if (strLine.Length == 0)
+ while ((strLine = fin.ReadLine()) != null)
{
- continue;
- }
-
- string[] items = strLine.Split('\t');
- idx = int.Parse(items[0]);
- strTagName = items[1];
+ strLine = strLine.Trim();
+ if (strLine.Length == 0)
+ {
+ continue;
+ }
- m_Tag2Index.Add(strTagName, idx);
+ m_Tag2Index.Add(strLine, idx);
+ idx++;
+ }
}
- fin.Close();
}
}
}
diff --git a/RNNSharp/TemplateFeaturizer.cs b/RNNSharp/TemplateFeaturizer.cs
index 4dfa81e..8ee4926 100644
--- a/RNNSharp/TemplateFeaturizer.cs
+++ b/RNNSharp/TemplateFeaturizer.cs
@@ -1,11 +1,12 @@
using System;
using System.Collections.Generic;
-using System.Linq;
using System.Text;
-using System.Threading.Tasks;
using System.IO;
using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
//Template feature processor
@@ -32,11 +33,6 @@ public int GetFeatureSize()
return m_maxFeatureId;
}
- public List GetFeatureTemplates()
- {
- return m_Templates;
- }
-
//Extract feature id list from given record and start position
public List GetFeatureIds(List record, int startX)
{
diff --git a/RNNSharp/Vector.cs b/RNNSharp/Vector.cs
index 92d692f..6d1ed68 100644
--- a/RNNSharp/Vector.cs
+++ b/RNNSharp/Vector.cs
@@ -1,9 +1,9 @@
using System;
using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class Vector
@@ -13,7 +13,7 @@ public virtual int GetDimension()
return 0;
}
- public virtual double this[int i]
+ public virtual float this[int i]
{
get
{
@@ -58,7 +58,7 @@ public void Append(SingleVector vector)
}
- public override double this[int i]
+ public override float this[int i]
{
get
{
@@ -74,7 +74,7 @@ public override double this[int i]
public class SingleVector : Vector
{
- private double[] m_innerData;
+ private float[] m_innerData;
int m_nLen;
public override int GetDimension() { return m_nLen; }
@@ -83,10 +83,10 @@ public SingleVector()
m_innerData = null;
}
- public SingleVector(int nLen, double[] val)
+ public SingleVector(int nLen, float[] val)
{
m_nLen = nLen;
- m_innerData = new double[m_nLen];
+ m_innerData = new float[m_nLen];
for (int i = 0; i < m_nLen; i++)
{
m_innerData[i] = val[i];
@@ -95,12 +95,12 @@ public SingleVector(int nLen, double[] val)
public SingleVector(int nLen)
{
- m_innerData = new double[nLen];
+ m_innerData = new float[nLen];
m_nLen = nLen;
}
- public override double this[int i]
+ public override float this[int i]
{
get
{
@@ -111,33 +111,5 @@ public override double this[int i]
m_innerData[i] = value;
}
}
-
-
- public SingleVector Set(SingleVector rhs, int startOffset)
- {
- for (int i = 0; i < rhs.GetDimension(); i++)
- {
- m_innerData[i + startOffset] = rhs.m_innerData[i];
- }
- return this;
- }
-
- public void Normalize()
- {
-
- double sum = 0;
- for (int i = 0; i < m_nLen; i++)
- {
- sum += m_innerData[i] * m_innerData[i];
- }
-
- if (0 == sum) return;
- double df = Math.Sqrt(sum);
-
- for (int i = 0; i < m_nLen; i++)
- {
- m_innerData[i] /= df;
- }
- }
}
}
diff --git a/RNNSharp/WordEMWrapFeaturizer.cs b/RNNSharp/WordEMWrapFeaturizer.cs
index 708eb2c..d78768a 100644
--- a/RNNSharp/WordEMWrapFeaturizer.cs
+++ b/RNNSharp/WordEMWrapFeaturizer.cs
@@ -1,9 +1,8 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+using System.Collections.Generic;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public class WordEMWrapFeaturizer
@@ -14,25 +13,22 @@ public class WordEMWrapFeaturizer
public WordEMWrapFeaturizer(string filename)
{
- Txt2Vec.Decoder decoder = new Txt2Vec.Decoder();
- decoder.LoadBinaryModel(filename);
+ Txt2Vec.Model model = new Txt2Vec.Model();
+ model.LoadBinaryModel(filename);
- string[] terms = decoder.GetAllTerms();
- vectorSize = decoder.GetVectorSize();
+ string[] terms = model.GetAllTerms();
+ vectorSize = model.VectorSize;
m_WordEmbedding = new Dictionary();
m_UnkEmbedding = new SingleVector(vectorSize);
foreach (string term in terms)
{
- double[] vector = decoder.GetVector(term);
+ float[] vector = model.GetVector(term);
if (vector != null)
{
SingleVector spVector = new SingleVector(vectorSize, vector);
-
- spVector.Normalize();
-
m_WordEmbedding.Add(term, spVector);
}
}
diff --git a/RNNSharp/neuron.cs b/RNNSharp/neuron.cs
index 6689ef1..2457cd9 100644
--- a/RNNSharp/neuron.cs
+++ b/RNNSharp/neuron.cs
@@ -1,9 +1,7 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
+
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharp
{
public struct neuron
diff --git a/RNNSharpConsole/Program.cs b/RNNSharpConsole/Program.cs
index 36cbd48..8ee9065 100644
--- a/RNNSharpConsole/Program.cs
+++ b/RNNSharpConsole/Program.cs
@@ -1,12 +1,13 @@
using System;
using System.Collections.Generic;
-using System.Linq;
using System.Text;
-using System.Threading.Tasks;
using System.IO;
using RNNSharp;
using AdvUtils;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace RNNSharpConsole
{
class Program
@@ -22,8 +23,8 @@ class Program
static int layersize = 200;
static int iCRF = 0;
static long savestep = 0;
- static double alpha = 0.1;
- static double dropout = 0;
+ static float alpha = 0.1f;
+ static float dropout = 0;
static int bptt = 4;
static int modelType = 0;
static int nBest = 1;
@@ -31,7 +32,7 @@ class Program
static void UsageTitle()
{
- Console.WriteLine("Recurrent Neural Network Toolkit v1.1 by Zhongkai Fu (fuzhongkai@gmail.com)");
+ Console.WriteLine("Recurrent Neural Network Toolkit v1.2 by Zhongkai Fu (fuzhongkai@gmail.com)");
}
static void Usage()
@@ -136,8 +137,8 @@ static void InitParameters(string[] args)
if ((i = ArgPos("-modeltype", args)) >= 0) modelType = int.Parse(args[i + 1]);
if ((i = ArgPos("-crf", args)) >= 0) iCRF = int.Parse(args[i + 1]);
if ((i = ArgPos("-maxiter", args)) >= 0) maxIter = int.Parse(args[i + 1]);
- if ((i = ArgPos("-alpha", args)) >= 0) alpha = double.Parse(args[i + 1]);
- if ((i = ArgPos("-dropout", args)) >= 0) dropout = double.Parse(args[i + 1]);
+ if ((i = ArgPos("-alpha", args)) >= 0) alpha = float.Parse(args[i + 1]);
+ if ((i = ArgPos("-dropout", args)) >= 0) dropout = float.Parse(args[i + 1]);
if ((i = ArgPos("-bptt", args)) >= 0) bptt = int.Parse(args[i + 1]);
if ((i = ArgPos("-nbest", args)) >= 0) nBest = int.Parse(args[i + 1]);
if ((i = ArgPos("-dir", args)) >= 0) iDir = int.Parse(args[i + 1]);
@@ -221,28 +222,21 @@ static void LoadDataset(string strFileName, Featurizer featurizer, DataSet dataS
while (true)
{
- List tokenList = ReadRecord(sr);
- if (tokenList.Count == 0)
+ //Extract features from it and convert it into sequence
+ Sentence sent = new Sentence(ReadRecord(sr));
+ if (sent.TokensList.Count <= 2)
{
- //No more record
+ //No more record, it only contain and
break;
}
- //Extract features from it and convert it into sequence
- Sentence sent = new Sentence();
- sent.SetFeatures(tokenList);
Sequence seq = featurizer.ExtractFeatures(sent);
-
+
//Set label for the sequence
- if (seq.SetLabel(sent, featurizer.GetTagSet()) == false)
- {
- Logger.WriteLine(Logger.Level.info, "Error: Invalidated record.");
- sent.DumpFeatures();
- continue;
- }
+ seq.SetLabel(sent, featurizer.TagSet);
//Add the sequence into data set
- dataSet.Add(seq);
+ dataSet.SequenceList.Add(seq);
//Show state at every 1000 record
RecordCount++;
@@ -284,9 +278,9 @@ static void Main(string[] args)
}
}
- private static List ReadRecord(StreamReader sr)
+ private static List ReadRecord(StreamReader sr)
{
- List record = new List();
+ List record = new List();
string strLine = null;
//Read each line from file
@@ -299,7 +293,7 @@ private static List ReadRecord(StreamReader sr)
return record;
}
- record.Add(strLine);
+ record.Add(strLine.Split('\t'));
}
return record;
@@ -314,7 +308,7 @@ private static void Test()
return;
}
- //Load tag id and its name from file
+ //Load tag name
TagSet tagSet = new TagSet(strTagFile);
if (String.IsNullOrEmpty(strModelFile) == true)
@@ -342,12 +336,10 @@ private static void Test()
Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet);
featurizer.ShowFeatureSize();
- //Create an instance for the model
- // Model model = new Model(strModelFile);
-
//Create instance for decoder
RNNSharp.RNNDecoder decoder = new RNNSharp.RNNDecoder(strModelFile, featurizer);
+
if (File.Exists(strTestFile) == false)
{
Logger.WriteLine(Logger.Level.err, "FAILED: The test corpus {0} isn't existed.", strTestFile);
@@ -360,25 +352,23 @@ private static void Test()
while (true)
{
- List tokenList = ReadRecord(sr);
- if (tokenList.Count == 0)
+ Sentence sent = new Sentence(ReadRecord(sr));
+ if (sent.TokensList.Count <= 2)
{
- //No more record
+ //No more record, it only contains and
break;
}
- Sentence sent = new Sentence();
- sent.SetFeatures(tokenList);
-
if (nBest == 1)
{
int[] output = decoder.Process(sent);
//Output decoded result
//Append the decoded result into the end of feature set of each token
StringBuilder sb = new StringBuilder();
- for (int i = 0; i < tokenList.Count; i++)
+ for (int i = 0; i < sent.TokensList.Count; i++)
{
- sb.Append(tokenList[i]);
+ string tokens = String.Join("\t", sent.TokensList[i]);
+ sb.Append(tokens);
sb.Append("\t");
sb.Append(tagSet.GetTagName(output[i]));
sb.AppendLine();
@@ -389,19 +379,13 @@ private static void Test()
else
{
int[][] output = decoder.ProcessNBest(sent, nBest);
- if (output == null)
- {
- Logger.WriteLine(Logger.Level.err, "FAILED: decode failed. Dump current sentence...");
- sent.DumpFeatures();
- return;
- }
-
StringBuilder sb = new StringBuilder();
for (int i = 0; i < nBest; i++)
{
- for (int j = 0; j < tokenList.Count; j++)
+ for (int j = 0; j < sent.TokensList.Count; j++)
{
- sb.Append(tokenList[j]);
+ string tokens = String.Join("\t", sent.TokensList[i]);
+ sb.Append(tokens);
sb.Append("\t");
sb.Append(tagSet.GetTagName(output[i][j]));
sb.AppendLine();
@@ -433,23 +417,23 @@ private static void Train()
//Create configuration instance and set parameters
ModelSetting RNNConfig = new ModelSetting();
- RNNConfig.SetModelFile(strModelFile);
- RNNConfig.SetNumHidden(layersize);
- RNNConfig.SetCRFTraining((iCRF == 1) ? true : false);
- RNNConfig.SetDir(iDir);
- RNNConfig.SetModelType(modelType);
- RNNConfig.SetMaxIteration(maxIter);
- RNNConfig.SetSaveStep(savestep);
- RNNConfig.SetLearningRate(alpha);
- RNNConfig.SetDropout(dropout);
- RNNConfig.SetBptt(bptt);
+ RNNConfig.ModelFile = strModelFile;
+ RNNConfig.NumHidden = layersize;
+ RNNConfig.IsCRFTraining = (iCRF == 1) ? true : false;
+ RNNConfig.ModelDirection = iDir;
+ RNNConfig.ModelType = modelType;
+ RNNConfig.MaxIteration = maxIter;
+ RNNConfig.SaveStep = savestep;
+ RNNConfig.LearningRate = alpha;
+ RNNConfig.Dropout = dropout;
+ RNNConfig.Bptt = bptt;
//Dump RNN setting on console
RNNConfig.DumpSetting();
if (File.Exists(strFeatureConfigFile) == false)
{
- Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} isn't existed.", strFeatureConfigFile);
+ Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} doesn't exist.", strFeatureConfigFile);
UsageTrain();
return;
}
@@ -463,38 +447,37 @@ private static void Train()
UsageTrain();
return;
}
- if (String.IsNullOrEmpty(strTrainFile) == true)
+
+ if (File.Exists(strTrainFile) == false)
{
- Logger.WriteLine(Logger.Level.err, "FAILED: The training corpus isn't specified.");
+ Logger.WriteLine(Logger.Level.err, "FAILED: The training corpus doesn't exist.");
UsageTrain();
return;
}
- //LoadFeatureConfig training corpus and extract feature set
- DataSet dataSetTrain = new DataSet(tagSet.GetSize());
- LoadDataset(strTrainFile, featurizer, dataSetTrain);
-
- DataSet dataSetValidation = null;
- if (String.IsNullOrEmpty(strValidFile) == true)
+ if (File.Exists(strValidFile) == false)
{
- Logger.WriteLine(Logger.Level.err, "FAILED: The validation corpus isn't specified.");
+ Logger.WriteLine(Logger.Level.err, "FAILED: The validation corpus doesn't exist.");
+ UsageTrain();
return;
}
- //LoadFeatureConfig validated corpus and extract feature set
- dataSetValidation = new DataSet(tagSet.GetSize());
- LoadDataset(strValidFile, featurizer, dataSetValidation);
//Create RNN encoder and save necessary parameters
RNNEncoder encoder = new RNNEncoder(RNNConfig);
- encoder.SetTrainingSet(dataSetTrain);
- encoder.SetValidationSet(dataSetValidation);
+
+ //LoadFeatureConfig training corpus and extract feature set
+ encoder.TrainingSet = new DataSet(tagSet.GetSize());
+ LoadDataset(strTrainFile, featurizer, encoder.TrainingSet);
+
+ //LoadFeatureConfig validated corpus and extract feature set
+ encoder.ValidationSet = new DataSet(tagSet.GetSize());
+ LoadDataset(strValidFile, featurizer, encoder.ValidationSet);
if (iCRF == 1)
{
Logger.WriteLine(Logger.Level.info, "Initialize output tag bigram transition probability...");
//Build tag bigram transition matrix
- dataSetTrain.BuildLabelBigramTransition();
- encoder.SetLabelBigramTransition(dataSetTrain.GetLabelBigramTransition());
+ encoder.TrainingSet.BuildLabelBigramTransition();
}
//Start to train the model
diff --git a/RNNSharpOverview.jpg b/RNNSharpOverview.jpg
index f047cc7..b0577c4 100644
Binary files a/RNNSharpOverview.jpg and b/RNNSharpOverview.jpg differ
diff --git a/TFeatureBin/Program.cs b/TFeatureBin/Program.cs
index 1e8e625..f59cfd3 100644
--- a/TFeatureBin/Program.cs
+++ b/TFeatureBin/Program.cs
@@ -1,12 +1,13 @@
using System;
using System.Collections.Generic;
-using System.Linq;
using System.Text;
-using System.Threading.Tasks;
using System.IO;
using AdvUtils;
using RNNSharp;
+///
+/// RNNSharp written by Zhongkai Fu (fuzhongkai@gmail.com)
+///
namespace TFeatureBin
{
class Program
@@ -167,56 +168,60 @@ static IDictionary ExtractFeatureSetFromFile()
Logger.WriteLine(Logger.Level.info, "Generate feature set...");
BigDictionary feature2freq = new BigDictionary();
- List record = new List();
- StreamReader srCorpus = new StreamReader(strInputFile, Encoding.UTF8);
+
+
+ List tokenList = new List();
string strLine = null;
- while ((strLine = srCorpus.ReadLine()) != null)
+ Sentence sentence = null;
+
+ using (StreamReader srCorpus = new StreamReader(strInputFile, Encoding.UTF8))
{
- strLine = strLine.Trim();
- if (strLine.Length == 0)
+ while ((strLine = srCorpus.ReadLine()) != null)
{
- //The end of current record
- for (int i = 0; i < record.Count; i++)
+ strLine = strLine.Trim();
+ if (strLine.Length == 0)
{
- //Get feature of current token
- List featureList = templateFeaturizer.GenerateFeature(record, i);
- foreach (string strFeature in featureList)
+ //The end of current record
+ sentence = new Sentence(tokenList);
+ for (int i = 0; i < sentence.TokensList.Count; i++)
{
- if (feature2freq.ContainsKey(strFeature) == false)
+ //Get feature of i-th token
+ List featureList = templateFeaturizer.GenerateFeature(sentence.TokensList, i);
+ foreach (string strFeature in featureList)
{
- feature2freq.Add(strFeature, 0);
+ if (feature2freq.ContainsKey(strFeature) == false)
+ {
+ feature2freq.Add(strFeature, 0);
+ }
+ feature2freq[strFeature]++;
}
- feature2freq[strFeature]++;
}
- }
- record.Clear();
- }
- else
- {
- string[] items = strLine.Split('\t');
- record.Add(items);
+ tokenList.Clear();
+ }
+ else
+ {
+ tokenList.Add(strLine.Split('\t'));
+ }
}
- }
-
- //The end of current record
- for (int i = 0; i < record.Count; i++)
- {
- //Get feature of current token
- List featureList = templateFeaturizer.GenerateFeature(record, i);
- foreach (string strFeature in featureList)
+ //The end of current record
+ sentence = new Sentence(tokenList);
+ for (int i = 0; i < sentence.TokensList.Count; i++)
{
- if (feature2freq.ContainsKey(strFeature) == false)
+ //Get feature of i-th token
+ List featureList = templateFeaturizer.GenerateFeature(sentence.TokensList, i);
+ foreach (string strFeature in featureList)
{
- feature2freq.Add(strFeature, 0);
+ if (feature2freq.ContainsKey(strFeature) == false)
+ {
+ feature2freq.Add(strFeature, 0);
+ }
+ feature2freq[strFeature]++;
}
- feature2freq[strFeature]++;
}
}
- srCorpus.Close();
-
//Only save the feature whose frequency is not less than minfreq
Logger.WriteLine(Logger.Level.info, "Filter out features whose frequency is less than {0}", minfreq);
SortedDictionary features = new SortedDictionary(StringComparer.Ordinal);
diff --git a/dll/AdvUtils.dll b/dll/AdvUtils.dll
index 337a33a..ebb81b3 100644
Binary files a/dll/AdvUtils.dll and b/dll/AdvUtils.dll differ
diff --git a/dll/txt2vec.dll b/dll/txt2vec.dll
index ae77c5c..35fb8f1 100644
Binary files a/dll/txt2vec.dll and b/dll/txt2vec.dll differ