Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
christianbuck committed Oct 26, 2015
1 parent f51e80f commit 68bb529
Show file tree
Hide file tree
Showing 23 changed files with 4,252 additions and 4,030 deletions.
3 changes: 2 additions & 1 deletion src/Aligner.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ class Aligner {
list<pair<int, int> > flattenDiagonal(stack<pair<int, int> > &traceback);

// make final alignment
stack<Chunk> makeInitialAlignment(stack<Data> data, const LengthDistributions &l_d,
stack<Chunk> makeInitialAlignment(stack<Data> data,
const LengthDistributions &l_d,
const LexicalDistributions &lex_d,
vector<list<pair<int, int> > > f_d);

Expand Down
70 changes: 38 additions & 32 deletions src/AlignmentClustering.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#include <stack>
#include <map>
#include <utility>
#include <cmath>
#include <vector>
#include <cmath>
#include <vector>
#include "Sentence.h"
#include "Data.h"
#include "Chunk.h"
Expand All @@ -14,36 +14,42 @@

using namespace std;

class AlignmentClustering
{
private:
stack<Chunk> sentence_alignment;
stack<Chunk> expanded_alignment;

//prior distributions of alignment types
float prior10_01;
float prior_11;
float prior12_21;
float prior13_31;
float prior14_41;

public:
AlignmentClustering(stack< Chunk > sentence_alignment);

stack< Chunk > getSentenceAlignment();
void setSentenceAlignment(stack< Chunk >);

stack< Chunk > getExpandedAlignment();
void setExpandedAlignment(stack< Chunk >);

//compute bead priors (P_ak()) and lambda factor for successions of insertions and deletions
void estimatePriors(stack<Chunk> a);

//distance function for making merging decisions (based on distance function as defined in Moore 2002 but with Viterbi model 1 instead of summing up over all alignments)
long double mergeDistanceFunction(LengthDistributions &l_d, LexicalDistributions &lex_d, vector<long int> st, vector<long int> tt, vector<int> ss, vector<int> ts);

void alignmentClustering(stack<Chunk> sentence_alignment, LengthDistributions &l_d, LexicalDistributions &lex_d);

class AlignmentClustering {
private:
stack<Chunk> sentence_alignment;
stack<Chunk> expanded_alignment;

// prior distributions of alignment types
float prior10_01;
float prior_11;
float prior12_21;
float prior13_31;
float prior14_41;

public:
AlignmentClustering(stack<Chunk> sentence_alignment);

stack<Chunk> getSentenceAlignment();
void setSentenceAlignment(stack<Chunk>);

stack<Chunk> getExpandedAlignment();
void setExpandedAlignment(stack<Chunk>);

// compute bead priors (P_ak()) and lambda factor for successions of
// insertions and deletions
void estimatePriors(stack<Chunk> a);

// distance function for making merging decisions (based on distance function
// as defined in Moore 2002 but with Viterbi model 1 instead of summing up
// over all alignments)
long double mergeDistanceFunction(LengthDistributions &l_d,
LexicalDistributions &lex_d,
vector<long int> st, vector<long int> tt,
vector<int> ss, vector<int> ts);

void alignmentClustering(stack<Chunk> sentence_alignment,
LengthDistributions &l_d,
LexicalDistributions &lex_d);
};

#endif /*ALIGNMENTCLUSTERING_H_*/
2 changes: 1 addition & 1 deletion src/Cell.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Cell // cells of the distance matrix of the DP procedure
bool initialized;

public:
Cell() : distance(0), initialized(false) {};
Cell() : distance(0), initialized(false){};
Cell(long double d, pair<int, int> p)
: distance(d), previous(p), initialized(true){};

Expand Down
71 changes: 35 additions & 36 deletions src/Chunk.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -5,42 +5,41 @@

using namespace std;

class Chunk //data structure containing each minimal alignment
{
private:

vector<long int> sourceChunkPosition; //source sentence ids in a chunk
vector<long int> targetChunkPosition; //target sentence ids in a chunk

vector< vector<long int> > sourceChunkToken;
vector< vector<long int> > targetChunkToken;

public:

Chunk( vector<long int> sourceChunkPosition, vector<long int> targetChunkPosition,
vector< vector<long int> > sourceChunkToken, vector< vector<long int> > targetChunkToken );

vector<long int> getSourceChunkPosition();
void setSourceChunkPosition(vector<long int> sourceChunkPos);
void addToSourceChunkPosition(long int sentPos);

vector<long int> getTargetChunkPosition();
void setTargetChunkPosition(vector<long int> targetChunkPos);
void addToTargetChunkPosition(long int sentPos);

vector< vector<long int> > getSourceChunkToken();
void setSourceChunkToken(vector< vector<long int> > sourceChunkTok);
void addToSourceChunkToken(vector<long int> sentToken);

vector< vector<long int> > getTargetChunkToken();
void setTargetChunkToken(vector< vector<long int> > targetChunkTok);
void addToTargetChunkToken(vector<long int> sentToken);

//compares the sentence ids in two chunks
bool comparePositions(Chunk c);

void clear();

class Chunk // data structure containing each minimal alignment
{
private:
vector<long int> sourceChunkPosition; // source sentence ids in a chunk
vector<long int> targetChunkPosition; // target sentence ids in a chunk

vector<vector<long int> > sourceChunkToken;
vector<vector<long int> > targetChunkToken;

public:
Chunk(vector<long int> sourceChunkPosition,
vector<long int> targetChunkPosition,
vector<vector<long int> > sourceChunkToken,
vector<vector<long int> > targetChunkToken);

vector<long int> getSourceChunkPosition();
void setSourceChunkPosition(vector<long int> sourceChunkPos);
void addToSourceChunkPosition(long int sentPos);

vector<long int> getTargetChunkPosition();
void setTargetChunkPosition(vector<long int> targetChunkPos);
void addToTargetChunkPosition(long int sentPos);

vector<vector<long int> > getSourceChunkToken();
void setSourceChunkToken(vector<vector<long int> > sourceChunkTok);
void addToSourceChunkToken(vector<long int> sentToken);

vector<vector<long int> > getTargetChunkToken();
void setTargetChunkToken(vector<vector<long int> > targetChunkTok);
void addToTargetChunkToken(vector<long int> sentToken);

// compares the sentence ids in two chunks
bool comparePositions(Chunk c);

void clear();
};

#endif /*CHUNK_H_*/
6 changes: 3 additions & 3 deletions src/Data.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ class Data {
public:
Data(){};
Data(const string& corpus_name, const vector<Sentence>& source_sentences,
const vector<Sentence>& target_sentences);
const vector<Sentence>& target_sentences);

string getCorpusName() const;
void setCorpusName(const string& corpus_name);

// string getParagraphName() const;
// void setParagraphName(string par_name);
// string getParagraphName() const;
// void setParagraphName(string par_name);

vector<Sentence> getSourceSentences() const;
vector<Sentence> getTargetSentences() const;
Expand Down
14 changes: 8 additions & 6 deletions src/InitialAlignment.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ class InitialAlignment {
list<pair<int, int> > diagonal; // diagonal from initial alignment

public:
InitialAlignment(const list<pair<int, int> >& f_d);
InitialAlignment(const list<pair<int, int> > &f_d);

list<pair<int, int> > getDiagonal() const;
void setDiagonal(const list<pair<int, int> >& f_d);
void setDiagonal(const list<pair<int, int> > &f_d);

stack<Chunk> getAlignment();
void setAlignment(stack<Chunk>);
Expand All @@ -44,8 +44,8 @@ class InitialAlignment {
bool getReduceBand1() const;
void setReduceBand1(bool);

// bool getReduceBand2() const;
// void setReduceBand2(bool);
// bool getReduceBand2() const;
// void setReduceBand2(bool);

bool getDynamicPrgrammingState() const;
void setDynamicProgrammingState(bool);
Expand All @@ -64,9 +64,11 @@ class InitialAlignment {

// dynamic programming : makes an approximate alignment on a narrow band
// around the flat diagonal extracted from the length-based alignment. If the
// alignment comes close to the bounds reiterate and replace the flat
// alignment comes close to the bounds reiterate and replace the
// flat
// diagonal by the approximate alignment.
void dynamicProgramming(Data &d, const LengthDistributions &length_distributions,
void dynamicProgramming(Data &d,
const LengthDistributions &length_distributions,
const LexicalDistributions &lexical_distributions,
int bandwidth_1, int bandwidth_2);
};
Expand Down
54 changes: 29 additions & 25 deletions src/LengthAlignment.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,49 @@

#include <stack>
#include <utility>
#include <cmath>
#include <vector>
#include <cmath>
#include <vector>
#include "Sentence.h"
#include "Data.h"
#include "Cell.h"
#include "Chunk.h"
#include "LengthDistributions.h"

/*
Length-based sentence aligner: makes length-based alignment as in Moore or a modified version
Length-based sentence aligner: makes length-based alignment as in Moore or a
modified version
*/

class LengthAlignment {

private:
stack<pair<int,int> > trace_back;
stack<Chunk> alignment;
private:
stack<pair<int, int> > trace_back;
stack<Chunk> alignment;

protected:
// int bandwidth;

public:
LengthAlignment(){};
protected:
// int bandwidth;

stack<pair<int,int> > getTraceBack();
void setTraceBack(stack<pair<int,int> > t_b);
public:
LengthAlignment(){};

stack<Chunk> getAlignment();
void setAlignment(stack<Chunk>);

//sentence length based distance function as defined in Moore 2002
float distanceFunctionMoore(const LengthDistributions &l_d, int x1, int x2, int y1, int y2);
stack<pair<int, int> > getTraceBack();
void setTraceBack(stack<pair<int, int> > t_b);

//dynamic Programming procedure : searches a barrow band around the main diagonal (i=j). If the search comes close to the bound, the procedure reiterates
void dynamicProgramming(const Data& d, const LengthDistributions &l_d, int bandwith = 0);
stack<Chunk> getAlignment();
void setAlignment(stack<Chunk>);

//filters out the best 1-to-1 correspondences from the sentence length based alignment
stack<Chunk> filterOutBestOneToOnes(const LengthDistributions &l_d, stack<Chunk> a);
// sentence length based distance function as defined in Moore 2002
float distanceFunctionMoore(const LengthDistributions &l_d, int x1, int x2,
int y1, int y2);

// dynamic Programming procedure : searches a barrow band around the main
// diagonal (i=j). If the search comes close to the bound, the procedure
// reiterates
void dynamicProgramming(const Data &d, const LengthDistributions &l_d,
int bandwith = 0);

// filters out the best 1-to-1 correspondences from the sentence length based
// alignment
stack<Chunk> filterOutBestOneToOnes(const LengthDistributions &l_d,
stack<Chunk> a);
};
#endif/*LENGTH_ALIGNMENT_H*/
#endif /*LENGTH_ALIGNMENT_H*/
Loading

0 comments on commit 68bb529

Please sign in to comment.