Skip to content

Commit

Permalink
more optimization and a bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Waleed Ammar committed Apr 23, 2015
1 parent 17d0d29 commit aba97d5
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 8 deletions.
11 changes: 8 additions & 3 deletions alignment/LatentCrfAligner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,17 @@ LatentCrfAligner::LatentCrfAligner(const string &textFilename,

// encode the null token which is conventionally added to the beginning of the src sentnece.
NULL_TOKEN_STR = "__null__token__";
NULL_TOKEN = vocabEncoder.Encode(NULL_TOKEN_STR);
if (learningInfo.mpiWorld->rank() == 0) {
NULL_TOKEN = vocabEncoder.Encode(NULL_TOKEN_STR);
}
boost::mpi::broadcast<int64_t>(*learningInfo.mpiWorld, NULL_TOKEN, 0);
assert(NULL_TOKEN != vocabEncoder.UnkInt());

// read and encode tgt words and their classes (e.g. brown clusters)
EncodeTgtWordClasses();

if (learningInfo.mpiWorld->rank() == 0) {
EncodeTgtWordClasses();
}

// read and encode data
srcSents.clear();
tgtSents.clear();
Expand Down
9 changes: 9 additions & 0 deletions alignment/LatentCrfAligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
#define _LATENT_CRF_ALIGNER_H_

#include <fstream>

#include "mpi.h"

#include <boost/mpi/environment.hpp>
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/nonblocking.hpp>
#include <boost/mpi/collectives.hpp>


#include "../core/LatentCrfModel.h"

class LatentCrfAligner : public LatentCrfModel {
Expand Down
2 changes: 1 addition & 1 deletion core/LearningInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ class LearningInfo {

// should be called only once in the constructor
void SetSharedMemorySegment(bool create) {
size_t segmentSize = 30 * 1024; // in GBs
size_t segmentSize = 256 * 1024; // in GBs
segmentSize *= 1024 * 1024;
string SEGMENT_NAME = outputFilenamePrefix + ".segment";
using namespace boost::interprocess;
Expand Down
16 changes: 12 additions & 4 deletions core/VocabEncoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,13 +296,17 @@ class VocabEncoder {

if (end_of_file) {
break;
} else {
}

// Wait till the master has encoded this line.
if (learningInfo.mpiWorld->rank() != 0) {
boost::mpi::broadcast<std::string>(*learningInfo.mpiWorld, line, 0);
}

// skip empty lines
if(line.size() == 0) {
continue;
cerr << "Blank lines are not allowed in parallel data. Will die." << endl;
exit(1);
}
lineNumber++;

Expand All @@ -314,10 +318,14 @@ class VocabEncoder {
srcSents.resize(lineNumber+1);
tgtSents.resize(lineNumber+1);
vector<int64_t> temp;

Encode(splits, temp);

assert(splits.size() == temp.size());

// Having encoded the words in this line, the master now sends this line to the slaves.
if (learningInfo.mpiWorld->rank() == 0) {
boost::mpi::broadcast<std::string>(*learningInfo.mpiWorld, line, 0);
}

// src sent is written before tgt sent
bool src = true;
if(nullToken.size() > 0) {
Expand Down

0 comments on commit aba97d5

Please sign in to comment.