Skip to content

Commit

Permalink
Stubbing out of Scorer.
Browse files Browse the repository at this point in the history
Scorer test data.


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1646 1f5c12ca-751b-0410-a591-d2e778427230
  • Loading branch information
bhaddow committed May 14, 2008
1 parent 4af8428 commit b672d9f
Show file tree
Hide file tree
Showing 8 changed files with 12,080 additions and 21 deletions.
5 changes: 5 additions & 0 deletions mert/BleuScorer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "BleuScorer.h"

/**
Implementation of bleu scoring for use in mert
**/
6 changes: 6 additions & 0 deletions mert/BleuScorer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifndef __BLEU_SCORER_H__
#define __BLEU_SCORER_H__

#include "Scorer.h"

#endif //__BLEU_SCORER_H__
7 changes: 5 additions & 2 deletions mert/Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
OBJS= Util.o Timer.o Parameter.o \
ScoreStats.o ScoreArray.o ScoreData.o \
FeatureStats.o FeatureArray.o FeatureData.o Optimizer.o Point.o
Optimizer.o FeatureStats.o FeatureArray.o FeatureData.o Point.o BleuScorer.o

CFLAGS=-O3 -DTRACE_ENABLE
GCC=g++
LDFLAGS=
LDLIBS=-lm

all: main feature_extractor
all: main feature_extractor test_scorer

clean:
rm -f *.o
Expand All @@ -21,3 +21,6 @@ feature_extractor: $(OBJS) feature_extractor.cpp
main: $(OBJS) main.cpp
$(GCC) $(CFLAGS) $(OBJS) $(LDLIBS) -o $@ $@.cpp

test_scorer: $(OBJS) test_scorer.cpp
$(GCC) $(CFLAGS) $(OBJS) $(LDLIBS) -o $@ $@.cpp

5 changes: 2 additions & 3 deletions mert/ScoreData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
ScoreData::ScoreData(Scorer& ptr):
bufLen_(0), theScorer(&ptr)
{
score_type = "BLEU4";
// score_type = theScorer.name();
score_type = theScorer->getName();
};

void ScoreData::savetxt(std::ofstream& outFile)
Expand Down Expand Up @@ -107,7 +106,7 @@ void ScoreData::loadnbest(const std::string &file)
* theStatistics will contain the statistics (as a string)
* coming from the actual Scorer
*/
// theStatistics = theScorer.getStatistics(sentence_index, theSentence);
theScorer->prepareStats(sentence_index, theSentence,entry);
entry.set(theStatistics);

add(entry,sentence_index);
Expand Down
60 changes: 44 additions & 16 deletions mert/Scorer.h
Original file line number Diff line number Diff line change
@@ -1,35 +1,63 @@
#ifndef __SCORER_H__
#define __SCORER_H__

#include <algorithm>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>

using namespace std;

class ScoreData;
class ScoreStats;

class Scorer {
/**
* Extract initial statistics from the nbestfile and write them to the stats
* file. For example, for bleu these are the ngram counts and length.
**/

public:

Scorer(const string& statsfile): _statsfile(statsfile) {}
Scorer(const std::string& name): _name(name), _scoreData(0) {}

const std::string& getName() const {return _name;}

/**
* set the reference files. This must be called before prepareStats.
**/
void setReferenceFiles(const std::vector<std::string>& referenceFiles) {
_referenceFiles.clear();
_referenceFiles.resize(referenceFiles.size());
std::copy(referenceFiles.begin(),referenceFiles.end(),_referenceFiles.begin());
}

virtual void prepare(const vector<string>& referencefiles, const string& nbestfile) {
//dummy impl
/**
* Process the given guessed text, corresponding to the given reference sindex
* and add the appropriate statistics to the entry.
**/
virtual void prepareStats(int sindex, const std::string& text, ScoreStats& entry) {
//std::cerr << text << std::endl;
}
/**
* Calculate the score of the sentences corresponding to the list of candidate
* indices. Each index indicates the 1-best choice from the n-best list.
**/
virtual float score(const vector<unsigned int>& candidates) {
//dummy impl

/**
* Set the score data, prior to scoring.
**/
void setScoreData(ScoreData* scoreData) {
_scoreData = scoreData;
}

/**
* Calculate the score of the sentences corresponding to the list of candidate
* indices. Each index indicates the 1-best choice from the n-best list.
**/
virtual float score(const std::vector<unsigned int>& candidates) {
if (!_scoreData) {
throw std::runtime_error("score data not loaded");
}
return 0;
}

protected:
string _statsfile;
private:
std::string _name;
std::vector<std::string> _referenceFiles;
ScoreData* _scoreData;

};

Expand Down
18 changes: 18 additions & 0 deletions mert/test_scorer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include <iostream>
#include <vector>

#include "ScoreData.h"
#include "BleuScorer.h"

using namespace std;

int main(int argc, char** argv) {
cout << "Testing the scorer" << endl;
//BleuScorer bs("test-scorer-data/cppstats.feats.opt");;
vector<string> references;
references.push_back("test_scorer_data/reference.txt");
//bs.prepare(references, "test-scorer-data/nbest.out");
Scorer scorer("test");
ScoreData sd(scorer);
sd.loadnbest("test_scorer_data/nbest.out");
}
10,000 changes: 10,000 additions & 0 deletions mert/test_scorer_data/nbest.out

Large diffs are not rendered by default.

2,000 changes: 2,000 additions & 0 deletions mert/test_scorer_data/reference.txt

Large diffs are not rendered by default.

0 comments on commit b672d9f

Please sign in to comment.