Skip to content

Commit

Permalink
add docID validation method
Browse files Browse the repository at this point in the history
  • Loading branch information
iorixxx committed Mar 19, 2021
1 parent d4cb639 commit c936589
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 5 deletions.
11 changes: 8 additions & 3 deletions src/main/java/edu/anadolu/LearningToSelect.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ Map<String, Double> documentRelevanceScores(String model, InfoNeed need, int n)
if (!Files.exists(thePath) || !Files.isRegularFile(thePath) || !Files.isReadable(thePath))
throw new IllegalArgumentException(thePath + " does not exist or is not a directory.");

String queryID = Integer.toString(need.id()) + "\t";
String queryID = need.id() + "\t";

Map<String, Double> documentScoreMap = new HashMap<>();

Expand All @@ -74,7 +74,7 @@ Map<String, Double> documentRelevanceScores(String model, InfoNeed need, int n)

String docId = parts[2];

if (dataSet.getNoDocumentsID().length() != docId.length() && dataSet.getNoDocumentsID().length() + 1 != docId.length())
if (!dataSet.validateDocID(docId))
throw new RuntimeException("invalid doc id : " + docId);


Expand Down Expand Up @@ -383,6 +383,11 @@ public ModelScore evaluate(Evaluator evaluator) throws IOException {
int K = -1;
int N = -1;

if (tracks.length == 1) {
System.out.println("The current train/test split mechanism requires more than one tracks.");
return new ModelScore("LTS (k=" + K + ", n=" + N + ")", Double.NaN);
}

for (int n = 20; n <= 1000; n += 10) {

Map<Integer, Double> mean = emptyDoubleMap();
Expand All @@ -392,7 +397,7 @@ public ModelScore evaluate(Evaluator evaluator) throws IOException {
List<InfoNeed> trainingQueries = trainingQueries(residualNeeds, track);
List<InfoNeed> testQueries = testQueries(residualNeeds, track);

System.out.println("training set size : " + trainingQueries.size() + " test set size : " + testQueries.size());
System.out.println("n=" + n + " training set size : " + trainingQueries.size() + " test set size : " + testQueries.size());


if (trainingQueries.size() + testQueries.size() != residualNeeds.size())
Expand Down
6 changes: 4 additions & 2 deletions src/main/java/edu/anadolu/cmdline/FeatureTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ public final class FeatureTool extends CmdLineTool {
@Option(name = "-task", required = false, usage = "task to be executed")
private String task;

public static final String DEFAULT_MODELS = "BM25k1.2b0.75_DirichletLMc2500.0_LGDc1.0_PL2c1.0_DPH_DFIC_DFRee_DLH13";

@Override
public String getShortDescription() {
return "Feature Extraction Tool";
Expand Down Expand Up @@ -65,7 +67,7 @@ void resultListFeatures(DataSet dataset) throws IOException {
// .filter(Files::isRegularFile)
// .collect(Collectors.toList());

String[] models = "BM25k1.2b0.75_DirichletLMc2500.0_LGDc1.0_PL2c1.0_DPH_DFIC_DFRee_DLH13".split("_");
String[] models = DEFAULT_MODELS.split("_");

Map<String, Map<Integer, List<SubmissionFile.Tuple>>> theMap = new HashMap<>();

Expand Down Expand Up @@ -133,7 +135,7 @@ public void run(Properties props) throws Exception {
DataSet dataset = CollectionFactory.dataset(collection, tfd_home);

if ("labels".equals(task)) {
Evaluator evaluator = new Evaluator(dataset, tag, measure, "all", evalDirectory(dataset), "OR");
Evaluator evaluator = new Evaluator(dataset, tag, measure, DEFAULT_MODELS, "evals"/*evalDirectory(dataset)*/, "OR");
List<InfoNeed> needs = evaluator.getNeeds();

// Print header
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/edu/anadolu/datasets/ClueWeb09A.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ public String getNoDocumentsID() {
return "clueweb09-en0000-00-00000";
}

@Override
public boolean validateDocID(String docID) {
return docID.startsWith("clueweb09-");
}

@Override
public boolean spamAvailable() {
return true;
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/edu/anadolu/datasets/ClueWeb09B.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ public String getNoDocumentsID() {
return "clueweb09-en0000-00-00000";
}

@Override
public boolean validateDocID(String docID) {
return docID.startsWith("clueweb09-");
}

@Override
public boolean spamAvailable() {
return true;
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/edu/anadolu/datasets/ClueWeb12B.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ public String getNoDocumentsID() {
return "clueweb12-0000wb-00-00000";
}

@Override
public boolean validateDocID(String docID) {
return docID.startsWith("clueweb12-");
}

@Override
public boolean spamAvailable() {
return true;
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/edu/anadolu/datasets/DataSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ public abstract class DataSet {

public abstract String getNoDocumentsID();

public boolean validateDocID(String docID) {
if (null == docID) return false;
else return !"".equals(docID.trim());
}

public abstract boolean spamAvailable();

private final List<InfoNeed> needs;
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/edu/anadolu/datasets/MQ09.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ public String getNoDocumentsID() {
return "clueweb09-en0000-00-00000";
}

@Override
public boolean validateDocID(String docID) {
return docID.startsWith("clueweb09-");
}

@Override
public boolean spamAvailable() {
return true;
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/edu/anadolu/datasets/NTCIR.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ public String getNoDocumentsID() {
return "clueweb12-0000wb-00-00000";
}

@Override
public boolean validateDocID(String docID) {
return docID.startsWith("clueweb12-");
}

@Override
public boolean spamAvailable() {
return true;
Expand Down

0 comments on commit c936589

Please sign in to comment.