Skip to content

Commit

Permalink
add The NTCIR-15 WWW-3 English Subtask
Browse files Browse the repository at this point in the history
  • Loading branch information
iorixxx committed Apr 8, 2021
1 parent 96f0f64 commit 4a37896
Show file tree
Hide file tree
Showing 52 changed files with 1,153,032 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
*.txt text
*.bat text eol=crlf
*.sh text eol=lf
*.xml text
*.qrels text

4 changes: 4 additions & 0 deletions scripts/features.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,8 @@ else
./run.sh TFDistribution -collection "$1" -task query
./run.sh TFDistribution -collection "$1" -task term

# ./run.sh Custom -collection $1 -task search
# ./eval.sh
# ./run.sh Custom -collection $1 -task export

fi
2 changes: 1 addition & 1 deletion src/main/java/edu/anadolu/cmdline/CustomTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ public void run(Properties props) throws Exception {
final String tag = path.getFileName().toString();

// search for a specific tag, skip the rest
if (this.tag != null && !tag.equals(this.tag)) continue;
if (!(this.tag != null && this.tag.toString().equals(tag))) continue;

try (Searcher searcher = new Searcher(path, dataset, 10000)) {
searcher.searchWithThreads(numThreads, modelBaseSet, fields, "base_spam_runs");
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/edu/anadolu/datasets/NTCIR.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.clueweb09.tracks.Track;
import org.clueweb09.tracks.WWW13;
import org.clueweb09.tracks.WWW14;
import org.clueweb09.tracks.WWW15;

import java.nio.file.Path;
import java.nio.file.Paths;
Expand All @@ -21,7 +22,7 @@ public class NTCIR extends DataSet {

NTCIR(String tfd_home) {
super(Collection.NTCIR, new Track[]{
new WWW13(tfd_home), new WWW14(tfd_home)
new WWW13(tfd_home), new WWW14(tfd_home), new WWW15(tfd_home)
}, tfd_home);
}

Expand Down
31 changes: 31 additions & 0 deletions src/main/java/org/clueweb09/tracks/WWW15.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.clueweb09.tracks;

import java.io.IOException;
import java.nio.file.Paths;

/**
* The NTCIR-15 WWW-3 English Subtask
* http://sakailab.com/www3english/
*/
public class WWW15 extends WWW13 {

@Override
protected int offset() {
return 100;
}

@Override
protected void populateInfoNeeds() throws IOException {
populateInfoNeedsWWW(Paths.get(home, "topics-and-qrels", "www3topics-E.xml"));
}

@Override
protected void populateQRelsMap() throws IOException {
// tail -n -16677 ntcir15www2+3official.qrels >> ntcir15www3.qrels
populateQRelsMap(Paths.get(home, "topics-and-qrels", "ntcir15www3.qrels"));
}

public WWW15(String home) {
super(home, Paths.get(home, "topics-and-qrels", "qrels.www.201-280.txt"));
}
}
Loading

0 comments on commit 4a37896

Please sign in to comment.