-
Notifications
You must be signed in to change notification settings - Fork 0
/
fbData.java
136 lines (112 loc) · 4.69 KB
/
fbData.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package com.company;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
import org.lemurproject.galago.core.parse.Document;
import org.lemurproject.galago.core.parse.stem.Stemmer;
import org.lemurproject.galago.core.retrieval.GroupRetrieval;
import org.lemurproject.galago.core.retrieval.Retrieval;
import org.lemurproject.galago.core.retrieval.ScoredDocument;
import org.lemurproject.galago.core.retrieval.query.AnnotatedNode;
import org.lemurproject.galago.utility.Parameters;
public class fbData {
private static final Logger logger = Logger.getLogger("fbData");
Parameters fbParams;
List <ScoredDocument> initialResults;
Set<String> stemmedQueryTerms;
Retrieval retrieval;
Set<String> exclusionTerms;
Map <ScoredDocument, Integer> docLength;
Map<String, Map<ScoredDocument, Integer>> termCounts;
Map<ScoredDocument, Map<String, Integer>> termCountsReverse;
public fbData(Retrieval r, Set<String> exclusionTerms, List<ScoredDocument> results, Parameters fbParams) throws IOException{
this.initialResults = results;
this.fbParams = fbParams;
this.retrieval = r;
this.exclusionTerms = exclusionTerms;
docLength = new HashMap <> ();
termCounts = new HashMap <> ();
termCountsReverse = new HashMap<>();
process();
}
private void process() throws IOException {
Stemmer stemmer = getStemmer(null, retrieval);
Map<ScoredDocument, Integer> termCount;
Document doc;
Document.DocumentComponents corpusParams = new Document.DocumentComponents(true, false, true);
String group = fbParams.get("group", (String) null);
for (ScoredDocument sd : initialResults) {
if (group != null && retrieval instanceof GroupRetrieval) {
doc = ((GroupRetrieval) retrieval).getDocument(sd.documentName, corpusParams, group);
} else {
doc = retrieval.getDocument(sd.documentName, corpusParams);
}
if (doc == null) {
logger.info("Failed to retrieve document: " + sd.documentName + " -- RM skipping document.");
continue;
}
List<String> docterms = doc.terms;
docLength.put(sd, doc.terms.size());
sd.annotation = new AnnotatedNode();
sd.annotation.extraInfo = "" + docterms.size();
for (String term : docterms) {
// perform stopword and query term filtering here
String stemmedTerm = (stemmer == null) ? term : stemmer.stem(term);
if (exclusionTerms.contains(term)) {
continue; // on the blacklist
}
if (!termCounts.containsKey(term)) {
termCounts.put(term, new HashMap<ScoredDocument, Integer>());
}
termCount = termCounts.get(term);
if (termCount.containsKey(sd)) {
termCount.put(sd, termCount.get(sd) + 1);
} else {
termCount.put(sd, 1);
}
}
}
termCountsReverse = convert(termCounts);
}
private Map <ScoredDocument, Map<String, Integer>> convert (Map<String, Map<ScoredDocument, Integer>> in){
Map <ScoredDocument, Map<String, Integer>> counts = new HashMap <>();
for (String term : in.keySet()){
for (ScoredDocument sd : in.get(term).keySet()){
if (!counts.containsKey(sd)){
counts.put(sd, new HashMap <>());
}
counts.get(sd).put(term, in.get(term).get(sd));
}
}
return counts;
}
public Map<ScoredDocument, Integer> getDocLength() {
return docLength;
}
public Map<String, Map<ScoredDocument, Integer>> getTermCounts() {
return termCounts;
}
public Map<ScoredDocument, Map<String, Integer>> getTermCountsReverse() {
return termCountsReverse;
}
public List<ScoredDocument> getInitialResults() {
return initialResults;
}
public static Stemmer getStemmer(Parameters p, Retrieval ret) {
Stemmer stemmer;
if (ret.getGlobalParameters().isString("rmStemmer")) {
String rmstemmer = ret.getGlobalParameters().getString("rmStemmer");
try {
stemmer = (Stemmer) Class.forName(rmstemmer).getConstructor().newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
}
} else {
stemmer = null;
}
return stemmer;
}
}