Skip to content

Commit

Permalink
updates on which NER views are allowed to include ONTONOTES
Browse files Browse the repository at this point in the history
mayhewsw committed Sep 27, 2018
1 parent e678549 commit 3a2b020
Showing 2 changed files with 23 additions and 88 deletions.
97 changes: 14 additions & 83 deletions src/main/java/io/github/mayhewsw/utils/HtmlGenerator.java
Original file line number Diff line number Diff line change
@@ -48,7 +48,13 @@ public static String getCopyableHTMLFromTA(TextAnnotation ta, Dictionary dict, b
IntPair sentspan = new IntPair(-1, -1);
String id = ta.getId();

View ner = ta.getView(ViewNames.NER_CONLL);
// required to have one view or another...
View ner;
if(ta.hasView(ViewNames.NER_ONTONOTES)){
ner = ta.getView(ViewNames.NER_ONTONOTES);
} else{
ner = ta.getView(ViewNames.NER_CONLL);
}

View nersugg = null;
if(ta.hasView("NER_SUGGESTION")) {
@@ -148,7 +154,13 @@ public static String getHTMLfromTA(TextAnnotation ta, IntPair span, String id, S

IntPair sentspan = span;

View ner = ta.getView(ViewNames.NER_CONLL);
// required to have one view or another...
View ner;
if(ta.hasView(ViewNames.NER_ONTONOTES)){
ner = ta.getView(ViewNames.NER_ONTONOTES);
} else{
ner = ta.getView(ViewNames.NER_CONLL);
}

View nersugg = null;
if(ta.hasView("NER_SUGGESTION")) {
@@ -273,85 +285,4 @@ public static String getHTMLfromTA(TextAnnotation ta, IntPair span, String id, S
return out;
}


// /**
// * Given a TA, this returns the HTML string.
// * @param
// * @return
// */
// public static String getHTMLfromTA_OLD(TextAnnotation ta, SessionData sd){
//
// View ner = ta.getView(ViewNames.NER_CONLL);
// View sents = ta.getView(ViewNames.SENTENCE);
//
// String[] text = ta.getTokenizedText().split(" ");
//
// ArrayList<String> suffixes = sd.suffixes;
//
// if(suffixes == null){
// new ArrayList<>();
// }else{
// suffixes.sort((String s1, String s2)-> s2.length()-s1.length());
// }
//
// // add spans to every word that is not a constituent.
// for(int t = 0; t < text.length; t++){
// String def = null;
// if(sd.dict != null && sd.dict.containsKey(text[t])){
// def = sd.dict.get(text[t]).get(0);
// }
//
// for(String suffix : suffixes){
// if(text[t].endsWith(suffix)){
// //System.out.println(text[t] + " ends with " + suffix);
// text[t] = text[t].substring(0, text[t].length()-suffix.length()) + "<span class='suffix'>" + suffix + "</span>";
// break;
// }
// }
//
// if(sd.showdefs && def != null) {
// text[t] = "<span class='token pointer def' id='tok-"+ t + "'>" + def + "</span>";
// }else{
// text[t] = "<span class='token pointer' id='tok-" + t + "'>" + text[t] + "</span>";
// }
// }
//
// for(Constituent c : ner.getConstituents()){
//
// int start = c.getStartSpan();
// int end = c.getEndSpan();
//
// // important to also include 'cons' class, as it is a keyword in the html
// text[start] = String.format("<span class='%s pointer cons' id='cons-%d-%d'>%s", c.getLabel(), start, end, text[start]);
// text[end-1] += "</span>";
// }
//
// List<Suggestion> suggestions = getdocsuggestions(ta, sd);
//
// for(Suggestion s : suggestions){
//
// int start = s.getStartSpan();
// int end = s.getEndSpan();
//
// // don't suggest spans that cover already tagged areas.
// if(ner.getConstituentsCoveringSpan(start, end).size() > 0) continue;
//
// System.out.println(start + " " + end + ": " + s.reason + " " + s);
//
// // important to also include 'cons' class, as it is a keyword in the html
// text[start] = String.format("<span class='pointer suggestion' data-toggle=\"tooltip\" title='%s' id='cons-%d-%d'>%s", s.reason, start, end, text[start]);
// text[end-1] += "</span>";
// }
//
// for(Constituent c : sents.getConstituents()){
// int start = c.getStartSpan();
// int end = c.getEndSpan();
// text[start] = "<p>" + text[start];
// text[end-1] += "</p>";
// }
//
// String out = StringUtils.join("", text);
// return out;
// }

}
14 changes: 9 additions & 5 deletions src/main/java/io/github/mayhewsw/utils/TalenCLI.java
Original file line number Diff line number Diff line change
@@ -127,14 +127,18 @@ public static void main(String[] args) throws ParseException, IOException, URISy
private static String loadTextAnnotation(String docid) throws Exception {
TextAnnotation ta = SerializationHelper.deserializeTextAnnotationFromFile(docid, true);

// add a dummy view if
if(!ta.hasView(ViewNames.NER_CONLL)){
View ner = new View(ViewNames.NER_CONLL, "DocumentController",ta,1.0);
ta.addView(ViewNames.NER_CONLL, ner);
}else{
// add a dummy view if no NER view
if(ta.hasView(ViewNames.NER_CONLL)){
for(Constituent c : ta.getView(ViewNames.NER_CONLL).getConstituents()){
labels.add(c.getLabel());
}
}else if(ta.hasView(ViewNames.NER_ONTONOTES)) {
for (Constituent c : ta.getView(ViewNames.NER_ONTONOTES).getConstituents()) {
labels.add(c.getLabel());
}
}else{
View ner = new View(ViewNames.NER_CONLL, "DocumentController",ta,1.0);
ta.addView(ViewNames.NER_CONLL, ner);
}

// Some style features are not wanted in this version. In particular: we want to

0 comments on commit 3a2b020

Please sign in to comment.