Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for two-word tokens and upgrade to java8 #4

Merged
merged 4 commits into from
Jan 25, 2018
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Handle two-word tokens by separating them with an underscore
  • Loading branch information
vivekraghuram committed Nov 12, 2017
commit 1475cb1e06797a81baadcb73c64496f58a739b53
Original file line number Diff line number Diff line change
Expand Up @@ -290,14 +290,31 @@ public PriorityQueue<List<T>> getBestPartialParses(Utterance<Word, String> utter
morphToken = new ArrayList<ArrayList<MorphTokenPair>>();

ArrayList<String> unknowns = new ArrayList<String>();
int i_mod = 0;

for (int i = 0; i < utterance.size(); i++) {

// try to match orth with tokens/morphology (Celex)
try {
String wordform = utterance.getElement(i).getOrthography();

String wordform = utterance.getElement(i + i_mod).getOrthography();
Set<String> lems = this.morpher.getLemmas(wordform);
if (i + 1 < utterance.size()) {
String wordform1 = wordform;
String wordform2 = utterance.getElement(i + i_mod + 1).getOrthography();

try {
Set<String> lems2 = this.morpher.getLemmas(wordform1 + "_" + wordform2);
if (lems2.size() > 0 ) {
wordform = wordform1 + "_" + wordform2;
lems = lems2;
List<Word> elements = utterance.getElements();
elements.remove(i + 1);
elements.set(i, new Word(wordform));
utterance.setElements(elements);
}
} catch (GrammarException g) {}
}

constructionInput.add(new ArrayList<Construction>());
morphToken.add(new ArrayList<MorphTokenPair>());

Expand Down Expand Up @@ -331,11 +348,11 @@ public PriorityQueue<List<T>> getBestPartialParses(Utterance<Word, String> utter
}
}
} catch (GrammarException g) {
debugPrint("Unknown input lemma: " + utterance.getElement(i).getOrthography());
debugPrint("Unknown input lemma: " + utterance.getElement(i + i_mod).getOrthography());
}
// This block will handle numbers
try {
String potentialNumber = utterance.getElement(i).getOrthography();
String potentialNumber = utterance.getElement(i + i_mod).getOrthography();
try {
double value = Double.parseDouble(potentialNumber);
Construction cxn = grammar.getConstruction("NumberType");
Expand All @@ -361,7 +378,7 @@ public PriorityQueue<List<T>> getBestPartialParses(Utterance<Word, String> utter
}
//long n = potentialNumber
} catch (GrammarException g) {
debugPrint("Could not identify number in " + utterance.getElement(i).getOrthography() +
debugPrint("Could not identify number in " + utterance.getElement(i + i_mod).getOrthography() +
" or construction NumberType not found in grammar.");
}
// This block will handle lexical constructions
Expand All @@ -373,21 +390,21 @@ public PriorityQueue<List<T>> getBestPartialParses(Utterance<Word, String> utter
morphToken.add(new ArrayList<MorphTokenPair>());
}
List<Construction> lexicalCxns = grammar.getLexicalConstruction(StringUtilities.addQuotes(utterance.getElement(
i).getOrthography()));
i + i_mod).getOrthography()));
constructionInput.get(i).addAll(lexicalCxns);
for (int k = 0; k < lexicalCxns.size(); k++) {
//morphToken.get(i).add(mt);
morphToken.get(i).add(new MorphTokenPair(null, null));
}
} catch (GrammarException g) {
debugPrint("Unknown input lexeme: " + utterance.getElement(i).getOrthography());
debugPrint("Unknown input lexeme: " + utterance.getElement(i + i_mod).getOrthography());

List<Construction> lexicalCxns = grammar.getLexicalConstruction(StringUtilities
.addQuotes(ECGConstants.UNKNOWN_ITEM));
//Construction lexicalCxns = grammar.getConstruction("NounType");
if (constructionInput.get(i).isEmpty()) {
//constructionInput.get(i).add(lexicalCxns);
unknowns.add(utterance.getElement(i).getOrthography());
unknowns.add(utterance.getElement(i + i_mod).getOrthography());
constructionInput.get(i).add(lexicalCxns.get(0));
morphToken.get(i).add(new MorphTokenPair(null, null));
}
Expand Down