Skip to content

Commit 5ee2c39

Browse files
committed
Update the number finding pattern to accommodate starting with .6 instead of 0.6 ... addresses #547
1 parent 93e6811 commit 5ee2c39

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/edu/stanford/nlp/ie/NumberNormalizer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public static void setVerbose(boolean verbose) {
8888
// Converts numbers in words to numeric form
8989
// works through trillions
9090
private static final Pattern digitsPattern = Pattern.compile("\\d+");
91-
private static final Pattern digitsPatternExtended = Pattern.compile("(\\d+\\.?\\d*)(dozen|score|hundred|thousand|million|billion|trillion)?"); // this is really just second-guessing the tokenizer
91+
private static final Pattern digitsPatternExtended = Pattern.compile("((?:\\d+\\.?\\d*)|(?:\\.\\d+))(dozen|score|hundred|thousand|million|billion|trillion)?"); // this is really just second-guessing the tokenizer
9292
private static final Pattern numPattern = Pattern.compile("[-+]?(?:\\d+(?:,\\d\\d\\d)*(?:\\.\\d*)?|\\.\\d+)");
9393
private static final Pattern numRangePattern = Pattern.compile("(" + numPattern.pattern() + ")-(" + numPattern.pattern() + ")");
9494
// private static final Pattern[] endUnitWordsPattern = new Pattern[endUnitWords.length];
@@ -372,7 +372,7 @@ public static Number wordToNumber(String str) {
372372
} else {
373373
throw new NumberFormatException("Bad number put into wordToNumber. Word is: \"" + curPart + "\", originally part of \"" + originalString + "\", piece # " + curIndex);
374374
}
375-
} else if (Character.isDigit(curPart.charAt(0))) {
375+
} else if (Character.isDigit(curPart.charAt(0)) || curPart.charAt(0) == '.') {
376376
if (curPart.endsWith("th") || curPart.endsWith("rd") || curPart.endsWith("nd") || curPart.endsWith("st")) {
377377
curPart = curPart.substring(0, curPart.length()-2).trim();
378378
}

0 commit comments

Comments
 (0)