Open
Description
With base units supplied here
https://github.com/stanfordnlp/CoreNLP/blob/f569983c8ad4e7890139b77775865cce1b82d4dc/src/edu/stanford/nlp/ie/qe/rules/units.txt
meter, kilogram, liter get extracted regardless if numeric quantity and type are collapsed or separated by space. 10m, 10 m, 3kg 3 kg. Pretty much the rest of the unit types get extracted only
when the quantity and type are collapsed 10lb, 12acre... etc
` AnnotationPipeline pipeline = new AnnotationPipeline();
pipeline.addAnnotator(new TokenizerAnnotator(false,
"en"));
pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
pipeline.addAnnotator(new POSTaggerAnnotator(DefaultPaths.DEFAULT_POS_MODEL,
false));
// pipeline.addAnnotator(new NERCombinerAnnotator(false));
//pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false));
QuantifiableEntityExtractor qex = new QuantifiableEntityExtractor();
qex.init(new Options());
List<String> sampleQueries = ImmutableList.of(
"50lb cement works",
"50 lb cement does not extract",
"23 KG fish extracts",
"23kg fish extracts");
for (String sampleQuery : sampleQueries) {
Annotation annotations = createDocument(pipeline,
sampleQuery);
List<MatchedExpression> extract = qex.extract(annotations);
NumberNormalizer.findAndAnnotateNumericExpressions(annotations);
for (MatchedExpression matchedExpression : extract) {
System.out.println(sampleQuery +
": Got expression " +
matchedExpression.getText() +
" with value " +
matchedExpression.getValue());
}
}`