Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,29 @@ repository = "https://github.com/allo-media/text2num-rs"
readme = "README.md"
exclude = [".circleci", ".gitignore"]

[features]
default = ["all_languages", "std"]
std = ["phf/std"]

# Languages
de = []
en = []
es = []
fr = []
it = []
nl = []
pt = []
all_languages = [
"de",
"en",
"es",
"fr",
"it",
"nl",
"pt",
]

[dependencies]
phf = { version = "0.8", features = ["macros"] }
phf = { version = "0.8", default-features = false, features = ["macros"] }
bitflags = "1.3"
daachorse = "1"
60 changes: 50 additions & 10 deletions src/digit_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
//!
//! Everywhere, the term `position` refers to decimal positions: 0 is units, 1 is tens, etc…

use std::ops::Deref;
use alloc::vec::Vec;

use core::fmt::Write as _;

use super::error::Error;
use super::lang::MorphologicalMarker;
Expand Down Expand Up @@ -223,20 +225,29 @@ impl DigitString {
}
}

/// Formal base 10 string representation with leading zeroes
pub fn to_string(&self) -> String {
// we know that the string is valid.
let mut res = "0".repeat(self.leading_zeroes);
res.push_str(std::str::from_utf8(self.buffer.as_slice()).unwrap());
res
}

pub fn is_ordinal(&self) -> bool {
self.marker.is_ordinal()
}

/// Parse digits as an integer.
pub fn parse(&self) -> u64 {
let mut result = 0;
for d in self.buffer.iter() {
let d = d - b'0';
result = result * 10 + d as u64;
}
result
}

/// Parse digits as a fractional part of a float.
pub fn parse_decimal(&self) -> f64 {
let digits = self.leading_zeroes + self.buffer.len();
let value = self.parse();
value as f64 / 10f64.powi(digits as i32)
}
}

impl Deref for DigitString {
impl core::ops::Deref for DigitString {
type Target = [u8];

fn deref(&self) -> &Self::Target {
Expand All @@ -250,9 +261,22 @@ impl Default for DigitString {
}
}

/// Formal base 10 string representation with leading zeroes
impl core::fmt::Display for DigitString {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
for _ in 0..self.leading_zeroes {
f.write_char('0')?;
}
f.write_str(core::str::from_utf8(self.buffer.as_slice()).unwrap())
}
}

#[cfg(test)]
mod tests {
use alloc::string::ToString as _;

use super::*;

#[test]
fn test_put_single() -> Result<(), Error> {
let mut builder = DigitString::new();
Expand Down Expand Up @@ -457,4 +481,20 @@ mod tests {
assert!(!dstring.is_position_free(3));
assert!(!dstring.is_position_free(5));
}

#[test]
fn test_int_parse() {
let mut dstring = DigitString::new();
dstring.buffer = Vec::from(b"12345000");
dstring.leading_zeroes = 1000;
assert_eq!(dstring.parse(), 12345000)
}

#[test]
fn test_decimal_parse() {
let mut dstring = DigitString::new();
dstring.buffer = Vec::from(b"123");
dstring.leading_zeroes = 3;
assert_eq!(dstring.parse_decimal(), 0.000123)
}
}
27 changes: 2 additions & 25 deletions src/lang/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,30 +219,7 @@ impl LangInterpreter for German {
None
}
}

fn format_and_value(&self, b: &DigitString) -> (String, f64) {
let repr = b.to_string();
let val: f64 = repr.parse().unwrap();
if let MorphologicalMarker::Ordinal(marker) = b.marker {
(format!("{}{}", b.to_string(), marker), val)
} else {
(repr, val)
}
}

fn format_decimal_and_value(
&self,
int: &DigitString,
dec: &DigitString,
sep: char,
) -> (String, f64) {
let irepr = int.to_string();
let drepr = dec.to_string();
let frepr = format!("{irepr}{sep}{drepr}");
let val = format!("{irepr}.{drepr}").parse().unwrap();
(frepr, val)
}


fn get_morph_marker(&self, word: &str) -> MorphologicalMarker {
if word.ends_with("te") {
MorphologicalMarker::Ordinal(".")
Expand All @@ -265,7 +242,7 @@ mod tests {
($text:expr, $res:expr) => {
let f = German::new();
let res = text2digits($text, &f);
dbg!(&res);
crate::tests::dbg!(&res);
assert!(res.is_ok());
assert_eq!(res.unwrap(), $res)
};
Expand Down
21 changes: 5 additions & 16 deletions src/lang/en/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! English number interpreter

use alloc::{format, string::String, vec::Vec};

use crate::digit_string::DigitString;
use crate::error::Error;

Expand Down Expand Up @@ -128,27 +130,14 @@ impl LangInterpreter for English {
}
}

fn format_and_value(&self, b: &DigitString) -> (String, f64) {
let repr = b.to_string();
let val: f64 = repr.parse().unwrap();
if let MorphologicalMarker::Ordinal(marker) = b.marker {
(format!("{}{}", b.to_string(), marker), val)
} else {
(repr, val)
}
}

fn format_decimal_and_value(
&self,
int: &DigitString,
dec: &DigitString,
_sep: char,
) -> (String, f64) {
let irepr = int.to_string();
let drepr = dec.to_string();
let frepr = format!("{irepr}.{drepr}");
let val = frepr.parse().unwrap();
(frepr, val)
let val = int.parse() as f64 + dec.parse_decimal();
(format!("{int}.{dec}"), val)
}

fn get_morph_marker(&self, word: &str) -> MorphologicalMarker {
Expand Down Expand Up @@ -219,7 +208,7 @@ mod tests {
($text:expr, $res:expr) => {
let f = English {};
let res = text2digits($text, &f);
dbg!(&res);
crate::tests::dbg!(&res);
assert!(res.is_ok());
assert_eq!(res.unwrap(), $res)
};
Expand Down
26 changes: 8 additions & 18 deletions src/lang/es/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
//! Spanish number interpreter

use alloc::{format, string::String};

use crate::digit_string::DigitString;
use crate::error::Error;

Expand Down Expand Up @@ -135,27 +138,14 @@ impl LangInterpreter for Spanish {
}

fn format_and_value(&self, b: &DigitString) -> (String, f64) {
let repr = b.to_string();
let val: f64 = repr.parse().unwrap();
let val: f64 = b.parse() as f64;
match b.marker {
MorphologicalMarker::Fraction(_) => (format!("1/{repr}"), val.recip()),
MorphologicalMarker::Ordinal(marker) => (format!("{repr}{marker}"), val),
MorphologicalMarker::None => (repr, val),
MorphologicalMarker::Fraction(_) => (format!("1/{b}"), val.recip()),
MorphologicalMarker::Ordinal(marker) => (format!("{b}{marker}"), val),
MorphologicalMarker::None => (alloc::string::ToString::to_string(&b), val),
}
}

fn format_decimal_and_value(
&self,
int: &DigitString,
dec: &DigitString,
sep: char,
) -> (String, f64) {
let sint = int.to_string();
let sdec = dec.to_string();
let val = format!("{sint}.{sdec}").parse().unwrap();
(format!("{sint}{sep}{sdec}"), val)
}

fn get_morph_marker(&self, word: &str) -> MorphologicalMarker {
let sing = lemmatize(word).trim_start_matches("decimo");
let is_plur = word.ends_with('s');
Expand Down Expand Up @@ -194,7 +184,7 @@ mod tests {
($text:expr, $res:expr) => {
let f = Spanish {};
let res = text2digits($text, &f);
dbg!(&res);
crate::tests::dbg!(&res);
assert!(res.is_ok());
assert_eq!(res.unwrap(), $res)
};
Expand Down
27 changes: 4 additions & 23 deletions src/lang/fr/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
//! French number interpreter.
//!
//! It supports regional variants.

use alloc::vec::Vec;

use bitflags::bitflags;

use crate::digit_string::DigitString;
Expand Down Expand Up @@ -233,28 +236,6 @@ impl LangInterpreter for French {
}
}

fn format_and_value(&self, b: &DigitString) -> (String, f64) {
let repr = b.to_string();
let val = repr.parse().unwrap();
if let MorphologicalMarker::Ordinal(marker) = b.marker {
(format!("{}{}", b.to_string(), marker), val)
} else {
(repr, val)
}
}

fn format_decimal_and_value(
&self,
int: &DigitString,
dec: &DigitString,
sep: char,
) -> (String, f64) {
let sint = int.to_string();
let sdec = dec.to_string();
let val = format!("{sint}.{sdec}").parse().unwrap();
(format!("{sint}{sep}{sdec}"), val)
}

fn get_morph_marker(&self, word: &str) -> MorphologicalMarker {
if word.ends_with("ème") {
MorphologicalMarker::Ordinal("ème")
Expand Down Expand Up @@ -329,7 +310,7 @@ mod tests {
($text:expr, $res:expr) => {
let f = French {};
let res = text2digits($text, &f);
dbg!(&res);
crate::tests::dbg!(&res);
assert!(res.is_ok());
assert_eq!(res.unwrap(), $res)
};
Expand Down
22 changes: 1 addition & 21 deletions src/lang/it/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,26 +273,6 @@ impl LangInterpreter for Italian {
None
}
}
fn format_and_value(&self, b: &DigitString) -> (String, f64) {
let repr = b.to_string();
let val = repr.parse().unwrap();
if let MorphologicalMarker::Ordinal(marker) = b.marker {
(format!("{}{}", b.to_string(), marker), val)
} else {
(repr, val)
}
}
fn format_decimal_and_value(
&self,
int: &DigitString,
dec: &DigitString,
sep: char,
) -> (String, f64) {
let sint = int.to_string();
let sdec = dec.to_string();
let val = format!("{sint}.{sdec}").parse().unwrap();
(format!("{sint}{sep}{sdec}"), val)
}

fn is_linking(&self, word: &str) -> bool {
INSIGNIFICANT.contains(word)
Expand All @@ -308,7 +288,7 @@ mod tests {
($text:expr, $res:expr) => {
let f = Italian::default();
let res = text2digits($text, &f);
dbg!(&res);
crate::tests::dbg!(&res);
assert!(res.is_ok());
assert_eq!(res.unwrap(), $res)
};
Expand Down
Loading