Skip to content

Commit

Permalink
Add PluralRules (#119)
Browse files Browse the repository at this point in the history
* Add icu::pluralrules

* Apply reviewers feedback

* Second round of feedback

* Remove float to PluralOperands

* cargo fmt

* Improve PluralCategory variant docs
  • Loading branch information
zbraniecki authored Aug 12, 2020
1 parent 9a296e8 commit c4dc724
Show file tree
Hide file tree
Showing 34 changed files with 4,096 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/actions-rs/grcov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ ignore:
- "/*"
- "C:/*"
- "../*"
- "components/pluralrules/src/data/*"
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ members = [
"components/uniset",
"components/locale",
"components/num-util",
"components/pluralrules",
]
46 changes: 46 additions & 0 deletions components/pluralrules/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[package]
name = "icu-pluralrules"
description = "Unicode Plural Rules categorizer for numeric input."
version = "0.0.1"
authors = ["The ICU4X Project Developers"]
edition = "2018"
readme = "README.md"
repository = "https://github.com/unicode-org/icu4x"
license-file = "../../LICENSE"
categories = ["internationalization"]
include = [
"src/**/*",
"Cargo.toml",
"README.md"
]

[dependencies]
icu-locale = { path = "../locale" }
serde = { version = "1.0", optional = true, features = ["derive"] }
serde_json = {version = "1.0", optional = true }
bincode = { version = "1.3", optional = true }
serde-tuple-vec-map = { version = "1.0", optional = true }

[dev-dependencies]
criterion = "0.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = {version = "1.0" }
icu-locale = { path = "../locale", features = ["serde"] }

[features]
default = []
io = ["serde", "serde_json", "serde-tuple-vec-map"]
io-json = ["io"]
io-bincode = ["io", "bincode"]

[[bin]]
name = "generate_res"
required-features = ["io-json", "io-bincode"]

[[bench]]
name = "pluralrules"
harness = false

[[bench]]
name = "operands"
harness = false
46 changes: 46 additions & 0 deletions components/pluralrules/benches/fixtures/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use icu_locale::LanguageIdentifier;
use icu_pluralrules::PluralCategory;
use std::collections::HashMap;

use serde::Deserialize;

#[derive(Deserialize)]
pub(crate) struct NumbersFixture {
pub isize: Vec<i64>,
pub usize: Vec<u64>,
pub string: Vec<String>,
}

#[derive(Deserialize)]
pub(crate) struct PluralsFixture {
pub rules: HashMap<String, LocalePluralRulesFixture>,
pub langs: Vec<LanguageIdentifier>,
}

#[derive(Deserialize)]
pub(crate) struct LocalePluralRulesFixture {
#[cfg_attr(feature = "serde", serde(rename = "pluralRule-count-zero"))]
pub zero: Option<String>,
#[cfg_attr(feature = "serde", serde(rename = "pluralRule-count-one"))]
pub one: Option<String>,
#[cfg_attr(feature = "serde", serde(rename = "pluralRule-count-two"))]
pub two: Option<String>,
#[cfg_attr(feature = "serde", serde(rename = "pluralRule-count-few"))]
pub few: Option<String>,
#[cfg_attr(feature = "serde", serde(rename = "pluralRule-count-many"))]
pub many: Option<String>,
}

impl LocalePluralRulesFixture {
#[allow(dead_code)]
pub(crate) fn get(&self, category: &PluralCategory) -> Option<&String> {
match category {
PluralCategory::Zero => self.zero.as_ref(),
PluralCategory::One => self.one.as_ref(),
PluralCategory::Two => self.two.as_ref(),
PluralCategory::Few => self.few.as_ref(),
PluralCategory::Many => self.many.as_ref(),
PluralCategory::Other => None,
}
}
}
14 changes: 14 additions & 0 deletions components/pluralrules/benches/fixtures/numbers.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"isize": [
1, 2, 3, 4, 5, 25, 134, 910293019, 12, 1412, -12, 15, 2931, 31231, 3123, 13231, 91, 0, 231, -2,
-45, 33, 728, 2, 291, 24, 479, 291, 778, 919, 93
],
"usize": [
1, 2, 3, 4, 5, 25, 134, 910293019, 12, 1412, 12, 15, 2931, 31231, 3123, 13231, 91, 0, 231, 2,
45, 33, 728, 2, 291, 24, 479, 291, 778, 919, 93
],
"string": [
"1", "2", "3", "4", "5", "25", "134", "910293019", "-12", "1412", "12", "15", "2931", "31231", "3123", "13231", "91", "0", "231", "-2",
"0.0", "1.0", "1.5", "2.3", "-3.30", "-0.2", "2.25", "230.12", "12.254", "2.1", "4.44", "99.99", "100.222", "31.32", "100.00", "0.12", "-2.00"
]
}
11 changes: 11 additions & 0 deletions components/pluralrules/benches/fixtures/plurals.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"rules": {
"pl": {
"pluralRule-count-one": "i = 1 and v = 0",
"pluralRule-count-few": "v = 0 and i % 10 = 2..4 and i % 100 != 12..14",
"pluralRule-count-many": "v = 0 and i != 1 and i % 10 = 0..1 or v = 0 and i % 10 = 5..9 or v = 0 and i % 100 = 12..14",
"pluralRule-count-other": ""
}
},
"langs": ["uk", "de", "sk", "ar", "fr", "it", "en", "cs", "es", "zh"]
}
11 changes: 11 additions & 0 deletions components/pluralrules/benches/helpers/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
use std::fs::File;
use std::io::{BufReader, Error};

pub(crate) fn read_fixture<T>(path: &str) -> Result<T, Error>
where
T: serde::de::DeserializeOwned,
{
let file = File::open(path)?;
let reader = BufReader::new(file);
Ok(serde_json::from_reader(reader)?)
}
45 changes: 45 additions & 0 deletions components/pluralrules/benches/operands.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
mod fixtures;
mod helpers;

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use std::convert::TryInto;

use icu_pluralrules::PluralOperands;

const DATA_PATH: &str = "./benches/fixtures/numbers.json";

fn operands(c: &mut Criterion) {
let data: fixtures::NumbersFixture =
helpers::read_fixture(DATA_PATH).expect("Failed to read a fixture");

c.bench_function("operands/create/usize", |b| {
b.iter(|| {
for s in &data.usize {
let _: PluralOperands = black_box(*s).into();
}
})
});

c.bench_function("operands/create/isize", |b| {
b.iter(|| {
for s in &data.isize {
let _: PluralOperands = black_box(*s)
.try_into()
.expect("Failed to parse a number into an operands.");
}
})
});

c.bench_function("operands/create/string", |b| {
b.iter(|| {
for s in &data.string {
let _: PluralOperands = black_box(s)
.parse()
.expect("Failed to parse a number into an operands.");
}
})
});
}

criterion_group!(benches, operands,);
criterion_main!(benches);
97 changes: 97 additions & 0 deletions components/pluralrules/benches/pluralrules.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
mod fixtures;
mod helpers;

use criterion::{black_box, criterion_group, criterion_main, Criterion};

use icu_pluralrules::PluralCategory;

fn plurals_bench(c: &mut Criterion) {
use icu_pluralrules::rules::{parse, Lexer};

let path = "./benches/fixtures/plurals.json";
let data: fixtures::PluralsFixture =
helpers::read_fixture(path).expect("Failed to read a fixture");

let pl_data = data
.rules
.get("pl")
.expect("Polish data should be in the fixture.");
let pl_data: Vec<&String> = PluralCategory::all()
.filter_map(|cat| pl_data.get(cat))
.collect();

let mut group = c.benchmark_group("plurals/parser");

group.bench_function("lex", |b| {
b.iter(|| {
for val in &pl_data {
let lexer = Lexer::new(black_box(val.as_bytes()));
let _ = lexer.count();
}
})
});

group.bench_function("parse", |b| {
b.iter(|| {
for val in &pl_data {
let _ = parse(black_box(val.as_bytes()));
}
})
});

group.finish();

#[cfg(feature = "io-json")]
{
use criterion::BenchmarkId;
use icu_locale::LanguageIdentifier;
use icu_pluralrules::data::io::json::DataProvider;
use icu_pluralrules::PluralOperands;
use icu_pluralrules::{PluralRuleType, PluralRules};

let path = "./benches/fixtures/numbers.json";
let num_data: fixtures::NumbersFixture =
helpers::read_fixture(path).expect("Failed to read a fixture");

c.bench_function("plurals/convert+select/json", |b| {
let loc: LanguageIdentifier = "pl".parse().unwrap();
let dtp = DataProvider {};
let pr = PluralRules::try_new(loc, PluralRuleType::Cardinal, &dtp).unwrap();
b.iter(|| {
for s in &num_data.usize {
let _ = pr.select(*s);
}
})
});

c.bench_function("plurals/select/json", |b| {
let loc: LanguageIdentifier = "pl".parse().unwrap();
let dtp = DataProvider {};
let pr = PluralRules::try_new(loc, PluralRuleType::Cardinal, &dtp).unwrap();
let operands: Vec<PluralOperands> =
num_data.usize.iter().map(|d| (*d).into()).collect();
b.iter(|| {
for op in &operands {
let _ = pr.select(*op);
}
})
});

c.bench_with_input(
BenchmarkId::new("plurals/construct/json", data.langs.len()),
&data.langs,
|b, langs| {
let dtp = DataProvider {};
b.iter(|| {
for lang in langs {
PluralRules::try_new(lang.clone(), PluralRuleType::Ordinal, &dtp).unwrap();
PluralRules::try_new(lang.clone(), PluralRuleType::Cardinal, &dtp).unwrap();
}
});
},
);
}
}

criterion_group!(benches, plurals_bench,);
criterion_main!(benches);
Binary file added components/pluralrules/data/ordinals.dat
Binary file not shown.
Loading

0 comments on commit c4dc724

Please sign in to comment.