Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing Option borrows (follow-up #1550) #1556

Merged
merged 7 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ members = [
"tools/benchmark/binsize",
"tools/datagen",
"utils/codepointtrie",
"utils/deduplicating_array",
"utils/serde_utils",
"utils/fixed_decimal",
"utils/litemap",
"utils/pattern",
Expand Down
3 changes: 2 additions & 1 deletion components/datetime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ smallvec = "1.6"
displaydoc = { version = "0.2.3", default-features = false }
either = { version = "1.6.1", default-features = false }
num_enum = { version = "0.5", default_features = false }
serde_utils = { version = "0.5", path = "../../utils/serde_utils", optional = true }

[dev-dependencies]
criterion = "0.3"
Expand All @@ -67,7 +68,7 @@ bench = false # This option is required for Benchmark CI
std = ["icu_provider/std", "icu_locid/std", "icu_calendar/std"]
default = ["provider_serde"]
bench = []
provider_serde = ["serde", "litemap/serde_serialize", "smallvec/serde", "litemap/serde", "zerovec/serde", "tinystr/serde"]
provider_serde = ["serde", "litemap/serde_serialize", "smallvec/serde", "litemap/serde", "zerovec/serde", "tinystr/serde", "serde_utils"]
provider_transform_internals = ["std"]

[[bench]]
Expand Down
242 changes: 180 additions & 62 deletions components/datetime/src/provider/calendar/symbols.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,61 +42,14 @@ pub struct Eras<'data> {
}

macro_rules! symbols {
($name: ident, $expr: ty) => {
pub mod $name {
use super::*;
($name: ident, $symbols: item) => {
pub mod $name {
use super::*;

#[derive(Debug, PartialEq, Clone, Default, ZeroCopyFrom, Yokeable)]
#[cfg_attr(feature="provider_serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SymbolsV1<'data>(#[cfg_attr(feature="provider_serde", serde(borrow))] pub $expr);
#[derive(Debug, PartialEq, Clone, Default, ZeroCopyFrom, Yokeable)]
#[cfg_attr(feature="provider_serde", derive(serde::Serialize, serde::Deserialize))]
$symbols

symbols!();
}
};
($name: ident { $($tokens: tt)* }) => {
symbols!($name { $($tokens)* } -> ());
};
($name: ident { $element: ident: Option<$ty: ty>, $($tokens: tt)+ } -> ($($members:tt)*)) => {
symbols!($name { $($tokens)* } -> (
$($members)*
#[cfg_attr(feature = "provider_serde", serde(borrow))]
pub $element: Option<$ty>,
));
};
($name: ident { $element: ident: $ty: ty, $($tokens: tt)+ } -> ($($members:tt)*)) => {
symbols!($name { $($tokens)* } -> (
$($members)*
#[cfg_attr(feature = "provider_serde", serde(borrow))]
pub $element: $ty,
));
};
($name: ident { $element: ident: Option<$ty: ty> $(,)? } -> ($($members:tt)*)) => {
symbols!($name { } -> (
$($members)*
#[cfg_attr(feature = "provider_serde", serde(borrow))]
pub $element: Option<$ty>,
));
};
($name: ident { $element: ident: $ty: ty $(,)? } -> ($($members:tt)*)) => {
symbols!($name { } -> (
$($members)*
#[cfg_attr(feature = "provider_serde", serde(borrow))]
pub $element: $ty,
));
};
($name: ident { } -> ($($members: tt)*)) => {
pub mod $name {
use super::*;

#[derive(Debug, PartialEq, Clone, Default, Yokeable, ZeroCopyFrom)]
#[cfg_attr(feature="provider_serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SymbolsV1<'data> {
$($members)*
}
symbols!();
}
};
() => {
// UTS 35 specifies that `format` widths are mandatory
// except of `short`.
#[derive(Debug, PartialEq, Clone, Default, Yokeable, ZeroCopyFrom)]
Expand Down Expand Up @@ -134,18 +87,183 @@ macro_rules! symbols {
#[cfg_attr(feature = "provider_serde", serde(borrow))]
pub stand_alone: Option<StandAloneWidthsV1<'data>>,
}
};
}
}
};
}

symbols!(months, [Cow<'data, str>; 12]);
symbols!(
months,
pub struct SymbolsV1<'data>(
#[cfg_attr(
feature = "provider_serde",
serde(borrow)
)]
pub [Cow<'data, str>; 12],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: is this necessary for [Cow; N] as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

);
);

symbols!(weekdays, [Cow<'data, str>; 7]);
symbols!(
weekdays,
pub struct SymbolsV1<'data>(
#[cfg_attr(
feature = "provider_serde",
serde(borrow)
)]
pub [Cow<'data, str>; 7],
);
);

symbols!(
day_periods {
am: Cow<'data, str>,
pm: Cow<'data, str>,
noon: Option<Cow<'data, str>>,
midnight: Option<Cow<'data, str>>,
day_periods,
pub struct SymbolsV1<'data> {
#[cfg_attr(feature = "provider_serde", serde(borrow))]
pub am: Cow<'data, str>,
#[cfg_attr(feature = "provider_serde", serde(borrow))]
pub pm: Cow<'data, str>,
#[cfg_attr(
feature = "provider_serde",
serde(borrow, deserialize_with = "serde_utils::option_of_cow::deserialize")
)]
pub noon: Option<Cow<'data, str>>,
#[cfg_attr(
feature = "provider_serde",
serde(borrow, deserialize_with = "serde_utils::option_of_cow::deserialize")
)]
pub midnight: Option<Cow<'data, str>>,
}
);

#[cfg(all(test, feature = "provider_serde"))]
mod test {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought: a cool test to perform (not in this PR) would be to use rust's custom per-collection allocator support to write a wrapping allocator that keeps track of memory allocated, and then construct a postcard based fs/static data provider and deserialize absolutely everything, printing out how much heap memory each component takes.

use super::*;

fn serialize() -> Vec<u8> {
let months = months::SymbolsV1([
Cow::Owned("January".to_string()),
Cow::Owned("February".to_string()),
Cow::Owned("March".to_string()),
Cow::Owned("April".to_string()),
Cow::Owned("May".to_string()),
Cow::Owned("June".to_string()),
Cow::Owned("July".to_string()),
Cow::Owned("August".to_string()),
Cow::Owned("September".to_string()),
Cow::Owned("October".to_string()),
Cow::Owned("November".to_string()),
Cow::Owned("December".to_string()),
]);

let weekdays = weekdays::SymbolsV1([
Cow::Owned("Monday".to_string()),
Cow::Owned("Tuesday".to_string()),
Cow::Owned("Wednesday".to_string()),
Cow::Owned("Thursday".to_string()),
Cow::Owned("Friday".to_string()),
Cow::Owned("Saturday".to_string()),
Cow::Owned("Sunday".to_string()),
]);

let day_periods = day_periods::SymbolsV1 {
am: Cow::Owned("am".to_string()),
pm: Cow::Owned("pm".to_string()),
noon: Some(Cow::Owned("noon".to_string())),
midnight: None,
};

bincode::serialize(&DateSymbolsV1 {
months: months::ContextsV1 {
format: months::FormatWidthsV1 {
abbreviated: months.clone(),
narrow: months.clone(),
short: Some(months.clone()),
wide: months.clone(),
},
stand_alone: Some(months::StandAloneWidthsV1 {
abbreviated: Some(months.clone()),
narrow: Some(months.clone()),
short: Some(months.clone()),
wide: Some(months.clone()),
}),
},
weekdays: weekdays::ContextsV1 {
format: weekdays::FormatWidthsV1 {
abbreviated: weekdays.clone(),
narrow: weekdays.clone(),
short: Some(weekdays.clone()),
wide: weekdays.clone(),
},
stand_alone: Some(weekdays::StandAloneWidthsV1 {
abbreviated: Some(weekdays.clone()),
narrow: Some(weekdays.clone()),
short: Some(weekdays.clone()),
wide: Some(weekdays.clone()),
}),
},
day_periods: day_periods::ContextsV1 {
format: day_periods::FormatWidthsV1 {
abbreviated: day_periods.clone(),
narrow: day_periods.clone(),
short: Some(day_periods.clone()),
wide: day_periods.clone(),
},
stand_alone: Some(day_periods::StandAloneWidthsV1 {
abbreviated: Some(day_periods.clone()),
narrow: Some(day_periods.clone()),
short: Some(day_periods.clone()),
wide: Some(day_periods.clone()),
}),
},
eras: Eras {
names: ZeroMap::new(),
abbr: ZeroMap::new(),
narrow: ZeroMap::new(),
},
})
.unwrap()
}

#[test]
fn months_borrows() {
let bytes = serialize();
let de = bincode::deserialize::<DateSymbolsV1>(&bytes).unwrap();

assert!(matches!(de.months.format.narrow.0[2], Cow::Borrowed(_)));
assert!(matches!(
de.months.format.short.as_ref().unwrap().0[11],
Cow::Borrowed(_)
));
}

#[test]
fn weekdays_borrows() {
let bytes = serialize();
let de = bincode::deserialize::<DateSymbolsV1>(&bytes).unwrap();

assert!(matches!(de.weekdays.format.narrow.0[2], Cow::Borrowed(_)));
assert!(matches!(
de.weekdays.format.short.as_ref().unwrap().0[4],
Cow::Borrowed(_)
));
}

#[test]
fn day_periods_borrows() {
let bytes = serialize();
let de = bincode::deserialize::<DateSymbolsV1>(&bytes).unwrap();

assert!(matches!(
de.day_periods.format.narrow.noon,
Some(Cow::Borrowed(_))
));
assert!(matches!(
de.day_periods.format.short.as_ref().unwrap().noon,
Some(Cow::Borrowed(_))
));

assert!(matches!(de.day_periods.format.narrow.am, Cow::Borrowed(_)));
assert!(matches!(
de.day_periods.format.short.as_ref().unwrap().am,
Cow::Borrowed(_)
));
}
}
4 changes: 2 additions & 2 deletions experimental/list_formatter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ icu_locid = { version = "0.4", path = "../../components/locid" }
icu_provider = { version = "0.4", path = "../../provider/core", features = ["macros"] }
serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
zerovec = { version = "0.5", path = "../../utils/zerovec", features = ["yoke"] }
deduplicating_array = { version = "0.5", path = "../../utils/deduplicating_array", optional = true }
serde_utils = { version = "0.5", path = "../../utils/serde_utils", optional = true }
regex-automata = { version = "0.1", git = "https://github.com/burntsushi/regex-automata", rev = "d8eee1279fac79514f6e366b6976f97ad7b37174", default-features = false }
writeable = { version = "0.2.1", path = "../../utils/writeable" }

Expand All @@ -43,6 +43,6 @@ path = "src/lib.rs"
[features]
default = ["icu4x_human_readable_de"]
std = ["icu_provider/std", "icu_locid/std", "regex-automata/std"]
provider_serde = ["serde", "zerovec/serde", "deduplicating_array"]
provider_serde = ["serde", "zerovec/serde", "serde_utils"]
provider_transform_internals = ["provider_serde", "std"]
icu4x_human_readable_de = ["provider_serde", "regex-automata/alloc"]
2 changes: 1 addition & 1 deletion experimental/list_formatter/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pub mod key {
pub struct ListFormatterPatternsV1<'data>(
#[cfg_attr(
feature = "provider_serde",
serde(borrow, with = "deduplicating_array")
serde(borrow, with = "serde_utils::deduplicating_array")
)]
/// The patterns in the order start, middle, end, pair, short_start, short_middle,
/// short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

[package]
name = "deduplicating_array"
description = "A serde serialization strategy that uses PartialEq to reduce serialized size."
name = "serde_utils"
robertbastian marked this conversation as resolved.
Show resolved Hide resolved
description = "Various Serde serialization and deserialization strategies."
version = "0.5.0"
authors = ["The ICU4X Project Developers"]
edition = "2018"
Expand Down Expand Up @@ -34,4 +34,4 @@ bench = []
std = []

[[example]]
name = "postcard"
name = "deduplicate"
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#[derive(serde::Serialize, serde::Deserialize)]
struct DataStruct {
#[serde(with = "deduplicating_array")]
#[serde(with = "serde_utils::deduplicating_array")]
coordinates: [(f64, f64); 5],
}

Expand Down
3 changes: 3 additions & 0 deletions utils/serde_utils/src/array_of_cow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
Loading