Skip to content

Commit

Permalink
Using std maps in datagen (unicode-org#2152)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian authored and samchen61661 committed Jul 12, 2022
1 parent b657a15 commit ad7f5f1
Show file tree
Hide file tree
Showing 39 changed files with 5,438 additions and 5,707 deletions.
3 changes: 0 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions components/datetime/src/provider/time_zones.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub struct TimeZoneFormatsV1<'data> {

/// TimeZone ID in BCP47 format
#[repr(transparent)]
#[derive(Debug, Clone, Copy, Eq, Ord, PartialEq, PartialOrd, yoke::Yokeable, ULE)]
#[derive(Debug, Clone, Copy, Eq, Ord, PartialEq, PartialOrd, yoke::Yokeable, ULE, Hash)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct TimeZoneBcp47Id(pub TinyAsciiStr<8>);
Expand Down Expand Up @@ -75,7 +75,7 @@ impl<'a> zerovec::maps::ZeroMapKV<'a> for TimeZoneBcp47Id {

/// MetaZone ID in a compact format
#[repr(transparent)]
#[derive(Debug, Clone, Copy, Eq, Ord, PartialEq, PartialOrd, yoke::Yokeable, ULE)]
#[derive(Debug, Clone, Copy, Eq, Ord, PartialEq, PartialOrd, yoke::Yokeable, ULE, Hash)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct MetaZoneId(pub TinyAsciiStr<4>);
Expand Down
6 changes: 3 additions & 3 deletions provider/adapters/src/filter/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ where
/// use icu_provider::prelude::*;
/// use icu_provider_adapters::filter::Filterable;
///
/// let provider = HelloWorldProvider::new_with_placeholder_data()
/// let provider = HelloWorldProvider
/// .filterable("Demo no-English filter")
/// .filter_by_langid(|langid| langid.language != language!("en"));
///
Expand Down Expand Up @@ -102,7 +102,7 @@ where
/// use icu_provider_adapters::filter::Filterable;
///
/// let allowlist = vec![langid!("de"), langid!("zh")];
/// let provider = HelloWorldProvider::new_with_placeholder_data()
/// let provider = HelloWorldProvider
/// .filterable("Demo German+Chinese filter")
/// .filter_by_langid_allowlist_strict(&allowlist);
///
Expand Down Expand Up @@ -162,7 +162,7 @@ where
/// use icu_provider::prelude::*;
/// use icu_provider_adapters::filter::Filterable;
///
/// let provider = HelloWorldProvider::new_with_placeholder_data()
/// let provider = HelloWorldProvider
/// .filterable("Demo require-langid filter")
/// .require_langid();
///
Expand Down
2 changes: 1 addition & 1 deletion provider/adapters/src/filter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
//! use icu_provider_adapters::filter::Filterable;
//!
//! // Only return German data from a HelloWorldProvider:
//! HelloWorldProvider::new_with_placeholder_data()
//! HelloWorldProvider
//! .filterable("Demo German-only filter")
//! .filter_by_langid(|langid| langid.language == language!("de"));
//! ```
Expand Down
10 changes: 5 additions & 5 deletions provider/adapters/src/fork/by_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use crate::helpers::result_is_err_missing_resource_key;
///
/// let forking_provider = ForkByKeyProvider(
/// DummyBufferProvider,
/// HelloWorldProvider::new_with_placeholder_data().into_json_provider(),
/// HelloWorldProvider.into_json_provider(),
/// );
///
/// let data_provider = forking_provider.as_deserializing();
Expand Down Expand Up @@ -74,11 +74,11 @@ use crate::helpers::result_is_err_missing_resource_key;
/// use icu_provider_adapters::fork::by_key::ForkByKeyProvider;
///
/// let forking_provider = ForkByKeyProvider(
/// HelloWorldProvider::new_with_placeholder_data()
/// HelloWorldProvider
/// .into_json_provider()
/// .filterable("Chinese")
/// .filter_by_langid(|langid| langid.language == language!("zh")),
/// HelloWorldProvider::new_with_placeholder_data()
/// HelloWorldProvider
/// .into_json_provider()
/// .filterable("German")
/// .filter_by_langid(|langid| langid.language == language!("de")),
Expand Down Expand Up @@ -204,11 +204,11 @@ where
///
/// let forking_provider = MultiForkByKeyProvider {
/// providers: vec![
/// HelloWorldProvider::new_with_placeholder_data()
/// HelloWorldProvider
/// .into_json_provider()
/// .filterable("Chinese")
/// .filter_by_langid(|langid| langid.language == language!("zh")),
/// HelloWorldProvider::new_with_placeholder_data()
/// HelloWorldProvider
/// .into_json_provider()
/// .filterable("German")
/// .filter_by_langid(|langid| langid.language == language!("de")),
Expand Down
2 changes: 0 additions & 2 deletions provider/blob/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ zerovec = { version = "0.7", path = "../../utils/zerovec", features = ["serde",

# For the export feature
log = { version = "0.4", optional = true }
litemap = { version = "0.4", path = "../../utils/litemap/", optional = true }

[dev-dependencies]
icu_locid = { version = "0.6", path = "../../components/locid", features = ["serde"] }
Expand All @@ -50,7 +49,6 @@ export = [
"log",
"postcard/alloc",
"std",
"litemap",
"icu_provider/datagen",
"zerovec/serde",
]
Expand Down
1 change: 0 additions & 1 deletion provider/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ displaydoc = { version = "0.2.3", default-features = false }
yoke = { version = "0.6.0", path = "../../utils/yoke", features = ["derive"] }
zerofrom = { version = "0.1.0", path = "../../utils/zerofrom", features = ["derive"] }
zerovec = { version = "0.7.0", path = "../../utils/zerovec", features = ["derive"]}
litemap = { version = "0.4.0", path = "../../utils/litemap" }
stable_deref_trait = { version = "1.2.0", default-features = false }

log = { version = "0.4", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion provider/core/src/buf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ impl DataMarker for BufferMarker {
/// use icu_provider::hello_world::*;
/// use icu_provider::prelude::*;
///
/// let buffer_provider = HelloWorldProvider::new_with_placeholder_data().into_json_provider();
/// let buffer_provider = HelloWorldProvider.into_json_provider();
///
/// let data_provider = buffer_provider.as_deserializing();
///
Expand Down
84 changes: 35 additions & 49 deletions provider/core/src/hello_world.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ use crate::zerofrom::{self, *};
use alloc::borrow::Cow;
use alloc::string::String;
use core::fmt::Debug;
use icu_locid::locale;
use litemap::LiteMap;

/// A struct containing "Hello World" in the requested language.
#[derive(Debug, PartialEq, Clone, Yokeable, ZeroFrom)]
Expand Down Expand Up @@ -61,9 +59,7 @@ impl ResourceMarker for HelloWorldV1Marker {
/// use icu_provider::hello_world::*;
/// use icu_provider::prelude::*;
///
/// let provider = HelloWorldProvider::new_with_placeholder_data();
///
/// let german_hello_world: DataPayload<HelloWorldV1Marker> = provider
/// let german_hello_world: DataPayload<HelloWorldV1Marker> = HelloWorldProvider
/// .load_resource(&DataRequest {
/// options: locale!("de").into(),
/// metadata: Default::default(),
Expand All @@ -75,42 +71,30 @@ impl ResourceMarker for HelloWorldV1Marker {
/// assert_eq!("Hallo Welt", german_hello_world.get().message);
/// ```
#[derive(Debug, PartialEq, Default)]
pub struct HelloWorldProvider {
map: LiteMap<ResourceOptions, Cow<'static, str>>,
}
pub struct HelloWorldProvider;

impl HelloWorldProvider {
pub const DATA: &'static [(icu_locid::Locale, &'static str)] = &[
(locale!("bn"), "ওহে বিশ্ব"),
(locale!("cs"), "Ahoj světe"),
(locale!("de"), "Hallo Welt"),
(locale!("el"), "Καλημέρα κόσμε"),
(locale!("en"), "Hello World"),
(locale!("eo"), "Saluton, Mondo"),
(locale!("fa"), "سلام دنیا‎"),
(locale!("fi"), "hei maailma"),
(locale!("is"), "Halló, heimur"),
(locale!("ja"), "こんにちは世界"),
(locale!("la"), "Ave, munde"),
(locale!("pt"), "Olá, mundo"),
(locale!("ro"), "Salut, lume"),
(locale!("ru"), "Привет, мир"),
(locale!("vi"), "Xin chào thế giới"),
(locale!("zh"), "你好世界"),
// Data from https://en.wiktionary.org/wiki/Hello_World#Translations
// Keep this sorted!
const DATA: &'static [(&'static str, &'static str)] = &[
("bn", "ওহে বিশ্ব"),
("cs", "Ahoj světe"),
("de", "Hallo Welt"),
("el", "Καλημέρα κόσμε"),
("en", "Hello World"),
("eo", "Saluton, Mondo"),
("fa", "سلام دنیا‎"),
("fi", "hei maailma"),
("is", "Halló, heimur"),
("ja", "こんにちは世界"),
("la", "Ave, munde"),
("pt", "Olá, mundo"),
("ro", "Salut, lume"),
("ru", "Привет, мир"),
("vi", "Xin chào thế giới"),
("zh", "你好世界"),
];

/// Creates a [`HelloWorldProvider`] pre-populated with hardcoded data from Wiktionary.
pub fn new_with_placeholder_data() -> HelloWorldProvider {
// Data from https://en.wiktionary.org/wiki/Hello_World#Translations
HelloWorldProvider {
map: Self::DATA
.iter()
.cloned()
.map(|(loc, value)| (loc.into(), value.into()))
.collect(),
}
}

/// Converts this provider into one that serves JSON blobs of the same data.
pub fn into_json_provider(self) -> HelloWorldJsonProvider {
HelloWorldJsonProvider(self)
Expand All @@ -122,11 +106,14 @@ impl ResourceProvider<HelloWorldV1Marker> for HelloWorldProvider {
&self,
req: &DataRequest,
) -> Result<DataResponse<HelloWorldV1Marker>, DataError> {
let data = self
.map
.get(&req.options)
.map(|s| HelloWorldV1 { message: s.clone() })
.ok_or_else(|| DataErrorKind::MissingLocale.with_key(HelloWorldV1Marker::KEY))?;
#[allow(clippy::indexing_slicing)] // binary_search
let data = Self::DATA
.binary_search_by(|(k, _)| req.options.strict_cmp(k.as_bytes()).reverse())
.map(|i| Self::DATA[i].1)
.map(|s| HelloWorldV1 {
message: Cow::Borrowed(s),
})
.map_err(|_| DataErrorKind::MissingLocale.with_req(HelloWorldV1Marker::KEY, req))?;
let metadata = DataResponseMetadata {
data_langid: Some(req.options.get_langid()),
..Default::default()
Expand Down Expand Up @@ -170,20 +157,19 @@ impl BufferProvider for HelloWorldJsonProvider {
#[cfg(feature = "datagen")]
impl IterableResourceProvider<HelloWorldV1Marker> for HelloWorldProvider {
fn supported_options(&self) -> Result<Vec<ResourceOptions>, DataError> {
Ok(self
.map
.iter_keys()
.cloned()
#[allow(clippy::unwrap_used)] // datagen
Ok(Self::DATA
.iter()
.map(|(s, _)| s.parse::<icu_locid::LanguageIdentifier>().unwrap())
.map(ResourceOptions::from)
.collect())
}
}

#[test]
fn test_iter() {
let provider = HelloWorldProvider::new_with_placeholder_data();
let mut supported_langids: Vec<ResourceOptions> = provider.supported_options().unwrap();
supported_langids.sort();
use icu_locid::locale;
let supported_langids: Vec<ResourceOptions> = HelloWorldProvider.supported_options().unwrap();

assert_eq!(
supported_langids,
Expand Down
13 changes: 13 additions & 0 deletions provider/core/src/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,19 @@ pub struct ResourceKey {
hash: ResourceKeyHash,
}

// I don't think this lint makes sense, see https://github.com/rust-lang/rust-clippy/issues/2627.
// It's not possible to violate the hash invariant (k1 == k2 -> hash(k1) == hash(k2)) without
// using non-determinism or unsafe code when PartialEq is derived.
#[allow(clippy::derive_hash_xor_eq)]

// Custom implementation to avoid hashing the path.
impl core::hash::Hash for ResourceKey {
#[inline]
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
self.hash.hash(state)
}
}

#[cfg(test)]
static_assertions::const_assert_eq!(24, core::mem::size_of::<ResourceKey>());

Expand Down
1 change: 0 additions & 1 deletion provider/datagen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ icu_codepointtrie_builder = { path = "../../utils/codepointtrie/builder" }
icu_locid = { version = "0.6", path = "../../components/locid", features = ["std"]}
icu_uniset = { version = "0.5", path = "../../utils/uniset", features = ["serde"] }
itertools = "0.10"
litemap = { version = "0.4.0", path = "../../utils/litemap" }
log = "0.4"
rayon = "1.5"
serde = { version = "1.0", default-features = false, features = ["derive", "alloc"] }
Expand Down
36 changes: 12 additions & 24 deletions provider/datagen/src/databake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use databake::{quote, CrateEnv, TokenStream};
use icu_provider::datagen::*;
use icu_provider::prelude::*;
use itertools::Itertools;
use litemap::LiteMap;
use rayon::prelude::*;
use std::collections::HashMap;
use std::fs::File;
Expand All @@ -31,8 +30,8 @@ pub(crate) struct BakedDataExporter {
mod_directory: PathBuf,
pretty: bool,
insert_feature_gates: bool,
// Temporary storage for put_payload: key -> (marker path, options -> bake)
data: Mutex<LiteMap<ResourceKey, (SyncTokenStream, LiteMap<ResourceOptions, SyncTokenStream>)>>,
// Temporary storage for put_payload: key -> (marker path, bake -> [options])
data: Mutex<HashMap<ResourceKey, (SyncTokenStream, HashMap<SyncTokenStream, Vec<String>>)>>,
// All mod.rs files in the module tree. Because generation is parallel,
// this will be non-deterministic and have to be sorted later.
mod_files: Mutex<HashMap<PathBuf, Vec<String>>>,
Expand Down Expand Up @@ -117,15 +116,15 @@ impl DataExporter for BakedDataExporter {
payload: &DataPayload<ExportMarker>,
) -> Result<(), DataError> {
let (payload, marker_type) = payload.tokenize(&self.dependencies);
let payload_string = payload.to_string();
let mut map = self.data.lock().expect("poison");
if !map.contains_key(&key) {
map.insert(key, (marker_type.to_string(), LiteMap::new()));
}
map.get_mut(&key)
.unwrap()
self.data
.lock()
.expect("poison")
.entry(key)
.or_insert_with(|| (marker_type.to_string(), Default::default()))
.1
.insert(options.clone(), payload_string);
.entry(payload.to_string())
.or_default()
.push(options.to_string());
Ok(())
}

Expand All @@ -136,21 +135,11 @@ impl DataExporter for BakedDataExporter {
.expect("poison")
.remove(&key)
.ok_or_else(|| DataError::custom("No data").with_key(key))?;
let mut sorted = raw.into_tuple_vec();
// Sort by payload bake so we can deduplicate.
sorted.sort_by(|(_, a), (_, b)| a.cmp(b));

let mut statics = Vec::new();
let mut all_options = Vec::new();

for (payload_bake_string, group) in &sorted
.into_iter()
.group_by(|(_, payload_bake_string)| payload_bake_string.clone())
{
// These are sorted because we stably sorted earlier.
let options = group
.map(|(options, _)| options.to_string())
.collect::<Vec<_>>();
for (payload_bake_string, mut options) in raw {
options.sort();
let ident_string = options
.iter()
.map(|options| {
Expand All @@ -171,7 +160,6 @@ impl DataExporter for BakedDataExporter {
statics.push((ident_string, payload_bake_string));
}

// Not necessary for functionality, but prettier
statics.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));

let statics = statics
Expand Down
4 changes: 2 additions & 2 deletions provider/datagen/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ macro_rules! create_datagen_provider {
icu_provider_adapters::make_forking_provider!(
icu_provider_adapters::fork::by_key::ForkByKeyProvider,
[
icu_provider::hello_world::HelloWorldProvider::new_with_placeholder_data(),
icu_provider::hello_world::HelloWorldProvider,
$(<$constructor>::from(__source)),+,
]
)
Expand Down Expand Up @@ -202,7 +202,7 @@ macro_rules! create_datagen_provider {
icu_provider_adapters::make_forking_provider!(
icu_provider_adapters::fork::by_key::ForkByKeyProvider,
[
icu_provider::hello_world::HelloWorldProvider::new_with_placeholder_data(),
icu_provider::hello_world::HelloWorldProvider,
$(<$constructor>::from(__source)),+,
]
)
Expand Down
Loading

0 comments on commit ad7f5f1

Please sign in to comment.