Skip to content

Commit

Permalink
rm AliasesV1
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Aug 7, 2024
1 parent 03382fc commit b573311
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 1,444 deletions.
168 changes: 18 additions & 150 deletions components/locale/src/canonicalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,24 +213,15 @@ impl LocaleCanonicalizer {
Self::new_with_expander(LocaleExpander::new_extended())
}

// Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed
#[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)]
pub fn try_new_with_any_provider(
provider: &(impl AnyProvider + ?Sized),
) -> Result<Self, DataError> {
let expander = LocaleExpander::try_new_with_any_provider(provider)?;
Self::try_new_with_expander_compat(&provider.as_downcasting(), expander)
}

// Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed
#[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)]
#[cfg(feature = "serde")]
pub fn try_new_with_buffer_provider(
provider: &(impl BufferProvider + ?Sized),
) -> Result<Self, DataError> {
let expander = LocaleExpander::try_new_with_buffer_provider(provider)?;
Self::try_new_with_expander_compat(&provider.as_deserializing(), expander)
}
icu_provider::gen_any_buffer_data_constructors!(() -> error: DataError,
functions: [
new: skip,
try_new_with_any_provider,
try_new_with_buffer_provider,
try_new_unstable,
Self,
]
);

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
Expand Down Expand Up @@ -259,26 +250,6 @@ impl LocaleCanonicalizer {
}
}

fn try_new_with_expander_compat<P>(
provider: &P,
expander: LocaleExpander,
) -> Result<Self, DataError>
where
P: DataProvider<AliasesV2Marker> + DataProvider<AliasesV1Marker> + ?Sized,
{
let aliases = if let Ok(response) =
DataProvider::<AliasesV2Marker>::load(provider, Default::default())
{
response.payload
} else {
DataProvider::<AliasesV1Marker>::load(provider, Default::default())?
.payload
.try_map_project(|st, _| st.try_into())?
};

Ok(Self { aliases, expander })
}

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)]
pub fn try_new_with_expander_unstable<P>(
provider: &P,
Expand All @@ -292,22 +263,15 @@ impl LocaleCanonicalizer {
Ok(Self { aliases, expander })
}

#[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new_with_expander)]
pub fn try_new_with_expander_with_any_provider(
provider: &(impl AnyProvider + ?Sized),
options: LocaleExpander,
) -> Result<Self, DataError> {
Self::try_new_with_expander_compat(&provider.as_downcasting(), options)
}

#[cfg(feature = "serde")]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER,Self::new_with_expander)]
pub fn try_new_with_expander_with_buffer_provider(
provider: &(impl BufferProvider + ?Sized),
options: LocaleExpander,
) -> Result<Self, DataError> {
Self::try_new_with_expander_compat(&provider.as_deserializing(), options)
}
icu_provider::gen_any_buffer_data_constructors!((options: LocaleExpander) -> error: DataError,
functions: [
new_with_expander: skip,
try_new_with_expander_with_any_provider,
try_new_with_expander_with_buffer_provider,
try_new_with_expander_unstable,
Self,
]
);

/// The canonicalize method potentially updates a passed in locale in place
/// depending up the results of running the canonicalization algorithm
Expand Down Expand Up @@ -623,99 +587,3 @@ mod test {
}
}
}

#[cfg(feature = "serde")]
#[cfg(test)]
mod tests {
use super::*;
use icu_locale_core::locale;

struct RejectByKeyProvider {
markers: Vec<DataMarkerInfo>,
}

impl AnyProvider for RejectByKeyProvider {
fn load_any(
&self,
marker: DataMarkerInfo,
_: DataRequest,
) -> Result<AnyResponse, DataError> {
use alloc::borrow::Cow;

println!("{:#?}", marker);
if self.markers.contains(&marker) {
return Err(DataErrorKind::MarkerNotFound.with_str_context("rejected"));
}

let aliases_v2 = crate::provider::Baked::SINGLETON_ALIASES_V2_MARKER;
let l = crate::provider::Baked::SINGLETON_LIKELY_SUBTAGS_FOR_LANGUAGE_V1_MARKER;
let ext = crate::provider::Baked::SINGLETON_LIKELY_SUBTAGS_EXTENDED_V1_MARKER;
let sr = crate::provider::Baked::SINGLETON_LIKELY_SUBTAGS_FOR_SCRIPT_REGION_V1_MARKER;

let payload = if marker.path.hashed() == AliasesV1Marker::INFO.path.hashed() {
let aliases_v1 = AliasesV1 {
language_variants: zerovec::VarZeroVec::from(&[StrStrPair(
Cow::Borrowed("aa-saaho"),
Cow::Borrowed("ssy"),
)]),
..Default::default()
};
DataPayload::<AliasesV1Marker>::from_owned(aliases_v1).wrap_into_any_payload()
} else if marker.path.hashed() == AliasesV2Marker::INFO.path.hashed() {
DataPayload::<AliasesV2Marker>::from_static_ref(aliases_v2).wrap_into_any_payload()
} else if marker.path.hashed() == LikelySubtagsForLanguageV1Marker::INFO.path.hashed() {
DataPayload::<LikelySubtagsForLanguageV1Marker>::from_static_ref(l)
.wrap_into_any_payload()
} else if marker.path.hashed() == LikelySubtagsExtendedV1Marker::INFO.path.hashed() {
DataPayload::<LikelySubtagsExtendedV1Marker>::from_static_ref(ext)
.wrap_into_any_payload()
} else if marker.path.hashed()
== LikelySubtagsForScriptRegionV1Marker::INFO.path.hashed()
{
DataPayload::<LikelySubtagsForScriptRegionV1Marker>::from_static_ref(sr)
.wrap_into_any_payload()
} else {
return Err(DataErrorKind::MarkerNotFound.into_error());
};

Ok(AnyResponse {
payload,
metadata: Default::default(),
})
}
}

#[test]
fn test_old_keys() {
let provider = RejectByKeyProvider {
markers: vec![AliasesV2Marker::INFO],
};
let lc = LocaleCanonicalizer::try_new_with_any_provider(&provider)
.expect("should create with old keys");
let mut locale = locale!("aa-saaho");
assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
assert_eq!(locale, locale!("ssy"));
}

#[test]
fn test_new_keys() {
let provider = RejectByKeyProvider {
markers: vec![AliasesV1Marker::INFO],
};
let lc = LocaleCanonicalizer::try_new_with_any_provider(&provider)
.expect("should create with old keys");
let mut locale = locale!("aa-saaho");
assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
assert_eq!(locale, locale!("ssy"));
}

#[test]
fn test_no_keys() {
let provider = RejectByKeyProvider {
markers: vec![AliasesV1Marker::INFO, AliasesV2Marker::INFO],
};
if LocaleCanonicalizer::try_new_with_any_provider(&provider).is_ok() {
panic!("should not create: no data present")
};
}
}
139 changes: 0 additions & 139 deletions components/locale/src/provider/canonicalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,145 +8,6 @@ use icu_provider::prelude::*;
use tinystr::UnvalidatedTinyAsciiStr;
use zerovec::{VarZeroVec, ZeroMap, ZeroSlice};

#[icu_provider::data_struct(marker(AliasesV1Marker, "locid_transform/aliases@1", singleton))]
#[derive(PartialEq, Clone, Default)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_locale::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[yoke(prove_covariance_manually)]
/// This alias data is used for locale canonicalization. Each field defines a
/// mapping from an old identifier to a new identifier, based upon the rules in
/// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>. The data
/// is stored in sorted order, allowing for binary search to identify rules to
/// apply. It is broken down into smaller vectors based upon some characteristic
/// of the data, to help avoid unnecessary searches. For example, the `sgn_region`
/// field contains aliases for sign language and region, so that it is not
/// necessary to search the data unless the input is a sign language.
///
/// The algorithm in tr35 is not guaranteed to terminate on data other than what
/// is currently in CLDR. For this reason, it is not a good idea to attempt to add
/// or modify aliases for use in this structure.
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
// TODO: Use validated types as value types
#[derive(Debug)]
pub struct AliasesV1<'data> {
/// `[language(-variant)+\] -> [langid]`
/// This is not a map as it's searched linearly according to the canonicalization rules.
#[cfg_attr(feature = "serde", serde(borrow))]
pub language_variants: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>,
/// `sgn-[region] -> [language]`
#[cfg_attr(feature = "serde", serde(borrow))]
pub sgn_region: ZeroMap<'data, UnvalidatedRegion, Language>,
/// `[language{2}] -> [langid]`
#[cfg_attr(feature = "serde", serde(borrow))]
pub language_len2: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, UnvalidatedLanguageIdentifier>,
/// `[language{3}] -> [langid]`
#[cfg_attr(feature = "serde", serde(borrow))]
pub language_len3: ZeroMap<'data, UnvalidatedLanguage, UnvalidatedLanguageIdentifier>,
/// `[langid] -> [langid]`
/// This is not a map as it's searched linearly according to the canonicalization rules.
#[cfg_attr(feature = "serde", serde(borrow))]
pub language: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>,

/// `[script] -> [script]`
#[cfg_attr(feature = "serde", serde(borrow))]
pub script: ZeroMap<'data, UnvalidatedScript, Script>,

/// `[region{2}] -> [region]`
#[cfg_attr(feature = "serde", serde(borrow))]
pub region_alpha: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, Region>,
/// `[region{3}] -> [region]`
#[cfg_attr(feature = "serde", serde(borrow))]
pub region_num: ZeroMap<'data, UnvalidatedRegion, Region>,

/// `[region] -> [region]+`
#[cfg_attr(feature = "serde", serde(borrow))]
pub complex_region: ZeroMap<'data, UnvalidatedRegion, ZeroSlice<Region>>,

/// `[variant] -> [variant]`
#[cfg_attr(feature = "serde", serde(borrow))]
pub variant: ZeroMap<'data, UnvalidatedVariant, Variant>,

/// `[value{7}] -> [value{7}]`
#[cfg_attr(feature = "serde", serde(borrow))]
pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>,
}

#[cfg(feature = "datagen")]
impl<'data> From<AliasesV2<'data>> for AliasesV1<'data> {
fn from(value: AliasesV2<'data>) -> Self {
let language_variants = value
.language_variants
.iter()
.map(zerofrom::ZeroFrom::zero_from)
.map(|v: LanguageStrStrPair| {
let langid = alloc::format!("{0}-{1}", v.0, v.1);
StrStrPair(langid.into(), v.2)
})
.collect::<alloc::vec::Vec<StrStrPair>>();

Self {
language_variants: VarZeroVec::from(&language_variants),
sgn_region: value.sgn_region,
language_len2: value.language_len2,
language_len3: value.language_len3,
language: value.language,
script: value.script,
region_alpha: value.region_alpha,
region_num: value.region_num,
complex_region: value.complex_region,
variant: value.variant,
subdivision: value.subdivision,
}
}
}

impl<'data> TryFrom<AliasesV1<'data>> for AliasesV2<'data> {
type Error = icu_provider::DataError;

fn try_from(value: AliasesV1<'data>) -> Result<Self, Self::Error> {
#[allow(unused_imports)]
use alloc::borrow::ToOwned;

let language_variants = value
.language_variants
.iter()
.map(zerofrom::ZeroFrom::zero_from)
.map(|v: StrStrPair| -> Result<LanguageStrStrPair, DataError> {
let (lang, variant) =
v.0.split_once('-')
.ok_or_else(|| DataError::custom("Each pair should be language-variant"))?;
let lang: Language = lang
.parse()
.map_err(|_| DataError::custom("Language should be a valid language subtag"))?;
Ok(LanguageStrStrPair(lang, variant.to_owned().into(), v.1))
})
.collect::<Result<alloc::vec::Vec<_>, _>>()?;

Ok(Self {
language_variants: VarZeroVec::from(&language_variants),
sgn_region: value.sgn_region,
language_len2: value.language_len2,
language_len3: value.language_len3,
language: value.language,
script: value.script,
region_alpha: value.region_alpha,
region_num: value.region_num,
complex_region: value.complex_region,
variant: value.variant,
subdivision: value.subdivision,
})
}
}

#[icu_provider::data_struct(marker(AliasesV2Marker, "locid_transform/aliases@2", singleton))]
#[derive(PartialEq, Clone, Default)]
#[cfg_attr(
Expand Down
1 change: 0 additions & 1 deletion provider/registry/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ macro_rules! registry(
icu::list::provider::AndListV2Marker = "list/and@2",
icu::list::provider::OrListV2Marker = "list/or@2",
icu::list::provider::UnitListV2Marker = "list/unit@2",
icu::locale::provider::AliasesV1Marker = "locid_transform/aliases@1",
icu::locale::provider::AliasesV2Marker = "locid_transform/aliases@2",
icu::locale::provider::LikelySubtagsV1Marker = "locid_transform/likelysubtags@1",
icu::locale::provider::LikelySubtagsExtendedV1Marker = "locid_transform/likelysubtags_ext@1",
Expand Down
Loading

0 comments on commit b573311

Please sign in to comment.