From b5733111cdfa3f58fde8fe8ba93185131f29da2b Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Wed, 7 Aug 2024 13:57:40 +0200 Subject: [PATCH] rm AliasesV1 --- components/locale/src/canonicalizer.rs | 168 +-- .../locale/src/provider/canonicalizer.rs | 139 -- provider/registry/src/lib.rs | 1 - .../debug/locid_transform/aliases@1/und.json | 1134 ----------------- .../src/locale_canonicalizer/aliases.rs | 20 - 5 files changed, 18 insertions(+), 1444 deletions(-) delete mode 100644 provider/source/data/debug/locid_transform/aliases@1/und.json diff --git a/components/locale/src/canonicalizer.rs b/components/locale/src/canonicalizer.rs index 943efd0d6d6..cd4c0fafbde 100644 --- a/components/locale/src/canonicalizer.rs +++ b/components/locale/src/canonicalizer.rs @@ -213,24 +213,15 @@ impl LocaleCanonicalizer { Self::new_with_expander(LocaleExpander::new_extended()) } - // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed - #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)] - pub fn try_new_with_any_provider( - provider: &(impl AnyProvider + ?Sized), - ) -> Result { - let expander = LocaleExpander::try_new_with_any_provider(provider)?; - Self::try_new_with_expander_compat(&provider.as_downcasting(), expander) - } - - // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed - #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)] - #[cfg(feature = "serde")] - pub fn try_new_with_buffer_provider( - provider: &(impl BufferProvider + ?Sized), - ) -> Result { - let expander = LocaleExpander::try_new_with_buffer_provider(provider)?; - Self::try_new_with_expander_compat(&provider.as_deserializing(), expander) - } + icu_provider::gen_any_buffer_data_constructors!(() -> error: DataError, + functions: [ + new: skip, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] pub fn try_new_unstable

(provider: &P) -> Result @@ -259,26 +250,6 @@ impl LocaleCanonicalizer { } } - fn try_new_with_expander_compat

( - provider: &P, - expander: LocaleExpander, - ) -> Result - where - P: DataProvider + DataProvider + ?Sized, - { - let aliases = if let Ok(response) = - DataProvider::::load(provider, Default::default()) - { - response.payload - } else { - DataProvider::::load(provider, Default::default())? - .payload - .try_map_project(|st, _| st.try_into())? - }; - - Ok(Self { aliases, expander }) - } - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)] pub fn try_new_with_expander_unstable

( provider: &P, @@ -292,22 +263,15 @@ impl LocaleCanonicalizer { Ok(Self { aliases, expander }) } - #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new_with_expander)] - pub fn try_new_with_expander_with_any_provider( - provider: &(impl AnyProvider + ?Sized), - options: LocaleExpander, - ) -> Result { - Self::try_new_with_expander_compat(&provider.as_downcasting(), options) - } - - #[cfg(feature = "serde")] - #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER,Self::new_with_expander)] - pub fn try_new_with_expander_with_buffer_provider( - provider: &(impl BufferProvider + ?Sized), - options: LocaleExpander, - ) -> Result { - Self::try_new_with_expander_compat(&provider.as_deserializing(), options) - } + icu_provider::gen_any_buffer_data_constructors!((options: LocaleExpander) -> error: DataError, + functions: [ + new_with_expander: skip, + try_new_with_expander_with_any_provider, + try_new_with_expander_with_buffer_provider, + try_new_with_expander_unstable, + Self, + ] + ); /// The canonicalize method potentially updates a passed in locale in place /// depending up the results of running the canonicalization algorithm @@ -623,99 +587,3 @@ mod test { } } } - -#[cfg(feature = "serde")] -#[cfg(test)] -mod tests { - use super::*; - use icu_locale_core::locale; - - struct RejectByKeyProvider { - markers: Vec, - } - - impl AnyProvider for RejectByKeyProvider { - fn load_any( - &self, - marker: DataMarkerInfo, - _: DataRequest, - ) -> Result { - use alloc::borrow::Cow; - - println!("{:#?}", marker); - if self.markers.contains(&marker) { - return Err(DataErrorKind::MarkerNotFound.with_str_context("rejected")); - } - - let aliases_v2 = crate::provider::Baked::SINGLETON_ALIASES_V2_MARKER; - let l = crate::provider::Baked::SINGLETON_LIKELY_SUBTAGS_FOR_LANGUAGE_V1_MARKER; - let ext = crate::provider::Baked::SINGLETON_LIKELY_SUBTAGS_EXTENDED_V1_MARKER; - let sr = crate::provider::Baked::SINGLETON_LIKELY_SUBTAGS_FOR_SCRIPT_REGION_V1_MARKER; - - let payload = if marker.path.hashed() == AliasesV1Marker::INFO.path.hashed() { - let aliases_v1 = AliasesV1 { - language_variants: zerovec::VarZeroVec::from(&[StrStrPair( - Cow::Borrowed("aa-saaho"), - Cow::Borrowed("ssy"), - )]), - ..Default::default() - }; - DataPayload::::from_owned(aliases_v1).wrap_into_any_payload() - } else if marker.path.hashed() == AliasesV2Marker::INFO.path.hashed() { - DataPayload::::from_static_ref(aliases_v2).wrap_into_any_payload() - } else if marker.path.hashed() == LikelySubtagsForLanguageV1Marker::INFO.path.hashed() { - DataPayload::::from_static_ref(l) - .wrap_into_any_payload() - } else if marker.path.hashed() == LikelySubtagsExtendedV1Marker::INFO.path.hashed() { - DataPayload::::from_static_ref(ext) - .wrap_into_any_payload() - } else if marker.path.hashed() - == LikelySubtagsForScriptRegionV1Marker::INFO.path.hashed() - { - DataPayload::::from_static_ref(sr) - .wrap_into_any_payload() - } else { - return Err(DataErrorKind::MarkerNotFound.into_error()); - }; - - Ok(AnyResponse { - payload, - metadata: Default::default(), - }) - } - } - - #[test] - fn test_old_keys() { - let provider = RejectByKeyProvider { - markers: vec![AliasesV2Marker::INFO], - }; - let lc = LocaleCanonicalizer::try_new_with_any_provider(&provider) - .expect("should create with old keys"); - let mut locale = locale!("aa-saaho"); - assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); - assert_eq!(locale, locale!("ssy")); - } - - #[test] - fn test_new_keys() { - let provider = RejectByKeyProvider { - markers: vec![AliasesV1Marker::INFO], - }; - let lc = LocaleCanonicalizer::try_new_with_any_provider(&provider) - .expect("should create with old keys"); - let mut locale = locale!("aa-saaho"); - assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); - assert_eq!(locale, locale!("ssy")); - } - - #[test] - fn test_no_keys() { - let provider = RejectByKeyProvider { - markers: vec![AliasesV1Marker::INFO, AliasesV2Marker::INFO], - }; - if LocaleCanonicalizer::try_new_with_any_provider(&provider).is_ok() { - panic!("should not create: no data present") - }; - } -} diff --git a/components/locale/src/provider/canonicalizer.rs b/components/locale/src/provider/canonicalizer.rs index b459d67dd6b..d84fe6db983 100644 --- a/components/locale/src/provider/canonicalizer.rs +++ b/components/locale/src/provider/canonicalizer.rs @@ -8,145 +8,6 @@ use icu_provider::prelude::*; use tinystr::UnvalidatedTinyAsciiStr; use zerovec::{VarZeroVec, ZeroMap, ZeroSlice}; -#[icu_provider::data_struct(marker(AliasesV1Marker, "locid_transform/aliases@1", singleton))] -#[derive(PartialEq, Clone, Default)] -#[cfg_attr( - feature = "datagen", - derive(serde::Serialize, databake::Bake), - databake(path = icu_locale::provider), -)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize))] -#[yoke(prove_covariance_manually)] -/// This alias data is used for locale canonicalization. Each field defines a -/// mapping from an old identifier to a new identifier, based upon the rules in -/// from . The data -/// is stored in sorted order, allowing for binary search to identify rules to -/// apply. It is broken down into smaller vectors based upon some characteristic -/// of the data, to help avoid unnecessary searches. For example, the `sgn_region` -/// field contains aliases for sign language and region, so that it is not -/// necessary to search the data unless the input is a sign language. -/// -/// The algorithm in tr35 is not guaranteed to terminate on data other than what -/// is currently in CLDR. For this reason, it is not a good idea to attempt to add -/// or modify aliases for use in this structure. -/// -///

-/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, -/// including in SemVer minor releases. While the serde representation of data structs is guaranteed -/// to be stable, their Rust representation might not be. Use with caution. -///
-// TODO: Use validated types as value types -#[derive(Debug)] -pub struct AliasesV1<'data> { - /// `[language(-variant)+\] -> [langid]` - /// This is not a map as it's searched linearly according to the canonicalization rules. - #[cfg_attr(feature = "serde", serde(borrow))] - pub language_variants: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>, - /// `sgn-[region] -> [language]` - #[cfg_attr(feature = "serde", serde(borrow))] - pub sgn_region: ZeroMap<'data, UnvalidatedRegion, Language>, - /// `[language{2}] -> [langid]` - #[cfg_attr(feature = "serde", serde(borrow))] - pub language_len2: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, UnvalidatedLanguageIdentifier>, - /// `[language{3}] -> [langid]` - #[cfg_attr(feature = "serde", serde(borrow))] - pub language_len3: ZeroMap<'data, UnvalidatedLanguage, UnvalidatedLanguageIdentifier>, - /// `[langid] -> [langid]` - /// This is not a map as it's searched linearly according to the canonicalization rules. - #[cfg_attr(feature = "serde", serde(borrow))] - pub language: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>, - - /// `[script] -> [script]` - #[cfg_attr(feature = "serde", serde(borrow))] - pub script: ZeroMap<'data, UnvalidatedScript, Script>, - - /// `[region{2}] -> [region]` - #[cfg_attr(feature = "serde", serde(borrow))] - pub region_alpha: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, Region>, - /// `[region{3}] -> [region]` - #[cfg_attr(feature = "serde", serde(borrow))] - pub region_num: ZeroMap<'data, UnvalidatedRegion, Region>, - - /// `[region] -> [region]+` - #[cfg_attr(feature = "serde", serde(borrow))] - pub complex_region: ZeroMap<'data, UnvalidatedRegion, ZeroSlice>, - - /// `[variant] -> [variant]` - #[cfg_attr(feature = "serde", serde(borrow))] - pub variant: ZeroMap<'data, UnvalidatedVariant, Variant>, - - /// `[value{7}] -> [value{7}]` - #[cfg_attr(feature = "serde", serde(borrow))] - pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>, -} - -#[cfg(feature = "datagen")] -impl<'data> From> for AliasesV1<'data> { - fn from(value: AliasesV2<'data>) -> Self { - let language_variants = value - .language_variants - .iter() - .map(zerofrom::ZeroFrom::zero_from) - .map(|v: LanguageStrStrPair| { - let langid = alloc::format!("{0}-{1}", v.0, v.1); - StrStrPair(langid.into(), v.2) - }) - .collect::>(); - - Self { - language_variants: VarZeroVec::from(&language_variants), - sgn_region: value.sgn_region, - language_len2: value.language_len2, - language_len3: value.language_len3, - language: value.language, - script: value.script, - region_alpha: value.region_alpha, - region_num: value.region_num, - complex_region: value.complex_region, - variant: value.variant, - subdivision: value.subdivision, - } - } -} - -impl<'data> TryFrom> for AliasesV2<'data> { - type Error = icu_provider::DataError; - - fn try_from(value: AliasesV1<'data>) -> Result { - #[allow(unused_imports)] - use alloc::borrow::ToOwned; - - let language_variants = value - .language_variants - .iter() - .map(zerofrom::ZeroFrom::zero_from) - .map(|v: StrStrPair| -> Result { - let (lang, variant) = - v.0.split_once('-') - .ok_or_else(|| DataError::custom("Each pair should be language-variant"))?; - let lang: Language = lang - .parse() - .map_err(|_| DataError::custom("Language should be a valid language subtag"))?; - Ok(LanguageStrStrPair(lang, variant.to_owned().into(), v.1)) - }) - .collect::, _>>()?; - - Ok(Self { - language_variants: VarZeroVec::from(&language_variants), - sgn_region: value.sgn_region, - language_len2: value.language_len2, - language_len3: value.language_len3, - language: value.language, - script: value.script, - region_alpha: value.region_alpha, - region_num: value.region_num, - complex_region: value.complex_region, - variant: value.variant, - subdivision: value.subdivision, - }) - } -} - #[icu_provider::data_struct(marker(AliasesV2Marker, "locid_transform/aliases@2", singleton))] #[derive(PartialEq, Clone, Default)] #[cfg_attr( diff --git a/provider/registry/src/lib.rs b/provider/registry/src/lib.rs index 828be62842a..b1ed2cebaee 100644 --- a/provider/registry/src/lib.rs +++ b/provider/registry/src/lib.rs @@ -75,7 +75,6 @@ macro_rules! registry( icu::list::provider::AndListV2Marker = "list/and@2", icu::list::provider::OrListV2Marker = "list/or@2", icu::list::provider::UnitListV2Marker = "list/unit@2", - icu::locale::provider::AliasesV1Marker = "locid_transform/aliases@1", icu::locale::provider::AliasesV2Marker = "locid_transform/aliases@2", icu::locale::provider::LikelySubtagsV1Marker = "locid_transform/likelysubtags@1", icu::locale::provider::LikelySubtagsExtendedV1Marker = "locid_transform/likelysubtags_ext@1", diff --git a/provider/source/data/debug/locid_transform/aliases@1/und.json b/provider/source/data/debug/locid_transform/aliases@1/und.json deleted file mode 100644 index f6b946136ad..00000000000 --- a/provider/source/data/debug/locid_transform/aliases@1/und.json +++ /dev/null @@ -1,1134 +0,0 @@ -{ - "language_variants": [ - [ - "aa-saaho", - "ssy" - ], - [ - "art-lojban", - "jbo" - ], - [ - "cel-gaulish", - "xtg" - ], - [ - "hy-arevmda", - "hyw" - ], - [ - "no-bokmal", - "nb" - ], - [ - "no-nynorsk", - "nn" - ], - [ - "und-hepburn-heploc", - "und-alalc97" - ], - [ - "zh-guoyu", - "zh" - ], - [ - "zh-hakka", - "hak" - ], - [ - "zh-xiang", - "hsn" - ], - [ - "und-aaland", - "und-AX" - ], - [ - "und-arevela", - "und" - ], - [ - "und-arevmda", - "und" - ], - [ - "und-bokmal", - "und" - ], - [ - "und-hakka", - "und" - ], - [ - "und-lojban", - "und" - ], - [ - "und-nynorsk", - "und" - ], - [ - "und-saaho", - "und" - ], - [ - "und-xiang", - "und" - ] - ], - "sgn_region": { - "BR": "bzs", - "CO": "csn", - "DE": "gsg", - "DK": "dsl", - "ES": "ssp", - "FR": "fsl", - "GB": "bfi", - "GR": "gss", - "IE": "isg", - "IT": "ise", - "JP": "jsl", - "MX": "mfs", - "NI": "ncs", - "NL": "dse", - "NO": "nsi", - "PT": "psr", - "SE": "swl", - "US": "ase", - "ZA": "sfs" - }, - "language_len2": { - "bh": "bho", - "in": "id", - "iw": "he", - "ji": "yi", - "jw": "jv", - "mo": "ro", - "sh": "sr-Latn", - "tl": "fil", - "tw": "ak" - }, - "language_len3": { - "aam": "aas", - "aar": "aa", - "abk": "ab", - "adp": "dz", - "afr": "af", - "agp": "apf", - "ais": "ami", - "ajp": "apc", - "ajt": "aeb", - "aju": "jrb", - "aka": "ak", - "alb": "sq", - "als": "sq", - "amh": "am", - "ara": "ar", - "arb": "ar", - "arg": "an", - "arm": "hy", - "asd": "snz", - "asm": "as", - "aue": "ktz", - "ava": "av", - "ave": "ae", - "aym": "ay", - "ayr": "ay", - "ayx": "nun", - "aze": "az", - "azj": "az", - "bak": "ba", - "bam": "bm", - "baq": "eu", - "baz": "nvo", - "bcc": "bal", - "bcl": "bik", - "bel": "be", - "ben": "bn", - "bgm": "bcg", - "bhk": "fbl", - "bic": "bir", - "bih": "bho", - "bis": "bi", - "bjd": "drl", - "bjq": "bzc", - "bkb": "ebk", - "blg": "iba", - "bod": "bo", - "bos": "bs", - "bre": "br", - "btb": "beb", - "bul": "bg", - "bur": "my", - "bxk": "luy", - "bxr": "bua", - "cat": "ca", - "ccq": "rki", - "ces": "cs", - "cha": "ch", - "che": "ce", - "chi": "zh", - "chu": "cu", - "chv": "cv", - "cjr": "mom", - "cka": "cmr", - "cld": "syr", - "cmk": "xch", - "cmn": "zh", - "cnr": "sr-ME", - "cor": "kw", - "cos": "co", - "coy": "pij", - "cqu": "quh", - "cre": "cr", - "cwd": "cr", - "cym": "cy", - "cze": "cs", - "daf": "dnj", - "dan": "da", - "dap": "njz", - "deu": "de", - "dgo": "doi", - "dhd": "mwr", - "dik": "din", - "diq": "zza", - "dit": "dif", - "div": "dv", - "djl": "dze", - "dkl": "aqd", - "drh": "mn", - "drr": "kzk", - "drw": "fa-AF", - "dud": "uth", - "duj": "dwu", - "dut": "nl", - "dwl": "dbt", - "dzo": "dz", - "ekk": "et", - "ell": "el", - "elp": "amq", - "emk": "man", - "eng": "en", - "epo": "eo", - "esk": "ik", - "est": "et", - "eus": "eu", - "ewe": "ee", - "fao": "fo", - "fas": "fa", - "fat": "ak", - "fij": "fj", - "fin": "fi", - "fra": "fr", - "fre": "fr", - "fry": "fy", - "fuc": "ff", - "ful": "ff", - "gav": "dev", - "gaz": "om", - "gbc": "wny", - "gbo": "grb", - "geo": "ka", - "ger": "de", - "gfx": "vaj", - "ggn": "gvr", - "ggo": "esg", - "ggr": "gtu", - "gio": "aou", - "gla": "gd", - "gle": "ga", - "glg": "gl", - "gli": "kzk", - "glv": "gv", - "gno": "gon", - "gre": "el", - "grn": "gn", - "gti": "nyc", - "gug": "gn", - "guj": "gu", - "guv": "duz", - "gya": "gba", - "hat": "ht", - "hau": "ha", - "hbs": "sr-Latn", - "hdn": "hai", - "hea": "hmn", - "heb": "he", - "her": "hz", - "him": "srx", - "hin": "hi", - "hmo": "ho", - "hrr": "jal", - "hrv": "hr", - "hun": "hu", - "hye": "hy", - "ibi": "opa", - "ibo": "ig", - "ice": "is", - "ido": "io", - "iii": "ii", - "ike": "iu", - "iku": "iu", - "ile": "ie", - "ill": "ilm", - "ilw": "gal", - "ina": "ia", - "ind": "id", - "ipk": "ik", - "isl": "is", - "ita": "it", - "izi": "eza", - "jar": "jgk", - "jav": "jv", - "jeg": "oyb", - "jpn": "ja", - "kal": "kl", - "kan": "kn", - "kas": "ks", - "kat": "ka", - "kau": "kr", - "kaz": "kk", - "kdv": "zkd", - "kgc": "tdf", - "kgd": "ncq", - "kgh": "kml", - "kgm": "plu", - "khk": "mn", - "khm": "km", - "kik": "ki", - "kin": "rw", - "kir": "ky", - "kmr": "ku", - "knc": "kr", - "kng": "kg", - "knn": "kok", - "koj": "kwv", - "kom": "kv", - "kon": "kg", - "kor": "ko", - "kpp": "jkm", - "kpv": "kv", - "krm": "bmf", - "ktr": "dtp", - "kua": "kj", - "kur": "ku", - "kvs": "gdj", - "kwq": "yam", - "kxe": "tvd", - "kxl": "kru", - "kzh": "dgl", - "kzj": "dtp", - "kzt": "dtp", - "lak": "ksp", - "lao": "lo", - "lat": "la", - "lav": "lv", - "lbk": "bnc", - "leg": "enl", - "lii": "raq", - "lim": "li", - "lin": "ln", - "lit": "lt", - "llo": "ngt", - "lmm": "rmx", - "ltz": "lb", - "lub": "lu", - "lug": "lg", - "lvs": "lv", - "mac": "mk", - "mah": "mh", - "mal": "ml", - "mao": "mi", - "mar": "mr", - "may": "ms", - "meg": "cir", - "mgx": "jbk", - "mhr": "chm", - "mkd": "mk", - "mlg": "mg", - "mlt": "mt", - "mnk": "man", - "mnt": "wnn", - "mof": "xnt", - "mol": "ro", - "mon": "mn", - "mri": "mi", - "msa": "ms", - "mst": "mry", - "mup": "raj", - "mwd": "dmw", - "mwj": "vaj", - "mya": "my", - "myd": "aog", - "myt": "mry", - "nad": "xny", - "nau": "na", - "nav": "nv", - "nbf": "nru", - "nbl": "nr", - "nbx": "ekc", - "ncp": "kdz", - "nde": "nd", - "ndo": "ng", - "nep": "ne", - "nld": "nl", - "nln": "azd", - "nlr": "nrk", - "nno": "nn", - "nns": "nbr", - "nnx": "ngv", - "nob": "nb", - "nom": "cbr", - "noo": "dtd", - "nor": "no", - "npi": "ne", - "nts": "pij", - "nxu": "bpp", - "nya": "ny", - "oci": "oc", - "ojg": "oj", - "oji": "oj", - "ori": "or", - "orm": "om", - "ory": "or", - "oss": "os", - "oun": "vaj", - "pan": "pa", - "pat": "kxr", - "pbu": "ps", - "pcr": "adx", - "per": "fa", - "pes": "fa", - "pli": "pi", - "plt": "mg", - "pmc": "huw", - "pmk": "crr", - "pmu": "phr", - "pnb": "lah", - "pol": "pl", - "por": "pt", - "ppa": "bfy", - "ppr": "lcq", - "prp": "gu", - "prs": "fa-AF", - "pry": "prt", - "pus": "ps", - "puz": "pub", - "que": "qu", - "quz": "qu", - "rmr": "emx", - "rmy": "rom", - "roh": "rm", - "ron": "ro", - "rum": "ro", - "run": "rn", - "rus": "ru", - "sag": "sg", - "san": "sa", - "sap": "aqt", - "sca": "hle", - "scc": "sr", - "scr": "hr", - "sgl": "isk", - "sin": "si", - "skk": "oyb", - "slk": "sk", - "slo": "sk", - "slv": "sl", - "smd": "kmb", - "sme": "se", - "smo": "sm", - "sna": "sn", - "snb": "iba", - "snd": "sd", - "som": "so", - "sot": "st", - "spa": "es", - "spy": "kln", - "sqi": "sq", - "src": "sc", - "srd": "sc", - "srp": "sr", - "ssw": "ss", - "sul": "sgd", - "sum": "ulw", - "sun": "su", - "swa": "sw", - "swc": "sw-CD", - "swe": "sv", - "swh": "sw", - "szd": "umi", - "tah": "ty", - "tam": "ta", - "tat": "tt", - "tdu": "dtp", - "tel": "te", - "tgg": "bjp", - "tgk": "tg", - "tgl": "fil", - "tha": "th", - "thc": "tpo", - "thw": "ola", - "thx": "oyb", - "tib": "bo", - "tid": "itd", - "tie": "ras", - "tir": "ti", - "tkk": "twm", - "tlw": "weo", - "tmk": "tdg", - "tmp": "tyj", - "tne": "kak", - "tnf": "fa-AF", - "ton": "to", - "tpw": "tpn", - "tsf": "taj", - "tsn": "tn", - "tso": "ts", - "ttq": "tmh", - "tuk": "tk", - "tur": "tr", - "twi": "ak", - "uig": "ug", - "ukr": "uk", - "umu": "del", - "unp": "wro", - "uok": "ema", - "urd": "ur", - "uzb": "uz", - "uzn": "uz", - "ven": "ve", - "vie": "vi", - "vol": "vo", - "wel": "cy", - "wgw": "wgb", - "wit": "nol", - "wiw": "nwo", - "wln": "wa", - "wol": "wo", - "xba": "cax", - "xho": "xh", - "xia": "acn", - "xkh": "waw", - "xpe": "kpe", - "xrq": "dmw", - "xsj": "suj", - "xsl": "den", - "xss": "zko", - "ybd": "rki", - "ydd": "yi", - "yen": "ynq", - "yid": "yi", - "yiy": "yrm", - "yma": "lrr", - "ymt": "mtm", - "yor": "yo", - "yos": "zom", - "yuu": "yug", - "zai": "zap", - "zha": "za", - "zho": "zh", - "zir": "scv", - "zkb": "kjh", - "zsm": "ms", - "zul": "zu", - "zyb": "za" - }, - "language": [], - "script": { - "Qaai": "Zinh" - }, - "region_alpha": { - "BU": "MM", - "CT": "KI", - "DD": "DE", - "DY": "BJ", - "FX": "FR", - "HV": "BF", - "JT": "UM", - "MI": "UM", - "NH": "VU", - "NQ": "AQ", - "PU": "UM", - "PZ": "PA", - "QU": "EU", - "RH": "ZW", - "TP": "TL", - "UK": "GB", - "VD": "VN", - "WK": "UM", - "YD": "YE", - "ZR": "CD" - }, - "region_num": { - "004": "AF", - "008": "AL", - "010": "AQ", - "012": "DZ", - "016": "AS", - "020": "AD", - "024": "AO", - "028": "AG", - "031": "AZ", - "032": "AR", - "036": "AU", - "040": "AT", - "044": "BS", - "048": "BH", - "050": "BD", - "051": "AM", - "052": "BB", - "056": "BE", - "060": "BM", - "064": "BT", - "068": "BO", - "070": "BA", - "072": "BW", - "074": "BV", - "076": "BR", - "084": "BZ", - "086": "IO", - "090": "SB", - "092": "VG", - "096": "BN", - "100": "BG", - "104": "MM", - "108": "BI", - "112": "BY", - "116": "KH", - "120": "CM", - "124": "CA", - "132": "CV", - "136": "KY", - "140": "CF", - "144": "LK", - "148": "TD", - "152": "CL", - "156": "CN", - "158": "TW", - "162": "CX", - "166": "CC", - "170": "CO", - "174": "KM", - "175": "YT", - "178": "CG", - "180": "CD", - "184": "CK", - "188": "CR", - "191": "HR", - "192": "CU", - "196": "CY", - "203": "CZ", - "204": "BJ", - "208": "DK", - "212": "DM", - "214": "DO", - "218": "EC", - "222": "SV", - "226": "GQ", - "230": "ET", - "231": "ET", - "232": "ER", - "233": "EE", - "234": "FO", - "238": "FK", - "239": "GS", - "242": "FJ", - "246": "FI", - "248": "AX", - "249": "FR", - "250": "FR", - "254": "GF", - "258": "PF", - "260": "TF", - "262": "DJ", - "266": "GA", - "268": "GE", - "270": "GM", - "275": "PS", - "276": "DE", - "278": "DE", - "280": "DE", - "288": "GH", - "292": "GI", - "296": "KI", - "300": "GR", - "304": "GL", - "308": "GD", - "312": "GP", - "316": "GU", - "320": "GT", - "324": "GN", - "328": "GY", - "332": "HT", - "334": "HM", - "336": "VA", - "340": "HN", - "344": "HK", - "348": "HU", - "352": "IS", - "356": "IN", - "360": "ID", - "364": "IR", - "368": "IQ", - "372": "IE", - "376": "IL", - "380": "IT", - "384": "CI", - "388": "JM", - "392": "JP", - "398": "KZ", - "400": "JO", - "404": "KE", - "408": "KP", - "410": "KR", - "414": "KW", - "417": "KG", - "418": "LA", - "422": "LB", - "426": "LS", - "428": "LV", - "430": "LR", - "434": "LY", - "438": "LI", - "440": "LT", - "442": "LU", - "446": "MO", - "450": "MG", - "454": "MW", - "458": "MY", - "462": "MV", - "466": "ML", - "470": "MT", - "474": "MQ", - "478": "MR", - "480": "MU", - "484": "MX", - "492": "MC", - "496": "MN", - "498": "MD", - "499": "ME", - "500": "MS", - "504": "MA", - "508": "MZ", - "512": "OM", - "516": "NA", - "520": "NR", - "524": "NP", - "528": "NL", - "531": "CW", - "533": "AW", - "534": "SX", - "535": "BQ", - "540": "NC", - "548": "VU", - "554": "NZ", - "558": "NI", - "562": "NE", - "566": "NG", - "570": "NU", - "574": "NF", - "578": "NO", - "580": "MP", - "581": "UM", - "583": "FM", - "584": "MH", - "585": "PW", - "586": "PK", - "591": "PA", - "598": "PG", - "600": "PY", - "604": "PE", - "608": "PH", - "612": "PN", - "616": "PL", - "620": "PT", - "624": "GW", - "626": "TL", - "630": "PR", - "634": "QA", - "638": "RE", - "642": "RO", - "643": "RU", - "646": "RW", - "652": "BL", - "654": "SH", - "659": "KN", - "660": "AI", - "662": "LC", - "663": "MF", - "666": "PM", - "670": "VC", - "674": "SM", - "678": "ST", - "682": "SA", - "686": "SN", - "688": "RS", - "690": "SC", - "694": "SL", - "702": "SG", - "703": "SK", - "704": "VN", - "705": "SI", - "706": "SO", - "710": "ZA", - "716": "ZW", - "720": "YE", - "724": "ES", - "728": "SS", - "729": "SD", - "732": "EH", - "736": "SD", - "740": "SR", - "744": "SJ", - "748": "SZ", - "752": "SE", - "756": "CH", - "760": "SY", - "762": "TJ", - "764": "TH", - "768": "TG", - "772": "TK", - "776": "TO", - "780": "TT", - "784": "AE", - "788": "TN", - "792": "TR", - "795": "TM", - "796": "TC", - "798": "TV", - "800": "UG", - "804": "UA", - "807": "MK", - "818": "EG", - "826": "GB", - "831": "GG", - "832": "JE", - "833": "IM", - "834": "TZ", - "840": "US", - "850": "VI", - "854": "BF", - "858": "UY", - "860": "UZ", - "862": "VE", - "876": "WF", - "882": "WS", - "886": "YE", - "887": "YE", - "894": "ZM", - "958": "AA", - "959": "QM", - "960": "QN", - "962": "QP", - "963": "QQ", - "964": "QR", - "965": "QS", - "966": "QT", - "967": "EU", - "968": "QV", - "969": "QW", - "970": "QX", - "971": "QY", - "972": "QZ", - "973": "XA", - "974": "XB", - "975": "XC", - "976": "XD", - "977": "XE", - "978": "XF", - "979": "XG", - "980": "XH", - "981": "XI", - "982": "XJ", - "983": "XK", - "984": "XL", - "985": "XM", - "986": "XN", - "987": "XO", - "988": "XP", - "989": "XQ", - "990": "XR", - "991": "XS", - "992": "XT", - "993": "XU", - "994": "XV", - "995": "XW", - "996": "XX", - "997": "XY", - "998": "XZ", - "999": "ZZ" - }, - "complex_region": { - "062": [ - "034", - "143" - ], - "172": [ - "RU", - "AM", - "AZ", - "BY", - "GE", - "KG", - "KZ", - "MD", - "TJ", - "TM", - "UA", - "UZ" - ], - "200": [ - "CZ", - "SK" - ], - "530": [ - "CW", - "SX", - "BQ" - ], - "532": [ - "CW", - "SX", - "BQ" - ], - "536": [ - "SA", - "IQ" - ], - "582": [ - "FM", - "MH", - "MP", - "PW" - ], - "810": [ - "RU", - "AM", - "AZ", - "BY", - "EE", - "GE", - "KZ", - "KG", - "LV", - "LT", - "MD", - "TJ", - "TM", - "UA", - "UZ" - ], - "830": [ - "JE", - "GG" - ], - "890": [ - "RS", - "ME", - "SI", - "HR", - "MK", - "BA" - ], - "891": [ - "RS", - "ME" - ], - "AN": [ - "CW", - "SX", - "BQ" - ], - "CS": [ - "RS", - "ME" - ], - "FQ": [ - "AQ", - "TF" - ], - "NT": [ - "SA", - "IQ" - ], - "PC": [ - "FM", - "MH", - "MP", - "PW" - ], - "SU": [ - "RU", - "AM", - "AZ", - "BY", - "EE", - "GE", - "KZ", - "KG", - "LV", - "LT", - "MD", - "TJ", - "TM", - "UA", - "UZ" - ], - "YU": [ - "RS", - "ME" - ] - }, - "variant": { - "heploc": "alalc97", - "polytoni": "polyton" - }, - "subdivision": { - "cn11": "cnbj", - "cn12": "cntj", - "cn13": "cnhe", - "cn14": "cnsx", - "cn15": "cnmn", - "cn21": "cnln", - "cn22": "cnjl", - "cn23": "cnhl", - "cn31": "cnsh", - "cn32": "cnjs", - "cn33": "cnzj", - "cn34": "cnah", - "cn35": "cnfj", - "cn36": "cnjx", - "cn37": "cnsd", - "cn41": "cnha", - "cn42": "cnhb", - "cn43": "cnhn", - "cn44": "cngd", - "cn45": "cngx", - "cn46": "cnhi", - "cn50": "cncq", - "cn51": "cnsc", - "cn52": "cngz", - "cn53": "cnyn", - "cn54": "cnxz", - "cn61": "cnsn", - "cn62": "cngs", - "cn63": "cnqh", - "cn64": "cnnx", - "cn65": "cnxj", - "cn71": "twzzzz", - "cn91": "hkzzzz", - "cn92": "mozzzz", - "cz10a": "cz110", - "cz10b": "cz111", - "cz10c": "cz112", - "cz10d": "cz113", - "cz10e": "cz114", - "cz10f": "cz115", - "cz611": "cz663", - "cz612": "cz632", - "cz613": "cz633", - "cz614": "cz634", - "cz615": "cz635", - "cz621": "cz641", - "cz622": "cz642", - "cz623": "cz643", - "cz624": "cz644", - "cz626": "cz646", - "cz627": "cz647", - "czjc": "cz31", - "czjm": "cz64", - "czka": "cz41", - "czkr": "cz52", - "czli": "cz51", - "czmo": "cz80", - "czol": "cz71", - "czpa": "cz53", - "czpl": "cz32", - "czpr": "cz10", - "czst": "cz20", - "czus": "cz42", - "czvy": "cz63", - "czzl": "cz72", - "fi01": "axzzzz", - "fra": "frges", - "frb": "frnaq", - "frbl": "blzzzz", - "frc": "frara", - "frcp": "cpzzzz", - "frd": "frbfc", - "fre": "frbre", - "frf": "frcvl", - "frg": "frges", - "frgf": "gfzzzz", - "frgp": "gpzzzz", - "frh": "frcor", - "fri": "frbfc", - "frj": "fridf", - "frk": "frocc", - "frl": "frnaq", - "frm": "frges", - "frmf": "mfzzzz", - "frmq": "mqzzzz", - "frn": "frocc", - "frnc": "nczzzz", - "fro": "frhdf", - "frp": "frnor", - "frpf": "pfzzzz", - "frpm": "pmzzzz", - "frq": "frnor", - "frr": "frpdl", - "frre": "rezzzz", - "frs": "frhdf", - "frt": "frnaq", - "frtf": "tfzzzz", - "fru": "frpac", - "frv": "frara", - "frwf": "wfzzzz", - "fryt": "ytzzzz", - "laxn": "laxs", - "lud": "lucl", - "lug": "luec", - "lul": "luca", - "mrnkc": "mr13", - "nlaw": "awzzzz", - "nlcw": "cwzzzz", - "nlsx": "sxzzzz", - "no23": "no50", - "nzn": "nzauk", - "nzs": "nzcan", - "omba": "ombj", - "omsh": "omsj", - "plds": "pl02", - "plkp": "pl04", - "pllb": "pl08", - "plld": "pl10", - "pllu": "pl06", - "plma": "pl12", - "plmz": "pl14", - "plop": "pl16", - "plpd": "pl20", - "plpk": "pl18", - "plpm": "pl22", - "plsk": "pl26", - "plsl": "pl24", - "plwn": "pl28", - "plwp": "pl30", - "plzp": "pl32", - "shta": "tazzzz", - "tteto": "tttob", - "ttrcm": "ttmrc", - "ttwto": "tttob", - "twkhq": "twkhh", - "twtnq": "twtnn", - "twtpq": "twnwt", - "twtxq": "twtxg", - "usas": "aszzzz", - "usgu": "guzzzz", - "usmp": "mpzzzz", - "uspr": "przzzz", - "usum": "umzzzz", - "usvi": "vizzzz" - } -} diff --git a/provider/source/src/locale_canonicalizer/aliases.rs b/provider/source/src/locale_canonicalizer/aliases.rs index 32921adc919..b718fefbee5 100644 --- a/provider/source/src/locale_canonicalizer/aliases.rs +++ b/provider/source/src/locale_canonicalizer/aliases.rs @@ -14,26 +14,6 @@ use std::collections::{BTreeMap, HashSet}; use tinystr::TinyAsciiStr; use zerovec::ZeroSlice; -impl DataProvider for SourceDataProvider { - fn load(&self, req: DataRequest) -> Result, DataError> { - self.check_req::(req)?; - let data: &cldr_serde::aliases::Resource = self - .cldr()? - .core() - .read_and_parse("supplemental/aliases.json")?; - Ok(DataResponse { - metadata: Default::default(), - payload: DataPayload::from_owned(AliasesV2::from(data).into()), - }) - } -} - -impl crate::IterableDataProviderCached for SourceDataProvider { - fn iter_ids_cached(&self) -> Result>, DataError> { - Ok(HashSet::from_iter([Default::default()])) - } -} - impl DataProvider for SourceDataProvider { fn load(&self, req: DataRequest) -> Result, DataError> { self.check_req::(req)?;