diff --git a/components/collator/src/provider.rs b/components/collator/src/provider.rs index 5e658e331ee..d394bebba72 100644 --- a/components/collator/src/provider.rs +++ b/components/collator/src/provider.rs @@ -117,7 +117,8 @@ fn data_ce_to_primary(data_ce: u64, c: char) -> u32 { #[icu_provider::data_struct(marker( CollationDataV1Marker, "collator/data@1", - fallback_by = "collation", + // TODO(#3867): Use script fallback + fallback_by = "language", ))] #[derive(Debug, PartialEq, Clone)] #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))] @@ -231,7 +232,7 @@ impl<'data> CollationDataV1<'data> { #[icu_provider::data_struct(marker( CollationDiacriticsV1Marker, "collator/dia@1", - fallback_by = "collation", + fallback_by = "language", ))] #[derive(Debug, PartialEq, Clone)] #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))] @@ -273,7 +274,7 @@ pub struct CollationJamoV1<'data> { #[icu_provider::data_struct(marker( CollationReorderingV1Marker, "collator/reord@1", - fallback_by = "collation", + fallback_by = "language", ))] #[derive(Debug, PartialEq, Clone)] #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))] @@ -361,7 +362,7 @@ impl<'data> CollationReorderingV1<'data> { #[icu_provider::data_struct(marker( CollationMetadataV1Marker, "collator/meta@1", - fallback_by = "collation", + fallback_by = "language", ))] #[derive(Debug, PartialEq, Clone, Copy)] #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))] diff --git a/components/experimental/src/personnames/formatter.rs b/components/experimental/src/personnames/formatter.rs index b14edf8e366..2899d153f31 100644 --- a/components/experimental/src/personnames/formatter.rs +++ b/components/experimental/src/personnames/formatter.rs @@ -47,9 +47,7 @@ impl PersonNamesFormatter { + DataProvider + DataProvider + DataProvider - + DataProvider - // TODO: We shouldn't need the collation supplement here - + DataProvider, + + DataProvider, { let swe = icu_properties::script::load_script_with_extensions_unstable(provider)?; let scripts = icu_properties::Script::get_enum_to_short_name_mapper(provider)?; diff --git a/components/experimental/tests/personnames/tests.rs b/components/experimental/tests/personnames/tests.rs index fb9b75ec707..446b742fd28 100644 --- a/components/experimental/tests/personnames/tests.rs +++ b/components/experimental/tests/personnames/tests.rs @@ -26,7 +26,6 @@ const _: () = { impl_person_names_format_v1_marker!(TestingProvider); icu_locale_data::impl_locale_fallback_likely_subtags_v1_marker!(TestingProvider); icu_locale_data::impl_locale_fallback_parents_v1_marker!(TestingProvider); - icu_locale_data::impl_collation_fallback_supplement_v1_marker!(TestingProvider); icu_properties_data::impl_script_value_to_short_name_v1_marker!(TestingProvider); icu_properties_data::impl_script_with_extensions_property_v1_marker!(TestingProvider); }; diff --git a/components/locale/src/fallback/algorithms.rs b/components/locale/src/fallback/algorithms.rs index 7b5e76e9612..ee923e54c5d 100644 --- a/components/locale/src/fallback/algorithms.rs +++ b/components/locale/src/fallback/algorithms.rs @@ -63,16 +63,7 @@ impl<'a> LocaleFallbackerWithConfig<'a> { } } // 3. Remove irrelevant extension subtags - locale.retain_unicode_ext(|key| { - match *key { - // Always retain -u-sd - SUBDIVISION_KEY => true, - // Retain the query-specific keyword - _ if Some(*key) == self.config.extension_key => true, - // Drop all others - _ => false, - } - }); + locale.retain_unicode_ext(|&key| matches!(key, SUBDIVISION_KEY)); // 4. If there is an invalid "sd" subtag, drop it // For now, ignore it, and let fallback do it for us } @@ -83,9 +74,6 @@ impl<'a> LocaleFallbackIteratorInner<'a> { match self.config.priority { LocaleFallbackPriority::Language => self.step_language(locale), LocaleFallbackPriority::Region => self.step_region(locale), - // TODO(#1964): Change the collation fallback rules to be different - // from the language fallback fules. - LocaleFallbackPriority::Collation => self.step_language(locale), // This case should not normally happen, but `LocaleFallbackPriority` is non_exhaustive. // Make it go directly to `und`. _ => { @@ -100,13 +88,6 @@ impl<'a> LocaleFallbackIteratorInner<'a> { } fn step_language(&mut self, locale: &mut DataLocale) { - // 1. Remove the extension fallback keyword - if let Some(extension_key) = self.config.extension_key { - if let Some(value) = locale.remove_unicode_ext(&extension_key) { - self.backup_extension = Some(value); - return; - } - } // 2. Remove the subdivision keyword if let Some(value) = locale.remove_unicode_ext(&SUBDIVISION_KEY) { self.backup_subdivision = Some(value); @@ -122,7 +103,7 @@ impl<'a> LocaleFallbackIteratorInner<'a> { // 5. Check for parent override if let Some(parent) = self.get_explicit_parent(locale) { locale.set_langid(parent); - self.restore_extensions_variants(locale); + self.restore_subdivision_variants(locale); return; } // 6. Add the script subtag if necessary @@ -134,7 +115,7 @@ impl<'a> LocaleFallbackIteratorInner<'a> { ®ion.into_tinystr().to_unvalidated(), ) { locale.set_script(Some(script)); - self.restore_extensions_variants(locale); + self.restore_subdivision_variants(locale); return; } } @@ -142,7 +123,7 @@ impl<'a> LocaleFallbackIteratorInner<'a> { // 7. Remove region if locale.region().is_some() { locale.set_region(None); - self.restore_extensions_variants(locale); + self.restore_subdivision_variants(locale); return; } // 8. Remove language+script @@ -152,13 +133,6 @@ impl<'a> LocaleFallbackIteratorInner<'a> { } fn step_region(&mut self, locale: &mut DataLocale) { - // 1. Remove the extension fallback keyword - if let Some(extension_key) = self.config.extension_key { - if let Some(value) = locale.remove_unicode_ext(&extension_key) { - self.backup_extension = Some(value); - return; - } - } // 2. Remove the subdivision keyword if let Some(value) = locale.remove_unicode_ext(&SUBDIVISION_KEY) { self.backup_subdivision = Some(value); @@ -175,7 +149,7 @@ impl<'a> LocaleFallbackIteratorInner<'a> { if !locale.language().is_empty() || locale.script().is_some() { locale.set_script(None); locale.set_language(Language::UND); - self.restore_extensions_variants(locale); + self.restore_subdivision_variants(locale); return; } // 6. Remove region @@ -183,11 +157,7 @@ impl<'a> LocaleFallbackIteratorInner<'a> { locale.set_region(None); } - fn restore_extensions_variants(&mut self, locale: &mut DataLocale) { - if let Some(value) = self.backup_extension.take() { - #[allow(clippy::unwrap_used)] // not reachable unless extension_key is present - locale.set_unicode_ext(self.config.extension_key.unwrap(), value); - } + fn restore_subdivision_variants(&mut self, locale: &mut DataLocale) { if let Some(value) = self.backup_subdivision.take() { locale.set_unicode_ext(SUBDIVISION_KEY, value); } @@ -197,17 +167,9 @@ impl<'a> LocaleFallbackIteratorInner<'a> { } fn get_explicit_parent(&self, locale: &DataLocale) -> Option { - self.supplement - .and_then(|supplement| { - supplement - .parents - .get_copied_by(|uvstr| locale.strict_cmp(uvstr).reverse()) - }) - .or_else(|| { - self.parents - .parents - .get_copied_by(|uvstr| locale.strict_cmp(uvstr).reverse()) - }) + self.parents + .parents + .get_copied_by(|uvstr| locale.strict_cmp(uvstr).reverse()) .map(LanguageIdentifier::from) } } @@ -220,8 +182,6 @@ mod tests { struct TestCase { input: &'static str, requires_data: bool, - extension_key: Option, - fallback_supplement: Option, // Note: The first entry in the chain is the normalized locale expected_language_chain: &'static [&'static str], expected_region_chain: &'static [&'static str], @@ -232,40 +192,30 @@ mod tests { TestCase { input: "en-u-hc-h12-sd-usca", requires_data: false, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["en-u-sd-usca", "en"], expected_region_chain: &["en-u-sd-usca", "en", "und-u-sd-usca"], }, TestCase { input: "en-US-u-hc-h12-sd-usca", requires_data: false, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["en-US-u-sd-usca", "en-US", "en-u-sd-usca", "en"], expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"], }, TestCase { input: "en-US-fonipa-u-hc-h12-sd-usca", requires_data: false, - extension_key: Some(key!("hc")), - fallback_supplement: None, expected_language_chain: &[ - "en-US-fonipa-u-hc-h12-sd-usca", "en-US-fonipa-u-sd-usca", "en-US-fonipa", "en-US", - "en-fonipa-u-hc-h12-sd-usca", "en-fonipa-u-sd-usca", "en-fonipa", "en", ], expected_region_chain: &[ - "en-US-fonipa-u-hc-h12-sd-usca", "en-US-fonipa-u-sd-usca", "en-US-fonipa", "en-US", - "und-US-fonipa-u-hc-h12-sd-usca", "und-US-fonipa-u-sd-usca", "und-US-fonipa", "und-US", @@ -274,24 +224,18 @@ mod tests { TestCase { input: "en-u-hc-h12-sd-usca", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["en-u-sd-usca", "en"], expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"], }, TestCase { input: "en-Latn-u-sd-usca", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["en-u-sd-usca", "en"], expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"], }, TestCase { input: "en-Latn-US-u-sd-usca", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["en-US-u-sd-usca", "en-US", "en-u-sd-usca", "en"], expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"], }, @@ -299,32 +243,24 @@ mod tests { // TODO(#4413): -u-rg is not yet supported; when it is, this test should be updated input: "en-u-rg-gbxxxx", requires_data: false, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["en"], expected_region_chain: &["en"], }, TestCase { input: "sr-ME", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["sr-ME", "sr-Latn-ME", "sr-Latn"], expected_region_chain: &["sr-ME", "und-ME"], }, TestCase { input: "sr-Latn-ME", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["sr-ME", "sr-Latn-ME", "sr-Latn"], expected_region_chain: &["sr-ME", "und-ME"], }, TestCase { input: "sr-ME-fonipa", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &[ "sr-ME-fonipa", "sr-ME", @@ -338,72 +274,54 @@ mod tests { TestCase { input: "sr-RS", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["sr-RS", "sr"], expected_region_chain: &["sr-RS", "und-RS"], }, TestCase { input: "sr-Cyrl-RS", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["sr-RS", "sr"], expected_region_chain: &["sr-RS", "und-RS"], }, TestCase { input: "sr-Latn-RS", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["sr-Latn-RS", "sr-Latn"], expected_region_chain: &["sr-Latn-RS", "und-RS"], }, TestCase { input: "de-Latn-LI", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["de-LI", "de"], expected_region_chain: &["de-LI", "und-LI"], }, TestCase { input: "ca-ES-valencia", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["ca-ES-valencia", "ca-ES", "ca-valencia", "ca"], expected_region_chain: &["ca-ES-valencia", "ca-ES", "und-ES-valencia", "und-ES"], }, TestCase { input: "es-AR", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["es-AR", "es-419", "es"], expected_region_chain: &["es-AR", "und-AR"], }, TestCase { input: "hi-IN", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["hi-IN", "hi"], expected_region_chain: &["hi-IN", "und-IN"], }, TestCase { input: "hi-Latn-IN", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["hi-Latn-IN", "hi-Latn", "en-IN", "en-001", "en"], expected_region_chain: &["hi-Latn-IN", "und-IN"], }, TestCase { input: "zh-CN", requires_data: true, - extension_key: None, - fallback_supplement: None, // Note: "zh-Hans" is not reachable because it is the default script for "zh". // The fallback algorithm does not visit the language-script bundle when the // script is the default for the language @@ -413,26 +331,21 @@ mod tests { TestCase { input: "zh-TW", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["zh-TW", "zh-Hant-TW", "zh-Hant"], expected_region_chain: &["zh-TW", "und-TW"], }, TestCase { input: "yue-HK", requires_data: true, - extension_key: None, - fallback_supplement: None, expected_language_chain: &["yue-HK", "yue"], expected_region_chain: &["yue-HK", "und-HK"], }, TestCase { input: "yue-HK", requires_data: true, - extension_key: None, - fallback_supplement: Some(LocaleFallbackSupplement::Collation), - expected_language_chain: &["yue-HK", "yue", "zh-Hant", "zh"], + expected_language_chain: &["yue-HK", "yue"], expected_region_chain: &["yue-HK", "und-HK"], + // TODO(#3867): script fallback should do zh-Hant or und-Hant as well }, ]; @@ -451,8 +364,6 @@ mod tests { ] { let mut config = LocaleFallbackConfig::default(); config.priority = priority; - config.extension_key = cas.extension_key; - config.fallback_supplement = cas.fallback_supplement; let fallbacker = if cas.requires_data { fallbacker_with_data } else { diff --git a/components/locale/src/fallback/mod.rs b/components/locale/src/fallback/mod.rs index 5954af8b19c..69f4b852eb4 100644 --- a/components/locale/src/fallback/mod.rs +++ b/components/locale/src/fallback/mod.rs @@ -11,9 +11,7 @@ use icu_locale_core::subtags::Variants; use icu_provider::prelude::*; #[doc(inline)] -pub use icu_provider::_internal::{ - LocaleFallbackConfig, LocaleFallbackPriority, LocaleFallbackSupplement, -}; +pub use icu_provider::_internal::{LocaleFallbackConfig, LocaleFallbackPriority}; mod algorithms; @@ -35,7 +33,7 @@ mod algorithms; /// let fallbacker = LocaleFallbacker::new(); /// /// // Create a LocaleFallbackerIterator with a default configuration. -/// // By default, uses language priority with no additional extension keywords. +/// // By default, uses language priority. /// let mut fallback_iterator = fallbacker /// .for_config(Default::default()) /// .fallback_for(locale!("hi-Latn-IN").into()); @@ -62,7 +60,6 @@ mod algorithms; pub struct LocaleFallbacker { likely_subtags: DataPayload, parents: DataPayload, - collation_supplement: Option>, } /// Borrowed version of [`LocaleFallbacker`]. @@ -70,7 +67,6 @@ pub struct LocaleFallbacker { pub struct LocaleFallbackerBorrowed<'a> { likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, parents: &'a LocaleFallbackParentsV1<'a>, - collation_supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, } /// A [`LocaleFallbackerBorrowed`] with an associated [`LocaleFallbackConfig`]. @@ -78,7 +74,6 @@ pub struct LocaleFallbackerBorrowed<'a> { pub struct LocaleFallbackerWithConfig<'a> { likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, parents: &'a LocaleFallbackParentsV1<'a>, - supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, config: LocaleFallbackConfig, } @@ -87,9 +82,7 @@ pub struct LocaleFallbackerWithConfig<'a> { struct LocaleFallbackIteratorInner<'a> { likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, parents: &'a LocaleFallbackParentsV1<'a>, - supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, config: LocaleFallbackConfig, - backup_extension: Option, backup_subdivision: Option, backup_variants: Option, } @@ -118,9 +111,6 @@ impl LocaleFallbacker { likely_subtags: crate::provider::Baked::SINGLETON_LOCALE_FALLBACK_LIKELY_SUBTAGS_V1_MARKER, parents: crate::provider::Baked::SINGLETON_LOCALE_FALLBACK_PARENTS_V1_MARKER, - collation_supplement: Some( - crate::provider::Baked::SINGLETON_COLLATION_FALLBACK_SUPPLEMENT_V1_MARKER, - ), }; // Safety: we're transmuting down from LocaleFallbackerBorrowed<'static> to LocaleFallbackerBorrowed<'a> // ZeroMaps use associated types in a way that confuse the compiler which gives up and marks them @@ -144,27 +134,13 @@ impl LocaleFallbacker { where P: DataProvider + DataProvider - + DataProvider + ?Sized, { let likely_subtags = provider.load(Default::default())?.payload; let parents = provider.load(Default::default())?.payload; - let collation_supplement = match DataProvider::::load( - provider, - Default::default(), - ) { - Ok(response) => Some(response.payload), - // It is expected that not all keys are present - Err(DataError { - kind: DataErrorKind::MarkerNotFound, - .. - }) => None, - Err(e) => return Err(e), - }; Ok(LocaleFallbacker { likely_subtags, parents, - collation_supplement, }) } @@ -174,7 +150,6 @@ impl LocaleFallbacker { LocaleFallbacker { likely_subtags: DataPayload::from_owned(Default::default()), parents: DataPayload::from_owned(Default::default()), - collation_supplement: None, } } @@ -189,7 +164,6 @@ impl LocaleFallbacker { LocaleFallbackerBorrowed { likely_subtags: self.likely_subtags.get(), parents: self.parents.get(), - collation_supplement: self.collation_supplement.as_ref().map(|p| p.get()), } } } @@ -201,10 +175,6 @@ impl<'a> LocaleFallbackerBorrowed<'a> { LocaleFallbackerWithConfig { likely_subtags: self.likely_subtags, parents: self.parents, - supplement: match config.fallback_supplement { - Some(LocaleFallbackSupplement::Collation) => self.collation_supplement, - _ => None, - }, config, } } @@ -219,10 +189,6 @@ impl LocaleFallbackerBorrowed<'static> { LocaleFallbacker { likely_subtags: DataPayload::from_static_ref(self.likely_subtags), parents: DataPayload::from_static_ref(self.parents), - collation_supplement: match self.collation_supplement { - None => None, - Some(x) => Some(DataPayload::from_static_ref(x)), - }, } } } @@ -240,9 +206,7 @@ impl<'a> LocaleFallbackerWithConfig<'a> { inner: LocaleFallbackIteratorInner { likely_subtags: self.likely_subtags, parents: self.parents, - supplement: self.supplement, config: self.config, - backup_extension: None, backup_subdivision: None, backup_variants: None, }, diff --git a/components/locale/src/provider/fallback.rs b/components/locale/src/provider/fallback.rs index dcd5f7bbf73..4fc50473779 100644 --- a/components/locale/src/provider/fallback.rs +++ b/components/locale/src/provider/fallback.rs @@ -3,7 +3,6 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::*; -use icu_locale_core::extensions::unicode::Key; use icu_locale_core::subtags::{region, script, Language, Region, Script}; use icu_provider::prelude::*; use zerovec::ule::UnvalidatedStr; @@ -77,26 +76,3 @@ pub struct LocaleFallbackParentsV1<'data> { #[cfg_attr(feature = "serde", serde(borrow))] pub parents: ZeroMap<'data, UnvalidatedStr, (Language, Option