From 901390ecb93ee83fe68faf71e238e48c53cd8881 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 5 Sep 2024 10:21:11 -0700 Subject: [PATCH] Add RuleCollection::register_aliases and bigger, working Transliterator docs test (#5483) A more docs-friendly version of the test introduced in #5469 Related: #3991 --- .../src/transliterate/compile/mod.rs | 17 +++-- .../src/transliterate/transliterator/mod.rs | 70 +++++++++++++++++++ 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/components/experimental/src/transliterate/compile/mod.rs b/components/experimental/src/transliterate/compile/mod.rs index e37a9d7b440..8bc3e6527ad 100644 --- a/components/experimental/src/transliterate/compile/mod.rs +++ b/components/experimental/src/transliterate/compile/mod.rs @@ -113,11 +113,20 @@ impl RuleCollection { id.to_string().to_ascii_lowercase(), (source, reverse, visible), ); + self.register_aliases(id, aliases) + } - for alias in aliases.into_iter() { - self.id_mapping - .insert(alias.to_ascii_lowercase(), id.clone()); - } + /// Add transliteration ID aliases without registering a source. + pub fn register_aliases<'a>( + &mut self, + id: &icu_locale_core::Locale, + aliases: impl IntoIterator, + ) { + self.id_mapping.extend( + aliases + .into_iter() + .map(|alias| (alias.to_ascii_lowercase(), id.clone())), + ) } /// Returns a provider that is usable by [`Transliterator::try_new_unstable`](crate::transliterate::Transliterator::try_new_unstable). diff --git a/components/experimental/src/transliterate/transliterator/mod.rs b/components/experimental/src/transliterate/transliterator/mod.rs index 2508691d5db..300f48a9774 100644 --- a/components/experimental/src/transliterate/transliterator/mod.rs +++ b/components/experimental/src/transliterate/transliterator/mod.rs @@ -153,6 +153,76 @@ type Env = LiteMap; /// A `Transliterator` allows transliteration based on [UTS #35 transform rules](https://unicode.org/reports/tr35/tr35-general.html#Transforms), /// including overrides with custom implementations. +/// +/// # Examples +/// +/// A transliterator with a custom alias referenced by another: +/// +/// ``` +/// use icu::experimental::transliterate::{Transliterator, CustomTransliterator, RuleCollection}; +/// use icu::locale::Locale; +/// +/// // Set up a transliterator with 3 custom rules. +/// // Note: These rules are for demonstration purposes only! Do not use. +/// +/// // 1. Main entrypoint: a chain of several transliterators +/// let mut collection = RuleCollection::default(); +/// collection.register_source( +/// &"und-t-und-x0-custom".parse().unwrap(), +/// "::NFD; ::FlattenLowerUmlaut; ::[:Nonspacing Mark:] Remove; ::AsciiUpper; ::NFC;".to_string(), +/// [], +/// false, +/// true, +/// ); +/// +/// // 2. A custom ruleset that expands lowercase umlauts +/// collection.register_source( +/// &"und-t-und-x0-dep1".parse().unwrap(), +/// r#" +/// [ä {a \u0308}] → ae; +/// [ö {o \u0308}] → oe; +/// [ü {u \u0308}] → ue; +/// "#.to_string(), +/// ["Any-FlattenLowerUmlaut"], +/// false, +/// true, +/// ); +/// +/// // 3. A custom transliterator that uppercases all ASCII characters +/// #[derive(Debug)] +/// struct AsciiUpperTransliterator; +/// impl CustomTransliterator for AsciiUpperTransliterator { +/// fn transliterate(&self, input: &str, range: std::ops::Range) -> String { +/// input.to_ascii_uppercase() +/// } +/// } +/// collection.register_aliases( +/// &"und-t-und-x0-dep2".parse().unwrap(), +/// ["Any-AsciiUpper"], +/// ); +/// +/// // Create a transliterator from the main entrypoint: +/// let provider = collection.as_provider(); +/// let t = Transliterator::try_new_with_override_unstable( +/// "und-t-und-x0-custom".parse().unwrap(), +/// |locale| { +/// if locale.normalizing_eq("und-t-und-x0-dep2") { +/// Some(Box::new(AsciiUpperTransliterator)) +/// } else { +/// None +/// } +/// }, +/// &provider, +/// ) +/// .unwrap(); +/// +/// // Test the behavior: +/// // - The uppercase 'Ü' is stripped of its umlaut +/// // - The lowercase 'ä' is expanded to "ae" +/// // - All ASCII characters are uppercased: not 'ß', which is not ASCII +/// let r = t.transliterate("Übermäßig".to_string()); +/// assert_eq!(r, "UBERMAEßIG"); +/// ``` #[derive(Debug)] pub struct Transliterator { transliterator: DataPayload,