Skip to content

Commit

Permalink
Add RuleCollection::register_aliases and bigger, working Transliterat…
Browse files Browse the repository at this point in the history
…or docs test (#5483)

A more docs-friendly version of the test introduced in #5469

Related: #3991
  • Loading branch information
sffc authored Sep 5, 2024
1 parent 130d2f7 commit 901390e
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 4 deletions.
17 changes: 13 additions & 4 deletions components/experimental/src/transliterate/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,20 @@ impl RuleCollection {
id.to_string().to_ascii_lowercase(),
(source, reverse, visible),
);
self.register_aliases(id, aliases)
}

for alias in aliases.into_iter() {
self.id_mapping
.insert(alias.to_ascii_lowercase(), id.clone());
}
/// Add transliteration ID aliases without registering a source.
pub fn register_aliases<'a>(
&mut self,
id: &icu_locale_core::Locale,
aliases: impl IntoIterator<Item = &'a str>,
) {
self.id_mapping.extend(
aliases
.into_iter()
.map(|alias| (alias.to_ascii_lowercase(), id.clone())),
)
}

/// Returns a provider that is usable by [`Transliterator::try_new_unstable`](crate::transliterate::Transliterator::try_new_unstable).
Expand Down
70 changes: 70 additions & 0 deletions components/experimental/src/transliterate/transliterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,76 @@ type Env = LiteMap<String, InternalTransliterator>;

/// A `Transliterator` allows transliteration based on [UTS #35 transform rules](https://unicode.org/reports/tr35/tr35-general.html#Transforms),
/// including overrides with custom implementations.
///
/// # Examples
///
/// A transliterator with a custom alias referenced by another:
///
/// ```
/// use icu::experimental::transliterate::{Transliterator, CustomTransliterator, RuleCollection};
/// use icu::locale::Locale;
///
/// // Set up a transliterator with 3 custom rules.
/// // Note: These rules are for demonstration purposes only! Do not use.
///
/// // 1. Main entrypoint: a chain of several transliterators
/// let mut collection = RuleCollection::default();
/// collection.register_source(
/// &"und-t-und-x0-custom".parse().unwrap(),
/// "::NFD; ::FlattenLowerUmlaut; ::[:Nonspacing Mark:] Remove; ::AsciiUpper; ::NFC;".to_string(),
/// [],
/// false,
/// true,
/// );
///
/// // 2. A custom ruleset that expands lowercase umlauts
/// collection.register_source(
/// &"und-t-und-x0-dep1".parse().unwrap(),
/// r#"
/// [ä {a \u0308}] → ae;
/// [ö {o \u0308}] → oe;
/// [ü {u \u0308}] → ue;
/// "#.to_string(),
/// ["Any-FlattenLowerUmlaut"],
/// false,
/// true,
/// );
///
/// // 3. A custom transliterator that uppercases all ASCII characters
/// #[derive(Debug)]
/// struct AsciiUpperTransliterator;
/// impl CustomTransliterator for AsciiUpperTransliterator {
/// fn transliterate(&self, input: &str, range: std::ops::Range<usize>) -> String {
/// input.to_ascii_uppercase()
/// }
/// }
/// collection.register_aliases(
/// &"und-t-und-x0-dep2".parse().unwrap(),
/// ["Any-AsciiUpper"],
/// );
///
/// // Create a transliterator from the main entrypoint:
/// let provider = collection.as_provider();
/// let t = Transliterator::try_new_with_override_unstable(
/// "und-t-und-x0-custom".parse().unwrap(),
/// |locale| {
/// if locale.normalizing_eq("und-t-und-x0-dep2") {
/// Some(Box::new(AsciiUpperTransliterator))
/// } else {
/// None
/// }
/// },
/// &provider,
/// )
/// .unwrap();
///
/// // Test the behavior:
/// // - The uppercase 'Ü' is stripped of its umlaut
/// // - The lowercase 'ä' is expanded to "ae"
/// // - All ASCII characters are uppercased: not 'ß', which is not ASCII
/// let r = t.transliterate("Übermäßig".to_string());
/// assert_eq!(r, "UBERMAEßIG");
/// ```
#[derive(Debug)]
pub struct Transliterator {
transliterator: DataPayload<TransliteratorRulesV1Marker>,
Expand Down

0 comments on commit 901390e

Please sign in to comment.