Merge branch 'main' into exemplars

unicode-org · Sep 3, 2024 · 1426a7e · 1426a7e
2 parents 58c13d9 + e80a609
commit 1426a7e
Show file tree

Hide file tree

Showing 500 changed files with 121,458 additions and 223,066 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -209,10 +209,10 @@ icu_benchmark_macros = { path = "tools/benchmark/macros" }
 
 # The version here can either be a `version = ".."` spec or `git = "https://github.com/rust-diplomat/diplomat", rev = ".."`
 # Diplomat must be published preceding a new ICU4X release but may use git versions in between
-diplomat = { git = "https://github.com/rust-diplomat/diplomat", rev = "291371b020d9c3cd5abbd1f96f6dfe4f8fac3b5c" }
-diplomat-runtime = { git = "https://github.com/rust-diplomat/diplomat", rev = "291371b020d9c3cd5abbd1f96f6dfe4f8fac3b5c" }
-diplomat_core = { git = "https://github.com/rust-diplomat/diplomat", rev = "291371b020d9c3cd5abbd1f96f6dfe4f8fac3b5c" }
-diplomat-tool = { git = "https://github.com/rust-diplomat/diplomat", rev = "291371b020d9c3cd5abbd1f96f6dfe4f8fac3b5c" }
+diplomat = { git = "https://github.com/rust-diplomat/diplomat", rev = "0183677b4bc25fc762ec06a4665c4f384998fb50" }
+diplomat-runtime = { git = "https://github.com/rust-diplomat/diplomat", rev = "0183677b4bc25fc762ec06a4665c4f384998fb50" }
+diplomat_core = { git = "https://github.com/rust-diplomat/diplomat", rev = "0183677b4bc25fc762ec06a4665c4f384998fb50" }
+diplomat-tool = { git = "https://github.com/rust-diplomat/diplomat", rev = "0183677b4bc25fc762ec06a4665c4f384998fb50" }
 
 # EXTERNAL DEPENDENCIES
 #

diff --git a/README.md b/README.md
@@ -86,3 +86,5 @@ Copyright © 2020-2024 Unicode, Inc. Unicode and the Unicode Logo are registered
 The project is released under [LICENSE](./LICENSE), the free and open-source [Unicode License](https://www.unicode.org/license.txt), which is based on the well-known MIT license, with the primary difference being that the Unicode License expressly covers data and data files, as well as code. For further information please see [The Unicode Consortium Intellectual Property, Licensing, and Technical Contribution Policies](https://www.unicode.org/policies/licensing_policy.html).
 
 A CLA is required to contribute to this project - please refer to the [CONTRIBUTING.md](./CONTRIBUTING.md) file (or start a Pull Request) for more information.
+
+The contents of this repository are governed by the Unicode [Terms of Use](https://www.unicode.org/copyright.html).
diff --git a/components/calendar/src/provider.rs b/components/calendar/src/provider.rs
@@ -83,11 +83,8 @@ pub const MARKERS: &[DataMarkerInfo] = &[
 #[derive(
     Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Debug, yoke::Yokeable, zerofrom::ZeroFrom,
 )]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_calendar::provider),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct EraStartDate {
     /// The year the era started in
@@ -111,11 +108,8 @@ pub struct EraStartDate {
     marker(JapaneseExtendedErasV1Marker, "calendar/japanext@1", singleton)
 )]
 #[derive(Debug, PartialEq, Clone, Default)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_calendar::provider),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct JapaneseErasV1<'data> {
     /// A map from era start dates to their era codes
@@ -137,11 +131,8 @@ pub struct JapaneseErasV1<'data> {
     fallback_by = "region"
 ))]
 #[derive(Clone, Copy, Debug, PartialEq)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_calendar::provider),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 #[allow(clippy::exhaustive_structs)] // used in data provider
 pub struct WeekDataV1 {
@@ -165,7 +156,8 @@ pub struct WeekDataV1 {
     fallback_by = "region"
 ))]
 #[derive(Clone, Copy, Debug, PartialEq)]
-#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_calendar::provider))]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 #[allow(clippy::exhaustive_structs)] // used in data provider
 pub struct WeekDataV2 {

diff --git a/components/calendar/src/provider/chinese_based.rs b/components/calendar/src/provider/chinese_based.rs
@@ -30,11 +30,8 @@ use zerovec::ZeroVec;
     marker(DangiCacheV1Marker, "calendar/dangicache@1", singleton)
 )]
 #[derive(Debug, PartialEq, Clone, Default)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_calendar::provider::chinese_based),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider::chinese_based))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct ChineseBasedCacheV1<'data> {
     /// The extended year corresponding to the first data entry for this year
@@ -138,11 +135,8 @@ impl<'data> ChineseBasedCacheV1<'data> {
 /// to be stable, their Rust representation might not be. Use with caution.
 /// </div>
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ULE)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(databake::Bake),
-    databake(path = icu_calendar::provider),
-)]
+#[cfg_attr(feature = "datagen", derive(databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
 #[repr(C, packed)]
 pub struct PackedChineseBasedYearInfo(pub u8, pub u8, pub u8);
 

diff --git a/components/calendar/src/provider/islamic.rs b/components/calendar/src/provider/islamic.rs
@@ -36,11 +36,8 @@ use zerovec::ZeroVec;
     )
 )]
 #[derive(Debug, PartialEq, Clone, Default)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_calendar::provider::islamic),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider::islamic))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct IslamicCacheV1<'data> {
     /// The extended year corresponding to the first data entry for this year
@@ -150,11 +147,8 @@ impl<'data> IslamicCacheV1<'data> {
 /// to be stable, their Rust representation might not be. Use with caution.
 /// </div>
 #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, ULE)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_calendar::provider),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 #[repr(C, packed)]
 pub struct PackedIslamicYearInfo(pub u8, pub u8);

diff --git a/components/calendar/src/types.rs b/components/calendar/src/types.rs
@@ -84,11 +84,8 @@ impl FormattableYear {
 /// [era-proposal]: https://tc39.es/proposal-intl-era-monthcode/
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 #[allow(clippy::exhaustive_structs)] // this is a newtype
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_calendar::types),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::types))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct MonthCode(pub TinyStr4);
 
@@ -712,11 +709,8 @@ fn test_from_minute_with_remainder_days() {
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 #[allow(missing_docs)] // The weekday variants should be self-obvious.
 #[repr(i8)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_calendar::types),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_calendar::types))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 #[allow(clippy::exhaustive_enums)] // This is stable
 pub enum IsoWeekday {

diff --git a/components/casemap/benches/casemap.rs b/components/casemap/benches/casemap.rs
@@ -6,7 +6,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use icu_casemap::CaseMapper;
 use icu_locale_core::langid;
 #[cfg(feature = "bench")]
-use icu_normalizer::DecomposingNormalizer;
+use icu_normalizer::DecomposingNormalizerBorrowed;
 
 const TEST_STRING_EN: &str = "One of the key design principles of ICU4X is to make locale data small and portable, allowing it to be pulled from multiple sources depending on the needs of the application.  This document explains how that goal can be achieved.";
 
@@ -59,7 +59,7 @@ fn greek_uppercasing(_c: &mut Criterion) {
         let el = langid!("el");
 
         let iliad_lowercase = casemapper.lowercase_to_string(ILIAD, &root);
-        let decomposer = DecomposingNormalizer::new_nfd();
+        let decomposer = DecomposingNormalizerBorrowed::new_nfd();
         let nfd = decomposer.normalize_utf8(ILIAD.as_bytes());
         let nfd_lowercase = decomposer.normalize_utf8(iliad_lowercase.as_bytes());
 

diff --git a/components/casemap/src/provider/exceptions.rs b/components/casemap/src/provider/exceptions.rs
@@ -34,11 +34,8 @@ const SURROGATES_LEN: u32 = 0xDFFF - SURROGATES_START + 1;
 /// to be stable, their Rust representation might not be. Use with caution.
 /// </div>
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
-#[cfg_attr(
-    feature = "datagen", 
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_casemap::provider::exceptions),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::exceptions))]
 #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
 pub struct CaseMapExceptions<'data> {
     #[cfg_attr(feature = "serde", serde(borrow))]

diff --git a/components/casemap/src/provider/mod.rs b/components/casemap/src/provider/mod.rs
@@ -73,11 +73,8 @@ pub use self::unfold::{CaseMapUnfoldV1, CaseMapUnfoldV1Marker};
 /// </div>
 #[icu_provider::data_struct(marker(CaseMapV1Marker, "props/casemap@1", singleton))]
 #[derive(Debug, PartialEq, Clone)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_casemap::provider),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider))]
 #[yoke(prove_covariance_manually)]
 /// CaseMapper provides low-level access to the data necessary to
 /// convert characters and strings to upper, lower, or title case.

diff --git a/components/casemap/src/provider/unfold.rs b/components/casemap/src/provider/unfold.rs
@@ -20,11 +20,8 @@ use zerovec::ZeroMap;
 /// </div>
 #[icu_provider::data_struct(marker(CaseMapUnfoldV1Marker, "props/casemap_unfold@1", singleton))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_casemap::provider),
-)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider))]
 #[derive(Debug, PartialEq, Clone)]
 #[yoke(prove_covariance_manually)]
 pub struct CaseMapUnfoldV1<'data> {

diff --git a/components/casemap/tests/conversions.rs b/components/casemap/tests/conversions.rs
@@ -346,8 +346,8 @@ fn test_dutch() {
 
 #[test]
 fn test_greek_upper() {
-    let nfc = icu_normalizer::ComposingNormalizer::new_nfc();
-    let nfd = icu_normalizer::DecomposingNormalizer::new_nfd();
+    let nfc = icu_normalizer::ComposingNormalizerBorrowed::new_nfc();
+    let nfd = icu_normalizer::DecomposingNormalizerBorrowed::new_nfd();
 
     let cm = CaseMapper::new();
     let modern_greek = &langid!("el");

diff --git a/components/casemap/tests/gen_greek_to_me.rs b/components/casemap/tests/gen_greek_to_me.rs
@@ -6,13 +6,13 @@ use icu_casemap::greek_to_me::{
     self, GreekDiacritics, GreekPrecomposedLetterData, GreekVowel, PackedGreekPrecomposedLetterData,
 };
 use icu_casemap::CaseMapper;
-use icu_normalizer::DecomposingNormalizer;
+use icu_normalizer::DecomposingNormalizerBorrowed;
 use icu_properties::{maps, GeneralCategoryGroup, Script};
 use std::collections::BTreeMap;
 use std::fmt::Write;
 
 fn main() {
-    let decomposer = DecomposingNormalizer::new_nfd();
+    let decomposer = DecomposingNormalizerBorrowed::new_nfd();
     let script = maps::script();
     let gc = maps::general_category();
     let cm = CaseMapper::new();

diff --git a/components/collator/src/provider.rs b/components/collator/src/provider.rs
@@ -122,7 +122,8 @@ fn data_ce_to_primary(data_ce: u64, c: char) -> u32 {
     attributes_domain = "collator",
 ))]
 #[derive(Debug, PartialEq, Clone)]
-#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct CollationDataV1<'data> {
     /// Mapping from `char` to `CollationElement32` (represented
@@ -237,7 +238,8 @@ impl<'data> CollationDataV1<'data> {
     attributes_domain = "collator",
 ))]
 #[derive(Debug, PartialEq, Clone)]
-#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct CollationDiacriticsV1<'data> {
     /// Secondary weights for characters starting from U+0300 up
@@ -257,7 +259,8 @@ pub struct CollationDiacriticsV1<'data> {
 /// </div>
 #[icu_provider::data_struct(marker(CollationJamoV1Marker, "collator/jamo@1", singleton))]
 #[derive(Debug, PartialEq, Clone)]
-#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct CollationJamoV1<'data> {
     /// `CollationElement32`s (as `u32`s) for the Hangul Jamo Unicode Block.
@@ -280,7 +283,8 @@ pub struct CollationJamoV1<'data> {
     attributes_domain = "collator",
 ))]
 #[derive(Debug, PartialEq, Clone)]
-#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct CollationReorderingV1<'data> {
     /// Limit of last reordered range. 0 if no reordering or no split bytes.
@@ -369,7 +373,8 @@ impl<'data> CollationReorderingV1<'data> {
     attributes_domain = "collator",
 ))]
 #[derive(Debug, PartialEq, Clone, Copy)]
-#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct CollationMetadataV1 {
     /// See the mask constants in the `impl` block for the
@@ -463,7 +468,8 @@ impl CollationMetadataV1 {
     singleton
 ))]
 #[derive(Debug, PartialEq, Clone)]
-#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
+#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
 pub struct CollationSpecialPrimariesV1<'data> {
     /// The primaries corresponding to `MaxVariable`

diff --git a/components/collections/src/char16trie/trie.rs b/components/collections/src/char16trie/trie.rs
@@ -76,7 +76,8 @@ fn skip_node_value(pos: usize, lead: u16) -> usize {
 /// - [ICU4C UCharsTrie](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1UCharsTrie.html)
 /// - [ICU4J CharsTrie](https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/util/CharsTrie.html) API.
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
-#[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = icu_collections::char16trie))]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = icu_collections::char16trie))]
 #[derive(Clone, Debug, PartialEq, Eq, ZeroFrom)]
 pub struct Char16Trie<'data> {
     /// An array of u16 containing the trie data.

diff --git a/components/collections/src/codepointtrie/cptrie.rs b/components/collections/src/codepointtrie/cptrie.rs
@@ -36,7 +36,8 @@ use zerovec::ZeroVec;
 /// Also see [`UCPTrieType`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
 #[derive(Clone, Copy, PartialEq, Debug, Eq)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
-#[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = icu_collections::codepointtrie))]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
 pub enum TrieType {
     /// Represents the "fast" type code point tries for the
     /// [`TrieType`] trait. The "fast max" limit is set to `0xffff`.
@@ -135,7 +136,8 @@ pub struct CodePointTrie<'trie, T: TrieValue> {
 
 /// This struct contains the fixed-length header fields of a [`CodePointTrie`].
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
-#[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = icu_collections::codepointtrie))]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Yokeable, ZeroFrom)]
 pub struct CodePointTrieHeader {
     /// The code point of the start of the last range of the trie. A