Skip to content

Commit

Permalink
Merge branch 'main' into exemplars
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Sep 3, 2024
2 parents 58c13d9 + e80a609 commit 1426a7e
Show file tree
Hide file tree
Showing 500 changed files with 121,458 additions and 223,066 deletions.
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,10 @@ icu_benchmark_macros = { path = "tools/benchmark/macros" }

# The version here can either be a `version = ".."` spec or `git = "https://github.com/rust-diplomat/diplomat", rev = ".."`
# Diplomat must be published preceding a new ICU4X release but may use git versions in between
diplomat = { git = "https://github.com/rust-diplomat/diplomat", rev = "291371b020d9c3cd5abbd1f96f6dfe4f8fac3b5c" }
diplomat-runtime = { git = "https://github.com/rust-diplomat/diplomat", rev = "291371b020d9c3cd5abbd1f96f6dfe4f8fac3b5c" }
diplomat_core = { git = "https://github.com/rust-diplomat/diplomat", rev = "291371b020d9c3cd5abbd1f96f6dfe4f8fac3b5c" }
diplomat-tool = { git = "https://github.com/rust-diplomat/diplomat", rev = "291371b020d9c3cd5abbd1f96f6dfe4f8fac3b5c" }
diplomat = { git = "https://github.com/rust-diplomat/diplomat", rev = "0183677b4bc25fc762ec06a4665c4f384998fb50" }
diplomat-runtime = { git = "https://github.com/rust-diplomat/diplomat", rev = "0183677b4bc25fc762ec06a4665c4f384998fb50" }
diplomat_core = { git = "https://github.com/rust-diplomat/diplomat", rev = "0183677b4bc25fc762ec06a4665c4f384998fb50" }
diplomat-tool = { git = "https://github.com/rust-diplomat/diplomat", rev = "0183677b4bc25fc762ec06a4665c4f384998fb50" }

# EXTERNAL DEPENDENCIES
#
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,5 @@ Copyright © 2020-2024 Unicode, Inc. Unicode and the Unicode Logo are registered
The project is released under [LICENSE](./LICENSE), the free and open-source [Unicode License](https://www.unicode.org/license.txt), which is based on the well-known MIT license, with the primary difference being that the Unicode License expressly covers data and data files, as well as code. For further information please see [The Unicode Consortium Intellectual Property, Licensing, and Technical Contribution Policies](https://www.unicode.org/policies/licensing_policy.html).

A CLA is required to contribute to this project - please refer to the [CONTRIBUTING.md](./CONTRIBUTING.md) file (or start a Pull Request) for more information.

The contents of this repository are governed by the Unicode [Terms of Use](https://www.unicode.org/copyright.html).
24 changes: 8 additions & 16 deletions components/calendar/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,8 @@ pub const MARKERS: &[DataMarkerInfo] = &[
#[derive(
Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Debug, yoke::Yokeable, zerofrom::ZeroFrom,
)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_calendar::provider),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct EraStartDate {
/// The year the era started in
Expand All @@ -111,11 +108,8 @@ pub struct EraStartDate {
marker(JapaneseExtendedErasV1Marker, "calendar/japanext@1", singleton)
)]
#[derive(Debug, PartialEq, Clone, Default)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_calendar::provider),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct JapaneseErasV1<'data> {
/// A map from era start dates to their era codes
Expand All @@ -137,11 +131,8 @@ pub struct JapaneseErasV1<'data> {
fallback_by = "region"
))]
#[derive(Clone, Copy, Debug, PartialEq)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_calendar::provider),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[allow(clippy::exhaustive_structs)] // used in data provider
pub struct WeekDataV1 {
Expand All @@ -165,7 +156,8 @@ pub struct WeekDataV1 {
fallback_by = "region"
))]
#[derive(Clone, Copy, Debug, PartialEq)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_calendar::provider))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[allow(clippy::exhaustive_structs)] // used in data provider
pub struct WeekDataV2 {
Expand Down
14 changes: 4 additions & 10 deletions components/calendar/src/provider/chinese_based.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,8 @@ use zerovec::ZeroVec;
marker(DangiCacheV1Marker, "calendar/dangicache@1", singleton)
)]
#[derive(Debug, PartialEq, Clone, Default)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_calendar::provider::chinese_based),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider::chinese_based))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct ChineseBasedCacheV1<'data> {
/// The extended year corresponding to the first data entry for this year
Expand Down Expand Up @@ -138,11 +135,8 @@ impl<'data> ChineseBasedCacheV1<'data> {
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ULE)]
#[cfg_attr(
feature = "datagen",
derive(databake::Bake),
databake(path = icu_calendar::provider),
)]
#[cfg_attr(feature = "datagen", derive(databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
#[repr(C, packed)]
pub struct PackedChineseBasedYearInfo(pub u8, pub u8, pub u8);

Expand Down
14 changes: 4 additions & 10 deletions components/calendar/src/provider/islamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,8 @@ use zerovec::ZeroVec;
)
)]
#[derive(Debug, PartialEq, Clone, Default)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_calendar::provider::islamic),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider::islamic))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct IslamicCacheV1<'data> {
/// The extended year corresponding to the first data entry for this year
Expand Down Expand Up @@ -150,11 +147,8 @@ impl<'data> IslamicCacheV1<'data> {
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, ULE)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_calendar::provider),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[repr(C, packed)]
pub struct PackedIslamicYearInfo(pub u8, pub u8);
Expand Down
14 changes: 4 additions & 10 deletions components/calendar/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,8 @@ impl FormattableYear {
/// [era-proposal]: https://tc39.es/proposal-intl-era-monthcode/
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[allow(clippy::exhaustive_structs)] // this is a newtype
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_calendar::types),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::types))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct MonthCode(pub TinyStr4);

Expand Down Expand Up @@ -712,11 +709,8 @@ fn test_from_minute_with_remainder_days() {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[allow(missing_docs)] // The weekday variants should be self-obvious.
#[repr(i8)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_calendar::types),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_calendar::types))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[allow(clippy::exhaustive_enums)] // This is stable
pub enum IsoWeekday {
Expand Down
4 changes: 2 additions & 2 deletions components/casemap/benches/casemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
use icu_casemap::CaseMapper;
use icu_locale_core::langid;
#[cfg(feature = "bench")]
use icu_normalizer::DecomposingNormalizer;
use icu_normalizer::DecomposingNormalizerBorrowed;

const TEST_STRING_EN: &str = "One of the key design principles of ICU4X is to make locale data small and portable, allowing it to be pulled from multiple sources depending on the needs of the application. This document explains how that goal can be achieved.";

Expand Down Expand Up @@ -59,7 +59,7 @@ fn greek_uppercasing(_c: &mut Criterion) {
let el = langid!("el");

let iliad_lowercase = casemapper.lowercase_to_string(ILIAD, &root);
let decomposer = DecomposingNormalizer::new_nfd();
let decomposer = DecomposingNormalizerBorrowed::new_nfd();
let nfd = decomposer.normalize_utf8(ILIAD.as_bytes());
let nfd_lowercase = decomposer.normalize_utf8(iliad_lowercase.as_bytes());

Expand Down
7 changes: 2 additions & 5 deletions components/casemap/src/provider/exceptions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,8 @@ const SURROGATES_LEN: u32 = 0xDFFF - SURROGATES_START + 1;
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_casemap::provider::exceptions),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::exceptions))]
#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
pub struct CaseMapExceptions<'data> {
#[cfg_attr(feature = "serde", serde(borrow))]
Expand Down
7 changes: 2 additions & 5 deletions components/casemap/src/provider/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,8 @@ pub use self::unfold::{CaseMapUnfoldV1, CaseMapUnfoldV1Marker};
/// </div>
#[icu_provider::data_struct(marker(CaseMapV1Marker, "props/casemap@1", singleton))]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_casemap::provider),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider))]
#[yoke(prove_covariance_manually)]
/// CaseMapper provides low-level access to the data necessary to
/// convert characters and strings to upper, lower, or title case.
Expand Down
7 changes: 2 additions & 5 deletions components/casemap/src/provider/unfold.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@ use zerovec::ZeroMap;
/// </div>
#[icu_provider::data_struct(marker(CaseMapUnfoldV1Marker, "props/casemap_unfold@1", singleton))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_casemap::provider),
)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider))]
#[derive(Debug, PartialEq, Clone)]
#[yoke(prove_covariance_manually)]
pub struct CaseMapUnfoldV1<'data> {
Expand Down
4 changes: 2 additions & 2 deletions components/casemap/tests/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ fn test_dutch() {

#[test]
fn test_greek_upper() {
let nfc = icu_normalizer::ComposingNormalizer::new_nfc();
let nfd = icu_normalizer::DecomposingNormalizer::new_nfd();
let nfc = icu_normalizer::ComposingNormalizerBorrowed::new_nfc();
let nfd = icu_normalizer::DecomposingNormalizerBorrowed::new_nfd();

let cm = CaseMapper::new();
let modern_greek = &langid!("el");
Expand Down
4 changes: 2 additions & 2 deletions components/casemap/tests/gen_greek_to_me.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ use icu_casemap::greek_to_me::{
self, GreekDiacritics, GreekPrecomposedLetterData, GreekVowel, PackedGreekPrecomposedLetterData,
};
use icu_casemap::CaseMapper;
use icu_normalizer::DecomposingNormalizer;
use icu_normalizer::DecomposingNormalizerBorrowed;
use icu_properties::{maps, GeneralCategoryGroup, Script};
use std::collections::BTreeMap;
use std::fmt::Write;

fn main() {
let decomposer = DecomposingNormalizer::new_nfd();
let decomposer = DecomposingNormalizerBorrowed::new_nfd();
let script = maps::script();
let gc = maps::general_category();
let cm = CaseMapper::new();
Expand Down
18 changes: 12 additions & 6 deletions components/collator/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ fn data_ce_to_primary(data_ce: u64, c: char) -> u32 {
attributes_domain = "collator",
))]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct CollationDataV1<'data> {
/// Mapping from `char` to `CollationElement32` (represented
Expand Down Expand Up @@ -237,7 +238,8 @@ impl<'data> CollationDataV1<'data> {
attributes_domain = "collator",
))]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct CollationDiacriticsV1<'data> {
/// Secondary weights for characters starting from U+0300 up
Expand All @@ -257,7 +259,8 @@ pub struct CollationDiacriticsV1<'data> {
/// </div>
#[icu_provider::data_struct(marker(CollationJamoV1Marker, "collator/jamo@1", singleton))]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct CollationJamoV1<'data> {
/// `CollationElement32`s (as `u32`s) for the Hangul Jamo Unicode Block.
Expand All @@ -280,7 +283,8 @@ pub struct CollationJamoV1<'data> {
attributes_domain = "collator",
))]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct CollationReorderingV1<'data> {
/// Limit of last reordered range. 0 if no reordering or no split bytes.
Expand Down Expand Up @@ -369,7 +373,8 @@ impl<'data> CollationReorderingV1<'data> {
attributes_domain = "collator",
))]
#[derive(Debug, PartialEq, Clone, Copy)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct CollationMetadataV1 {
/// See the mask constants in the `impl` block for the
Expand Down Expand Up @@ -463,7 +468,8 @@ impl CollationMetadataV1 {
singleton
))]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_collator::provider))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct CollationSpecialPrimariesV1<'data> {
/// The primaries corresponding to `MaxVariable`
Expand Down
3 changes: 2 additions & 1 deletion components/collections/src/char16trie/trie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ fn skip_node_value(pos: usize, lead: u16) -> usize {
/// - [ICU4C UCharsTrie](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1UCharsTrie.html)
/// - [ICU4J CharsTrie](https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/util/CharsTrie.html) API.
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
#[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = icu_collections::char16trie))]
#[cfg_attr(feature = "databake", derive(databake::Bake))]
#[cfg_attr(feature = "databake", databake(path = icu_collections::char16trie))]
#[derive(Clone, Debug, PartialEq, Eq, ZeroFrom)]
pub struct Char16Trie<'data> {
/// An array of u16 containing the trie data.
Expand Down
6 changes: 4 additions & 2 deletions components/collections/src/codepointtrie/cptrie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ use zerovec::ZeroVec;
/// Also see [`UCPTrieType`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
#[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = icu_collections::codepointtrie))]
#[cfg_attr(feature = "databake", derive(databake::Bake))]
#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
pub enum TrieType {
/// Represents the "fast" type code point tries for the
/// [`TrieType`] trait. The "fast max" limit is set to `0xffff`.
Expand Down Expand Up @@ -135,7 +136,8 @@ pub struct CodePointTrie<'trie, T: TrieValue> {

/// This struct contains the fixed-length header fields of a [`CodePointTrie`].
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
#[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = icu_collections::codepointtrie))]
#[cfg_attr(feature = "databake", derive(databake::Bake))]
#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Yokeable, ZeroFrom)]
pub struct CodePointTrieHeader {
/// The code point of the start of the last range of the trie. A
Expand Down
Loading

0 comments on commit 1426a7e

Please sign in to comment.