Skip to content

Commit

Permalink
Merge branch 'main' into tz
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Mar 21, 2023
2 parents ea482e9 + c7567d4 commit 0a0800b
Show file tree
Hide file tree
Showing 138 changed files with 16,331 additions and 15,547 deletions.
7 changes: 4 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions components/calendar/src/any_calendar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ use core::fmt;
/// // This is a DateTime<AnyCalendar>
/// let any_japanese_datetime = japanese_datetime.to_any();
/// ```
#[derive(Debug)]
#[non_exhaustive]
pub enum AnyCalendar {
/// A [`Gregorian`] calendar
Expand Down
2 changes: 1 addition & 1 deletion components/calendar/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
clippy::panic,
clippy::exhaustive_structs,
clippy::exhaustive_enums,
// TODO(#2266): enable missing_debug_implementations,
missing_debug_implementations,
)
)]
#![warn(missing_docs)]
Expand Down
1 change: 1 addition & 0 deletions components/collator/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ impl AnyQuaternaryAccumulator {
}

/// Compares strings according to culturally-relevant ordering.
#[derive(Debug)]
pub struct Collator {
special_primaries: Option<DataPayload<CollationSpecialPrimariesV1Marker>>,
root: DataPayload<CollationDataV1Marker>,
Expand Down
2 changes: 1 addition & 1 deletion components/collator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
clippy::panic,
clippy::exhaustive_structs,
clippy::exhaustive_enums,
// TODO(#2266): enable missing_debug_implementations,
missing_debug_implementations,
)
)]
#![warn(missing_docs)]
Expand Down
1 change: 1 addition & 0 deletions components/collections/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ criterion = "0.3.4"
icu_benchmark_macros = { path = "../../tools/benchmark/macros" }
iai = "0.1.1"
icu = { path = "../icu", default-features = false }
icu_testdata = { path = "../../provider/testdata", default-features = false, features = ["icu_properties"] }

[features]
std = []
Expand Down
5 changes: 4 additions & 1 deletion components/collections/codepointtrie_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
clippy::panic,
clippy::exhaustive_structs,
clippy::exhaustive_enums,
// TODO(#2266): enable missing_debug_implementations,
missing_debug_implementations,
)
)]
// This is a build tool with many invariants being enforced
Expand All @@ -96,6 +96,7 @@ mod native;
///
/// There is currently only one variant, but more may be added in the future.
#[non_exhaustive]
#[derive(Debug)]
pub enum CodePointTrieBuilderData<'a, T> {
/// A list of values for each code point, starting from code point 0.
///
Expand All @@ -106,6 +107,7 @@ pub enum CodePointTrieBuilderData<'a, T> {

/// Settings for building a CodePointTrie.
#[allow(clippy::exhaustive_structs)]
#[derive(Debug)]
pub struct CodePointTrieBuilder<'a, T> {
/// The data to be encoded.
pub data: CodePointTrieBuilderData<'a, T>,
Expand Down Expand Up @@ -149,6 +151,7 @@ where
}

#[test]
#[cfg(any(feature = "wasm", feature = "icu4c"))]
fn test_cpt_builder() {
// Buckets of ten characters for 0 to 100, and then some default values, and then heterogenous "last hex digit" for 0x100 to 0x200
let values: Vec<u32> = (0..100)
Expand Down
51 changes: 51 additions & 0 deletions components/collections/src/codepointinvlist/cpinvlist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,57 @@ impl<'data> CodePointInversionList<'data> {
})
}

/// Yields an [`Iterator`] returning the ranges of the code points that are
/// *not* included in the [`CodePointInversionList`]
///
/// Ranges are returned as [`RangeInclusive`], which is inclusive of its
/// `end` bound value. An end-inclusive behavior matches the ICU4C/J
/// behavior of ranges, ex: `CodePointInversionList::contains(UChar32 start, UChar32 end)`.
///
/// # Example
///
/// ```
/// use icu_collections::codepointinvlist::CodePointInversionList;
/// let example_list = [0x41, 0x44, 0x45, 0x46];
/// let example =
/// CodePointInversionList::try_from_inversion_list_slice(&example_list)
/// .unwrap();
/// let mut example_iter_ranges = example.iter_ranges_complemented();
/// assert_eq!(Some(0..=0x40), example_iter_ranges.next());
/// assert_eq!(Some(0x44..=0x44), example_iter_ranges.next());
/// assert_eq!(Some(0x46..=char::MAX as u32), example_iter_ranges.next());
/// assert_eq!(None, example_iter_ranges.next());
/// ```
pub fn iter_ranges_complemented(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
let inv_ule = self.inv_list.as_ule_slice();
let middle = inv_ule.get(1..inv_ule.len() - 1).unwrap_or(&[]);
let beginning = if let Some(first) = self.inv_list.first() {
if first == 0 {
None
} else {
Some(0..=first - 1)
}
} else {
None
};
let end = if let Some(last) = self.inv_list.last() {
if last == char::MAX as u32 {
None
} else {
Some(last..=char::MAX as u32)
}
} else {
None
};
#[allow(clippy::indexing_slicing)] // chunks
let chunks = middle.chunks(2).map(|pair| {
let range_start: u32 = AsULE::from_unaligned(pair[0]);
let range_limit: u32 = AsULE::from_unaligned(pair[1]);
RangeInclusive::new(range_start, range_limit - 1)
});
beginning.into_iter().chain(chunks).chain(end.into_iter())
}

/// Returns the number of ranges contained in this [`CodePointInversionList`]
pub fn get_range_count(&self) -> usize {
self.inv_list.len() / 2
Expand Down
131 changes: 101 additions & 30 deletions components/collections/src/codepointtrie/cptrie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use crate::codepointtrie::error::Error;
use crate::codepointtrie::impl_const::*;

use crate::codepointinvlist::CodePointInversionList;
use core::char::CharTryFromError;
use core::convert::Infallible;
use core::convert::TryFrom;
use core::fmt::Display;
use core::iter::FromIterator;
Expand Down Expand Up @@ -64,33 +66,24 @@ pub trait TrieValue: Copy + Eq + PartialEq + zerovec::ule::AsULE + 'static {
fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>;
}

impl TrieValue for u8 {
type TryFromU32Error = TryFromIntError;
fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
Self::try_from(i)
}
}

impl TrieValue for u16 {
type TryFromU32Error = TryFromIntError;
fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
Self::try_from(i)
}
}

impl TrieValue for u32 {
type TryFromU32Error = TryFromIntError;
fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
Ok(i)
}
macro_rules! impl_primitive_trie_value {
($primitive:ty, $error:ty) => {
impl TrieValue for $primitive {
type TryFromU32Error = $error;
fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
Self::try_from(i)
}
}
};
}

impl TrieValue for char {
type TryFromU32Error = core::char::CharTryFromError;
fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
char::try_from(i)
}
}
impl_primitive_trie_value!(u8, TryFromIntError);
impl_primitive_trie_value!(u16, TryFromIntError);
impl_primitive_trie_value!(u32, Infallible);
impl_primitive_trie_value!(i8, TryFromIntError);
impl_primitive_trie_value!(i16, TryFromIntError);
impl_primitive_trie_value!(i32, TryFromIntError);
impl_primitive_trie_value!(char, CharTryFromError);

/// Helper function used by [`get_range`]. Converts occurrences of trie's null
/// value into the provided null_value.
Expand Down Expand Up @@ -402,6 +395,9 @@ impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
///
/// Borrowed data remains borrowed, and owned data remains owned.
///
/// If the old and new types are not the same size, use
/// [`CodePointTrie::try_alloc_map_value`].
///
/// # Panics
///
/// Panics if `T` and `P` are different sizes.
Expand All @@ -413,10 +409,14 @@ impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
///
/// ```no_run
/// use icu_collections::codepointtrie::CodePointTrie;
/// use icu_collections::codepointtrie::planes;
///
/// let cpt1: CodePointTrie<char> = unimplemented!();
/// let cpt2: CodePointTrie<u32> =
/// cpt1.try_into_converted().expect("infallible");
/// let planes_trie_u8: CodePointTrie<u8> = planes::get_planes_trie();
/// let planes_trie_i8: CodePointTrie<i8> = planes_trie_u8
/// .try_into_converted()
/// .expect("infallible");
///
/// assert_eq!(planes_trie_i8.get32(0x30000), 3);
/// ```
pub fn try_into_converted<P>(self) -> Result<CodePointTrie<'trie, P>, ZeroVecError>
where
Expand All @@ -438,6 +438,44 @@ impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
})
}

/// Maps the CodePointTrie into one that returns a different type.
///
/// This function returns owned data.
///
/// If the old and new types are the same size, use the more efficient
/// [`CodePointTrie::try_into_converted`].
///
/// # Examples
///
/// ```
/// use icu_collections::codepointtrie::CodePointTrie;
/// use icu_collections::codepointtrie::planes;
/// use core::convert::Infallible;
///
/// let planes_trie_u8: CodePointTrie<u8> = planes::get_planes_trie();
/// let planes_trie_u16: CodePointTrie<u16> = planes_trie_u8
/// .try_alloc_map_value(TryFrom::try_from)
/// .expect("infallible");
///
/// assert_eq!(planes_trie_u16.get32(0x30000), 3);
/// ```
pub fn try_alloc_map_value<P, E>(
&self,
mut f: impl FnMut(T) -> Result<P, E>,
) -> Result<CodePointTrie<'trie, P>, E>
where
P: TrieValue,
{
let error_converted = f(self.error_value)?;
let converted_data = self.data.iter().map(f).collect::<Result<ZeroVec<P>, E>>()?;
Ok(CodePointTrie {
header: self.header,
index: self.index.clone(),
data: converted_data,
error_value: error_converted,
})
}

/// Returns a [`CodePointMapRange`] struct which represents a range of code
/// points associated with the same trie value. The returned range will be
/// the longest stretch of consecutive code points starting at `start` that
Expand Down Expand Up @@ -861,6 +899,39 @@ impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
.map(|cpm_range| cpm_range.range)
}

/// Yields an [`Iterator`] returning the ranges of the code points after passing
/// the value through a mapping function.
///
/// This is preferable to calling `.get_ranges().map()` since it will coalesce
/// adjacent ranges into one.
///
/// # Examples
///
/// ```
/// use icu_collections::codepointtrie::planes;
///
/// let trie = planes::get_planes_trie();
///
/// let plane_val = 2;
/// let mut sip_range_iter = trie.iter_ranges_mapped(|value| value != plane_val as u8).filter(|range| range.value);
///
/// let end = plane_val * 0x1_0000 - 1;
///
/// let sip_range = sip_range_iter.next()
/// .expect("Complemented planes data should have at least one entry");
/// assert_eq!(0..=end, sip_range.range);
pub fn iter_ranges_mapped<'a, U: Eq + 'a>(
&'a self,
mut map: impl FnMut(T) -> U + Copy + 'a,
) -> impl Iterator<Item = CodePointMapRange<U>> + 'a {
crate::iterator_utils::RangeListIteratorCoalescer::new(self.iter_ranges().map(
move |range| CodePointMapRange {
range: range.range,
value: map(range.value),
},
))
}

/// Returns a [`CodePointInversionList`] for the code points that have the given
/// [`TrieValue`] in the trie.
///
Expand Down Expand Up @@ -944,9 +1015,9 @@ where

/// Represents a range of consecutive code points sharing the same value in a
/// code point map. The start and end of the interval is represented as a
/// `RangeInclusive<u32>`, and the value is represented as a [`TrieValue`].
/// `RangeInclusive<u32>`, and the value is represented as `T`.
#[derive(PartialEq, Eq, Debug, Clone)]
pub struct CodePointMapRange<T: TrieValue> {
pub struct CodePointMapRange<T> {
/// Range of code points from start to end (inclusive).
pub range: RangeInclusive<u32>,
/// Trie value associated with this range.
Expand Down
Loading

0 comments on commit 0a0800b

Please sign in to comment.