Skip to content

Commit

Permalink
Move unvalidated types to new crate (#5364)
Browse files Browse the repository at this point in the history
Part of #3546
  • Loading branch information
robertbastian authored Aug 15, 2024
1 parent fe3fb81 commit 3a96d5f
Show file tree
Hide file tree
Showing 41 changed files with 849 additions and 662 deletions.
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ members = [
"utils/env_preferences",
"utils/tinystr",
"utils/tzif",
"utils/potential_utf",
"utils/writeable",
"utils/yoke",
"utils/yoke/derive",
Expand Down Expand Up @@ -159,8 +160,8 @@ icu_provider_macros = { version = "~1.5.0", path = "provider/core/macros", defau
icu_provider_adapters = { version = "~1.5.0", path = "provider/adapters", default-features = false }
icu_provider_baked = { version = "~1.5.0", path = "provider/baked", default-features = false }
icu_provider_blob = { version = "~1.5.0", path = "provider/blob", default-features = false }
icu_provider_fs = { version = "~1.5.0", path = "provider/fs/", default-features = false }
icu_provider_registry = { version = "~1.5.0", path = "provider/registry/", default-features = false }
icu_provider_fs = { version = "~1.5.0", path = "provider/fs", default-features = false }
icu_provider_registry = { version = "~1.5.0", path = "provider/registry", default-features = false }

# Baked data
icu_calendar_data = { version = "~1.5.0", path = "provider/data/calendar", default-features = false }
Expand Down Expand Up @@ -190,7 +191,8 @@ ixdtf = { version = "0.2.0", path = "utils/ixdtf", default-features = false }
litemap = { version = "0.7.3", path = "utils/litemap", default-features = false }
tinystr = { version = "0.7.5", path = "utils/tinystr", default-features = false }
tzif = { version = "0.2.3", path = "utils/tzif", default-features = false }
writeable = { version = "0.5.5", path = "utils/writeable/", default-features = false }
potential_utf = { version = "0.0.0", path = "utils/potential_utf", default-features = false }
writeable = { version = "0.5.5", path = "utils/writeable", default-features = false }
yoke = { version = "0.7.4", path = "utils/yoke", default-features = false }
yoke-derive = { version = "0.7.4", path = "utils/yoke/derive", default-features = false }
zerofrom = { version = "0.1.3", path = "utils/zerofrom", default-features = false }
Expand Down
5 changes: 3 additions & 2 deletions components/casemap/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ icu_collections = { workspace = true }
icu_locale_core = { workspace = true }
icu_properties = { workspace = true }
icu_provider = { workspace = true, features = ["macros"] }
zerovec = { workspace = true, features = ["yoke"] }
potential_utf = { workspace = true, features = ["zerovec"] }
writeable = { workspace = true }
zerovec = { workspace = true, features = ["yoke"] }

databake = { workspace = true, features = ["derive"], optional = true}
serde = { workspace = true, features = ["derive", "alloc"], optional = true }
Expand All @@ -46,7 +47,7 @@ criterion = { workspace = true }
default = ["compiled_data"]
std = ["icu_collections/std", "icu_provider/std"]
bench = []
serde = ["dep:serde", "zerovec/serde", "icu_collections/serde", "icu_provider/serde", "icu_properties/serde"]
serde = ["dep:serde", "zerovec/serde", "icu_collections/serde", "icu_provider/serde", "icu_properties/serde", "potential_utf/serde"]
datagen = ["serde", "dep:databake", "zerovec/databake", "icu_collections/databake"]
compiled_data = ["dep:icu_casemap_data", "icu_properties/compiled_data"]

Expand Down
8 changes: 4 additions & 4 deletions components/casemap/src/provider/unfold.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#[cfg(feature = "datagen")]
use alloc::string::String;
use icu_provider::prelude::*;
use zerovec::ule::UnvalidatedStr;
use potential_utf::PotentialUtf8;
use zerovec::ZeroMap;

/// Reverse case folding data. Maps from multi-character strings back
Expand All @@ -30,7 +30,7 @@ use zerovec::ZeroMap;
pub struct CaseMapUnfoldV1<'data> {
#[cfg_attr(feature = "serde", serde(borrow))]
/// The actual map. Maps from strings to a list of codepoints, stored as a contiguous UTF-8 string
pub map: ZeroMap<'data, UnvalidatedStr, str>,
pub map: ZeroMap<'data, PotentialUtf8, str>,
}

impl<'data> CaseMapUnfoldV1<'data> {
Expand Down Expand Up @@ -80,7 +80,7 @@ impl<'data> CaseMapUnfoldV1<'data> {
let val = Self::decode_string(&row[string_width..])
.ok_or(DataError::custom("Unfold: unpaired surrogate in value"))?;
if map
.try_append(UnvalidatedStr::from_str(&key), val.as_ref())
.try_append(PotentialUtf8::from_str(&key), val.as_ref())
.is_some()
{
return Err(DataError::custom("Unfold: keys not sorted/unique"));
Expand All @@ -99,6 +99,6 @@ impl<'data> CaseMapUnfoldV1<'data> {
// Given a string, returns another string representing the set of characters
// that case fold to that string.
pub(crate) fn get(&self, key: &str) -> Option<&str> {
self.map.get(UnvalidatedStr::from_str(key))
self.map.get(PotentialUtf8::from_str(key))
}
}
2 changes: 2 additions & 0 deletions components/datetime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ icu_provider = { workspace = true, features = ["macros"] }
icu_timezone = { workspace = true }
smallvec = { workspace = true }
tinystr = { workspace = true, features = ["alloc", "zerovec"] }
potential_utf = { workspace = true, features = ["zerovec"] }
writeable = { workspace = true }
zerovec = { workspace = true, features = ["yoke"] }

Expand Down Expand Up @@ -78,6 +79,7 @@ serde = [
"litemap?/serde",
"smallvec/serde",
"tinystr/serde",
"potential_utf/serde",
"zerovec/serde",
]
datagen = [
Expand Down
9 changes: 5 additions & 4 deletions components/datetime/src/provider/calendar/symbols.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
use alloc::borrow::Cow;
use icu_calendar::types::MonthCode;
use icu_provider::prelude::*;
use potential_utf::PotentialUtf8;
use tinystr::{tinystr, TinyStr4};
use zerovec::{ule::UnvalidatedStr, ZeroMap};
use zerovec::ZeroMap;

size_test!(DateSymbolsV1, date_symbols_v1_size, 3792);

Expand Down Expand Up @@ -123,17 +124,17 @@ pub struct Eras<'data> {
///
/// Keys are era codes, and values are display names. See [`Eras`].
#[cfg_attr(feature = "serde", serde(borrow))]
pub names: ZeroMap<'data, UnvalidatedStr, str>,
pub names: ZeroMap<'data, PotentialUtf8, str>,
/// Symbol data for era abbreviations.
///
/// Keys are era codes, and values are display names. See [`Eras`].
#[cfg_attr(feature = "serde", serde(borrow))]
pub abbr: ZeroMap<'data, UnvalidatedStr, str>,
pub abbr: ZeroMap<'data, PotentialUtf8, str>,
/// Symbol data for era narrow forms.
///
/// Keys are era codes, and values are display names. See [`Eras`].
#[cfg_attr(feature = "serde", serde(borrow))]
pub narrow: ZeroMap<'data, UnvalidatedStr, str>,
pub narrow: ZeroMap<'data, PotentialUtf8, str>,
}

// Note: the SymbolsV* struct doc strings metadata are attached to `$name` in the macro invocation to
Expand Down
9 changes: 6 additions & 3 deletions components/datetime/src/provider/neo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ mod adapter;
use crate::pattern::runtime::{self, PatternULE};
use alloc::borrow::Cow;
use icu_provider::prelude::*;
use zerovec::ule::{AsULE, UnvalidatedStr, ULE};
use zerovec::{VarZeroVec, ZeroMap};
use potential_utf::PotentialUtf8;
use zerovec::{
ule::{AsULE, ULE},
VarZeroVec, ZeroMap,
};

#[cfg(feature = "experimental")]
use crate::neo_skeleton::NeoSkeletonLength;
Expand Down Expand Up @@ -356,7 +359,7 @@ size_test!(YearNamesV1, year_names_v1_size, 48);
pub enum YearNamesV1<'data> {
/// This calendar uses eras with numeric years, this stores the era names mapped from
/// era code to the name
Eras(#[cfg_attr(feature = "serde", serde(borrow))] ZeroMap<'data, UnvalidatedStr, str>),
Eras(#[cfg_attr(feature = "serde", serde(borrow))] ZeroMap<'data, PotentialUtf8, str>),
/// This calendar is cyclic (Chinese, Dangi), so it uses cyclic year names without any eras
Cyclic(#[cfg_attr(feature = "serde", serde(borrow))] VarZeroVec<'data, str>),
}
Expand Down
3 changes: 2 additions & 1 deletion components/experimental/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ fixed_decimal = { workspace = true }
icu_pattern = { workspace = true , features = ["alloc", "yoke", "zerofrom"]}
litemap = { workspace = true }
tinystr = { workspace = true, features = ["alloc", "zerovec"] }
potential_utf = { workspace = true, features = ["zerovec"] }
writeable = { workspace = true }
zerotrie = { workspace = true, features = ["yoke", "zerofrom"] }
zerovec = { workspace = true, features = ["derive", "yoke"] }
Expand Down Expand Up @@ -70,7 +71,7 @@ default = ["compiled_data"]
compiled_data = ["dep:icu_experimental_data", "icu_decimal/compiled_data", "icu_list/compiled_data", "icu_plurals/compiled_data", "icu_properties/compiled_data", "icu_normalizer/compiled_data"]
datagen = ["serde", "std", "dep:databake", "zerovec/databake", "zerotrie/databake", "tinystr/databake", "icu_collections/databake", "std", "log", "icu_pattern/databake"]
ryu = ["fixed_decimal/ryu"]
serde = ["dep:serde", "zerovec/serde", "tinystr/serde", "icu_collections/serde", "icu_decimal/serde", "icu_list/serde", "icu_pattern/serde", "icu_plurals/serde", "icu_provider/serde", "zerotrie/serde"]
serde = ["dep:serde", "zerovec/serde", "potential_utf/serde", "tinystr/serde", "icu_collections/serde", "icu_decimal/serde", "icu_list/serde", "icu_pattern/serde", "icu_plurals/serde", "icu_provider/serde", "zerotrie/serde"]
std = ["fixed_decimal/std", "icu_decimal/std", "icu_pattern/std", "icu_plurals/std", "icu_provider/std", "icu_locale_core/std"]

bench = []
Expand Down
6 changes: 3 additions & 3 deletions components/experimental/src/displaynames/displaynames.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use icu_locale_core::{
Locale,
};
use icu_provider::prelude::*;
use zerovec::ule::UnvalidatedStr;
use potential_utf::PotentialUtf8;

/// Lookup of the locale-specific display names by region code.
///
Expand Down Expand Up @@ -411,7 +411,7 @@ impl LocaleDisplayNamesFormatter {
if let Some(script) = locale.id.script {
let data = self.locale_data.get();
let id = LanguageIdentifier::from((locale.id.language, Some(script), None));
let cmp = |uvstr: &UnvalidatedStr| id.strict_cmp(uvstr).reverse();
let cmp = |uvstr: &PotentialUtf8| id.strict_cmp(uvstr).reverse();
if let Some(x) = match self.options.style {
Some(Style::Short) => data.short_names.get_by(cmp),
Some(Style::Long) => data.long_names.get_by(cmp),
Expand All @@ -429,7 +429,7 @@ impl LocaleDisplayNamesFormatter {
if let Some(region) = locale.id.region {
let data = self.locale_data.get();
let id = LanguageIdentifier::from((locale.id.language, None, Some(region)));
let cmp = |uvstr: &UnvalidatedStr| id.strict_cmp(uvstr).reverse();
let cmp = |uvstr: &PotentialUtf8| id.strict_cmp(uvstr).reverse();
if let Some(x) = match self.options.style {
Some(Style::Short) => data.short_names.get_by(cmp),
Some(Style::Long) => data.long_names.get_by(cmp),
Expand Down
4 changes: 2 additions & 2 deletions components/experimental/src/displaynames/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
//! Read more about data providers: [`icu_provider`]

use icu_provider::prelude::*;
use potential_utf::PotentialUtf8;
use tinystr::UnvalidatedTinyAsciiStr;
use zerovec::ule::UnvalidatedStr;
use zerovec::ZeroMap;

// We use raw TinyAsciiStrs for map keys, as we then don't have to
Expand All @@ -21,7 +21,7 @@ use zerovec::ZeroMap;
type UnvalidatedRegion = UnvalidatedTinyAsciiStr<3>;
type UnvalidatedLanguage = UnvalidatedTinyAsciiStr<3>;
type UnvalidatedScript = UnvalidatedTinyAsciiStr<4>;
type UnvalidatedLocale = UnvalidatedStr;
type UnvalidatedLocale = PotentialUtf8;
type UnvalidatedVariant = UnvalidatedTinyAsciiStr<8>;

#[cfg(feature = "compiled_data")]
Expand Down
7 changes: 4 additions & 3 deletions components/locale/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@ denylist = ["bench"]
all-features = true

[dependencies]
databake = { workspace = true, optional = true, features = ["derive"] }
displaydoc = { workspace = true }
icu_locale_core = { workspace = true, features = ["zerovec"] }
icu_provider = { workspace = true, features = ["macros"] }
serde = { workspace = true, features = ["derive", "alloc"], optional = true }
tinystr = { workspace = true, features = ["alloc", "zerovec"] }
potential_utf = { workspace = true, features = ["zerovec"] }
zerovec = { workspace = true, features = ["yoke"] }
databake = { workspace = true, optional = true, features = ["derive"] }
displaydoc = { workspace = true }

icu_locale_data = { workspace = true, optional = true }

Expand All @@ -51,7 +52,7 @@ bench = false # This option is required for Benchmark CI
default = ["compiled_data"]
std = []
bench = ["serde"]
serde = ["dep:serde", "icu_locale_core/serde", "tinystr/serde", "zerovec/serde", "icu_provider/serde"]
serde = ["dep:serde", "icu_locale_core/serde", "tinystr/serde", "zerovec/serde", "icu_provider/serde", "potential_utf/serde"]
datagen = ["serde", "dep:databake", "zerovec/databake", "icu_locale_core/databake", "tinystr/databake"]
compiled_data = ["dep:icu_locale_data"]

Expand Down
5 changes: 3 additions & 2 deletions components/locale/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ pub const MARKERS: &[DataMarkerInfo] = &[
use alloc::borrow::Cow;
use icu_locale_core::subtags::{Language, Region, Script, Variant};
use icu_provider::prelude::*;
use potential_utf::PotentialUtf8;
use tinystr::{TinyAsciiStr, UnvalidatedTinyAsciiStr};
use zerovec::{ule::UnvalidatedStr, VarZeroVec, ZeroMap, ZeroSlice, ZeroVec};
use zerovec::{VarZeroVec, ZeroMap, ZeroSlice, ZeroVec};

// We use raw TinyAsciiStrs for map keys, as we then don't have to
// validate them as subtags on deserialization. Map lookup can be
Expand Down Expand Up @@ -347,7 +348,7 @@ pub struct ParentsV1<'data> {
/// Map from language identifier to language identifier, indicating that the language on the
/// left should inherit from the language on the right.
#[cfg_attr(feature = "serde", serde(borrow))]
pub parents: ZeroMap<'data, UnvalidatedStr, (Language, Option<Script>, Option<Region>)>,
pub parents: ZeroMap<'data, PotentialUtf8, (Language, Option<Script>, Option<Region>)>,
}

#[icu_provider::data_struct(marker(ScriptDirectionV1Marker, "locale/script_dir@1", singleton))]
Expand Down
1 change: 1 addition & 0 deletions components/locale_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ icu = { path = "../../components/icu", default-features = false }
icu_benchmark_macros = { path = "../../tools/benchmark/macros" }
litemap = { path = "../../utils/litemap", features = ["testing"]}
postcard = { workspace = true, features = ["use-std"] }
potential_utf = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }

Expand Down
Loading

0 comments on commit 3a96d5f

Please sign in to comment.