Skip to content

Commit

Permalink
Move collator and normalizer from experimental to components
Browse files Browse the repository at this point in the history
  • Loading branch information
hsivonen committed Jun 14, 2022
1 parent ae819ae commit a4b1e04
Show file tree
Hide file tree
Showing 44 changed files with 42 additions and 50 deletions.
4 changes: 2 additions & 2 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,19 @@

# List of components with at least two owners as specified in docs/crate-ownership.

components/collator/ @hsivonen @echeran
components/datetime/ @zbraniecki @gregtatum @nordzilla
components/decimal/ @sffc
components/icu/ @unicode-org/icu4x-owners
components/icu4x/ @unicode-org/icu4x-owners
components/locale_canonicalizer/ @dminor @zbraniecki
components/locid/ @zbraniecki @nciric
components/normalizer/ @hsivonen @echeran
components/plurals/ @zbraniecki @sffc
components/uniset/ @echeran @iainireland
experimental/bies/ @sffc
experimental/calendar/ @Manishearth @sffc
experimental/codepointtrie/ @echeran
experimental/collator/ @hsivonen @echeran
experimental/normalizer/ @hsivonen @echeran
experimental/provider_ppucd/ @echeran
experimental/segmenter/ @aethanyc @makotokato
experimental/segmenter_lstm/ @aethanyc @sffc
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,22 @@
resolver = "2"
members = [
"components/calendar",
"components/collator",
"components/datetime",
"components/decimal",
"components/icu",
"components/icu4x",
"components/list",
"components/locale_canonicalizer",
"components/locid",
"components/normalizer",
"components/plurals",
"components/properties",
"experimental/bies",
"experimental/casemapping",
"experimental/char16trie",
"experimental/crabbake",
"experimental/crabbake/derive",
"experimental/collator",
"experimental/normalizer",
"experimental/segmenter",
"experimental/segmenter_lstm",
"ffi/capi_cdylib",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ icu_codepointtrie = { version = "0.4", path = "../../utils/codepointtrie" }
icu_char16trie = { version = "0.1", path = "../../experimental/char16trie" }
icu_provider = { version = "0.6", path = "../../provider/core", features = ["macros"] }
icu_locid = { version = "0.6", path = "../../components/locid" }
icu_normalizer = { version = "0.6", path = "../../experimental/normalizer" }
icu_normalizer = { version = "0.6", path = "../../components/normalizer" }
icu_properties = { version = "0.6", path = "../../components/properties" }
icu_uniset = { version = "0.5", path = "../../utils/uniset" }
serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Clone `rust_icu` from <https://github.com/google/rust_icu> to `$PROJECTS/rust_ic

In `$PROJECTS/icu-build` run `make install`.

`cd $PROJECTS/icu4x/experimental/collator`
`cd $PROJECTS/icu4x/components/collator`

Run the fuzzer until a panic:

Expand All @@ -52,7 +52,7 @@ Once there is a panic, recompile with debug symbols by adding `--dev`:

Record with

`LD_LIBRARY_PATH="$PROJECTS/localicu/lib" rr fuzz/target/x86_64-unknown-linux-gnu/debug/compare_utf16 -artifact_prefix=$PROJECTS/icu4x/experimental/collator/fuzz/artifacts/compare_utf16/ fuzz/artifacts/compare_utf16/crash-$ARTIFACTHASH`
`LD_LIBRARY_PATH="$PROJECTS/localicu/lib" rr fuzz/target/x86_64-unknown-linux-gnu/debug/compare_utf16 -artifact_prefix=$PROJECTS/icu4x/components/collator/fuzz/artifacts/compare_utf16/ fuzz/artifacts/compare_utf16/crash-$ARTIFACTHASH`

## Design notes

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
//!
//! In `$PROJECTS/icu-build` run `make install`.
//!
//! `cd $PROJECTS/icu4x/experimental/collator`
//! `cd $PROJECTS/icu4x/components/collator`
//!
//! Run the fuzzer until a panic:
//!
Expand All @@ -71,7 +71,7 @@
//!
//! Record with
//!
//! `LD_LIBRARY_PATH="$PROJECTS/localicu/lib" rr fuzz/target/x86_64-unknown-linux-gnu/debug/compare_utf16 -artifact_prefix=$PROJECTS/icu4x/experimental/collator/fuzz/artifacts/compare_utf16/ fuzz/artifacts/compare_utf16/crash-$ARTIFACTHASH`
//! `LD_LIBRARY_PATH="$PROJECTS/localicu/lib" rr fuzz/target/x86_64-unknown-linux-gnu/debug/compare_utf16 -artifact_prefix=$PROJECTS/icu4x/components/collator/fuzz/artifacts/compare_utf16/ fuzz/artifacts/compare_utf16/crash-$ARTIFACTHASH`
//!
//! # Design notes
//!
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@ bench = false # This option is required for Benchmark CI
default = []
serde = ["dep:serde", "icu_codepointtrie/serde", "zerovec/serde", "icu_uniset/serde", "icu_properties/serde", "icu_char16trie/serde"]
datagen = ["serde", "crabbake", "icu_codepointtrie/crabbake", "zerovec/crabbake", "icu_uniset/crabbake", "icu_properties/crabbake", "icu_char16trie/crabbake"]
experimental = []
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ use crate::error::NormalizerError;
use crate::provider::CanonicalDecompositionDataV1Marker;
use crate::provider::CompatibilityDecompositionSupplementV1Marker;
use crate::provider::DecompositionDataV1;
#[cfg(any(test, feature = "experimental"))]
use crate::provider::Uts46DecompositionSupplementV1Marker;
use alloc::string::String;
use alloc::vec::Vec;
Expand All @@ -93,6 +94,7 @@ use provider::CompatibilityDecompositionTablesV1Marker;
use provider::CompositionPassthroughV1;
use provider::DecompositionSupplementV1;
use provider::DecompositionTablesV1;
#[cfg(any(test, feature = "experimental"))]
use provider::Uts46CompositionPassthroughV1Marker;
use smallvec::SmallVec;
use u24::EMPTY_U24;
Expand All @@ -105,13 +107,15 @@ use zerovec::ZeroSlice;

enum SupplementPayloadHolder {
Compatibility(DataPayload<CompatibilityDecompositionSupplementV1Marker>),
#[cfg(any(test, feature = "experimental"))]
Uts46(DataPayload<Uts46DecompositionSupplementV1Marker>),
}

impl SupplementPayloadHolder {
fn get(&self) -> &DecompositionSupplementV1 {
match self {
SupplementPayloadHolder::Compatibility(d) => d.get(),
#[cfg(any(test, feature = "experimental"))]
SupplementPayloadHolder::Uts46(d) => d.get(),
}
}
Expand All @@ -120,6 +124,7 @@ impl SupplementPayloadHolder {
enum PassthroughPayloadHolder {
Canonical(DataPayload<CanonicalCompositionPassthroughV1Marker>),
Compatibility(DataPayload<CompatibilityCompositionPassthroughV1Marker>),
#[cfg(any(test, feature = "experimental"))]
Uts46(DataPayload<Uts46CompositionPassthroughV1Marker>),
}

Expand All @@ -128,6 +133,7 @@ impl PassthroughPayloadHolder {
match self {
PassthroughPayloadHolder::Canonical(d) => d.get(),
PassthroughPayloadHolder::Compatibility(d) => d.get(),
#[cfg(any(test, feature = "experimental"))]
PassthroughPayloadHolder::Uts46(d) => d.get(),
}
}
Expand Down Expand Up @@ -1147,6 +1153,7 @@ macro_rules! normalizer_methods {
}

/// Normalize a string slice into a `Write` sink.
#[cfg(feature = "experimental")]
pub fn normalize_to<W: core::fmt::Write + ?Sized>(
&self,
text: &str,
Expand Down Expand Up @@ -1181,6 +1188,7 @@ macro_rules! normalizer_methods {
///
/// Unpaired surrogates are mapped to the REPLACEMENT CHARACTER
/// before normalizing.
#[cfg(feature = "experimental")]
pub fn normalize_utf16_to<W: core::fmt::Write + ?Sized>(
&self,
text: &[u16],
Expand Down Expand Up @@ -1211,6 +1219,7 @@ macro_rules! normalizer_methods {
///
/// Errors are mapped to the REPLACEMENT CHARACTER according
/// to the WHATWG Encoding Standard.
#[cfg(feature = "experimental")]
pub fn normalize_utf8_to<W: core::fmt::Write + ?Sized>(
&self,
text: &[u8],
Expand Down Expand Up @@ -1353,6 +1362,7 @@ impl DecomposingNormalizer {
/// to other reorderable characters.
///
/// Deliberately private and not available outside the crate.
#[cfg(any(test, feature = "experimental"))]
fn try_new_uts46_decomposed_without_ignored_and_disallowed<D>(
data_provider: &D,
) -> Result<Self, NormalizerError>
Expand Down Expand Up @@ -1518,9 +1528,9 @@ impl ComposingNormalizer {
/// canonically equivant with each other if they differ by how U+0345 is ordered relative
/// to other reorderable characters.
///
/// NOTE: This method should probably remain experimental when this crate moves to
/// `components` until suitability of this feature as part of IDNA processing has
/// been demonstrated.
/// NOTE: This method remains experimental until suitability of this feature as part of
/// IDNA processing has been demonstrated.
#[cfg(any(test, feature = "experimental"))]
pub fn try_new_uts46_without_ignored_and_disallowed<D>(
data_provider: &D,
) -> Result<Self, NormalizerError>
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
9 changes: 4 additions & 5 deletions provider/datagen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,17 @@ all-features = true

# ICU components
icu_calendar = { version = "0.6", path = "../../components/calendar", features = ["datagen"] }
icu_collator = { version = "0.6", path = "../../components/collator", features = ["datagen"] }
icu_datetime = { version = "0.6", path = "../../components/datetime", features = ["datagen"] }
icu_decimal = { version = "0.6", path = "../../components/decimal", features = ["datagen"] }
icu_list = { version = "0.6", path = "../../components/list", features = ["datagen"]}
icu_locale_canonicalizer = { version = "0.6", path = "../../components/locale_canonicalizer", features = ["datagen"] }
icu_normalizer = { version = "0.6", path = "../../components/normalizer", features = ["datagen"] }
icu_plurals = { version = "0.6", path = "../../components/plurals", features = ["datagen"] }
icu_properties = { version = "0.6", path = "../../components/properties", features = ["datagen"]}
# (experimental)
icu_casemapping = { version = "0.1", path = "../../experimental/casemapping", features = ["datagen"], optional = true }
icu_segmenter = { version = "0.6", path = "../../experimental/segmenter", features = ["datagen"], optional = true }
icu_collator = { version = "0.6", path = "../../experimental/collator", features = ["datagen"], optional = true }
icu_normalizer = { version = "0.6", path = "../../experimental/normalizer", features = ["datagen"], optional = true }

# ICU provider infrastructure
icu_provider = { version = "0.6", path = "../core", features = ["std", "log_error_context", "datagen"]}
Expand Down Expand Up @@ -76,7 +76,7 @@ crabbake = { version = "0.4", path = "../../experimental/crabbake"}
proc-macro2 = "1.0"
crlify = { version = "1", path = "../../utils/crlify"}
syn = {version = "1.0", features = ["parsing"] }
writeable = { version = "0.4", path = "../../utils/writeable", optional = true }
writeable = { version = "0.4", path = "../../utils/writeable" }
zip = "0.6"

# Dependencies for "bin" feature
Expand All @@ -90,12 +90,11 @@ walkdir = { version = "2.3.2", optional = true }

[dev-dependencies]
dhat = "0.3.0"
writeable = { path = "../../utils/writeable" }
icu_testdata = { path = "../testdata", features = ["metadata"] }

[features]
default = []
experimental = ["icu_casemapping", "icu_segmenter", "icu_collator", "icu_normalizer", "writeable"]
experimental = ["icu_casemapping", "icu_segmenter"]
bin = ["clap", "cached-path", "eyre", "pathdiff","sha2", "simple_logger", "walkdir"]

[[bin]]
Expand Down
24 changes: 8 additions & 16 deletions provider/datagen/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,21 @@ pub fn get_all_keys() -> Vec<ResourceKey> {
icu_plurals::provider::OrdinalV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_casemapping::provider::CaseMappingV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CanonicalDecompositionDataV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CompatibilityDecompositionSupplementV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::Uts46DecompositionSupplementV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CanonicalDecompositionTablesV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CompatibilityDecompositionTablesV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CanonicalCompositionsV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CanonicalCompositionPassthroughV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CompatibilityCompositionPassthroughV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::Uts46CompositionPassthroughV1Marker::KEY,
];
v.extend(icu_properties::provider::ALL_KEYS);
#[cfg(feature = "experimental")]
v.extend(icu_segmenter::ALL_KEYS);
#[cfg(feature = "experimental")]
v.extend(crate::transform::collator::ALL_KEYS);
v
}
Expand Down Expand Up @@ -107,6 +99,14 @@ macro_rules! create_datagen_provider {
$crate::transform::uprops::EnumeratedPropertyCodePointTrieProvider,
$crate::transform::uprops::ScriptWithExtensionsPropertyProvider,
$crate::transform::uprops::BinaryPropertyUnicodeSetDataProvider,
$crate::transform::collator::CollationProvider,
$crate::transform::uprops::CanonicalDecompositionTablesProvider,
$crate::transform::uprops::CompatibilityDecompositionTablesProvider,
$crate::transform::uprops::CanonicalCompositionsProvider,
$crate::transform::uprops::CanonicalCompositionPassthroughProvider,
$crate::transform::uprops::CompatibilityCompositionPassthroughProvider,
$crate::transform::uprops::CanonicalDecompositionDataProvider,
$crate::transform::uprops::CompatibilityDecompositionSupplementProvider,
]
)
};
Expand Down Expand Up @@ -176,16 +176,8 @@ macro_rules! create_datagen_provider {
$crate::transform::uprops::ScriptWithExtensionsPropertyProvider,
$crate::transform::uprops::BinaryPropertyUnicodeSetDataProvider,
$crate::transform::segmenter::SegmenterRuleProvider,
$crate::transform::uprops::CanonicalDecompositionDataProvider,
$crate::transform::uprops::CompatibilityDecompositionSupplementProvider,
$crate::transform::uprops::Uts46DecompositionSupplementProvider,
$crate::transform::uprops::CanonicalDecompositionTablesProvider,
$crate::transform::uprops::CompatibilityDecompositionTablesProvider,
$crate::transform::uprops::CanonicalCompositionsProvider,
$crate::transform::uprops::CanonicalCompositionPassthroughProvider,
$crate::transform::uprops::CompatibilityCompositionPassthroughProvider,
$crate::transform::uprops::Uts46CompositionPassthroughProvider,
$crate::transform::collator::CollationProvider,
]
)
};
Expand Down
1 change: 0 additions & 1 deletion provider/datagen/src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
//! Exposes all available data transfomers

pub mod cldr;
#[cfg(feature = "experimental")]
pub mod collator;
#[cfg(feature = "experimental")]
pub mod segmenter;
Expand Down
17 changes: 4 additions & 13 deletions provider/datagen/src/transform/uprops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,35 +21,26 @@

mod bin_uniset;
#[cfg(feature = "experimental")]
mod normalizer;
#[cfg(feature = "experimental")]
mod casemapping;
#[cfg(feature = "experimental")]
mod normalizer_serde;
mod enum_codepointtrie;
mod normalizer;
mod normalizer_serde;
mod script;
mod uprops_serde;

pub use bin_uniset::BinaryPropertyUnicodeSetDataProvider;
#[cfg(feature = "experimental")]
pub use casemapping::CaseMappingDataProvider;
pub use enum_codepointtrie::EnumeratedPropertyCodePointTrieProvider;
pub use normalizer::CanonicalCompositionPassthroughProvider;
#[cfg(feature = "experimental")]
pub use normalizer::CanonicalCompositionsProvider;
#[cfg(feature = "experimental")]
pub use normalizer::CanonicalDecompositionDataProvider;
#[cfg(feature = "experimental")]
pub use normalizer::CanonicalDecompositionTablesProvider;
#[cfg(feature = "experimental")]
pub use normalizer::CompatibilityCompositionPassthroughProvider;
#[cfg(feature = "experimental")]
pub use normalizer::CompatibilityDecompositionSupplementProvider;
#[cfg(feature = "experimental")]
pub use normalizer::CompatibilityDecompositionTablesProvider;
#[cfg(feature = "experimental")]
pub use normalizer::Uts46CompositionPassthroughProvider;
#[cfg(feature = "experimental")]
pub use normalizer::Uts46DecompositionSupplementProvider;
#[cfg(feature = "experimental")]
pub use casemapping::CaseMappingDataProvider;
pub use enum_codepointtrie::EnumeratedPropertyCodePointTrieProvider;
pub use script::ScriptWithExtensionsPropertyProvider;
4 changes: 2 additions & 2 deletions provider/testdata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -170,18 +170,18 @@ writeable = { version = "0.4", path = "../../utils/writeable", optional = true }
# crabbake deps
icu_calendar = { version = "0.6", path = "../../components/calendar", optional = true }
icu_casemapping = { version = "0.1", path = "../../experimental/casemapping", optional = true }
icu_collator = { version = "0.6", path = "../../components/collator", optional = true }
icu_datetime = { version = "0.6", path = "../../components/datetime", optional = true }
icu_decimal = { version = "0.6", path = "../../components/decimal", optional = true }
icu_list = { version = "0.6", path = "../../components/list", optional = true }
icu_locale_canonicalizer = { version = "0.6", path = "../../components/locale_canonicalizer", optional = true }
icu_normalizer = { version = "0.6", path = "../../components/normalizer", optional = true }
icu_plurals = { version = "0.6", path = "../../components/plurals", optional = true }
icu_properties = { version = "0.6", path = "../../components/properties", optional = true }
icu_segmenter = { version = "0.6", path = "../../experimental/segmenter", optional = true }
icu_char16trie = { version = "0.1", path = "../../experimental/char16trie", optional = true }
icu_codepointtrie = { version = "0.4", path = "../../utils/codepointtrie", optional = true }
icu_uniset = { version = "0.5", path = "../../utils/uniset", optional = true }
icu_normalizer = { version = "0.6", path = "../../experimental/normalizer", optional = true }
icu_collator = { version = "0.6", path = "../../experimental/collator", optional = true }
tinystr = { version = "0.6", path = "../../utils/tinystr", optional = true }
zerovec = { version = "0.7", path = "../../utils/zerovec", optional = true }

Expand Down
2 changes: 1 addition & 1 deletion tools/scripts/tidy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ script = '''
exit_on_error true
glob_pattern = set "./**/Cargo.toml"
skip_paths = set_new "experimental/collator/fuzz/Cargo.toml" "experimental/normalizer/fuzz/Cargo.toml"
skip_paths = set_new "components/collator/fuzz/Cargo.toml" "components/normalizer/fuzz/Cargo.toml"
template = canonicalize README.tpl
Expand Down

0 comments on commit a4b1e04

Please sign in to comment.