From fc07aa631a4366a6f34c09d24714749e0d58c7f9 Mon Sep 17 00:00:00 2001 From: rodrimati1992 Date: Mon, 4 Jul 2022 18:14:33 -0300 Subject: [PATCH] Added char pattern support to `str_split` and `str_replace`. Replaced both internal `*Pattern` types used by `str_split` and `str_replace` with a shared `Pattern` enum. Added tests to ensure that both macros handle char patterns correctly. --- const_format/Cargo.toml | 5 +- const_format/src/__str_methods.rs | 3 + const_format/src/__str_methods/pattern.rs | 52 +++++++++++++++ const_format/src/__str_methods/str_replace.rs | 56 +++++++---------- const_format/src/__str_methods/str_split.rs | 63 +++++++------------ const_format/src/char_encoding.rs | 8 +++ const_format/src/char_encoding/tests.rs | 3 + const_format/src/macros/str_methods.rs | 6 ++ .../tests/str_methods_modules/str_replace.rs | 40 ++++++++++++ .../str_methods_modules/str_split_tests.rs | 45 +++++++++++++ 10 files changed, 208 insertions(+), 73 deletions(-) create mode 100644 const_format/src/__str_methods/pattern.rs diff --git a/const_format/Cargo.toml b/const_format/Cargo.toml index 7d0b5da..e0ef83f 100644 --- a/const_format/Cargo.toml +++ b/const_format/Cargo.toml @@ -43,7 +43,10 @@ all = [ "assert", ] -# "private" features +############## +### "private" features + +# __debug = ["const_format_proc_macros/debug"] __test = [] __only_new_tests = ["__test"] diff --git a/const_format/src/__str_methods.rs b/const_format/src/__str_methods.rs index 45b234a..24c3390 100644 --- a/const_format/src/__str_methods.rs +++ b/const_format/src/__str_methods.rs @@ -1,3 +1,6 @@ +mod pattern; +use pattern::{Pattern, PatternCtor, PatternNorm}; + #[cfg(feature = "const_generics")] mod str_replace; diff --git a/const_format/src/__str_methods/pattern.rs b/const_format/src/__str_methods/pattern.rs new file mode 100644 index 0000000..e2258d3 --- /dev/null +++ b/const_format/src/__str_methods/pattern.rs @@ -0,0 +1,52 @@ +use super::AsciiByte; + +pub(crate) struct PatternCtor(pub(crate) T); + +impl PatternCtor { + pub(crate) const fn conv(self) -> Pattern { + Pattern::AsciiByte(AsciiByte::new(self.0)) + } +} + +impl PatternCtor<&'static str> { + pub(crate) const fn conv(self) -> Pattern { + if let [b @ 0..=127] = *self.0.as_bytes() { + Pattern::AsciiByte(AsciiByte::new(b)) + } else { + Pattern::Str(self.0) + } + } +} + +impl PatternCtor { + pub(crate) const fn conv(self) -> Pattern { + let code = self.0 as u32; + if let c @ 0..=127 = code { + Pattern::AsciiByte(AsciiByte::new(c as u8)) + } else { + Pattern::Char(crate::char_encoding::char_to_display(self.0)) + } + } +} + +#[derive(Copy, Clone)] +pub(crate) enum Pattern { + AsciiByte(AsciiByte), + Str(&'static str), + Char(crate::char_encoding::FmtChar), +} + +pub(crate) enum PatternNorm<'a> { + AsciiByte(AsciiByte), + Str(&'a str), +} + +impl Pattern { + pub(crate) const fn normalize(&self) -> PatternNorm<'_> { + match self { + Pattern::AsciiByte(ab) => PatternNorm::AsciiByte(*ab), + Pattern::Str(str) => PatternNorm::Str(*str), + Pattern::Char(char) => PatternNorm::Str(char.as_str()), + } + } +} diff --git a/const_format/src/__str_methods/str_replace.rs b/const_format/src/__str_methods/str_replace.rs index 3858ce0..a0d382a 100644 --- a/const_format/src/__str_methods/str_replace.rs +++ b/const_format/src/__str_methods/str_replace.rs @@ -1,39 +1,31 @@ -use super::{bytes_find, AsciiByte}; +use super::{bytes_find, Pattern, PatternCtor, PatternNorm}; pub struct ReplaceInputConv(pub &'static str, pub T, pub &'static str); -impl ReplaceInputConv { - pub const fn conv(self) -> ReplaceInput { - ReplaceInput { - str: self.0, - pattern: ReplacePattern::AsciiByte(AsciiByte::new(self.1)), - replaced_with: self.2, +macro_rules! ctor { + ($ty:ty) => { + impl ReplaceInputConv<$ty> { + pub const fn conv(self) -> ReplaceInput { + ReplaceInput { + str: self.0, + pattern: PatternCtor(self.1).conv(), + replaced_with: self.2, + } + } } - } + }; } -impl ReplaceInputConv<&'static str> { - pub const fn conv(self) -> ReplaceInput { - ReplaceInput { - str: self.0, - pattern: ReplacePattern::Str(self.1), - replaced_with: self.2, - } - } -} +ctor! {u8} +ctor! {&'static str} +ctor! {char} pub struct ReplaceInput { str: &'static str, - pattern: ReplacePattern, + pattern: Pattern, replaced_with: &'static str, } -#[derive(Copy, Clone)] -pub enum ReplacePattern { - AsciiByte(AsciiByte), - Str(&'static str), -} - impl ReplaceInput { pub const fn replace_length(&self) -> usize { str_replace_length(self.str, self.pattern, self.replaced_with) @@ -43,20 +35,20 @@ impl ReplaceInput { } } -const fn str_replace_length(inp: &str, r: ReplacePattern, replaced_with: &str) -> usize { +const fn str_replace_length(inp: &str, r: Pattern, replaced_with: &str) -> usize { let inp = inp.as_bytes(); let replaced_len = replaced_with.len(); let mut out_len = 0; - match r { - ReplacePattern::AsciiByte(byte) => { + match r.normalize() { + PatternNorm::AsciiByte(byte) => { let byte = byte.get(); iter_copy_slice! {b in inp => out_len += if b == byte { replaced_len } else { 1 }; } } - ReplacePattern::Str(str) => { + PatternNorm::Str(str) => { if str.is_empty() { return inp.len(); } @@ -74,7 +66,7 @@ const fn str_replace_length(inp: &str, r: ReplacePattern, replaced_with: &str) - out_len } -const fn str_replace(inp: &str, r: ReplacePattern, replaced_with: &str) -> [u8; L] { +const fn str_replace(inp: &str, r: Pattern, replaced_with: &str) -> [u8; L] { let inp = inp.as_bytes(); let replaced_with_bytes = replaced_with.as_bytes(); @@ -96,8 +88,8 @@ const fn str_replace(inp: &str, r: ReplacePattern, replaced_with }; } - match r { - ReplacePattern::AsciiByte(byte) => { + match r.normalize() { + PatternNorm::AsciiByte(byte) => { let byte = byte.get(); iter_copy_slice! {b in inp => if b == byte { @@ -107,7 +99,7 @@ const fn str_replace(inp: &str, r: ReplacePattern, replaced_with } } } - ReplacePattern::Str(str) => { + PatternNorm::Str(str) => { if str.is_empty() { iter_copy_slice! {b in inp => write_byte!(b); diff --git a/const_format/src/__str_methods/str_split.rs b/const_format/src/__str_methods/str_split.rs index d889eeb..b6482f8 100644 --- a/const_format/src/__str_methods/str_split.rs +++ b/const_format/src/__str_methods/str_split.rs @@ -1,41 +1,30 @@ -use super::AsciiByte; +use super::{Pattern, PatternCtor, PatternNorm}; pub struct SplitInputConv(pub &'static str, pub T); -impl SplitInputConv { - pub const fn conv(self) -> SplitInput { - SplitInput { - str: self.0, - pattern: SplitPattern::AsciiByte(AsciiByte::new(self.1)), - length: usize::MAX, +macro_rules! ctor { + ($ty:ty) => { + impl SplitInputConv<$ty> { + pub const fn conv(self) -> SplitInput { + SplitInput { + str: self.0, + pattern: PatternCtor(self.1).conv(), + length: usize::MAX, + } + .compute_length() + } } - .compute_length() - } + }; } -impl SplitInputConv<&'static str> { - pub const fn conv(self) -> SplitInput { - let str = self.1; - - let pattern = if let [b @ 0..=127] = *str.as_bytes() { - SplitPattern::AsciiByte(AsciiByte::new(b)) - } else { - SplitPattern::Str(str) - }; - - SplitInput { - str: self.0, - pattern, - length: usize::MAX, - } - .compute_length() - } -} +ctor! {u8} +ctor! {&'static str} +ctor! {char} #[derive(Copy, Clone)] pub struct SplitInput { str: &'static str, - pattern: SplitPattern, + pattern: Pattern, length: usize, } @@ -54,12 +43,6 @@ impl SplitInput { } } -#[derive(Copy, Clone)] -pub enum SplitPattern { - AsciiByte(AsciiByte), - Str(&'static str), -} - pub const fn count_splits( SplitInput { mut str, pattern, .. @@ -67,8 +50,8 @@ pub const fn count_splits( ) -> usize { let mut count = 1; - match pattern { - SplitPattern::AsciiByte(ascii_c) => { + match pattern.normalize() { + PatternNorm::AsciiByte(ascii_c) => { let mut bytes = str.as_bytes(); let ascii_c = ascii_c.get(); @@ -80,7 +63,7 @@ pub const fn count_splits( } } } - SplitPattern::Str(str_pat) => { + PatternNorm::Str(str_pat) => { if str_pat.is_empty() { let mut char_i = 0; count += 1; @@ -143,8 +126,8 @@ pub const fn split_it(args: SplitInput) -> [&'static str; LEN] }; } - match pattern { - SplitPattern::AsciiByte(ascii_c) => { + match pattern.normalize() { + PatternNorm::AsciiByte(ascii_c) => { let ascii_c = ascii_c.get(); while let Some(found_at) = find_u8(str.as_bytes(), ascii_c) { @@ -152,7 +135,7 @@ pub const fn split_it(args: SplitInput) -> [&'static str; LEN] str = konst::string::str_from(str, found_at + 1); } } - SplitPattern::Str(str_pat) => { + PatternNorm::Str(str_pat) => { if str_pat.is_empty() { out_i += 1; while let Some(next) = find_next_char_boundary(str, 0) { diff --git a/const_format/src/char_encoding.rs b/const_format/src/char_encoding.rs index 0d0115d..afdeed2 100644 --- a/const_format/src/char_encoding.rs +++ b/const_format/src/char_encoding.rs @@ -101,6 +101,14 @@ impl FmtChar { fn as_bytes(&self) -> &[u8] { &self.encoded[..self.len()] } + + #[cfg(feature = "more_str_macros")] + pub(crate) const fn as_str(&self) -> &str { + let bytes = konst::slice::slice_up_to(&self.encoded, self.len()); + + // safety: the tests ensure that all possible chars are encoded correctly + unsafe { core::str::from_utf8_unchecked(bytes) } + } } #[cfg(all(test, not(miri)))] diff --git a/const_format/src/char_encoding/tests.rs b/const_format/src/char_encoding/tests.rs index fbd26fd..64a0fc2 100644 --- a/const_format/src/char_encoding/tests.rs +++ b/const_format/src/char_encoding/tests.rs @@ -23,6 +23,9 @@ fn char_to_utf8_display_test() { assert_eq!(utf8_here.len(), char_display_len(c)); assert_eq!(utf8_std.as_bytes(), utf8_here.as_bytes()); + + #[cfg(feature = "more_str_macros")] + assert_eq!(utf8_std, utf8_here.as_str(), "{:?}", c); } } diff --git a/const_format/src/macros/str_methods.rs b/const_format/src/macros/str_methods.rs index 65cb184..250a715 100644 --- a/const_format/src/macros/str_methods.rs +++ b/const_format/src/macros/str_methods.rs @@ -17,6 +17,8 @@ /// /// - `&'static str` /// +/// - `char` +/// /// - `u8`: required to be ascii (`0` up to `127` inclusive). /// /// # Example @@ -502,6 +504,8 @@ macro_rules! str_get { /// /// - `&'static str` /// +/// - `char` +/// /// - `u8`: only ascii values (0 up to 127 inclusive) are allowed /// /// The value of `LEN` depends on the `string` and `splitter` arguments. @@ -512,6 +516,8 @@ macro_rules! str_get { /// ```rust /// use const_format::str_split; /// +/// assert_eq!(str_split!("this is nice", ' '), ["this", "is", "nice"]); +/// /// assert_eq!(str_split!("Hello, world!", ", "), ["Hello", "world!"]); /// /// // A `""` splitter outputs all chars individually (`str::split` does the same) diff --git a/const_format/tests/str_methods_modules/str_replace.rs b/const_format/tests/str_methods_modules/str_replace.rs index ec7dd8f..cfbf2ad 100644 --- a/const_format/tests/str_methods_modules/str_replace.rs +++ b/const_format/tests/str_methods_modules/str_replace.rs @@ -31,6 +31,46 @@ fn test_small_pattern() { assert_case! {"hequx", "qu", "XYZ", "heXYZx"} } +#[test] +fn test_char_pattern() { + { + const C: char = 'q'; + assert_eq!(C.len_utf8(), 1); + + assert_case! {"hequ", C, "XY", "heXYu"} + assert_case! {"hequx", C, "XYZ", "heXYZux"} + assert_case! {"hequq", C, "XY", "heXYuXY"} + assert_case! {"hequxq", C, "XYZ", "heXYZuxXYZ"} + } + { + const C: char = 'ñ'; + assert_eq!(C.len_utf8(), 2); + + assert_case! {"heñu", C, "XY", "heXYu"} + assert_case! {"heñux", C, "XYZ", "heXYZux"} + assert_case! {"heñuñ", C, "XY", "heXYuXY"} + assert_case! {"heñuxñ", C, "XYZ", "heXYZuxXYZ"} + } + { + const C: char = '₀'; + assert_eq!(C.len_utf8(), 3); + + assert_case! {"he₀u", C, "XY", "heXYu"} + assert_case! {"he₀ux", C, "XYZ", "heXYZux"} + assert_case! {"he₀u₀", C, "XY", "heXYuXY"} + assert_case! {"he₀ux₀", C, "XYZ", "heXYZuxXYZ"} + } + { + const C: char = '🧡'; + assert_eq!(C.len_utf8(), 4); + + assert_case! {"he🧡u", C, "XY", "heXYu"} + assert_case! {"he🧡ux", C, "XYZ", "heXYZux"} + assert_case! {"he🧡u🧡", C, "XY", "heXYuXY"} + assert_case! {"he🧡ux🧡", C, "XYZ", "heXYZuxXYZ"} + } +} + #[test] fn test_replace_overlapping() { assert_case! {"helololololol", "lol", "XY", "heXYoXYoXY"} diff --git a/const_format/tests/str_methods_modules/str_split_tests.rs b/const_format/tests/str_methods_modules/str_split_tests.rs index 174709a..69be15e 100644 --- a/const_format/tests/str_methods_modules/str_split_tests.rs +++ b/const_format/tests/str_methods_modules/str_split_tests.rs @@ -39,3 +39,48 @@ fn test_str_split_with_word_arg() { assert_eq!(str_split!("fooXYbarXYbaz", "XY"), ["foo", "bar", "baz"]); assert_eq!(str_split!("fooXY bar XYbaz", "XY"), ["foo", " bar ", "baz"]); } + +#[test] +fn test_str_split_with_ascii_char_arg() { + assert_eq!(str_split!("fob", '-'), ["fob"]); + assert_eq!(str_split!("-fob", '-'), ["", "fob"]); + assert_eq!(str_split!("-fob-", '-'), ["", "fob", ""]); + assert_eq!(str_split!("foo-bar-baz", '-'), ["foo", "bar", "baz"]); + assert_eq!(str_split!("foo- bar -baz", '-'), ["foo", " bar ", "baz"]); +} + +#[test] +fn test_str_split_with_non_ascii_char_arg() { + { + assert_eq!(''.len_utf8(), 1); + assert_eq!(str_split!("fob", ''), ["fob"]); + assert_eq!(str_split!("fob", ''), ["", "fob"]); + assert_eq!(str_split!("fob", ''), ["", "fob", ""]); + assert_eq!(str_split!("foobarbaz", ''), ["foo", "bar", "baz"]); + assert_eq!(str_split!("foo bar baz", ''), ["foo", " bar ", "baz"]); + } + { + assert_eq!('ñ'.len_utf8(), 2); + assert_eq!(str_split!("fob", 'ñ'), ["fob"]); + assert_eq!(str_split!("ñfob", 'ñ'), ["", "fob"]); + assert_eq!(str_split!("ñfobñ", 'ñ'), ["", "fob", ""]); + assert_eq!(str_split!("fooñbarñbaz", 'ñ'), ["foo", "bar", "baz"]); + assert_eq!(str_split!("fooñ bar ñbaz", 'ñ'), ["foo", " bar ", "baz"]); + } + { + assert_eq!('₀'.len_utf8(), 3); + assert_eq!(str_split!("fob", '₀'), ["fob"]); + assert_eq!(str_split!("₀fob", '₀'), ["", "fob"]); + assert_eq!(str_split!("₀fob₀", '₀'), ["", "fob", ""]); + assert_eq!(str_split!("foo₀bar₀baz", '₀'), ["foo", "bar", "baz"]); + assert_eq!(str_split!("foo₀ bar ₀baz", '₀'), ["foo", " bar ", "baz"]); + } + { + assert_eq!('🧡'.len_utf8(), 4); + assert_eq!(str_split!("fob", '🧡'), ["fob"]); + assert_eq!(str_split!("🧡fob", '🧡'), ["", "fob"]); + assert_eq!(str_split!("🧡fob🧡", '🧡'), ["", "fob", ""]); + assert_eq!(str_split!("foo🧡bar🧡baz", '🧡'), ["foo", "bar", "baz"]); + assert_eq!(str_split!("foo🧡 bar 🧡baz", '🧡'), ["foo", " bar ", "baz"]); + } +}