Added char pattern support to str_split and str_replace.

Replaced both internal `*Pattern` types used by `str_split` and `str_replace` with a shared `Pattern` enum. Added tests to ensure that both macros handle char patterns correctly.
rodrimati1992 · Jul 4, 2022 · fc07aa6 · fc07aa6
1 parent 57e8893
commit fc07aa6
Show file tree

Hide file tree

Showing 10 changed files with 208 additions and 73 deletions.
diff --git a/const_format/Cargo.toml b/const_format/Cargo.toml
@@ -43,7 +43,10 @@ all = [
     "assert",
 ]
 
-# "private" features
+##############
+### "private" features
+
+# 
 __debug = ["const_format_proc_macros/debug"]
 __test = []
 __only_new_tests = ["__test"]

diff --git a/const_format/src/__str_methods.rs b/const_format/src/__str_methods.rs
@@ -1,3 +1,6 @@
+mod pattern;
+use pattern::{Pattern, PatternCtor, PatternNorm};
+
 #[cfg(feature = "const_generics")]
 mod str_replace;
 

diff --git a/const_format/src/__str_methods/pattern.rs b/const_format/src/__str_methods/pattern.rs
@@ -0,0 +1,52 @@
+use super::AsciiByte;
+
+pub(crate) struct PatternCtor<T>(pub(crate) T);
+
+impl PatternCtor<u8> {
+    pub(crate) const fn conv(self) -> Pattern {
+        Pattern::AsciiByte(AsciiByte::new(self.0))
+    }
+}
+
+impl PatternCtor<&'static str> {
+    pub(crate) const fn conv(self) -> Pattern {
+        if let [b @ 0..=127] = *self.0.as_bytes() {
+            Pattern::AsciiByte(AsciiByte::new(b))
+        } else {
+            Pattern::Str(self.0)
+        }
+    }
+}
+
+impl PatternCtor<char> {
+    pub(crate) const fn conv(self) -> Pattern {
+        let code = self.0 as u32;
+        if let c @ 0..=127 = code {
+            Pattern::AsciiByte(AsciiByte::new(c as u8))
+        } else {
+            Pattern::Char(crate::char_encoding::char_to_display(self.0))
+        }
+    }
+}
+
+#[derive(Copy, Clone)]
+pub(crate) enum Pattern {
+    AsciiByte(AsciiByte),
+    Str(&'static str),
+    Char(crate::char_encoding::FmtChar),
+}
+
+pub(crate) enum PatternNorm<'a> {
+    AsciiByte(AsciiByte),
+    Str(&'a str),
+}
+
+impl Pattern {
+    pub(crate) const fn normalize(&self) -> PatternNorm<'_> {
+        match self {
+            Pattern::AsciiByte(ab) => PatternNorm::AsciiByte(*ab),
+            Pattern::Str(str) => PatternNorm::Str(*str),
+            Pattern::Char(char) => PatternNorm::Str(char.as_str()),
+        }
+    }
+}
diff --git a/const_format/src/__str_methods/str_replace.rs b/const_format/src/__str_methods/str_replace.rs
@@ -1,39 +1,31 @@
-use super::{bytes_find, AsciiByte};
+use super::{bytes_find, Pattern, PatternCtor, PatternNorm};
 
 pub struct ReplaceInputConv<T>(pub &'static str, pub T, pub &'static str);
 
-impl ReplaceInputConv<u8> {
-    pub const fn conv(self) -> ReplaceInput {
-        ReplaceInput {
-            str: self.0,
-            pattern: ReplacePattern::AsciiByte(AsciiByte::new(self.1)),
-            replaced_with: self.2,
+macro_rules! ctor {
+    ($ty:ty) => {
+        impl ReplaceInputConv<$ty> {
+            pub const fn conv(self) -> ReplaceInput {
+                ReplaceInput {
+                    str: self.0,
+                    pattern: PatternCtor(self.1).conv(),
+                    replaced_with: self.2,
+                }
+            }
         }
-    }
+    };
 }
 
-impl ReplaceInputConv<&'static str> {
-    pub const fn conv(self) -> ReplaceInput {
-        ReplaceInput {
-            str: self.0,
-            pattern: ReplacePattern::Str(self.1),
-            replaced_with: self.2,
-        }
-    }
-}
+ctor! {u8}
+ctor! {&'static str}
+ctor! {char}
 
 pub struct ReplaceInput {
     str: &'static str,
-    pattern: ReplacePattern,
+    pattern: Pattern,
     replaced_with: &'static str,
 }
 
-#[derive(Copy, Clone)]
-pub enum ReplacePattern {
-    AsciiByte(AsciiByte),
-    Str(&'static str),
-}
-
 impl ReplaceInput {
     pub const fn replace_length(&self) -> usize {
         str_replace_length(self.str, self.pattern, self.replaced_with)
@@ -43,20 +35,20 @@ impl ReplaceInput {
     }
 }
 
-const fn str_replace_length(inp: &str, r: ReplacePattern, replaced_with: &str) -> usize {
+const fn str_replace_length(inp: &str, r: Pattern, replaced_with: &str) -> usize {
     let inp = inp.as_bytes();
 
     let replaced_len = replaced_with.len();
     let mut out_len = 0;
 
-    match r {
-        ReplacePattern::AsciiByte(byte) => {
+    match r.normalize() {
+        PatternNorm::AsciiByte(byte) => {
             let byte = byte.get();
             iter_copy_slice! {b in inp =>
                 out_len += if b == byte { replaced_len } else { 1 };
             }
         }
-        ReplacePattern::Str(str) => {
+        PatternNorm::Str(str) => {
             if str.is_empty() {
                 return inp.len();
             }
@@ -74,7 +66,7 @@ const fn str_replace_length(inp: &str, r: ReplacePattern, replaced_with: &str) -
     out_len
 }
 
-const fn str_replace<const L: usize>(inp: &str, r: ReplacePattern, replaced_with: &str) -> [u8; L] {
+const fn str_replace<const L: usize>(inp: &str, r: Pattern, replaced_with: &str) -> [u8; L] {
     let inp = inp.as_bytes();
 
     let replaced_with_bytes = replaced_with.as_bytes();
@@ -96,8 +88,8 @@ const fn str_replace<const L: usize>(inp: &str, r: ReplacePattern, replaced_with
         };
     }
 
-    match r {
-        ReplacePattern::AsciiByte(byte) => {
+    match r.normalize() {
+        PatternNorm::AsciiByte(byte) => {
             let byte = byte.get();
             iter_copy_slice! {b in inp =>
                 if b == byte {
@@ -107,7 +99,7 @@ const fn str_replace<const L: usize>(inp: &str, r: ReplacePattern, replaced_with
                 }
             }
         }
-        ReplacePattern::Str(str) => {
+        PatternNorm::Str(str) => {
             if str.is_empty() {
                 iter_copy_slice! {b in inp =>
                     write_byte!(b);

diff --git a/const_format/src/__str_methods/str_split.rs b/const_format/src/__str_methods/str_split.rs
@@ -1,41 +1,30 @@
-use super::AsciiByte;
+use super::{Pattern, PatternCtor, PatternNorm};
 
 pub struct SplitInputConv<T>(pub &'static str, pub T);
 
-impl SplitInputConv<u8> {
-    pub const fn conv(self) -> SplitInput {
-        SplitInput {
-            str: self.0,
-            pattern: SplitPattern::AsciiByte(AsciiByte::new(self.1)),
-            length: usize::MAX,
+macro_rules! ctor {
+    ($ty:ty) => {
+        impl SplitInputConv<$ty> {
+            pub const fn conv(self) -> SplitInput {
+                SplitInput {
+                    str: self.0,
+                    pattern: PatternCtor(self.1).conv(),
+                    length: usize::MAX,
+                }
+                .compute_length()
+            }
         }
-        .compute_length()
-    }
+    };
 }
 
-impl SplitInputConv<&'static str> {
-    pub const fn conv(self) -> SplitInput {
-        let str = self.1;
-
-        let pattern = if let [b @ 0..=127] = *str.as_bytes() {
-            SplitPattern::AsciiByte(AsciiByte::new(b))
-        } else {
-            SplitPattern::Str(str)
-        };
-
-        SplitInput {
-            str: self.0,
-            pattern,
-            length: usize::MAX,
-        }
-        .compute_length()
-    }
-}
+ctor! {u8}
+ctor! {&'static str}
+ctor! {char}
 
 #[derive(Copy, Clone)]
 pub struct SplitInput {
     str: &'static str,
-    pattern: SplitPattern,
+    pattern: Pattern,
     length: usize,
 }
 
@@ -54,21 +43,15 @@ impl SplitInput {
     }
 }
 
-#[derive(Copy, Clone)]
-pub enum SplitPattern {
-    AsciiByte(AsciiByte),
-    Str(&'static str),
-}
-
 pub const fn count_splits(
     SplitInput {
         mut str, pattern, ..
     }: SplitInput,
 ) -> usize {
     let mut count = 1;
 
-    match pattern {
-        SplitPattern::AsciiByte(ascii_c) => {
+    match pattern.normalize() {
+        PatternNorm::AsciiByte(ascii_c) => {
             let mut bytes = str.as_bytes();
             let ascii_c = ascii_c.get();
 
@@ -80,7 +63,7 @@ pub const fn count_splits(
                 }
             }
         }
-        SplitPattern::Str(str_pat) => {
+        PatternNorm::Str(str_pat) => {
             if str_pat.is_empty() {
                 let mut char_i = 0;
                 count += 1;
@@ -143,16 +126,16 @@ pub const fn split_it<const LEN: usize>(args: SplitInput) -> [&'static str; LEN]
         };
     }
 
-    match pattern {
-        SplitPattern::AsciiByte(ascii_c) => {
+    match pattern.normalize() {
+        PatternNorm::AsciiByte(ascii_c) => {
             let ascii_c = ascii_c.get();
 
             while let Some(found_at) = find_u8(str.as_bytes(), ascii_c) {
                 write_out! {konst::string::str_up_to(str, found_at)}
                 str = konst::string::str_from(str, found_at + 1);
             }
         }
-        SplitPattern::Str(str_pat) => {
+        PatternNorm::Str(str_pat) => {
             if str_pat.is_empty() {
                 out_i += 1;
                 while let Some(next) = find_next_char_boundary(str, 0) {

diff --git a/const_format/src/char_encoding.rs b/const_format/src/char_encoding.rs
@@ -101,6 +101,14 @@ impl FmtChar {
     fn as_bytes(&self) -> &[u8] {
         &self.encoded[..self.len()]
     }
+
+    #[cfg(feature = "more_str_macros")]
+    pub(crate) const fn as_str(&self) -> &str {
+        let bytes = konst::slice::slice_up_to(&self.encoded, self.len());
+
+        // safety: the tests ensure that all possible chars are encoded correctly
+        unsafe { core::str::from_utf8_unchecked(bytes) }
+    }
 }
 
 #[cfg(all(test, not(miri)))]

diff --git a/const_format/src/char_encoding/tests.rs b/const_format/src/char_encoding/tests.rs
@@ -23,6 +23,9 @@ fn char_to_utf8_display_test() {
         assert_eq!(utf8_here.len(), char_display_len(c));
 
         assert_eq!(utf8_std.as_bytes(), utf8_here.as_bytes());
+
+        #[cfg(feature = "more_str_macros")]
+        assert_eq!(utf8_std, utf8_here.as_str(), "{:?}", c);
     }
 }
 

diff --git a/const_format/src/macros/str_methods.rs b/const_format/src/macros/str_methods.rs
@@ -17,6 +17,8 @@
 ///
 /// - `&'static str`
 ///
+/// - `char`
+///
 /// - `u8`: required to be ascii (`0` up to `127` inclusive).
 ///
 /// # Example
@@ -502,6 +504,8 @@ macro_rules! str_get {
 ///
 /// - `&'static str`
 ///
+/// - `char`
+///
 /// - `u8`: only ascii values (0 up to 127 inclusive) are allowed
 ///
 /// The value of `LEN` depends on the `string` and `splitter` arguments.
@@ -512,6 +516,8 @@ macro_rules! str_get {
 /// ```rust
 /// use const_format::str_split;
 ///
+/// assert_eq!(str_split!("this is nice", ' '), ["this", "is", "nice"]);
+///
 /// assert_eq!(str_split!("Hello, world!", ", "), ["Hello", "world!"]);
 ///
 /// // A `""` splitter outputs all chars individually (`str::split` does the same)

diff --git a/const_format/tests/str_methods_modules/str_replace.rs b/const_format/tests/str_methods_modules/str_replace.rs
@@ -31,6 +31,46 @@ fn test_small_pattern() {
     assert_case! {"hequx", "qu", "XYZ", "heXYZx"}
 }
 
+#[test]
+fn test_char_pattern() {
+    {
+        const C: char = 'q';
+        assert_eq!(C.len_utf8(), 1);
+
+        assert_case! {"hequ", C, "XY", "heXYu"}
+        assert_case! {"hequx", C, "XYZ", "heXYZux"}
+        assert_case! {"hequq", C, "XY", "heXYuXY"}
+        assert_case! {"hequxq", C, "XYZ", "heXYZuxXYZ"}
+    }
+    {
+        const C: char = 'ñ';
+        assert_eq!(C.len_utf8(), 2);
+
+        assert_case! {"heñu", C, "XY", "heXYu"}
+        assert_case! {"heñux", C, "XYZ", "heXYZux"}
+        assert_case! {"heñuñ", C, "XY", "heXYuXY"}
+        assert_case! {"heñuxñ", C, "XYZ", "heXYZuxXYZ"}
+    }
+    {
+        const C: char = '₀';
+        assert_eq!(C.len_utf8(), 3);
+
+        assert_case! {"he₀u", C, "XY", "heXYu"}
+        assert_case! {"he₀ux", C, "XYZ", "heXYZux"}
+        assert_case! {"he₀u₀", C, "XY", "heXYuXY"}
+        assert_case! {"he₀ux₀", C, "XYZ", "heXYZuxXYZ"}
+    }
+    {
+        const C: char = '🧡';
+        assert_eq!(C.len_utf8(), 4);
+
+        assert_case! {"he🧡u", C, "XY", "heXYu"}
+        assert_case! {"he🧡ux", C, "XYZ", "heXYZux"}
+        assert_case! {"he🧡u🧡", C, "XY", "heXYuXY"}
+        assert_case! {"he🧡ux🧡", C, "XYZ", "heXYZuxXYZ"}
+    }
+}
+
 #[test]
 fn test_replace_overlapping() {
     assert_case! {"helololololol", "lol", "XY", "heXYoXYoXY"}