From fc07aa631a4366a6f34c09d24714749e0d58c7f9 Mon Sep 17 00:00:00 2001
From: rodrimati1992 <rodrimatt1985@gmail.com>
Date: Mon, 4 Jul 2022 18:14:33 -0300
Subject: [PATCH] Added char  pattern support to `str_split` and `str_replace`.

Replaced both internal `*Pattern` types used by `str_split` and `str_replace`
with a shared `Pattern` enum.

Added tests to ensure that both macros handle char patterns correctly.
---
 const_format/Cargo.toml                       |  5 +-
 const_format/src/__str_methods.rs             |  3 +
 const_format/src/__str_methods/pattern.rs     | 52 +++++++++++++++
 const_format/src/__str_methods/str_replace.rs | 56 +++++++----------
 const_format/src/__str_methods/str_split.rs   | 63 +++++++------------
 const_format/src/char_encoding.rs             |  8 +++
 const_format/src/char_encoding/tests.rs       |  3 +
 const_format/src/macros/str_methods.rs        |  6 ++
 .../tests/str_methods_modules/str_replace.rs  | 40 ++++++++++++
 .../str_methods_modules/str_split_tests.rs    | 45 +++++++++++++
 10 files changed, 208 insertions(+), 73 deletions(-)
 create mode 100644 const_format/src/__str_methods/pattern.rs
diff --git a/const_format/Cargo.toml b/const_format/Cargo.toml
index 7d0b5da..e0ef83f 100644
--- a/const_format/Cargo.toml
+++ b/const_format/Cargo.toml
@@ -43,7 +43,10 @@ all = [
     "assert",
 ]
 
-# "private" features
+##############
+### "private" features
+
+# 
 __debug = ["const_format_proc_macros/debug"]
 __test = []
 __only_new_tests = ["__test"]
diff --git a/const_format/src/__str_methods.rs b/const_format/src/__str_methods.rs
index 45b234a..24c3390 100644
--- a/const_format/src/__str_methods.rs
+++ b/const_format/src/__str_methods.rs
@@ -1,3 +1,6 @@
+mod pattern;
+use pattern::{Pattern, PatternCtor, PatternNorm};
+
 #[cfg(feature = "const_generics")]
 mod str_replace;
 
diff --git a/const_format/src/__str_methods/pattern.rs b/const_format/src/__str_methods/pattern.rs
new file mode 100644
index 0000000..e2258d3
--- /dev/null
+++ b/const_format/src/__str_methods/pattern.rs
@@ -0,0 +1,52 @@
+use super::AsciiByte;
+
+pub(crate) struct PatternCtor<T>(pub(crate) T);
+
+impl PatternCtor<u8> {
+    pub(crate) const fn conv(self) -> Pattern {
+        Pattern::AsciiByte(AsciiByte::new(self.0))
+    }
+}
+
+impl PatternCtor<&'static str> {
+    pub(crate) const fn conv(self) -> Pattern {
+        if let [b @ 0..=127] = *self.0.as_bytes() {
+            Pattern::AsciiByte(AsciiByte::new(b))
+        } else {
+            Pattern::Str(self.0)
+        }
+    }
+}
+
+impl PatternCtor<char> {
+    pub(crate) const fn conv(self) -> Pattern {
+        let code = self.0 as u32;
+        if let c @ 0..=127 = code {
+            Pattern::AsciiByte(AsciiByte::new(c as u8))
+        } else {
+            Pattern::Char(crate::char_encoding::char_to_display(self.0))
+        }
+    }
+}
+
+#[derive(Copy, Clone)]
+pub(crate) enum Pattern {
+    AsciiByte(AsciiByte),
+    Str(&'static str),
+    Char(crate::char_encoding::FmtChar),
+}
+
+pub(crate) enum PatternNorm<'a> {
+    AsciiByte(AsciiByte),
+    Str(&'a str),
+}
+
+impl Pattern {
+    pub(crate) const fn normalize(&self) -> PatternNorm<'_> {
+        match self {
+            Pattern::AsciiByte(ab) => PatternNorm::AsciiByte(*ab),
+            Pattern::Str(str) => PatternNorm::Str(*str),
+            Pattern::Char(char) => PatternNorm::Str(char.as_str()),
+        }
+    }
+}
diff --git a/const_format/src/__str_methods/str_replace.rs b/const_format/src/__str_methods/str_replace.rs
index 3858ce0..a0d382a 100644
--- a/const_format/src/__str_methods/str_replace.rs
+++ b/const_format/src/__str_methods/str_replace.rs
@@ -1,39 +1,31 @@
-use super::{bytes_find, AsciiByte};
+use super::{bytes_find, Pattern, PatternCtor, PatternNorm};
 
 pub struct ReplaceInputConv<T>(pub &'static str, pub T, pub &'static str);
 
-impl ReplaceInputConv<u8> {
-    pub const fn conv(self) -> ReplaceInput {
-        ReplaceInput {
-            str: self.0,
-            pattern: ReplacePattern::AsciiByte(AsciiByte::new(self.1)),
-            replaced_with: self.2,
+macro_rules! ctor {
+    ($ty:ty) => {
+        impl ReplaceInputConv<$ty> {
+            pub const fn conv(self) -> ReplaceInput {
+                ReplaceInput {
+                    str: self.0,
+                    pattern: PatternCtor(self.1).conv(),
+                    replaced_with: self.2,
+                }
+            }
         }
-    }
+    };
 }
 
-impl ReplaceInputConv<&'static str> {
-    pub const fn conv(self) -> ReplaceInput {
-        ReplaceInput {
-            str: self.0,
-            pattern: ReplacePattern::Str(self.1),
-            replaced_with: self.2,
-        }
-    }
-}
+ctor! {u8}
+ctor! {&'static str}
+ctor! {char}
 
 pub struct ReplaceInput {
     str: &'static str,
-    pattern: ReplacePattern,
+    pattern: Pattern,
     replaced_with: &'static str,
 }
 
-#[derive(Copy, Clone)]
-pub enum ReplacePattern {
-    AsciiByte(AsciiByte),
-    Str(&'static str),
-}
-
 impl ReplaceInput {
     pub const fn replace_length(&self) -> usize {
         str_replace_length(self.str, self.pattern, self.replaced_with)
@@ -43,20 +35,20 @@ impl ReplaceInput {
     }
 }
 
-const fn str_replace_length(inp: &str, r: ReplacePattern, replaced_with: &str) -> usize {
+const fn str_replace_length(inp: &str, r: Pattern, replaced_with: &str) -> usize {
     let inp = inp.as_bytes();
 
     let replaced_len = replaced_with.len();
     let mut out_len = 0;
 
-    match r {
-        ReplacePattern::AsciiByte(byte) => {
+    match r.normalize() {
+        PatternNorm::AsciiByte(byte) => {
             let byte = byte.get();
             iter_copy_slice! {b in inp =>
                 out_len += if b == byte { replaced_len } else { 1 };
             }
         }
-        ReplacePattern::Str(str) => {
+        PatternNorm::Str(str) => {
             if str.is_empty() {
                 return inp.len();
             }
@@ -74,7 +66,7 @@ const fn str_replace_length(inp: &str, r: ReplacePattern, replaced_with: &str) -
     out_len
 }
 
-const fn str_replace<const L: usize>(inp: &str, r: ReplacePattern, replaced_with: &str) -> [u8; L] {
+const fn str_replace<const L: usize>(inp: &str, r: Pattern, replaced_with: &str) -> [u8; L] {
     let inp = inp.as_bytes();
 
     let replaced_with_bytes = replaced_with.as_bytes();
@@ -96,8 +88,8 @@ const fn str_replace<const L: usize>(inp: &str, r: ReplacePattern, replaced_with
         };
     }
 
-    match r {
-        ReplacePattern::AsciiByte(byte) => {
+    match r.normalize() {
+        PatternNorm::AsciiByte(byte) => {
             let byte = byte.get();
             iter_copy_slice! {b in inp =>
                 if b == byte {
@@ -107,7 +99,7 @@ const fn str_replace<const L: usize>(inp: &str, r: ReplacePattern, replaced_with
                 }
             }
         }
-        ReplacePattern::Str(str) => {
+        PatternNorm::Str(str) => {
             if str.is_empty() {
                 iter_copy_slice! {b in inp =>
                     write_byte!(b);
diff --git a/const_format/src/__str_methods/str_split.rs b/const_format/src/__str_methods/str_split.rs
index d889eeb..b6482f8 100644
--- a/const_format/src/__str_methods/str_split.rs
+++ b/const_format/src/__str_methods/str_split.rs
@@ -1,41 +1,30 @@
-use super::AsciiByte;
+use super::{Pattern, PatternCtor, PatternNorm};
 
 pub struct SplitInputConv<T>(pub &'static str, pub T);
 
-impl SplitInputConv<u8> {
-    pub const fn conv(self) -> SplitInput {
-        SplitInput {
-            str: self.0,
-            pattern: SplitPattern::AsciiByte(AsciiByte::new(self.1)),
-            length: usize::MAX,
+macro_rules! ctor {
+    ($ty:ty) => {
+        impl SplitInputConv<$ty> {
+            pub const fn conv(self) -> SplitInput {
+                SplitInput {
+                    str: self.0,
+                    pattern: PatternCtor(self.1).conv(),
+                    length: usize::MAX,
+                }
+                .compute_length()
+            }
         }
-        .compute_length()
-    }
+    };
 }
 
-impl SplitInputConv<&'static str> {
-    pub const fn conv(self) -> SplitInput {
-        let str = self.1;
-
-        let pattern = if let [b @ 0..=127] = *str.as_bytes() {
-            SplitPattern::AsciiByte(AsciiByte::new(b))
-        } else {
-            SplitPattern::Str(str)
-        };
-
-        SplitInput {
-            str: self.0,
-            pattern,
-            length: usize::MAX,
-        }
-        .compute_length()
-    }
-}
+ctor! {u8}
+ctor! {&'static str}
+ctor! {char}
 
 #[derive(Copy, Clone)]
 pub struct SplitInput {
     str: &'static str,
-    pattern: SplitPattern,
+    pattern: Pattern,
     length: usize,
 }
 
@@ -54,12 +43,6 @@ impl SplitInput {
     }
 }
 
-#[derive(Copy, Clone)]
-pub enum SplitPattern {
-    AsciiByte(AsciiByte),
-    Str(&'static str),
-}
-
 pub const fn count_splits(
     SplitInput {
         mut str, pattern, ..
@@ -67,8 +50,8 @@ pub const fn count_splits(
 ) -> usize {
     let mut count = 1;
 
-    match pattern {
-        SplitPattern::AsciiByte(ascii_c) => {
+    match pattern.normalize() {
+        PatternNorm::AsciiByte(ascii_c) => {
             let mut bytes = str.as_bytes();
             let ascii_c = ascii_c.get();
 
@@ -80,7 +63,7 @@ pub const fn count_splits(
                 }
             }
         }
-        SplitPattern::Str(str_pat) => {
+        PatternNorm::Str(str_pat) => {
             if str_pat.is_empty() {
                 let mut char_i = 0;
                 count += 1;
@@ -143,8 +126,8 @@ pub const fn split_it<const LEN: usize>(args: SplitInput) -> [&'static str; LEN]
         };
     }
 
-    match pattern {
-        SplitPattern::AsciiByte(ascii_c) => {
+    match pattern.normalize() {
+        PatternNorm::AsciiByte(ascii_c) => {
             let ascii_c = ascii_c.get();
 
             while let Some(found_at) = find_u8(str.as_bytes(), ascii_c) {
@@ -152,7 +135,7 @@ pub const fn split_it<const LEN: usize>(args: SplitInput) -> [&'static str; LEN]
                 str = konst::string::str_from(str, found_at + 1);
             }
         }
-        SplitPattern::Str(str_pat) => {
+        PatternNorm::Str(str_pat) => {
             if str_pat.is_empty() {
                 out_i += 1;
                 while let Some(next) = find_next_char_boundary(str, 0) {
diff --git a/const_format/src/char_encoding.rs b/const_format/src/char_encoding.rs
index 0d0115d..afdeed2 100644
--- a/const_format/src/char_encoding.rs
+++ b/const_format/src/char_encoding.rs
@@ -101,6 +101,14 @@ impl FmtChar {
     fn as_bytes(&self) -> &[u8] {
         &self.encoded[..self.len()]
     }
+
+    #[cfg(feature = "more_str_macros")]
+    pub(crate) const fn as_str(&self) -> &str {
+        let bytes = konst::slice::slice_up_to(&self.encoded, self.len());
+
+        // safety: the tests ensure that all possible chars are encoded correctly
+        unsafe { core::str::from_utf8_unchecked(bytes) }
+    }
 }
 
 #[cfg(all(test, not(miri)))]
diff --git a/const_format/src/char_encoding/tests.rs b/const_format/src/char_encoding/tests.rs
index fbd26fd..64a0fc2 100644
--- a/const_format/src/char_encoding/tests.rs
+++ b/const_format/src/char_encoding/tests.rs
@@ -23,6 +23,9 @@ fn char_to_utf8_display_test() {
         assert_eq!(utf8_here.len(), char_display_len(c));
 
         assert_eq!(utf8_std.as_bytes(), utf8_here.as_bytes());
+
+        #[cfg(feature = "more_str_macros")]
+        assert_eq!(utf8_std, utf8_here.as_str(), "{:?}", c);
     }
 }
 
diff --git a/const_format/src/macros/str_methods.rs b/const_format/src/macros/str_methods.rs
index 65cb184..250a715 100644
--- a/const_format/src/macros/str_methods.rs
+++ b/const_format/src/macros/str_methods.rs
@@ -17,6 +17,8 @@
 ///
 /// - `&'static str`
 ///
+/// - `char`
+///
 /// - `u8`: required to be ascii (`0` up to `127` inclusive).
 ///
 /// # Example
@@ -502,6 +504,8 @@ macro_rules! str_get {
 ///
 /// - `&'static str`
 ///
+/// - `char`
+///
 /// - `u8`: only ascii values (0 up to 127 inclusive) are allowed
 ///
 /// The value of `LEN` depends on the `string` and `splitter` arguments.
@@ -512,6 +516,8 @@ macro_rules! str_get {
 /// ```rust
 /// use const_format::str_split;
 ///
+/// assert_eq!(str_split!("this is nice", ' '), ["this", "is", "nice"]);
+///
 /// assert_eq!(str_split!("Hello, world!", ", "), ["Hello", "world!"]);
 ///
 /// // A `""` splitter outputs all chars individually (`str::split` does the same)
diff --git a/const_format/tests/str_methods_modules/str_replace.rs b/const_format/tests/str_methods_modules/str_replace.rs
index ec7dd8f..cfbf2ad 100644
--- a/const_format/tests/str_methods_modules/str_replace.rs
+++ b/const_format/tests/str_methods_modules/str_replace.rs
@@ -31,6 +31,46 @@ fn test_small_pattern() {
     assert_case! {"hequx", "qu", "XYZ", "heXYZx"}
 }
 
+#[test]
+fn test_char_pattern() {
+    {
+        const C: char = 'q';
+        assert_eq!(C.len_utf8(), 1);
+
+        assert_case! {"hequ", C, "XY", "heXYu"}
+        assert_case! {"hequx", C, "XYZ", "heXYZux"}
+        assert_case! {"hequq", C, "XY", "heXYuXY"}
+        assert_case! {"hequxq", C, "XYZ", "heXYZuxXYZ"}
+    }
+    {
+        const C: char = 'ñ';
+        assert_eq!(C.len_utf8(), 2);
+
+        assert_case! {"heñu", C, "XY", "heXYu"}
+        assert_case! {"heñux", C, "XYZ", "heXYZux"}
+        assert_case! {"heñuñ", C, "XY", "heXYuXY"}
+        assert_case! {"heñuxñ", C, "XYZ", "heXYZuxXYZ"}
+    }
+    {
+        const C: char = '₀';
+        assert_eq!(C.len_utf8(), 3);
+
+        assert_case! {"he₀u", C, "XY", "heXYu"}
+        assert_case! {"he₀ux", C, "XYZ", "heXYZux"}
+        assert_case! {"he₀u₀", C, "XY", "heXYuXY"}
+        assert_case! {"he₀ux₀", C, "XYZ", "heXYZuxXYZ"}
+    }
+    {
+        const C: char = '🧡';
+        assert_eq!(C.len_utf8(), 4);
+
+        assert_case! {"he🧡u", C, "XY", "heXYu"}
+        assert_case! {"he🧡ux", C, "XYZ", "heXYZux"}
+        assert_case! {"he🧡u🧡", C, "XY", "heXYuXY"}
+        assert_case! {"he🧡ux🧡", C, "XYZ", "heXYZuxXYZ"}
+    }
+}
+
 #[test]
 fn test_replace_overlapping() {
     assert_case! {"helololololol", "lol", "XY", "heXYoXYoXY"}
diff --git a/const_format/tests/str_methods_modules/str_split_tests.rs b/const_format/tests/str_methods_modules/str_split_tests.rs
index 174709a..69be15e 100644
--- a/const_format/tests/str_methods_modules/str_split_tests.rs
+++ b/const_format/tests/str_methods_modules/str_split_tests.rs
@@ -39,3 +39,48 @@ fn test_str_split_with_word_arg() {
     assert_eq!(str_split!("fooXYbarXYbaz", "XY"), ["foo", "bar", "baz"]);
     assert_eq!(str_split!("fooXY bar XYbaz", "XY"), ["foo", " bar ", "baz"]);
 }
+
+#[test]
+fn test_str_split_with_ascii_char_arg() {
+    assert_eq!(str_split!("fob", '-'), ["fob"]);
+    assert_eq!(str_split!("-fob", '-'), ["", "fob"]);
+    assert_eq!(str_split!("-fob-", '-'), ["", "fob", ""]);
+    assert_eq!(str_split!("foo-bar-baz", '-'), ["foo", "bar", "baz"]);
+    assert_eq!(str_split!("foo- bar -baz", '-'), ["foo", " bar ", "baz"]);
+}
+
+#[test]
+fn test_str_split_with_non_ascii_char_arg() {
+    {
+        assert_eq!(''.len_utf8(), 1);
+        assert_eq!(str_split!("fob", ''), ["fob"]);
+        assert_eq!(str_split!("fob", ''), ["", "fob"]);
+        assert_eq!(str_split!("fob", ''), ["", "fob", ""]);
+        assert_eq!(str_split!("foobarbaz", ''), ["foo", "bar", "baz"]);
+        assert_eq!(str_split!("foo bar baz", ''), ["foo", " bar ", "baz"]);
+    }
+    {
+        assert_eq!('ñ'.len_utf8(), 2);
+        assert_eq!(str_split!("fob", 'ñ'), ["fob"]);
+        assert_eq!(str_split!("ñfob", 'ñ'), ["", "fob"]);
+        assert_eq!(str_split!("ñfobñ", 'ñ'), ["", "fob", ""]);
+        assert_eq!(str_split!("fooñbarñbaz", 'ñ'), ["foo", "bar", "baz"]);
+        assert_eq!(str_split!("fooñ bar ñbaz", 'ñ'), ["foo", " bar ", "baz"]);
+    }
+    {
+        assert_eq!('₀'.len_utf8(), 3);
+        assert_eq!(str_split!("fob", '₀'), ["fob"]);
+        assert_eq!(str_split!("₀fob", '₀'), ["", "fob"]);
+        assert_eq!(str_split!("₀fob₀", '₀'), ["", "fob", ""]);
+        assert_eq!(str_split!("foo₀bar₀baz", '₀'), ["foo", "bar", "baz"]);
+        assert_eq!(str_split!("foo₀ bar ₀baz", '₀'), ["foo", " bar ", "baz"]);
+    }
+    {
+        assert_eq!('🧡'.len_utf8(), 4);
+        assert_eq!(str_split!("fob", '🧡'), ["fob"]);
+        assert_eq!(str_split!("🧡fob", '🧡'), ["", "fob"]);
+        assert_eq!(str_split!("🧡fob🧡", '🧡'), ["", "fob", ""]);
+        assert_eq!(str_split!("foo🧡bar🧡baz", '🧡'), ["foo", "bar", "baz"]);
+        assert_eq!(str_split!("foo🧡 bar 🧡baz", '🧡'), ["foo", " bar ", "baz"]);
+    }
+}