Skip to content

Commit

Permalink
Added char pattern support to str_split and str_replace.
Browse files Browse the repository at this point in the history
Replaced both internal `*Pattern` types used by `str_split` and `str_replace`
with a shared `Pattern` enum.

Added tests to ensure that both macros handle char patterns correctly.
  • Loading branch information
rodrimati1992 committed Jul 4, 2022
1 parent 57e8893 commit fc07aa6
Show file tree
Hide file tree
Showing 10 changed files with 208 additions and 73 deletions.
5 changes: 4 additions & 1 deletion const_format/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ all = [
"assert",
]

# "private" features
##############
### "private" features

#
__debug = ["const_format_proc_macros/debug"]
__test = []
__only_new_tests = ["__test"]
Expand Down
3 changes: 3 additions & 0 deletions const_format/src/__str_methods.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
mod pattern;
use pattern::{Pattern, PatternCtor, PatternNorm};

#[cfg(feature = "const_generics")]
mod str_replace;

Expand Down
52 changes: 52 additions & 0 deletions const_format/src/__str_methods/pattern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use super::AsciiByte;

pub(crate) struct PatternCtor<T>(pub(crate) T);

impl PatternCtor<u8> {
pub(crate) const fn conv(self) -> Pattern {
Pattern::AsciiByte(AsciiByte::new(self.0))
}
}

impl PatternCtor<&'static str> {
pub(crate) const fn conv(self) -> Pattern {
if let [b @ 0..=127] = *self.0.as_bytes() {
Pattern::AsciiByte(AsciiByte::new(b))
} else {
Pattern::Str(self.0)
}
}
}

impl PatternCtor<char> {
pub(crate) const fn conv(self) -> Pattern {
let code = self.0 as u32;
if let c @ 0..=127 = code {
Pattern::AsciiByte(AsciiByte::new(c as u8))
} else {
Pattern::Char(crate::char_encoding::char_to_display(self.0))
}
}
}

#[derive(Copy, Clone)]
pub(crate) enum Pattern {
AsciiByte(AsciiByte),
Str(&'static str),
Char(crate::char_encoding::FmtChar),
}

pub(crate) enum PatternNorm<'a> {
AsciiByte(AsciiByte),
Str(&'a str),
}

impl Pattern {
pub(crate) const fn normalize(&self) -> PatternNorm<'_> {
match self {
Pattern::AsciiByte(ab) => PatternNorm::AsciiByte(*ab),
Pattern::Str(str) => PatternNorm::Str(*str),
Pattern::Char(char) => PatternNorm::Str(char.as_str()),
}
}
}
56 changes: 24 additions & 32 deletions const_format/src/__str_methods/str_replace.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,31 @@
use super::{bytes_find, AsciiByte};
use super::{bytes_find, Pattern, PatternCtor, PatternNorm};

pub struct ReplaceInputConv<T>(pub &'static str, pub T, pub &'static str);

impl ReplaceInputConv<u8> {
pub const fn conv(self) -> ReplaceInput {
ReplaceInput {
str: self.0,
pattern: ReplacePattern::AsciiByte(AsciiByte::new(self.1)),
replaced_with: self.2,
macro_rules! ctor {
($ty:ty) => {
impl ReplaceInputConv<$ty> {
pub const fn conv(self) -> ReplaceInput {
ReplaceInput {
str: self.0,
pattern: PatternCtor(self.1).conv(),
replaced_with: self.2,
}
}
}
}
};
}

impl ReplaceInputConv<&'static str> {
pub const fn conv(self) -> ReplaceInput {
ReplaceInput {
str: self.0,
pattern: ReplacePattern::Str(self.1),
replaced_with: self.2,
}
}
}
ctor! {u8}
ctor! {&'static str}
ctor! {char}

pub struct ReplaceInput {
str: &'static str,
pattern: ReplacePattern,
pattern: Pattern,
replaced_with: &'static str,
}

#[derive(Copy, Clone)]
pub enum ReplacePattern {
AsciiByte(AsciiByte),
Str(&'static str),
}

impl ReplaceInput {
pub const fn replace_length(&self) -> usize {
str_replace_length(self.str, self.pattern, self.replaced_with)
Expand All @@ -43,20 +35,20 @@ impl ReplaceInput {
}
}

const fn str_replace_length(inp: &str, r: ReplacePattern, replaced_with: &str) -> usize {
const fn str_replace_length(inp: &str, r: Pattern, replaced_with: &str) -> usize {
let inp = inp.as_bytes();

let replaced_len = replaced_with.len();
let mut out_len = 0;

match r {
ReplacePattern::AsciiByte(byte) => {
match r.normalize() {
PatternNorm::AsciiByte(byte) => {
let byte = byte.get();
iter_copy_slice! {b in inp =>
out_len += if b == byte { replaced_len } else { 1 };
}
}
ReplacePattern::Str(str) => {
PatternNorm::Str(str) => {
if str.is_empty() {
return inp.len();
}
Expand All @@ -74,7 +66,7 @@ const fn str_replace_length(inp: &str, r: ReplacePattern, replaced_with: &str) -
out_len
}

const fn str_replace<const L: usize>(inp: &str, r: ReplacePattern, replaced_with: &str) -> [u8; L] {
const fn str_replace<const L: usize>(inp: &str, r: Pattern, replaced_with: &str) -> [u8; L] {
let inp = inp.as_bytes();

let replaced_with_bytes = replaced_with.as_bytes();
Expand All @@ -96,8 +88,8 @@ const fn str_replace<const L: usize>(inp: &str, r: ReplacePattern, replaced_with
};
}

match r {
ReplacePattern::AsciiByte(byte) => {
match r.normalize() {
PatternNorm::AsciiByte(byte) => {
let byte = byte.get();
iter_copy_slice! {b in inp =>
if b == byte {
Expand All @@ -107,7 +99,7 @@ const fn str_replace<const L: usize>(inp: &str, r: ReplacePattern, replaced_with
}
}
}
ReplacePattern::Str(str) => {
PatternNorm::Str(str) => {
if str.is_empty() {
iter_copy_slice! {b in inp =>
write_byte!(b);
Expand Down
63 changes: 23 additions & 40 deletions const_format/src/__str_methods/str_split.rs
Original file line number Diff line number Diff line change
@@ -1,41 +1,30 @@
use super::AsciiByte;
use super::{Pattern, PatternCtor, PatternNorm};

pub struct SplitInputConv<T>(pub &'static str, pub T);

impl SplitInputConv<u8> {
pub const fn conv(self) -> SplitInput {
SplitInput {
str: self.0,
pattern: SplitPattern::AsciiByte(AsciiByte::new(self.1)),
length: usize::MAX,
macro_rules! ctor {
($ty:ty) => {
impl SplitInputConv<$ty> {
pub const fn conv(self) -> SplitInput {
SplitInput {
str: self.0,
pattern: PatternCtor(self.1).conv(),
length: usize::MAX,
}
.compute_length()
}
}
.compute_length()
}
};
}

impl SplitInputConv<&'static str> {
pub const fn conv(self) -> SplitInput {
let str = self.1;

let pattern = if let [b @ 0..=127] = *str.as_bytes() {
SplitPattern::AsciiByte(AsciiByte::new(b))
} else {
SplitPattern::Str(str)
};

SplitInput {
str: self.0,
pattern,
length: usize::MAX,
}
.compute_length()
}
}
ctor! {u8}
ctor! {&'static str}
ctor! {char}

#[derive(Copy, Clone)]
pub struct SplitInput {
str: &'static str,
pattern: SplitPattern,
pattern: Pattern,
length: usize,
}

Expand All @@ -54,21 +43,15 @@ impl SplitInput {
}
}

#[derive(Copy, Clone)]
pub enum SplitPattern {
AsciiByte(AsciiByte),
Str(&'static str),
}

pub const fn count_splits(
SplitInput {
mut str, pattern, ..
}: SplitInput,
) -> usize {
let mut count = 1;

match pattern {
SplitPattern::AsciiByte(ascii_c) => {
match pattern.normalize() {
PatternNorm::AsciiByte(ascii_c) => {
let mut bytes = str.as_bytes();
let ascii_c = ascii_c.get();

Expand All @@ -80,7 +63,7 @@ pub const fn count_splits(
}
}
}
SplitPattern::Str(str_pat) => {
PatternNorm::Str(str_pat) => {
if str_pat.is_empty() {
let mut char_i = 0;
count += 1;
Expand Down Expand Up @@ -143,16 +126,16 @@ pub const fn split_it<const LEN: usize>(args: SplitInput) -> [&'static str; LEN]
};
}

match pattern {
SplitPattern::AsciiByte(ascii_c) => {
match pattern.normalize() {
PatternNorm::AsciiByte(ascii_c) => {
let ascii_c = ascii_c.get();

while let Some(found_at) = find_u8(str.as_bytes(), ascii_c) {
write_out! {konst::string::str_up_to(str, found_at)}
str = konst::string::str_from(str, found_at + 1);
}
}
SplitPattern::Str(str_pat) => {
PatternNorm::Str(str_pat) => {
if str_pat.is_empty() {
out_i += 1;
while let Some(next) = find_next_char_boundary(str, 0) {
Expand Down
8 changes: 8 additions & 0 deletions const_format/src/char_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,14 @@ impl FmtChar {
fn as_bytes(&self) -> &[u8] {
&self.encoded[..self.len()]
}

#[cfg(feature = "more_str_macros")]
pub(crate) const fn as_str(&self) -> &str {
let bytes = konst::slice::slice_up_to(&self.encoded, self.len());

// safety: the tests ensure that all possible chars are encoded correctly
unsafe { core::str::from_utf8_unchecked(bytes) }
}
}

#[cfg(all(test, not(miri)))]
Expand Down
3 changes: 3 additions & 0 deletions const_format/src/char_encoding/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ fn char_to_utf8_display_test() {
assert_eq!(utf8_here.len(), char_display_len(c));

assert_eq!(utf8_std.as_bytes(), utf8_here.as_bytes());

#[cfg(feature = "more_str_macros")]
assert_eq!(utf8_std, utf8_here.as_str(), "{:?}", c);
}
}

Expand Down
6 changes: 6 additions & 0 deletions const_format/src/macros/str_methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
///
/// - `&'static str`
///
/// - `char`
///
/// - `u8`: required to be ascii (`0` up to `127` inclusive).
///
/// # Example
Expand Down Expand Up @@ -502,6 +504,8 @@ macro_rules! str_get {
///
/// - `&'static str`
///
/// - `char`
///
/// - `u8`: only ascii values (0 up to 127 inclusive) are allowed
///
/// The value of `LEN` depends on the `string` and `splitter` arguments.
Expand All @@ -512,6 +516,8 @@ macro_rules! str_get {
/// ```rust
/// use const_format::str_split;
///
/// assert_eq!(str_split!("this is nice", ' '), ["this", "is", "nice"]);
///
/// assert_eq!(str_split!("Hello, world!", ", "), ["Hello", "world!"]);
///
/// // A `""` splitter outputs all chars individually (`str::split` does the same)
Expand Down
40 changes: 40 additions & 0 deletions const_format/tests/str_methods_modules/str_replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,46 @@ fn test_small_pattern() {
assert_case! {"hequx", "qu", "XYZ", "heXYZx"}
}

#[test]
fn test_char_pattern() {
{
const C: char = 'q';
assert_eq!(C.len_utf8(), 1);

assert_case! {"hequ", C, "XY", "heXYu"}
assert_case! {"hequx", C, "XYZ", "heXYZux"}
assert_case! {"hequq", C, "XY", "heXYuXY"}
assert_case! {"hequxq", C, "XYZ", "heXYZuxXYZ"}
}
{
const C: char = 'ñ';
assert_eq!(C.len_utf8(), 2);

assert_case! {"heñu", C, "XY", "heXYu"}
assert_case! {"heñux", C, "XYZ", "heXYZux"}
assert_case! {"heñuñ", C, "XY", "heXYuXY"}
assert_case! {"heñuxñ", C, "XYZ", "heXYZuxXYZ"}
}
{
const C: char = '₀';
assert_eq!(C.len_utf8(), 3);

assert_case! {"he₀u", C, "XY", "heXYu"}
assert_case! {"he₀ux", C, "XYZ", "heXYZux"}
assert_case! {"he₀u₀", C, "XY", "heXYuXY"}
assert_case! {"he₀ux₀", C, "XYZ", "heXYZuxXYZ"}
}
{
const C: char = '🧡';
assert_eq!(C.len_utf8(), 4);

assert_case! {"he🧡u", C, "XY", "heXYu"}
assert_case! {"he🧡ux", C, "XYZ", "heXYZux"}
assert_case! {"he🧡u🧡", C, "XY", "heXYuXY"}
assert_case! {"he🧡ux🧡", C, "XYZ", "heXYZuxXYZ"}
}
}

#[test]
fn test_replace_overlapping() {
assert_case! {"helololololol", "lol", "XY", "heXYoXYoXY"}
Expand Down
Loading

0 comments on commit fc07aa6

Please sign in to comment.