diff --git a/src/regex/bytes.rs b/src/regex/bytes.rs index 3de4022a8..39af6e71c 100644 --- a/src/regex/bytes.rs +++ b/src/regex/bytes.rs @@ -1555,18 +1555,13 @@ impl<'h> Match<'h> { impl<'h> core::fmt::Debug for Match<'h> { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use regex_automata::util::escape::DebugHaystack; + let mut fmt = f.debug_struct("Match"); - fmt.field("start", &self.start).field("end", &self.end); - if let Ok(s) = core::str::from_utf8(self.as_bytes()) { - fmt.field("bytes", &s); - } else { - // FIXME: It would be nice if this could be printed as a string - // with invalid UTF-8 replaced with hex escapes. A alloc would - // probably okay if that makes it easier, but regex-automata does - // (at time of writing) have internal routines that do this. So - // maybe we should expose them. - fmt.field("bytes", &self.as_bytes()); - } + fmt.field("start", &self.start) + .field("end", &self.end) + .field("bytes", &DebugHaystack(&self.as_bytes())); + fmt.finish() } } @@ -2620,3 +2615,88 @@ fn no_expansion>(replacement: &T) -> Option> { None => Some(Cow::Borrowed(replacement)), } } + +#[cfg(test)] +mod tests { + use super::*; + use alloc::format; + + #[test] + fn test_match_properties() { + let haystack = b"Hello, world!"; + let m = Match::new(haystack, 7, 12); + + assert_eq!(m.start(), 7); + assert_eq!(m.end(), 12); + assert_eq!(m.is_empty(), false); + assert_eq!(m.len(), 5); + assert_eq!(m.as_bytes(), b"world"); + } + + #[test] + fn test_empty_match() { + let haystack = b""; + let m = Match::new(haystack, 0, 0); + + assert_eq!(m.is_empty(), true); + assert_eq!(m.len(), 0); + } + + #[test] + fn test_debug_output_valid_utf8() { + let haystack = b"Hello, world!"; + let m = Match::new(haystack, 7, 12); + let debug_str = format!("{:?}", m); + + assert_eq!( + debug_str, + r#"Match { start: 7, end: 12, bytes: "world" }"# + ); + } + + #[test] + fn test_debug_output_invalid_utf8() { + let haystack = b"Hello, \xFFworld!"; + let m = Match::new(haystack, 7, 13); + let debug_str = format!("{:?}", m); + + assert_eq!( + debug_str, + r#"Match { start: 7, end: 13, bytes: "\xffworld" }"# + ); + } + + #[test] + fn test_debug_output_various_unicode() { + let haystack = + "Hello, 😊 world! 안녕하세요? مرحبا بالعالم!".as_bytes(); + let m = Match::new(haystack, 0, haystack.len()); + let debug_str = format!("{:?}", m); + + assert_eq!( + debug_str, + r#"Match { start: 0, end: 62, bytes: "Hello, 😊 world! 안녕하세요? مرحبا بالعالم!" }"# + ); + } + + #[test] + fn test_debug_output_ascii_escape() { + let haystack = b"Hello,\tworld!\nThis is a \x1b[31mtest\x1b[0m."; + let m = Match::new(haystack, 0, haystack.len()); + let debug_str = format!("{:?}", m); + + assert_eq!( + debug_str, + r#"Match { start: 0, end: 38, bytes: "Hello,\tworld!\nThis is a \u{1b}[31mtest\u{1b}[0m." }"# + ); + } + + #[test] + fn test_debug_output_match_in_middle() { + let haystack = b"The quick brown fox jumps over the lazy dog."; + let m = Match::new(haystack, 16, 19); + let debug_str = format!("{:?}", m); + + assert_eq!(debug_str, r#"Match { start: 16, end: 19, bytes: "fox" }"#); + } +}