@@ -4,9 +4,18 @@ use memchr::memchr2_iter;
4
4
use std:: borrow:: Cow ;
5
5
use std:: ops:: Range ;
6
6
7
+ use jetscii:: bytes;
8
+ use memchr;
9
+ use once_cell:: sync:: Lazy ;
10
+
7
11
#[ cfg( test) ]
8
12
use pretty_assertions:: assert_eq;
9
13
14
+
15
+ static XML_ESCAPE_BYTES : Lazy < jetscii:: BytesConst > =
16
+ Lazy :: new ( || bytes ! ( b'<' , b'>' , b'&' , b'\'' , b'"' ) ) ;
17
+ static XML_PARTIAL_ESCAPE_BYTES : Lazy < jetscii:: BytesConst > = Lazy :: new ( || bytes ! ( b'<' , b'>' , b'&' ) ) ;
18
+
10
19
/// Error for XML escape / unescape.
11
20
#[ derive( Clone , Debug ) ]
12
21
pub enum EscapeError {
@@ -72,7 +81,8 @@ impl std::error::Error for EscapeError {}
72
81
/// | `'` | `'`
73
82
/// | `"` | `"`
74
83
pub fn escape ( raw : & str ) -> Cow < str > {
75
- _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' | b'\'' | b'\"' ) )
84
+ // _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"'))
85
+ simd_escape ( raw, & XML_ESCAPE_BYTES )
76
86
}
77
87
78
88
/// Escapes an `&str` and replaces xml special characters (`<`, `>`, `&`)
@@ -89,9 +99,11 @@ pub fn escape(raw: &str) -> Cow<str> {
89
99
/// | `>` | `>`
90
100
/// | `&` | `&`
91
101
pub fn partial_escape ( raw : & str ) -> Cow < str > {
92
- _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' ) )
102
+ // _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&'))
103
+ simd_escape ( raw, & XML_PARTIAL_ESCAPE_BYTES )
93
104
}
94
105
106
+
95
107
/// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`,
96
108
/// `&`, `'`, `"`) with their corresponding xml escaped value.
97
109
pub ( crate ) fn _escape < F : Fn ( u8 ) -> bool > ( raw : & str , escape_chars : F ) -> Cow < str > {
@@ -121,7 +133,47 @@ pub(crate) fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<str>
121
133
b'\r' => escaped. extend_from_slice ( b" " ) ,
122
134
b' ' => escaped. extend_from_slice ( b" " ) ,
123
135
_ => unreachable ! (
124
- "Only '<', '>','\' , '&', '\" ', '\\ t', '\\ r', '\\ n', and ' ' are escaped"
136
+ "Only '<', '>','\' , '&', '\" ', '\\ t', '\\ r', '\\ n', and ' ' are escaped" ) ,
137
+ }
138
+ pos = new_pos + 1 ;
139
+ }
140
+
141
+ if let Some ( mut escaped) = escaped {
142
+ if let Some ( raw) = bytes. get ( pos..) {
143
+ escaped. extend_from_slice ( raw) ;
144
+ }
145
+ // SAFETY: we operate on UTF-8 input and search for an one byte chars only,
146
+ // so all slices that was put to the `escaped` is a valid UTF-8 encoded strings
147
+ // TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }`
148
+ // if unsafe code will be allowed
149
+ Cow :: Owned ( String :: from_utf8 ( escaped) . unwrap ( ) )
150
+ } else {
151
+ Cow :: Borrowed ( raw)
152
+ }
153
+ }
154
+
155
+ /// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
156
+ /// corresponding xml escaped value.
157
+ pub fn simd_escape < ' a > ( raw : & ' a str , escape_matcher : & jetscii:: BytesConst ) -> Cow < ' a , str > {
158
+ let bytes = raw. as_bytes ( ) ;
159
+ let mut escaped = None ;
160
+ let mut pos = 0 ;
161
+ while let Some ( i) = escape_matcher. find ( & bytes[ pos..] ) {
162
+ if escaped. is_none ( ) {
163
+ escaped = Some ( Vec :: with_capacity ( raw. len ( ) ) ) ;
164
+ }
165
+ let escaped = escaped. as_mut ( ) . expect ( "initialized" ) ;
166
+ let new_pos = pos + i;
167
+ escaped. extend_from_slice ( & bytes[ pos..new_pos] ) ;
168
+ match bytes[ new_pos] {
169
+ b'<' => escaped. extend_from_slice ( b"<" ) ,
170
+ b'>' => escaped. extend_from_slice ( b">" ) ,
171
+ b'\'' => escaped. extend_from_slice ( b"'" ) ,
172
+ b'&' => escaped. extend_from_slice ( b"&" ) ,
173
+ b'"' => escaped. extend_from_slice ( b""" ) ,
174
+ c @ _ => unreachable ! (
175
+ "Found {} but only '<', '>', ', '&' and '\" ' are escaped" ,
176
+ c as char
125
177
) ,
126
178
}
127
179
pos = new_pos + 1 ;
0 commit comments