Skip to content

Commit b6d989a

Browse files
phdavis1027Mingun
authored andcommitted
No longer resolve predefined entities in unescape_with
1 parent 10d1ff8 commit b6d989a

File tree

8 files changed

+76
-26
lines changed

8 files changed

+76
-26
lines changed

Cargo.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,12 @@ async-tokio = ["tokio"]
111111
## [#158]: https://github.com/tafia/quick-xml/issues/158
112112
encoding = ["encoding_rs"]
113113

114-
## Enables support for recognizing all [HTML 5 entities] in [`unescape`] and
115-
## [`unescape_with`] functions. The full list of entities also can be found in
114+
## Enables support for recognizing all [HTML 5 entities] in [`unescape`]
115+
## function. The full list of entities also can be found in
116116
## <https://html.spec.whatwg.org/entities.json>.
117117
##
118118
## [HTML 5 entities]: https://dev.w3.org/html5/html-author/charref
119119
## [`unescape`]: crate::escape::unescape
120-
## [`unescape_with`]: crate::escape::unescape_with
121120
escape-html = []
122121

123122
## This feature is for the Serde deserializer that enables support for deserializing

Changelog.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ The method of reporting positions of errors has changed - use `error_position()`
1818
to get an offset of the error position. For `SyntaxError`s the range
1919
`error_position()..buffer_position()` also will represent a span of error.
2020

21+
The way of resolve entities with `unescape_with` are changed. Those methods no longer
22+
resolve predefined entities.
23+
2124
### New Features
2225

2326
- [#513]: Allow to continue parsing after getting new `Error::IllFormed`.
@@ -73,6 +76,10 @@ to get an offset of the error position. For `SyntaxError`s the range
7376
- [#738]: Add an example of how to deserialize XML elements into Rust enums using an
7477
intermediate custom deserializer.
7578
- [#748]: Implement `Clone` for [`DeEvent`], [`PayloadEvent`] and [`Text`].
79+
- [#734]: Rename `NoEntityResolver` to `PredefinedEntityResolver`.
80+
- [#734]: No longer resolve predefined entities (`lt`, `gt`, `apos`, `quot`, `amp`)
81+
in `unescape_with` family of methods. You should do that by yourself using the methods
82+
listed above.
7683

7784
[#275]: https://github.com/tafia/quick-xml/issues/275
7885
[#362]: https://github.com/tafia/quick-xml/issues/362

examples/custom_entities.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
1010
use std::collections::HashMap;
1111

12+
use quick_xml::escape::resolve_predefined_entity;
1213
use quick_xml::events::Event;
1314
use quick_xml::reader::Reader;
1415
use regex::bytes::Regex;
@@ -59,8 +60,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
5960
Ok(Event::Text(ref e)) => {
6061
println!(
6162
"text value: {}",
62-
e.unescape_with(|ent| custom_entities.get(ent).map(|s| s.as_str()))
63-
.unwrap()
63+
e.unescape_with(|ent| match custom_entities.get(ent) {
64+
Some(s) => Some(s.as_str()),
65+
None => resolve_predefined_entity(ent),
66+
})
67+
.unwrap()
6468
);
6569
}
6670
Ok(Event::Eof) => break,

src/de/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1997,7 +1997,7 @@ mod text;
19971997
mod var;
19981998

19991999
pub use crate::errors::serialize::DeError;
2000-
pub use resolver::{EntityResolver, NoEntityResolver};
2000+
pub use resolver::{EntityResolver, PredefinedEntityResolver};
20012001

20022002
use crate::{
20032003
de::map::ElementMapAccess,
@@ -2125,7 +2125,7 @@ impl<'a> PayloadEvent<'a> {
21252125
/// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
21262126
/// [`PayloadEvent::Text`] events, that followed by any event except
21272127
/// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
2128-
struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = NoEntityResolver> {
2128+
struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> {
21292129
/// A source of low-level XML events
21302130
reader: R,
21312131
/// Intermediate event, that could be returned by the next call to `next()`.
@@ -2356,7 +2356,7 @@ where
23562356
////////////////////////////////////////////////////////////////////////////////////////////////////
23572357

23582358
/// A structure that deserializes XML into Rust values.
2359-
pub struct Deserializer<'de, R, E: EntityResolver = NoEntityResolver>
2359+
pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver>
23602360
where
23612361
R: XmlRead<'de>,
23622362
{
@@ -2799,7 +2799,7 @@ impl<'de> Deserializer<'de, SliceReader<'de>> {
27992799
/// Deserializer created with this method will not resolve custom entities.
28002800
#[allow(clippy::should_implement_trait)]
28012801
pub fn from_str(source: &'de str) -> Self {
2802-
Self::from_str_with_resolver(source, NoEntityResolver)
2802+
Self::from_str_with_resolver(source, PredefinedEntityResolver)
28032803
}
28042804
}
28052805

@@ -2837,7 +2837,7 @@ where
28372837
///
28382838
/// Deserializer created with this method will not resolve custom entities.
28392839
pub fn from_reader(reader: R) -> Self {
2840-
Self::with_resolver(reader, NoEntityResolver)
2840+
Self::with_resolver(reader, PredefinedEntityResolver)
28412841
}
28422842
}
28432843

src/de/resolver.rs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
use std::convert::Infallible;
44
use std::error::Error;
55

6+
use crate::escape::resolve_predefined_entity;
67
use crate::events::BytesText;
78

89
/// Used to resolve unknown entities while parsing
@@ -87,18 +88,28 @@ pub trait EntityResolver {
8788
fn resolve(&self, entity: &str) -> Option<&str>;
8889
}
8990

90-
/// An `EntityResolver` that does nothing and always returns `None`.
91+
/// An [`EntityResolver`] that resolves only predefined entities:
92+
///
93+
/// | Entity | Resolution
94+
/// |--------|------------
95+
/// |`&lt;` | `<`
96+
/// |`&gt;` | `>`
97+
/// |`&amp;` | `&`
98+
/// |`&apos;`| `'`
99+
/// |`&quot;`| `"`
91100
#[derive(Default, Copy, Clone)]
92-
pub struct NoEntityResolver;
101+
pub struct PredefinedEntityResolver;
93102

94-
impl EntityResolver for NoEntityResolver {
103+
impl EntityResolver for PredefinedEntityResolver {
95104
type Error = Infallible;
96105

106+
#[inline]
97107
fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> {
98108
Ok(())
99109
}
100110

101-
fn resolve(&self, _entity: &str) -> Option<&str> {
102-
None
111+
#[inline]
112+
fn resolve(&self, entity: &str) -> Option<&str> {
113+
resolve_predefined_entity(entity)
103114
}
104115
}

src/escape.rs

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -184,16 +184,48 @@ pub(crate) fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<str>
184184
/// [`escape-html`]: ../index.html#escape-html
185185
/// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref
186186
pub fn unescape(raw: &str) -> Result<Cow<str>, EscapeError> {
187-
unescape_with(raw, |_| None)
187+
unescape_with(raw, resolve_predefined_entity)
188188
}
189189

190190
/// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into
191191
/// their corresponding value, using a resolver function for custom entities.
192192
///
193193
/// If feature [`escape-html`] is enabled, then recognizes all [HTML5 escapes].
194194
///
195+
/// Predefined entities will be resolved _after_ trying to resolve with `resolve_entity`,
196+
/// which allows you to override default behavior which required in some XML dialects.
197+
///
198+
/// Character references (`&#hh;`) cannot be overridden, they are resolved before
199+
/// calling `resolve_entity`.
200+
///
201+
/// Note, that entities will not be resolved recursively. In order to satisfy the
202+
/// XML [requirements] you should unescape nested entities by yourself.
203+
///
204+
/// # Example
205+
///
206+
/// ```
207+
/// use quick_xml::escape::resolve_xml_entity;
208+
/// # use quick_xml::escape::unescape_with;
209+
/// # use pretty_assertions::assert_eq;
210+
/// let override_named_entities = |entity: &str| match entity {
211+
/// // Override standard entities
212+
/// "lt" => Some("FOO"),
213+
/// "gt" => Some("BAR"),
214+
/// // Resolve custom entities
215+
/// "baz" => Some("&lt;"),
216+
/// // Delegate other entities to the default implementation
217+
/// _ => resolve_xml_entity(entity),
218+
/// };
219+
///
220+
/// assert_eq!(
221+
/// unescape_with("&amp;&lt;test&gt;&baz;", override_named_entities).unwrap(),
222+
/// "&FOOtestBAR&lt;"
223+
/// );
224+
/// ```
225+
///
195226
/// [`escape-html`]: ../index.html#escape-html
196227
/// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref
228+
/// [requirements]: https://www.w3.org/TR/xml11/#intern-replacement
197229
pub fn unescape_with<'input, 'entity, F>(
198230
raw: &'input str,
199231
mut resolve_entity: F,
@@ -221,8 +253,6 @@ where
221253
if let Some(entity) = pat.strip_prefix('#') {
222254
let codepoint = parse_number(entity, start..end)?;
223255
unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
224-
} else if let Some(value) = resolve_predefined_entity(pat) {
225-
unescaped.push_str(value);
226256
} else if let Some(value) = resolve_entity(pat) {
227257
unescaped.push_str(value);
228258
} else {
@@ -1840,10 +1870,7 @@ fn test_unescape_with() {
18401870
assert_eq!(unchanged, Cow::Borrowed("test"));
18411871
assert!(matches!(unchanged, Cow::Borrowed(_)));
18421872

1843-
assert_eq!(
1844-
unescape_with("&lt;test&gt;", custom_entities).unwrap(),
1845-
"<test>"
1846-
);
1873+
assert!(unescape_with("&lt;", custom_entities).is_err());
18471874
assert_eq!(unescape_with("&#x30;", custom_entities).unwrap(), "0");
18481875
assert_eq!(unescape_with("&#48;", custom_entities).unwrap(), "0");
18491876
assert_eq!(unescape_with("&foo;", custom_entities).unwrap(), "BAR");

src/events/attributes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
//! Provides an iterator over attributes key/value pairs
44
55
use crate::errors::Result as XmlResult;
6-
use crate::escape::{escape, unescape_with};
6+
use crate::escape::{escape, resolve_predefined_entity, unescape_with};
77
use crate::name::QName;
88
use crate::reader::{is_whitespace, Reader};
99
use crate::utils::{write_byte_string, write_cow_string, Bytes};
@@ -85,7 +85,7 @@ impl<'a> Attribute<'a> {
8585
/// This will allocate if the value contains any escape sequences or in
8686
/// non-UTF-8 encoding.
8787
pub fn decode_and_unescape_value<B>(&self, reader: &Reader<B>) -> XmlResult<Cow<'a, str>> {
88-
self.decode_and_unescape_value_with(reader, |_| None)
88+
self.decode_and_unescape_value_with(reader, resolve_predefined_entity)
8989
}
9090

9191
/// Decodes then unescapes the value with custom entities.

src/events/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ use std::str::from_utf8;
4646

4747
use crate::encoding::Decoder;
4848
use crate::errors::{Error, IllFormedError, Result};
49-
use crate::escape::{escape, minimal_escape, partial_escape, unescape_with};
49+
use crate::escape::{
50+
escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
51+
};
5052
use crate::name::{LocalName, QName};
5153
use crate::reader::is_whitespace;
5254
use crate::utils::write_cow_string;
@@ -748,7 +750,7 @@ impl<'a> BytesText<'a> {
748750
/// This will allocate if the value contains any escape sequences or in
749751
/// non-UTF-8 encoding.
750752
pub fn unescape(&self) -> Result<Cow<'a, str>> {
751-
self.unescape_with(|_| None)
753+
self.unescape_with(resolve_predefined_entity)
752754
}
753755

754756
/// Decodes then unescapes the content of the event with custom entities.

0 commit comments

Comments
 (0)