Skip to content

Commit 4c8100d

Browse files
committed
Replace BytesText::unescape and unescape_with by decode
Text events produces by the Reader can not contain escaped data anymore, all such data is represented by the Event::GeneralRef
1 parent 6bc98fc commit 4c8100d

File tree

15 files changed

+28
-48
lines changed

15 files changed

+28
-48
lines changed

Changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ XML specification. See the updated `custom_entities` example!
3434

3535
- [#863]: Remove `From<QName<'a>> for BytesStart<'a>` because now `BytesStart` stores the
3636
encoding in which its data is encoded, but `QName` is a simple wrapper around byte slice.
37+
- [#766]: `BytesText::unescape` and `BytesText::unescape_with` replaced by `BytesText::decode`.
38+
Now Text events does not contain escaped parts which are reported as `Event::GeneralRef`.
3739

3840
[#766]: https://github.com/tafia/quick-xml/pull/766
3941
[#863]: https://github.com/tafia/quick-xml/pull/863

benches/macrobenches.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
5454
}
5555
}
5656
Event::Text(e) => {
57-
criterion::black_box(e.unescape()?);
57+
criterion::black_box(e.decode()?);
5858
}
5959
Event::CData(e) => {
6060
criterion::black_box(e.into_inner());
@@ -79,7 +79,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
7979
}
8080
}
8181
Event::Text(e) => {
82-
criterion::black_box(e.unescape()?);
82+
criterion::black_box(e.decode()?);
8383
}
8484
Event::CData(e) => {
8585
criterion::black_box(e.into_inner());
@@ -105,7 +105,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
105105
}
106106
}
107107
(resolved_ns, Event::Text(e)) => {
108-
criterion::black_box(e.unescape()?);
108+
criterion::black_box(e.decode()?);
109109
criterion::black_box(resolved_ns);
110110
}
111111
(resolved_ns, Event::CData(e)) => {
@@ -133,7 +133,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
133133
}
134134
}
135135
(resolved_ns, Event::Text(e)) => {
136-
criterion::black_box(e.unescape()?);
136+
criterion::black_box(e.decode()?);
137137
criterion::black_box(resolved_ns);
138138
}
139139
(resolved_ns, Event::CData(e)) => {

benches/microbenches.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ fn one_event(c: &mut Criterion) {
145145
config.trim_text(true);
146146
config.check_end_names = false;
147147
match r.read_event() {
148-
Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
148+
Ok(Event::Comment(e)) => nbtxt += e.decode().unwrap().len(),
149149
something_else => panic!("Did not expect {:?}", something_else),
150150
};
151151

fuzz/fuzz_targets/fuzz_target_1.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ where
4343
| Ok(Event::Comment(ref e))
4444
| Ok(Event::DocType(ref e)) => {
4545
debug_format!(e);
46-
if let Err(err) = e.unescape() {
46+
if let Err(err) = e.decode() {
4747
debug_format!(err);
4848
break;
4949
}

src/de/mod.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2298,9 +2298,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22982298
// FIXME: Actually, we should trim after decoding text, but now we trim before
22992299
e.inplace_trim_end();
23002300
}
2301-
result
2302-
.to_mut()
2303-
.push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
2301+
result.to_mut().push_str(&e.decode()?);
23042302
}
23052303
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
23062304

@@ -2322,7 +2320,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
23222320
// FIXME: Actually, we should trim after decoding text, but now we trim before
23232321
continue;
23242322
}
2325-
self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2323+
self.drain_text(e.decode()?)
23262324
}
23272325
PayloadEvent::CData(e) => self.drain_text(e.decode()?),
23282326
PayloadEvent::DocType(e) => {

src/events/mod.rs

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,7 @@ use std::str::from_utf8;
4848

4949
use crate::encoding::{Decoder, EncodingError};
5050
use crate::errors::{Error, IllFormedError};
51-
use crate::escape::{
52-
escape, minimal_escape, parse_number, partial_escape, resolve_predefined_entity, unescape_with,
53-
EscapeError,
54-
};
51+
use crate::escape::{escape, minimal_escape, parse_number, partial_escape, EscapeError};
5552
use crate::name::{LocalName, QName};
5653
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
5754
use attributes::{AttrError, Attribute, Attributes};
@@ -578,29 +575,12 @@ impl<'a> BytesText<'a> {
578575
}
579576
}
580577

581-
/// Decodes then unescapes the content of the event.
582-
///
583-
/// This will allocate if the value contains any escape sequences or in
584-
/// non-UTF-8 encoding.
585-
pub fn unescape(&self) -> Result<Cow<'a, str>, Error> {
586-
self.unescape_with(resolve_predefined_entity)
587-
}
588-
589-
/// Decodes then unescapes the content of the event with custom entities.
578+
/// Decodes the content of the event.
590579
///
591580
/// This will allocate if the value contains any escape sequences or in
592581
/// non-UTF-8 encoding.
593-
pub fn unescape_with<'entity>(
594-
&self,
595-
resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
596-
) -> Result<Cow<'a, str>, Error> {
597-
let decoded = self.decoder.decode_cow(&self.content)?;
598-
599-
match unescape_with(&decoded, resolve_entity)? {
600-
// Because result is borrowed, no replacements was done and we can use original string
601-
Cow::Borrowed(_) => Ok(decoded),
602-
Cow::Owned(s) => Ok(s.into()),
603-
}
582+
pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
583+
self.decoder.decode_cow(&self.content)
604584
}
605585

606586
/// Removes leading XML whitespace bytes from text content.

src/reader/async_tokio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
103103
/// loop {
104104
/// match reader.read_event_into_async(&mut buf).await {
105105
/// Ok(Event::Start(_)) => count += 1,
106-
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
106+
/// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
107107
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
108108
/// Ok(Event::Eof) => break,
109109
/// _ => (),
@@ -247,7 +247,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
247247
/// }
248248
/// }
249249
/// Event::Text(e) => {
250-
/// txt.push(e.unescape().unwrap().into_owned())
250+
/// txt.push(e.decode().unwrap().into_owned())
251251
/// }
252252
/// Event::Eof => break,
253253
/// _ => (),
@@ -383,7 +383,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
383383
/// (_, Event::Start(_)) => unreachable!(),
384384
///
385385
/// (_, Event::Text(e)) => {
386-
/// txt.push(e.unescape().unwrap().into_owned())
386+
/// txt.push(e.decode().unwrap().into_owned())
387387
/// }
388388
/// (_, Event::Eof) => break,
389389
/// _ => (),

src/reader/buffered_reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ impl<R: BufRead> Reader<R> {
372372
/// loop {
373373
/// match reader.read_event_into(&mut buf) {
374374
/// Ok(Event::Start(_)) => count += 1,
375-
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
375+
/// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
376376
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
377377
/// Ok(Event::Eof) => break,
378378
/// _ => (),

src/reader/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ where
718718
/// _ => (),
719719
/// }
720720
/// }
721-
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
721+
/// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
722722
///
723723
/// // There are several other `Event`s we do not consider here
724724
/// _ => (),

src/reader/ns_reader.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ impl<R: BufRead> NsReader<R> {
420420
/// }
421421
/// }
422422
/// Event::Text(e) => {
423-
/// txt.push(e.unescape().unwrap().into_owned())
423+
/// txt.push(e.decode().unwrap().into_owned())
424424
/// }
425425
/// Event::Eof => break,
426426
/// _ => (),
@@ -479,7 +479,7 @@ impl<R: BufRead> NsReader<R> {
479479
/// (_, Event::Start(_)) => unreachable!(),
480480
///
481481
/// (_, Event::Text(e)) => {
482-
/// txt.push(e.unescape().unwrap().into_owned())
482+
/// txt.push(e.decode().unwrap().into_owned())
483483
/// }
484484
/// (_, Event::Eof) => break,
485485
/// _ => (),
@@ -665,7 +665,7 @@ impl<'i> NsReader<&'i [u8]> {
665665
/// }
666666
/// }
667667
/// Event::Text(e) => {
668-
/// txt.push(e.unescape().unwrap().into_owned())
668+
/// txt.push(e.decode().unwrap().into_owned())
669669
/// }
670670
/// Event::Eof => break,
671671
/// _ => (),
@@ -727,7 +727,7 @@ impl<'i> NsReader<&'i [u8]> {
727727
/// (_, Event::Start(_)) => unreachable!(),
728728
///
729729
/// (_, Event::Text(e)) => {
730-
/// txt.push(e.unescape().unwrap().into_owned())
730+
/// txt.push(e.decode().unwrap().into_owned())
731731
/// }
732732
/// (_, Event::Eof) => break,
733733
/// _ => (),

0 commit comments

Comments
 (0)