Skip to content

Commit 9a354d7

Browse files
committed
Made read_string_impl more strict. Now <tag>text<trail/></tag> will return error instead of returning text
when deserialize into primitive types (ex. `u32`, but not struct S { #[serde(rename = "$text")] field: u32 })
1 parent 14a63d7 commit 9a354d7

File tree

2 files changed

+34
-18
lines changed

2 files changed

+34
-18
lines changed

Changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition.
2323
- [#660]: Fixed incorrect deserialization of `xs:list`s from empty tags (`<tag/>`
2424
or `<tag></tag>`). Previously an `DeError::UnexpectedEof")` was returned in that case
2525
- [#580]: Fixed incorrect deserialization of vectors of newtypes from sequences of tags.
26+
- [#661]: More string handling of serialized primitive values (booleans, numbers, strings,
27+
unit structs, unit variants). `<int>123<something-else/></int>` is no longer valid
28+
content. Previously all data after `123` up to closing tag would be silently skipped.
2629

2730
### Misc Changes
2831

@@ -43,6 +46,7 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition.
4346
[#649]: https://github.com/tafia/quick-xml/pull/646
4447
[#651]: https://github.com/tafia/quick-xml/pull/651
4548
[#660]: https://github.com/tafia/quick-xml/pull/660
49+
[#661]: https://github.com/tafia/quick-xml/pull/661
4650

4751

4852
## 0.30.0 -- 2023-07-23

src/de/mod.rs

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2581,11 +2581,15 @@ where
25812581
/// events, merge them into one string. If there are no such events, returns
25822582
/// an empty string.
25832583
///
2584-
/// If `allow_start` is `false`, then only text events is consumed, for other
2584+
/// If `allow_start` is `false`, then only text events are consumed, for other
25852585
/// events an error is returned (see table below).
25862586
///
2587-
/// If `allow_start` is `true`, then first [`DeEvent::Text`] event is returned
2588-
/// and all other content is skipped until corresponding end tag will be consumed.
2587+
/// If `allow_start` is `true`, then two or three events are expected:
2588+
/// - [`DeEvent::Start`];
2589+
/// - _(optional)_ [`DeEvent::Text`] which content is returned;
2590+
/// - [`DeEvent::End`]. If text event was missed, an empty string is returned.
2591+
///
2592+
/// Corresponding events are consumed.
25892593
///
25902594
/// # Handling events
25912595
///
@@ -2603,9 +2607,8 @@ where
26032607
/// |Event |XML |Handling
26042608
/// |------------------|---------------------------|----------------------------------------------------------------------------------
26052609
/// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
2606-
/// |[`DeEvent::End`] |`</tag>` |Returns an empty slice, if close tag matched the open one
2607-
/// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
2608-
/// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, consumes events up to `</tag>`
2610+
/// |[`DeEvent::End`] |`</tag>` |Returns an empty slice. The reader guarantee that tag will match the open one
2611+
/// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
26092612
/// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
26102613
///
26112614
/// [`Text`]: Event::Text
@@ -2614,23 +2617,32 @@ where
26142617
match self.next()? {
26152618
DeEvent::Text(e) => Ok(e.text),
26162619
// allow one nested level
2617-
DeEvent::Start(e) if allow_start => match self.next()? {
2618-
DeEvent::Text(t) => {
2619-
self.read_to_end(e.name())?;
2620-
Ok(t.text)
2621-
}
2622-
DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2623-
// We can get End event in case of `<tag></tag>` or `<tag/>` input
2624-
// Return empty text in that case
2625-
DeEvent::End(end) if end.name() == e.name() => Ok("".into()),
2626-
DeEvent::End(end) => Err(DeError::UnexpectedEnd(end.name().as_ref().to_owned())),
2627-
DeEvent::Eof => Err(DeError::UnexpectedEof),
2628-
},
2620+
DeEvent::Start(_) if allow_start => self.read_text(),
26292621
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
26302622
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
26312623
DeEvent::Eof => Err(DeError::UnexpectedEof),
26322624
}
26332625
}
2626+
/// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the
2627+
/// [`DeEvent::End`] event.
2628+
fn read_text(&mut self) -> Result<Cow<'de, str>, DeError> {
2629+
match self.next()? {
2630+
DeEvent::Text(e) => match self.next()? {
2631+
// The matching tag name is guaranteed by the reader
2632+
DeEvent::End(_) => Ok(e.text),
2633+
// SAFETY: Cannot be two consequent Text events, they would be merged into one
2634+
DeEvent::Text(_) => unreachable!(),
2635+
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2636+
DeEvent::Eof => Err(DeError::UnexpectedEof),
2637+
},
2638+
// We can get End event in case of `<tag></tag>` or `<tag/>` input
2639+
// Return empty text in that case
2640+
// The matching tag name is guaranteed by the reader
2641+
DeEvent::End(_) => Ok("".into()),
2642+
DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2643+
DeEvent::Eof => Err(DeError::UnexpectedEof),
2644+
}
2645+
}
26342646

26352647
/// Drops all events until event with [name](BytesEnd::name()) `name` won't be
26362648
/// dropped. This method should be called after [`Self::next()`]

0 commit comments

Comments
 (0)