Skip to content

Commit 9b220f1

Browse files
authored
Merge pull request #571 from Mingun/borrow-element-names
Borrow element names when deserialize using serde
2 parents b6e376b + 980d8ae commit 9b220f1

File tree

9 files changed

+150
-49
lines changed

9 files changed

+150
-49
lines changed

Changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
- [#568]: Rename `Writter::inner` into `Writter::get_mut`
2424
- [#568]: Add method `Writter::get_ref`
2525
- [#569]: Rewrite the `Reader::read_event_into_async` as an async fn, making the future `Send` if possible.
26+
- [#571]: Borrow element names (`<element>`) when deserialize with serde.
27+
This change allow to deserialize into `HashMap<&str, T>`, for example
2628

2729
### Bug Fixes
2830

@@ -55,6 +57,7 @@
5557
[#565]: https://github.com/tafia/quick-xml/pull/565
5658
[#568]: https://github.com/tafia/quick-xml/pull/568
5759
[#569]: https://github.com/tafia/quick-xml/pull/569
60+
[#571]: https://github.com/tafia/quick-xml/pull/571
5861

5962
## 0.27.1 -- 2022-12-28
6063

src/de/key.rs

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::de::str2bool;
22
use crate::encoding::Decoder;
33
use crate::errors::serialize::DeError;
44
use crate::name::QName;
5+
use crate::utils::CowRef;
56
use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor};
67
use serde::{forward_to_deserialize_any, serde_if_integer128};
78
use std::borrow::Cow;
@@ -60,18 +61,19 @@ fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result<Cow<'n, str>, De
6061
///
6162
/// `deserialize_any()` returns the same result as `deserialize_identifier()`.
6263
///
63-
/// # Lifetime
64+
/// # Lifetimes
6465
///
66+
/// - `'i`: lifetime of the data that the deserializer borrows from the parsed input
6567
/// - `'d`: lifetime of a deserializer that holds a buffer with content of events
6668
///
6769
/// [`attribute`]: Self::from_attr
6870
/// [`local_name()`]: QName::local_name
6971
/// [`Deserialize`]: serde::Deserialize
70-
pub struct QNameDeserializer<'d> {
71-
name: Cow<'d, str>,
72+
pub struct QNameDeserializer<'i, 'd> {
73+
name: CowRef<'i, 'd, str>,
7274
}
7375

74-
impl<'d> QNameDeserializer<'d> {
76+
impl<'i, 'd> QNameDeserializer<'i, 'd> {
7577
/// Creates deserializer from name of an attribute
7678
pub fn from_attr(name: QName<'d>, decoder: Decoder) -> Result<Self, DeError> {
7779
// https://github.com/tafia/quick-xml/issues/537
@@ -83,19 +85,34 @@ impl<'d> QNameDeserializer<'d> {
8385
};
8486

8587
Ok(Self {
86-
name: Cow::Owned(format!("@{field}")),
88+
name: CowRef::Owned(format!("@{field}")),
8789
})
8890
}
8991

9092
/// Creates deserializer from name of an element
91-
pub fn from_elem(name: QName<'d>, decoder: Decoder) -> Result<Self, DeError> {
92-
let local = decode_name(name, decoder)?;
93+
pub fn from_elem(name: CowRef<'i, 'd, [u8]>, decoder: Decoder) -> Result<Self, DeError> {
94+
let local = match name {
95+
CowRef::Input(borrowed) => match decode_name(QName(borrowed), decoder)? {
96+
Cow::Borrowed(borrowed) => CowRef::Input(borrowed),
97+
Cow::Owned(owned) => CowRef::Owned(owned),
98+
},
99+
CowRef::Slice(borrowed) => match decode_name(QName(borrowed), decoder)? {
100+
Cow::Borrowed(borrowed) => CowRef::Slice(borrowed),
101+
Cow::Owned(owned) => CowRef::Owned(owned),
102+
},
103+
CowRef::Owned(owned) => match decode_name(QName(&owned), decoder)? {
104+
// SAFETY: Because result is borrowed, no changes was done
105+
// and we can safely unwrap here
106+
Cow::Borrowed(_) => CowRef::Owned(String::from_utf8(owned).unwrap()),
107+
Cow::Owned(owned) => CowRef::Owned(owned),
108+
},
109+
};
93110

94111
Ok(Self { name: local })
95112
}
96113
}
97114

98-
impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'d> {
115+
impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {
99116
type Error = DeError;
100117

101118
forward_to_deserialize_any! {
@@ -202,8 +219,9 @@ impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'d> {
202219
V: Visitor<'de>,
203220
{
204221
match self.name {
205-
Cow::Borrowed(name) => visitor.visit_str(name),
206-
Cow::Owned(name) => visitor.visit_string(name),
222+
CowRef::Input(name) => visitor.visit_borrowed_str(name),
223+
CowRef::Slice(name) => visitor.visit_str(name),
224+
CowRef::Owned(name) => visitor.visit_string(name),
207225
}
208226
}
209227

@@ -220,7 +238,7 @@ impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'d> {
220238
}
221239
}
222240

223-
impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'d> {
241+
impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'de, 'd> {
224242
type Error = DeError;
225243
type Variant = QNameUnitOnly;
226244

@@ -337,7 +355,7 @@ mod tests {
337355
#[test]
338356
fn $name() {
339357
let de = QNameDeserializer {
340-
name: Cow::Borrowed($input),
358+
name: CowRef::Input($input),
341359
};
342360
let data: $type = Deserialize::deserialize(de).unwrap();
343361

@@ -352,7 +370,7 @@ mod tests {
352370
#[test]
353371
fn $name() {
354372
let de = QNameDeserializer {
355-
name: Cow::Borrowed($input),
373+
name: CowRef::Input($input),
356374
};
357375
let data: $type = Deserialize::deserialize(de).unwrap();
358376

@@ -377,7 +395,7 @@ mod tests {
377395
#[test]
378396
fn $name() {
379397
let de = QNameDeserializer {
380-
name: Cow::Borrowed($input),
398+
name: CowRef::Input($input),
381399
};
382400
let err = <$type as Deserialize>::deserialize(de).unwrap_err();
383401

@@ -420,8 +438,7 @@ mod tests {
420438
=> Custom("invalid value: string \"&lt;\", expected a character"));
421439

422440
deserialized_to!(string: String = "&lt;escaped&#x20;string" => "&lt;escaped&#x20;string");
423-
err!(borrowed_str: &str = "name"
424-
=> Custom("invalid type: string \"name\", expected a borrowed string"));
441+
deserialized_to!(borrowed_str: &str = "name" => "name");
425442

426443
err!(byte_buf: ByteBuf = "&lt;escaped&#x20;string"
427444
=> Custom("invalid type: string \"&lt;escaped&#x20;string\", expected byte data"));

src/de/map.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ where
281281
DeEvent::Start(e) => {
282282
self.source = ValueSource::Nested;
283283

284-
let de = QNameDeserializer::from_elem(e.name(), decoder)?;
284+
let de = QNameDeserializer::from_elem(e.raw_name(), decoder)?;
285285
seed.deserialize(de).map(Some)
286286
}
287287
// Stop iteration after reaching a closing tag

src/de/simple_type.rs

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@ use crate::de::{deserialize_bool, str2bool};
77
use crate::encoding::Decoder;
88
use crate::errors::serialize::DeError;
99
use crate::escape::unescape;
10+
use crate::utils::CowRef;
1011
use memchr::memchr;
1112
use serde::de::{DeserializeSeed, Deserializer, EnumAccess, SeqAccess, VariantAccess, Visitor};
1213
use serde::{self, serde_if_integer128};
1314
use std::borrow::Cow;
14-
use std::ops::{Deref, Range};
15+
use std::ops::Range;
1516

1617
macro_rules! deserialize_num {
1718
($method:ident, $visit:ident) => {
@@ -467,32 +468,6 @@ impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> {
467468

468469
////////////////////////////////////////////////////////////////////////////////////////////////////
469470

470-
/// A version of [`Cow`] that can borrow from two different buffers, one of them
471-
/// is a deserializer input.
472-
///
473-
/// # Lifetimes
474-
/// - `'de` -- lifetime of the data that deserializer borrow from the parsed input
475-
/// - `'a` -- lifetime of the data that owned by a deserializer
476-
enum CowRef<'de, 'a> {
477-
/// An input borrowed from the parsed data
478-
Input(&'de [u8]),
479-
/// An input borrowed from the buffer owned by another deserializer
480-
Slice(&'a [u8]),
481-
/// An input taken from an external deserializer, owned by that deserializer
482-
Owned(Vec<u8>),
483-
}
484-
impl<'de, 'a> Deref for CowRef<'de, 'a> {
485-
type Target = [u8];
486-
487-
fn deref(&self) -> &[u8] {
488-
match self {
489-
Self::Input(slice) => slice,
490-
Self::Slice(slice) => slice,
491-
Self::Owned(ref v) => v,
492-
}
493-
}
494-
}
495-
496471
/// A deserializer for an xml probably escaped and encoded value of XSD [simple types].
497472
/// This deserializer will borrow from the input as much as possible.
498473
///
@@ -510,7 +485,7 @@ impl<'de, 'a> Deref for CowRef<'de, 'a> {
510485
pub struct SimpleTypeDeserializer<'de, 'a> {
511486
/// - In case of attribute contains escaped attribute value
512487
/// - In case of text contains unescaped text value
513-
content: CowRef<'de, 'a>,
488+
content: CowRef<'de, 'a, [u8]>,
514489
/// If `true`, `content` in escaped form and should be unescaped before use
515490
escaped: bool,
516491
/// Decoder used to deserialize string data, numeric and boolean data.
@@ -545,7 +520,7 @@ impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> {
545520

546521
/// Constructor for tests
547522
#[inline]
548-
fn new(content: CowRef<'de, 'a>, escaped: bool, decoder: Decoder) -> Self {
523+
fn new(content: CowRef<'de, 'a, [u8]>, escaped: bool, decoder: Decoder) -> Self {
549524
Self {
550525
content,
551526
escaped,

src/de/var.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ where
3838
let decoder = self.de.reader.decoder();
3939
let (name, is_text) = match self.de.peek()? {
4040
DeEvent::Start(e) => (
41-
seed.deserialize(QNameDeserializer::from_elem(e.name(), decoder)?)?,
41+
seed.deserialize(QNameDeserializer::from_elem(e.raw_name(), decoder)?)?,
4242
false,
4343
),
4444
DeEvent::Text(_) => (

src/events/mod.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use crate::errors::{Error, Result};
4949
use crate::escape::{escape, partial_escape, unescape_with};
5050
use crate::name::{LocalName, QName};
5151
use crate::reader::is_whitespace;
52-
use crate::utils::write_cow_string;
52+
use crate::utils::{write_cow_string, CowRef};
5353
use attributes::{Attribute, Attributes};
5454
use std::mem::replace;
5555

@@ -189,6 +189,21 @@ impl<'a> BytesStart<'a> {
189189
self.name_len = name.len();
190190
self
191191
}
192+
193+
/// Gets the undecoded raw tag name, as present in the input stream, which
194+
/// is borrowed either to the input, or to the event.
195+
///
196+
/// # Lifetimes
197+
///
198+
/// - `'a`: Lifetime of the input data from which this event is borrow
199+
/// - `'e`: Lifetime of the concrete event instance
200+
// TODO: We should made this is a part of public API, but with safe wrapped for a name
201+
pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
202+
match self.buf {
203+
Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
204+
Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
205+
}
206+
}
192207
}
193208

194209
/// Attribute-related methods

src/utils.rs

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
use std::borrow::Cow;
1+
use std::borrow::{Borrow, Cow};
22
use std::fmt::{self, Debug, Formatter};
3+
use std::ops::Deref;
34

45
#[cfg(feature = "serialize")]
56
use serde::de::{Deserialize, Deserializer, Error, Visitor};
@@ -36,6 +37,56 @@ pub fn write_byte_string(f: &mut Formatter, byte_string: &[u8]) -> fmt::Result {
3637

3738
////////////////////////////////////////////////////////////////////////////////////////////////////
3839

40+
/// A version of [`Cow`] that can borrow from two different buffers, one of them
41+
/// is a deserializer input.
42+
///
43+
/// # Lifetimes
44+
///
45+
/// - `'i`: lifetime of the data that deserializer borrow from the parsed input
46+
/// - `'s`: lifetime of the data that owned by a deserializer
47+
pub enum CowRef<'i, 's, B>
48+
where
49+
B: ToOwned + ?Sized,
50+
{
51+
/// An input borrowed from the parsed data
52+
Input(&'i B),
53+
/// An input borrowed from the buffer owned by another deserializer
54+
Slice(&'s B),
55+
/// An input taken from an external deserializer, owned by that deserializer
56+
Owned(<B as ToOwned>::Owned),
57+
}
58+
impl<'i, 's, B> Deref for CowRef<'i, 's, B>
59+
where
60+
B: ToOwned + ?Sized,
61+
B::Owned: Borrow<B>,
62+
{
63+
type Target = B;
64+
65+
fn deref(&self) -> &B {
66+
match *self {
67+
Self::Input(borrowed) => borrowed,
68+
Self::Slice(borrowed) => borrowed,
69+
Self::Owned(ref owned) => owned.borrow(),
70+
}
71+
}
72+
}
73+
74+
impl<'i, 's, B> Debug for CowRef<'i, 's, B>
75+
where
76+
B: ToOwned + ?Sized + Debug,
77+
B::Owned: Debug,
78+
{
79+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
80+
match *self {
81+
Self::Input(borrowed) => Debug::fmt(borrowed, f),
82+
Self::Slice(borrowed) => Debug::fmt(borrowed, f),
83+
Self::Owned(ref owned) => Debug::fmt(owned, f),
84+
}
85+
}
86+
}
87+
88+
////////////////////////////////////////////////////////////////////////////////////////////////////
89+
3990
/// Wrapper around `Vec<u8>` that has a human-readable debug representation:
4091
/// printable ASCII symbols output as is, all other output in HEX notation.
4192
///

tests/serde-de.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6285,6 +6285,9 @@ fn from_str_should_ignore_encoding() {
62856285
/// Checks that deserializer is able to borrow data from the input
62866286
mod borrow {
62876287
use super::*;
6288+
use pretty_assertions::assert_eq;
6289+
use std::collections::BTreeMap;
6290+
use std::iter::FromIterator;
62886291

62896292
/// Struct that should borrow input to be able to deserialize successfully.
62906293
/// serde implicitly borrow `&str` and `&[u8]` even without `#[serde(borrow)]`
@@ -6403,4 +6406,24 @@ mod borrow {
64036406
}
64046407
}
64056408
}
6409+
6410+
#[test]
6411+
fn element_name() {
6412+
let data: BTreeMap<&str, &str> = from_str(
6413+
r#"
6414+
<root>
6415+
<element>element content</element>
6416+
text content
6417+
</root>"#,
6418+
)
6419+
.unwrap();
6420+
assert_eq!(
6421+
data,
6422+
BTreeMap::from_iter([
6423+
// Comment to prevent formatting in one line
6424+
("element", "element content"),
6425+
("$text", "text content"),
6426+
])
6427+
);
6428+
}
64066429
}

tests/serde-issues.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,23 @@ fn issue349() {
142142
);
143143
}
144144

145+
/// Regression test for https://github.com/tafia/quick-xml/issues/352.
146+
#[test]
147+
fn issue352() {
148+
use std::borrow::Cow;
149+
150+
#[derive(Deserialize)]
151+
struct Root<'a> {
152+
#[serde(borrow)]
153+
#[serde(rename = "@attribute")]
154+
attribute: Cow<'a, str>,
155+
}
156+
157+
let r: Root = from_str("<Root attribute='borrowed value'></Root>").unwrap();
158+
159+
assert!(matches!(r.attribute, Cow::Borrowed(_)));
160+
}
161+
145162
/// Regression test for https://github.com/tafia/quick-xml/issues/429.
146163
#[test]
147164
fn issue429() {

0 commit comments

Comments
 (0)