77
77
//! ```xml
78
78
//! <...>text<![CDATA[cdata]]>text</...>
79
79
//! ```
80
- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
81
- //!
82
- //! Merging of the text / CDATA content is tracked in the issue [#474] and
83
- //! will be available in the next release.
84
- //! </div>
80
+ //! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
85
81
//! </td>
86
82
//! <td>
87
83
//!
90
86
//! - [`Cow<str>`]
91
87
//! - [`u32`], [`f32`] and other numeric types
92
88
//! - `enum`s, like
93
- //! ```ignore
94
- //! // FIXME: #474, merging mixed text / CDATA
95
- //! // content does not work yet
89
+ //! ```
96
90
//! # use pretty_assertions::assert_eq;
97
91
//! # use serde::Deserialize;
98
92
//! # #[derive(Debug, PartialEq)]
149
143
//! ...
150
144
//! ]]></...>
151
145
//! ```
152
- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
153
- //!
154
- //! Merging of the text / CDATA content is tracked in the issue [#474] and
155
- //! will be available in the next release.
156
- //! </div>
157
146
//!
158
147
//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
159
148
//! </td>
162
151
//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
163
152
//!
164
153
//! ```
165
- //! // FIXME: #474, merging mixed text / CDATA
166
- //! // content does not work yet
167
154
//! type List = Vec<u32>;
168
155
//! ```
169
156
//!
520
507
//! }
521
508
//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
522
509
//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
523
- //! # assert_eq!(AnyName::Text("text".into()), quick_xml::de::from_str(r#"text"#).unwrap());
524
- //! # // TODO: After #474 parse mixed content
510
+ //! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
525
511
//! ```
526
512
//! ```
527
513
//! # use pretty_assertions::assert_eq;
544
530
//! }
545
531
//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
546
532
//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
547
- //! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text"#).unwrap());
548
- //! # // TODO: After #474 parse mixed content
533
+ //! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
549
534
//! ```
550
535
//! ```
551
536
//! # use pretty_assertions::assert_eq;
561
546
//! }
562
547
//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
563
548
//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
564
- //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text"#).unwrap());
565
- //! # // TODO: After #474 parse mixed content
549
+ //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
566
550
//! ```
567
551
//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
568
552
//!
643
627
//! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
644
628
//! # );
645
629
//! # assert_eq!(
646
- //! # AnyName { field: (), any_name: Choice::Text("text".into()) },
647
- //! # // TODO: After #474 parse mixed content
648
- //! # quick_xml::de::from_str(r#"<any-tag field="...">text</any-tag>"#).unwrap(),
630
+ //! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) },
631
+ //! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
649
632
//! # );
650
633
//! ```
651
634
//! </td>
967
950
//! from the full element (`<one>...</one>`), so they could use the element name
968
951
//! to choose the right variant:
969
952
//!
970
- //! ```ignore
971
- //! // FIXME: #474
953
+ //! ```
972
954
//! # use pretty_assertions::assert_eq;
973
955
//! # use serde::Deserialize;
974
956
//! # type One = ();
985
967
//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
986
968
//! # );
987
969
//! ```
988
- //! ```ignore
989
- //! // FIXME: #474, Custom("unknown variant `two`,
990
- //! // expected `one`")
970
+ //! ```
991
971
//! # use pretty_assertions::assert_eq;
992
972
//! # use serde::Deserialize;
993
973
//! # #[derive(Debug, PartialEq)]
1011
991
//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1012
992
//! so you cannot have two adjacent string types in your sequence.
1013
993
//! </div>
1014
- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
1015
- //!
1016
- //! Merging of the text / CDATA content is tracked in the issue [#474] and
1017
- //! will be available in the next release.
1018
- //! </div>
1019
994
//! </td>
1020
995
//! </tr>
1021
996
//! <!-- 15 ==================================================================================== -->
1040
1015
//! <td>
1041
1016
//! A homogeneous sequence of elements with a fixed or dynamic size:
1042
1017
//!
1043
- //! ```ignore
1044
- //! // FIXME: #474
1018
+ //! ```
1045
1019
//! # use pretty_assertions::assert_eq;
1046
1020
//! # use serde::Deserialize;
1047
1021
//! # #[derive(Debug, PartialEq)]
1059
1033
//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1060
1034
//! # );
1061
1035
//! ```
1062
- //! ```ignore
1063
- //! // FIXME: #474
1036
+ //! ```
1064
1037
//! # use pretty_assertions::assert_eq;
1065
1038
//! # use serde::Deserialize;
1066
1039
//! # #[derive(Debug, PartialEq)]
1088
1061
//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1089
1062
//! so you cannot have two adjacent string types in your sequence.
1090
1063
//! </div>
1091
- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
1092
- //!
1093
- //! Merging of the text / CDATA content is tracked in the issue [#474] and
1094
- //! will be available in the next release.
1095
- //! </div>
1096
1064
//! </td>
1097
1065
//! </tr>
1098
1066
//! <!-- 16 ==================================================================================== -->
1119
1087
//!
1120
1088
//! You MUST specify `#[serde(rename = "$value")]` on that field:
1121
1089
//!
1122
- //! ```ignore
1123
- //! // FIXME: #474, Custom("duplicate field `$value`")
1090
+ //! ```
1124
1091
//! # use pretty_assertions::assert_eq;
1125
1092
//! # use serde::Deserialize;
1126
1093
//! # type One = ();
1157
1124
//! # ).unwrap(),
1158
1125
//! # );
1159
1126
//! ```
1160
- //! ```ignore
1161
- //! // FIXME: #474, Custom("duplicate field `$value`")
1127
+ //! ```
1162
1128
//! # use pretty_assertions::assert_eq;
1163
1129
//! # use serde::Deserialize;
1164
1130
//! # type One = ();
1204
1170
//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1205
1171
//! so you cannot have two adjacent string types in your sequence.
1206
1172
//! </div>
1207
- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
1208
- //!
1209
- //! Merging of the text / CDATA content is tracked in the issue [#474] and
1210
- //! will be available in the next release.
1211
- //! </div>
1212
1173
//! </td>
1213
1174
//! </tr>
1214
1175
//! <!-- 17 ==================================================================================== -->
1237
1198
//!
1238
1199
//! You MUST specify `#[serde(rename = "$value")]` on that field:
1239
1200
//!
1240
- //! ```ignore
1241
- //! // FIXME: #474
1201
+ //! ```
1242
1202
//! # use pretty_assertions::assert_eq;
1243
1203
//! # use serde::Deserialize;
1244
1204
//! # #[derive(Debug, PartialEq)]
1282
1242
//! # ).unwrap(),
1283
1243
//! # );
1284
1244
//! ```
1285
- //! ```ignore
1286
- //! // FIXME: #474
1245
+ //! ```
1287
1246
//! # use pretty_assertions::assert_eq;
1288
1247
//! # use serde::Deserialize;
1289
1248
//! # #[derive(Debug, PartialEq)]
1332
1291
//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1333
1292
//! so you cannot have two adjacent string types in your sequence.
1334
1293
//! </div>
1335
- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
1336
- //!
1337
- //! Merging of the text / CDATA content is tracked in the issue [#474] and
1338
- //! will be available in the next release.
1339
- //! </div>
1340
1294
//! </td>
1341
1295
//! </tr>
1342
1296
//! </tbody>
1720
1674
//!
1721
1675
//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
1722
1676
//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1723
- //! [#474]: https://github.com/tafia/quick-xml/issues/474
1724
1677
//! [#497]: https://github.com/tafia/quick-xml/issues/497
1725
1678
1726
1679
// Macros should be defined before the modules that using them
@@ -2004,6 +1957,53 @@ impl<'i, R: XmlRead<'i>> XmlReader<'i, R> {
2004
1957
)
2005
1958
}
2006
1959
1960
+ /// Read all consequent [`Text`] and [`CData`] events until non-text event
1961
+ /// occurs. Content of all events would be appended to `result` and returned
1962
+ /// as [`DeEvent::Text`].
1963
+ ///
1964
+ /// [`Text`]: PayloadEvent::Text
1965
+ /// [`CData`]: PayloadEvent::CData
1966
+ fn drain_text ( & mut self , mut result : Cow < ' i , str > ) -> Result < DeEvent < ' i > , DeError > {
1967
+ loop {
1968
+ match self . lookahead {
1969
+ Ok ( PayloadEvent :: Text ( _) | PayloadEvent :: CData ( _) ) => {
1970
+ let text = self . next_text ( ) ?;
1971
+
1972
+ let mut s = result. into_owned ( ) ;
1973
+ s += & text;
1974
+ result = Cow :: Owned ( s) ;
1975
+ }
1976
+ _ => break ,
1977
+ }
1978
+ }
1979
+ Ok ( DeEvent :: Text ( result) )
1980
+ }
1981
+
1982
+ /// Read one text event, panics if current event is not a text event
1983
+ ///
1984
+ /// |Event |XML |Handling
1985
+ /// |-----------------------|---------------------------|----------------------------------------
1986
+ /// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
1987
+ /// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
1988
+ /// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
1989
+ /// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
1990
+ /// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
1991
+ #[ inline( always) ]
1992
+ fn next_text ( & mut self ) -> Result < Cow < ' i , str > , DeError > {
1993
+ match self . next_impl ( ) ? {
1994
+ PayloadEvent :: Text ( mut e) => {
1995
+ if self . need_trim_end ( ) {
1996
+ e. inplace_trim_end ( ) ;
1997
+ }
1998
+ Ok ( e. unescape ( ) ?)
1999
+ }
2000
+ PayloadEvent :: CData ( e) => Ok ( e. decode ( ) ?) ,
2001
+
2002
+ // SAFETY: this method is called only when we peeked Text or CData
2003
+ _ => unreachable ! ( "Only `Text` and `CData` events can come here" ) ,
2004
+ }
2005
+ }
2006
+
2007
2007
/// Return an input-borrowing event.
2008
2008
fn next ( & mut self ) -> Result < DeEvent < ' i > , DeError > {
2009
2009
loop {
@@ -2014,9 +2014,9 @@ impl<'i, R: XmlRead<'i>> XmlReader<'i, R> {
2014
2014
if self . need_trim_end ( ) && e. inplace_trim_end ( ) {
2015
2015
continue ;
2016
2016
}
2017
- Ok ( DeEvent :: Text ( e. unescape ( ) ?) )
2017
+ self . drain_text ( e. unescape ( ) ?)
2018
2018
}
2019
- PayloadEvent :: CData ( e) => Ok ( DeEvent :: Text ( e. decode ( ) ?) ) ,
2019
+ PayloadEvent :: CData ( e) => self . drain_text ( e. decode ( ) ?) ,
2020
2020
PayloadEvent :: Eof => Ok ( DeEvent :: Eof ) ,
2021
2021
} ;
2022
2022
}
@@ -2386,11 +2386,12 @@ where
2386
2386
self . read_string_impl ( true )
2387
2387
}
2388
2388
2389
- /// Consumes a one XML element or an XML tree, returns associated text or
2389
+ /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2390
+ /// events, merge them into one string. If there are no such events, returns
2390
2391
/// an empty string.
2391
2392
///
2392
- /// If `allow_start` is `false`, then only one event is consumed. If that
2393
- /// event is [`DeEvent::Start`], then [`DeError::UnexpectedStart`] is returned.
2393
+ /// If `allow_start` is `false`, then only text events is consumed, for other
2394
+ /// events an error is returned (see table below) .
2394
2395
///
2395
2396
/// If `allow_start` is `true`, then first [`DeEvent::Text`] event is returned
2396
2397
/// and all other content is skipped until corresponding end tag will be consumed.
@@ -2415,6 +2416,9 @@ where
2415
2416
/// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
2416
2417
/// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, consumes events up to `</tag>`
2417
2418
/// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2419
+ ///
2420
+ /// [`Text`]: Event::Text
2421
+ /// [`CData`]: Event::CData
2418
2422
fn read_string_impl ( & mut self , allow_start : bool ) -> Result < Cow < ' de , str > , DeError > {
2419
2423
match self . next ( ) ? {
2420
2424
DeEvent :: Text ( e) => Ok ( e) ,
@@ -3002,7 +3006,7 @@ mod tests {
3002
3006
]
3003
3007
) ;
3004
3008
3005
- // Drop all events thet represents <target> tree. Now unconsumed XML looks like:
3009
+ // Drop all events that represents <target> tree. Now unconsumed XML looks like:
3006
3010
//
3007
3011
// <skip>
3008
3012
// text
0 commit comments