@@ -2089,7 +2089,8 @@ use crate::{
2089
2089
de:: map:: ElementMapAccess ,
2090
2090
encoding:: Decoder ,
2091
2091
errors:: Error ,
2092
- events:: { BytesCData , BytesEnd , BytesStart , BytesText , Event } ,
2092
+ escape:: { parse_number, EscapeError } ,
2093
+ events:: { BytesCData , BytesEnd , BytesRef , BytesStart , BytesText , Event } ,
2093
2094
name:: QName ,
2094
2095
reader:: NsReader ,
2095
2096
utils:: CowRef ,
@@ -2208,6 +2209,8 @@ pub enum PayloadEvent<'a> {
2208
2209
CData ( BytesCData < ' a > ) ,
2209
2210
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2210
2211
DocType ( BytesText < ' a > ) ,
2212
+ /// Reference `&ref;` in the textual data.
2213
+ GeneralRef ( BytesRef < ' a > ) ,
2211
2214
/// End of XML document.
2212
2215
Eof ,
2213
2216
}
@@ -2222,6 +2225,7 @@ impl<'a> PayloadEvent<'a> {
2222
2225
PayloadEvent :: Text ( e) => PayloadEvent :: Text ( e. into_owned ( ) ) ,
2223
2226
PayloadEvent :: CData ( e) => PayloadEvent :: CData ( e. into_owned ( ) ) ,
2224
2227
PayloadEvent :: DocType ( e) => PayloadEvent :: DocType ( e. into_owned ( ) ) ,
2228
+ PayloadEvent :: GeneralRef ( e) => PayloadEvent :: GeneralRef ( e. into_owned ( ) ) ,
2225
2229
PayloadEvent :: Eof => PayloadEvent :: Eof ,
2226
2230
}
2227
2231
}
@@ -2276,7 +2280,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2276
2280
// If next event is a text or CDATA, we should not trim trailing spaces
2277
2281
!matches ! (
2278
2282
self . lookahead,
2279
- Ok ( PayloadEvent :: Text ( _) ) | Ok ( PayloadEvent :: CData ( _) )
2283
+ Ok ( PayloadEvent :: Text ( _) ) | Ok ( PayloadEvent :: CData ( _) | PayloadEvent :: GeneralRef ( _ ) )
2280
2284
)
2281
2285
}
2282
2286
@@ -2301,9 +2305,10 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2301
2305
result. to_mut ( ) . push_str ( & e. decode ( ) ?) ;
2302
2306
}
2303
2307
PayloadEvent :: CData ( e) => result. to_mut ( ) . push_str ( & e. decode ( ) ?) ,
2308
+ PayloadEvent :: GeneralRef ( e) => self . resolve_reference ( result. to_mut ( ) , e) ?,
2304
2309
2305
- // SAFETY: current_event_is_last_text checks that event is Text or CData
2306
- _ => unreachable ! ( "Only `Text` and `CData` events can come here" ) ,
2310
+ // SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
2311
+ _ => unreachable ! ( "Only `Text`, `CData` or `GeneralRef ` events can come here" ) ,
2307
2312
}
2308
2313
}
2309
2314
Ok ( DeEvent :: Text ( Text { text : result } ) )
@@ -2329,11 +2334,32 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2329
2334
. map_err ( |err| DeError :: Custom ( format ! ( "cannot parse DTD: {}" , err) ) ) ?;
2330
2335
continue ;
2331
2336
}
2337
+ PayloadEvent :: GeneralRef ( e) => {
2338
+ let mut text = String :: new ( ) ;
2339
+ self . resolve_reference ( & mut text, e) ?;
2340
+ self . drain_text ( text. into ( ) )
2341
+ }
2332
2342
PayloadEvent :: Eof => Ok ( DeEvent :: Eof ) ,
2333
2343
} ;
2334
2344
}
2335
2345
}
2336
2346
2347
+ fn resolve_reference ( & mut self , result : & mut String , event : BytesRef ) -> Result < ( ) , DeError > {
2348
+ let len = event. len ( ) ;
2349
+ let reference = self . decoder ( ) . decode ( & event) ?;
2350
+
2351
+ if let Some ( num) = reference. strip_prefix ( '#' ) {
2352
+ let codepoint = parse_number ( num) . map_err ( EscapeError :: InvalidCharRef ) ?;
2353
+ result. push_str ( codepoint. encode_utf8 ( & mut [ 0u8 ; 4 ] ) ) ;
2354
+ return Ok ( ( ) ) ;
2355
+ }
2356
+ if let Some ( value) = self . entity_resolver . resolve ( reference. as_ref ( ) ) {
2357
+ result. push_str ( value) ;
2358
+ return Ok ( ( ) ) ;
2359
+ }
2360
+ Err ( EscapeError :: UnrecognizedEntity ( 0 ..len, reference. to_string ( ) ) . into ( ) )
2361
+ }
2362
+
2337
2363
#[ inline]
2338
2364
fn read_to_end ( & mut self , name : QName ) -> Result < ( ) , DeError > {
2339
2365
match self . lookahead {
@@ -3126,7 +3152,7 @@ impl StartTrimmer {
3126
3152
Event :: End ( e) => ( PayloadEvent :: End ( e) , true ) ,
3127
3153
Event :: Eof => ( PayloadEvent :: Eof , true ) ,
3128
3154
3129
- // Do not trim next text event after Text or CDATA event
3155
+ // Do not trim next text event after Text, CDATA or reference event
3130
3156
Event :: CData ( e) => ( PayloadEvent :: CData ( e) , false ) ,
3131
3157
Event :: Text ( mut e) => {
3132
3158
// If event is empty after trimming, skip it
@@ -3135,6 +3161,7 @@ impl StartTrimmer {
3135
3161
}
3136
3162
( PayloadEvent :: Text ( e) , false )
3137
3163
}
3164
+ Event :: GeneralRef ( e) => ( PayloadEvent :: GeneralRef ( e) , false ) ,
3138
3165
3139
3166
_ => return None ,
3140
3167
} ;
0 commit comments