Skip to content

Commit 0a638b0

Browse files
committed
Rework entity resolution in serde Deserializer
Fixed (18): serde-de (9): borrow::escaped::element borrow::escaped::top_level resolve::resolve_custom_entity trivial::text::byte_buf trivial::text::bytes trivial::text::string::field trivial::text::string::naked trivial::text::string::text xml_schema_lists::element::text::string serde-migrated (1): test_parse_string serde-se (5): with_root::char_amp with_root::char_gt with_root::char_lt with_root::str_escaped with_root::tuple --doc (3): src\de\resolver.rs - de::resolver::EntityResolver (line 13)
1 parent 4c8100d commit 0a638b0

File tree

1 file changed

+32
-5
lines changed

1 file changed

+32
-5
lines changed

src/de/mod.rs

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2089,7 +2089,8 @@ use crate::{
20892089
de::map::ElementMapAccess,
20902090
encoding::Decoder,
20912091
errors::Error,
2092-
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
2092+
escape::{parse_number, EscapeError},
2093+
events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event},
20932094
name::QName,
20942095
reader::NsReader,
20952096
utils::CowRef,
@@ -2208,6 +2209,8 @@ pub enum PayloadEvent<'a> {
22082209
CData(BytesCData<'a>),
22092210
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
22102211
DocType(BytesText<'a>),
2212+
/// Reference `&ref;` in the textual data.
2213+
GeneralRef(BytesRef<'a>),
22112214
/// End of XML document.
22122215
Eof,
22132216
}
@@ -2222,6 +2225,7 @@ impl<'a> PayloadEvent<'a> {
22222225
PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
22232226
PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
22242227
PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2228+
PayloadEvent::GeneralRef(e) => PayloadEvent::GeneralRef(e.into_owned()),
22252229
PayloadEvent::Eof => PayloadEvent::Eof,
22262230
}
22272231
}
@@ -2276,7 +2280,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22762280
// If next event is a text or CDATA, we should not trim trailing spaces
22772281
!matches!(
22782282
self.lookahead,
2279-
Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2283+
Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_) | PayloadEvent::GeneralRef(_))
22802284
)
22812285
}
22822286

@@ -2301,9 +2305,10 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
23012305
result.to_mut().push_str(&e.decode()?);
23022306
}
23032307
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
2308+
PayloadEvent::GeneralRef(e) => self.resolve_reference(result.to_mut(), e)?,
23042309

2305-
// SAFETY: current_event_is_last_text checks that event is Text or CData
2306-
_ => unreachable!("Only `Text` and `CData` events can come here"),
2310+
// SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
2311+
_ => unreachable!("Only `Text`, `CData` or `GeneralRef` events can come here"),
23072312
}
23082313
}
23092314
Ok(DeEvent::Text(Text { text: result }))
@@ -2329,11 +2334,32 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
23292334
.map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
23302335
continue;
23312336
}
2337+
PayloadEvent::GeneralRef(e) => {
2338+
let mut text = String::new();
2339+
self.resolve_reference(&mut text, e)?;
2340+
self.drain_text(text.into())
2341+
}
23322342
PayloadEvent::Eof => Ok(DeEvent::Eof),
23332343
};
23342344
}
23352345
}
23362346

2347+
fn resolve_reference(&mut self, result: &mut String, event: BytesRef) -> Result<(), DeError> {
2348+
let len = event.len();
2349+
let reference = self.decoder().decode(&event)?;
2350+
2351+
if let Some(num) = reference.strip_prefix('#') {
2352+
let codepoint = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
2353+
result.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
2354+
return Ok(());
2355+
}
2356+
if let Some(value) = self.entity_resolver.resolve(reference.as_ref()) {
2357+
result.push_str(value);
2358+
return Ok(());
2359+
}
2360+
Err(EscapeError::UnrecognizedEntity(0..len, reference.to_string()).into())
2361+
}
2362+
23372363
#[inline]
23382364
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
23392365
match self.lookahead {
@@ -3126,7 +3152,7 @@ impl StartTrimmer {
31263152
Event::End(e) => (PayloadEvent::End(e), true),
31273153
Event::Eof => (PayloadEvent::Eof, true),
31283154

3129-
// Do not trim next text event after Text or CDATA event
3155+
// Do not trim next text event after Text, CDATA or reference event
31303156
Event::CData(e) => (PayloadEvent::CData(e), false),
31313157
Event::Text(mut e) => {
31323158
// If event is empty after trimming, skip it
@@ -3135,6 +3161,7 @@ impl StartTrimmer {
31353161
}
31363162
(PayloadEvent::Text(e), false)
31373163
}
3164+
Event::GeneralRef(e) => (PayloadEvent::GeneralRef(e), false),
31383165

31393166
_ => return None,
31403167
};

0 commit comments

Comments
 (0)