Skip to content

Commit 65d50a5

Browse files
committed
Allow to have attributes in closing tags (compatibility with the Macromedia Flash parser)
1 parent 45e8be4 commit 65d50a5

File tree

5 files changed

+118
-218
lines changed

5 files changed

+118
-218
lines changed

Changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
### New Features
1717

18+
- [#776]: Allow to have attributes in the end tag for compatibility reasons with Macromedia Flash XML parser.
19+
1820
### Bug Fixes
1921

2022
- [#781]: Fix conditions to start CDATA section. Only uppercase `<![CDATA[` can start it.
@@ -25,6 +27,7 @@
2527

2628
- [#780]: `reader::Parser`, `reader::ElementParser` and `reader::PiParser` moved to the new module `parser`.
2729

30+
[#776]: https://github.com/tafia/quick-xml/issues/776
2831
[#780]: https://github.com/tafia/quick-xml/pull/780
2932
[#781]: https://github.com/tafia/quick-xml/pull/781
3033

src/reader/buffered_reader.rs

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -101,54 +101,6 @@ macro_rules! impl_buffered_source {
101101
ReadTextResult::UpToEof(&buf[start..])
102102
}
103103

104-
#[inline]
105-
$($async)? fn read_bytes_until $(<$lf>)? (
106-
&mut self,
107-
byte: u8,
108-
buf: &'b mut Vec<u8>,
109-
position: &mut u64,
110-
) -> io::Result<(&'b [u8], bool)> {
111-
// search byte must be within the ascii range
112-
debug_assert!(byte.is_ascii());
113-
114-
let mut read = 0;
115-
let start = buf.len();
116-
loop {
117-
let available = match self $(.$reader)? .fill_buf() $(.$await)? {
118-
Ok(n) if n.is_empty() => break,
119-
Ok(n) => n,
120-
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
121-
Err(e) => {
122-
*position += read;
123-
return Err(e);
124-
}
125-
};
126-
127-
match memchr::memchr(byte, available) {
128-
Some(i) => {
129-
buf.extend_from_slice(&available[..i]);
130-
131-
let used = i + 1;
132-
self $(.$reader)? .consume(used);
133-
read += used as u64;
134-
135-
*position += read;
136-
return Ok((&buf[start..], true));
137-
}
138-
None => {
139-
buf.extend_from_slice(available);
140-
141-
let used = available.len();
142-
self $(.$reader)? .consume(used);
143-
read += used as u64;
144-
}
145-
}
146-
}
147-
148-
*position += read;
149-
Ok((&buf[start..], false))
150-
}
151-
152104
#[inline]
153105
$($async)? fn read_with<$($lf,)? P: Parser>(
154106
&mut self,

src/reader/mod.rs

Lines changed: 89 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -345,18 +345,27 @@ macro_rules! read_until_close {
345345
}
346346
},
347347
// `</` - closing tag
348+
// #776: We parse using ElementParser which allows us to have attributes
349+
// in close tags. While such tags are not allowed by the specification,
350+
// we anyway allow to parse them because:
351+
// - we do not check constrains during parsing. This is performed by the
352+
// optional validate step which user should call manually
353+
// - if we just look for `>` we will parse `</tag attr=">" >` as end tag
354+
// `</tag attr=">` and text `" >` which probably no one existing parser
355+
// do. This is still invalid tag but it also has no practical meaning
356+
// to parse it in a such way. Such documents exist in wild and are
357+
// tolerated by some parsers such as the one used by Adobe Flash.
348358
Ok(Some(b'/')) => match $reader
349-
.read_bytes_until(b'>', $buf, &mut $self.state.offset)
359+
.read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
350360
$(.$await)?
351361
{
352-
Ok((bytes, true)) => $self.state.emit_end(bytes),
353-
Ok((_, false)) => {
362+
Ok(bytes) => $self.state.emit_end(bytes),
363+
Err(e) => {
354364
// We want to report error at `<`, but offset was increased,
355365
// so return it back (-1 for `<`)
356366
$self.state.last_error_offset = start - 1;
357-
Err(Error::Syntax(SyntaxError::UnclosedTag))
367+
Err(e)
358368
}
359-
Err(e) => Err(Error::Io(e.into())),
360369
},
361370
// `<?` - processing instruction
362371
Ok(Some(b'?')) => match $reader
@@ -824,39 +833,6 @@ trait XmlSource<'r, B> {
824833
/// [events]: crate::events::Event
825834
fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
826835

827-
/// Read input until `byte` is found or end of input is reached.
828-
///
829-
/// Returns a slice of data read up to `byte` (exclusive),
830-
/// and a flag noting whether `byte` was found in the input or not.
831-
///
832-
/// # Example
833-
///
834-
/// ```ignore
835-
/// let mut position = 0;
836-
/// let mut input = b"abc*def".as_ref();
837-
/// // ^= 4
838-
///
839-
/// assert_eq!(
840-
/// input.read_bytes_until(b'*', (), &mut position).unwrap(),
841-
/// (b"abc".as_ref(), true)
842-
/// );
843-
/// assert_eq!(position, 4); // position after the symbol matched
844-
/// ```
845-
///
846-
/// # Parameters
847-
/// - `byte`: Byte for search
848-
/// - `buf`: Buffer that could be filled from an input (`Self`) and
849-
/// from which [events] could borrow their data
850-
/// - `position`: Will be increased by amount of bytes consumed
851-
///
852-
/// [events]: crate::events::Event
853-
fn read_bytes_until(
854-
&mut self,
855-
byte: u8,
856-
buf: B,
857-
position: &mut u64,
858-
) -> io::Result<(&'r [u8], bool)>;
859-
860836
/// Read input until processing instruction is finished.
861837
///
862838
/// This method expect that start sequence of a parser already was read.
@@ -1022,115 +998,6 @@ mod test {
1022998
$buf:expr
1023999
$(, $async:ident, $await:ident)?
10241000
) => {
1025-
mod read_bytes_until {
1026-
use super::*;
1027-
// Use Bytes for printing bytes as strings for ASCII range
1028-
use crate::utils::Bytes;
1029-
use pretty_assertions::assert_eq;
1030-
1031-
/// Checks that search in the empty buffer returns `None`
1032-
#[$test]
1033-
$($async)? fn empty() {
1034-
let buf = $buf;
1035-
let mut position = 0;
1036-
let mut input = b"".as_ref();
1037-
// ^= 0
1038-
1039-
let (bytes, found) = $source(&mut input)
1040-
.read_bytes_until(b'*', buf, &mut position)
1041-
$(.$await)?
1042-
.unwrap();
1043-
assert_eq!(
1044-
(Bytes(bytes), found),
1045-
(Bytes(b""), false)
1046-
);
1047-
assert_eq!(position, 0);
1048-
}
1049-
1050-
/// Checks that search in the buffer non-existent value returns entire buffer
1051-
/// as a result and set `position` to `len()`
1052-
#[$test]
1053-
$($async)? fn non_existent() {
1054-
let buf = $buf;
1055-
let mut position = 0;
1056-
let mut input = b"abcdef".as_ref();
1057-
// ^= 6
1058-
1059-
let (bytes, found) = $source(&mut input)
1060-
.read_bytes_until(b'*', buf, &mut position)
1061-
$(.$await)?
1062-
.unwrap();
1063-
assert_eq!(
1064-
(Bytes(bytes), found),
1065-
(Bytes(b"abcdef"), false)
1066-
);
1067-
assert_eq!(position, 6);
1068-
}
1069-
1070-
/// Checks that search in the buffer an element that is located in the front of
1071-
/// buffer returns empty slice as a result and set `position` to one symbol
1072-
/// after match (`1`)
1073-
#[$test]
1074-
$($async)? fn at_the_start() {
1075-
let buf = $buf;
1076-
let mut position = 0;
1077-
let mut input = b"*abcdef".as_ref();
1078-
// ^= 1
1079-
1080-
let (bytes, found) = $source(&mut input)
1081-
.read_bytes_until(b'*', buf, &mut position)
1082-
$(.$await)?
1083-
.unwrap();
1084-
assert_eq!(
1085-
(Bytes(bytes), found),
1086-
(Bytes(b""), true)
1087-
);
1088-
assert_eq!(position, 1); // position after the symbol matched
1089-
}
1090-
1091-
/// Checks that search in the buffer an element that is located in the middle of
1092-
/// buffer returns slice before that symbol as a result and set `position` to one
1093-
/// symbol after match
1094-
#[$test]
1095-
$($async)? fn inside() {
1096-
let buf = $buf;
1097-
let mut position = 0;
1098-
let mut input = b"abc*def".as_ref();
1099-
// ^= 4
1100-
1101-
let (bytes, found) = $source(&mut input)
1102-
.read_bytes_until(b'*', buf, &mut position)
1103-
$(.$await)?
1104-
.unwrap();
1105-
assert_eq!(
1106-
(Bytes(bytes), found),
1107-
(Bytes(b"abc"), true)
1108-
);
1109-
assert_eq!(position, 4); // position after the symbol matched
1110-
}
1111-
1112-
/// Checks that search in the buffer an element that is located in the end of
1113-
/// buffer returns slice before that symbol as a result and set `position` to one
1114-
/// symbol after match (`len()`)
1115-
#[$test]
1116-
$($async)? fn in_the_end() {
1117-
let buf = $buf;
1118-
let mut position = 0;
1119-
let mut input = b"abcdef*".as_ref();
1120-
// ^= 7
1121-
1122-
let (bytes, found) = $source(&mut input)
1123-
.read_bytes_until(b'*', buf, &mut position)
1124-
$(.$await)?
1125-
.unwrap();
1126-
assert_eq!(
1127-
(Bytes(bytes), found),
1128-
(Bytes(b"abcdef"), true)
1129-
);
1130-
assert_eq!(position, 7); // position after the symbol matched
1131-
}
1132-
}
1133-
11341001
mod read_bang_element {
11351002
use super::*;
11361003
use crate::errors::{Error, SyntaxError};
@@ -1693,6 +1560,81 @@ mod test {
16931560
assert_eq!(position, 42);
16941561
}
16951562
}
1563+
1564+
mod close {
1565+
use super::*;
1566+
use pretty_assertions::assert_eq;
1567+
1568+
#[$test]
1569+
$($async)? fn empty_tag() {
1570+
let buf = $buf;
1571+
let mut position = 1;
1572+
let mut input = b"/ >".as_ref();
1573+
// ^= 4
1574+
1575+
assert_eq!(
1576+
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1577+
Bytes(b"/ ")
1578+
);
1579+
assert_eq!(position, 4);
1580+
}
1581+
1582+
#[$test]
1583+
$($async)? fn normal() {
1584+
let buf = $buf;
1585+
let mut position = 1;
1586+
let mut input = b"/tag>".as_ref();
1587+
// ^= 6
1588+
1589+
assert_eq!(
1590+
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1591+
Bytes(b"/tag")
1592+
);
1593+
assert_eq!(position, 6);
1594+
}
1595+
1596+
#[$test]
1597+
$($async)? fn empty_ns_empty_tag() {
1598+
let buf = $buf;
1599+
let mut position = 1;
1600+
let mut input = b"/:>".as_ref();
1601+
// ^= 4
1602+
1603+
assert_eq!(
1604+
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1605+
Bytes(b"/:")
1606+
);
1607+
assert_eq!(position, 4);
1608+
}
1609+
1610+
#[$test]
1611+
$($async)? fn empty_ns() {
1612+
let buf = $buf;
1613+
let mut position = 1;
1614+
let mut input = b"/:tag>".as_ref();
1615+
// ^= 7
1616+
1617+
assert_eq!(
1618+
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1619+
Bytes(b"/:tag")
1620+
);
1621+
assert_eq!(position, 7);
1622+
}
1623+
1624+
#[$test]
1625+
$($async)? fn with_attributes() {
1626+
let buf = $buf;
1627+
let mut position = 1;
1628+
let mut input = br#"/tag attr-1=">" attr2 = '>' 3attr>"#.as_ref();
1629+
// ^= 40
1630+
1631+
assert_eq!(
1632+
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1633+
Bytes(br#"/tag attr-1=">" attr2 = '>' 3attr"#)
1634+
);
1635+
assert_eq!(position, 40);
1636+
}
1637+
}
16961638
}
16971639

16981640
/// Ensures, that no empty `Text` events are generated

src/reader/slice_reader.rs

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -284,29 +284,6 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
284284
}
285285
}
286286

287-
#[inline]
288-
fn read_bytes_until(
289-
&mut self,
290-
byte: u8,
291-
_buf: (),
292-
position: &mut u64,
293-
) -> io::Result<(&'a [u8], bool)> {
294-
// search byte must be within the ascii range
295-
debug_assert!(byte.is_ascii());
296-
297-
if let Some(i) = memchr::memchr(byte, self) {
298-
*position += i as u64 + 1;
299-
let bytes = &self[..i];
300-
*self = &self[i + 1..];
301-
Ok((bytes, true))
302-
} else {
303-
*position += self.len() as u64;
304-
let bytes = &self[..];
305-
*self = &[];
306-
Ok((bytes, false))
307-
}
308-
}
309-
310287
#[inline]
311288
fn read_with<P>(&mut self, mut parser: P, _buf: (), position: &mut u64) -> Result<&'a [u8]>
312289
where

0 commit comments

Comments
 (0)