tafia · dralley · Mar 12, 2023 · Mar 4, 2023 · Nov 27, 2022 · Nov 27, 2022
diff --git a/src/de/key.rs b/src/de/key.rs
@@ -123,7 +123,7 @@ impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {
         ignored_any
     }
 
-    /// According to the <https://www.w3.org/TR/xmlschema-2/#boolean>,
+    /// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
     /// valid boolean representations are only `"true"`, `"false"`, `"1"`,
     /// and `"0"`. But this method also handles following:
     ///

diff --git a/src/de/mod.rs b/src/de/mod.rs
@@ -2121,7 +2121,7 @@ where
     T::deserialize(&mut de)
 }
 
-// TODO: According to the https://www.w3.org/TR/xmlschema-2/#boolean,
+// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
 // valid boolean representations are only "true", "false", "1", and "0"
 fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
 where

diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs
@@ -163,7 +163,7 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> {
         self.deserialize_str(visitor)
     }
 
-    /// According to the <https://www.w3.org/TR/xmlschema-2/#boolean>,
+    /// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
     /// valid boolean representations are only `"true"`, `"false"`, `"1"`,
     /// and `"0"`. But this method also handles following:
     ///

diff --git a/src/errors.rs b/src/errors.rs
@@ -199,7 +199,7 @@ pub mod serialize {
         /// would occur if map key is a complex type that cannot be serialized as
         /// a primitive type (i.e. string, char, bool, unit struct or unit variant).
         ///
-        /// [XML name]: https://www.w3.org/TR/REC-xml/#sec-common-syn
+        /// [XML name]: https://www.w3.org/TR/xml11/#sec-common-syn
         Unsupported(Cow<'static, str>),
         /// Too many events were skipped while deserializing a sequence, event limit
         /// exceeded. The limit was provided as an argument

diff --git a/src/lib.rs b/src/lib.rs
@@ -16,10 +16,13 @@
 //! Especially for nested XML elements, the user must keep track _where_ (how deep)
 //! in the XML document the current event is located.
 //!
-//! quick-xml contains optional support of asynchronous reading using [tokio].
+//! quick-xml contains optional support of asynchronous reading and writing using [tokio].
+//! To get it enable the `async-tokio` feature.
 //!
 //! Furthermore, quick-xml also contains optional [Serde] support to directly
 //! serialize and deserialize from structs, without having to deal with the XML events.
+//! To get it enable the `serialize` feature. Read more about mapping Rust types
+//! to XML in the documentation of [`de`] module.
 //!
 //! # Examples
 //!
@@ -33,6 +36,7 @@
 //! [StAX]: https://en.wikipedia.org/wiki/StAX
 //! [tokio]: https://tokio.rs/
 //! [Serde]: https://serde.rs/
+//! [`de`]: ./de/index.html
 #![cfg_attr(
     feature = "document-features",
     cfg_attr(doc, doc = ::document_features::document_features!())

diff --git a/src/name.rs b/src/name.rs
@@ -209,7 +209,7 @@ impl<'a> AsRef<[u8]> for Prefix<'a> {
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /// A namespace prefix declaration, `xmlns` or `xmlns:<name>`, as defined in
-/// [XML Schema specification](https://www.w3.org/TR/xml-names/#ns-decl)
+/// [XML Schema specification](https://www.w3.org/TR/xml-names11/#ns-decl)
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum PrefixDeclaration<'a> {
     /// XML attribute binds a default namespace. Corresponds to `xmlns` in `xmlns="..."`
@@ -250,7 +250,7 @@ impl<'a> Namespace<'a> {
     /// This is because XML entity references are expanded during attribute value
     /// normalization.
     ///
-    /// [non-normalized]: https://www.w3.org/TR/REC-xml/#AVNormalize
+    /// [non-normalized]: https://www.w3.org/TR/xml11/#AVNormalize
     /// [IRI reference]: https://datatracker.ietf.org/doc/html/rfc3987
     #[inline(always)]
     pub fn into_inner(self) -> &'a [u8] {

diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs
@@ -168,9 +168,11 @@ macro_rules! impl_buffered_source {
                             self $(.$reader)? .consume(used);
                             read += used;
 
+                            // Position now just after the `>` symbol
                             *position += read;
                             break;
                         } else {
+                            // The `>` symbol not yet found, continue reading
                             buf.extend_from_slice(available);
 
                             let used = available.len();

diff --git a/src/reader/mod.rs b/src/reader/mod.rs
@@ -168,7 +168,7 @@ macro_rules! read_event_impl {
     ) => {{
         let event = loop {
             match $self.parser.state {
-                ParseState::Init => {
+                ParseState::Init => { // Go to OpenedTag state
                     // If encoding set explicitly, we not need to detect it. For example,
                     // explicit UTF-8 set automatically if Reader was created using `from_str`.
                     // But we still need to remove BOM for consistency with no encoding
@@ -184,19 +184,21 @@ macro_rules! read_event_impl {
                     #[cfg(not(feature = "encoding"))]
                     $reader.remove_utf8_bom() $(.$await)? ?;
 
+                    // Go to OpenedTag state
                     match $self.$read_until_open($buf) $(.$await)? {
                         Ok(Ok(ev)) => break Ok(ev),
                         Ok(Err(b)) => $buf = b,
                         Err(err)   => break Err(err),
                     }
                 },
-                ParseState::ClosedTag => {
+                ParseState::ClosedTag => { // Go to OpenedTag state
                     match $self.$read_until_open($buf) $(.$await)? {
                         Ok(Ok(ev)) => break Ok(ev),
                         Ok(Err(b)) => $buf = b,
                         Err(err)   => break Err(err),
                     }
                 },
+                // Go to ClosedTag state in next two arms
                 ParseState::OpenedTag => break $self.$read_until_close($buf) $(.$await)?,
                 ParseState::Empty => break $self.parser.close_expanded_empty(),
                 ParseState::Exit => break Ok(Event::Eof),
@@ -210,6 +212,15 @@ macro_rules! read_event_impl {
     }};
 }
 
+/// Read bytes up to `<` and skip it. If current byte (after skipping all space
+/// characters if [`Parser::trim_text_start`] is `true`) is already `<`, then
+/// returns the next event, otherwise stay at position just after the `<` symbol.
+///
+/// Moves parser to the `OpenedTag` state.
+///
+/// This code is executed in two cases:
+/// - after start of parsing just after skipping BOM if it is present
+/// - after parsing `</tag>` or `<tag>`
 macro_rules! read_until_open {
     (
         $self:ident, $buf:ident,
@@ -225,20 +236,42 @@ macro_rules! read_until_open {
 
         // If we already at the `<` symbol, do not try to return an empty Text event
         if $reader.skip_one(b'<', &mut $self.parser.offset) $(.$await)? ? {
+            // Pass $buf to the next next iteration of parsing loop
             return Ok(Err($buf));
         }
 
         match $reader
             .read_bytes_until(b'<', $buf, &mut $self.parser.offset)
             $(.$await)?
         {
+            // Return Text event with `bytes` content
             Ok(Some(bytes)) => $self.parser.read_text(bytes).map(Ok),
             Ok(None) => Ok(Ok(Event::Eof)),
             Err(e) => Err(e),
         }
     }};
 }
 
+/// Read bytes up to the `>` and skip it. This method is expected to be called
+/// after seeing the `<` symbol and skipping it. Inspects the next (current)
+/// symbol and returns an appropriate [`Event`]:
+///
+/// |Symbol |Event
+/// |-------|-------------------------------------
+/// |`!`    |[`Comment`], [`CData`] or [`DocType`]
+/// |`/`    |[`End`]
+/// |`?`    |[`PI`]
+/// |_other_|[`Start`] or [`Empty`]
+///
+/// Moves parser to the `ClosedTag` state.
+///
+/// [`Comment`]: Event::Comment
+/// [`CData`]: Event::CData
+/// [`DocType`]: Event::DocType
+/// [`End`]: Event::End
+/// [`PI`]: Event::PI
+/// [`Start`]: Event::Start
+/// [`Empty`]: Event::Empty
 macro_rules! read_until_close {
     (
         $self:ident, $buf:ident,
@@ -371,10 +404,12 @@ enum ParseState {
     /// that symbol will be returned in the [`Event::Text`] event. After that
     /// the reader moves to the `OpenedTag` state.
     ClosedTag,
-    /// This state is used only if option `expand_empty_elements` is set to `true`.
+    /// This state is used only if option [`expand_empty_elements`] is set to `true`.
     /// Reader enters to this state when it is in a `ClosedTag` state and emits an
     /// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
     /// after which reader returned to the `ClosedTag` state.
+    ///
+    /// [`expand_empty_elements`]: Parser::expand_empty_elements
     Empty,
     /// Reader enters this state when `Eof` event generated or an error occurred.
     /// This is the last state, the reader stay in it forever.

diff --git a/src/reader/parser.rs b/src/reader/parser.rs
@@ -204,29 +204,36 @@ impl Parser {
         }
     }
 
-    /// reads `BytesElement` starting with any character except `/`, `!` or ``?`
-    /// return `Start` or `Empty` event
-    pub fn read_start<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
-        // TODO: do this directly when reading bufreader ...
-        let len = buf.len();
-        let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len);
-        if let Some(&b'/') = buf.last() {
-            let end = if name_end < len { name_end } else { len - 1 };
+    /// Converts content of a tag to a `Start` or an `Empty` event
+    ///
+    /// # Parameters
+    /// - `content`: Content of a tag between `<` and `>`
+    pub fn read_start<'b>(&mut self, content: &'b [u8]) -> Result<Event<'b>> {
+        let len = content.len();
+        let name_end = content
+            .iter()
+            .position(|&b| is_whitespace(b))
+            .unwrap_or(len);
+        if let Some(&b'/') = content.last() {
+            // This is self-closed tag `<something/>`
+            let name_len = if name_end < len { name_end } else { len - 1 };
+            let event = BytesStart::wrap(&content[..len - 1], name_len);
+
             if self.expand_empty_elements {
                 self.state = ParseState::Empty;
                 self.opened_starts.push(self.opened_buffer.len());
-                self.opened_buffer.extend(&buf[..end]);
-                Ok(Event::Start(BytesStart::wrap(&buf[..len - 1], end)))
+                self.opened_buffer.extend(&content[..name_len]);
+                Ok(Event::Start(event))
             } else {
-                Ok(Event::Empty(BytesStart::wrap(&buf[..len - 1], end)))
+                Ok(Event::Empty(event))
             }
         } else {
             // #514: Always store names event when .check_end_names == false,
             // because checks can be temporary disabled and when they would be
             // enabled, we should have that information
             self.opened_starts.push(self.opened_buffer.len());
-            self.opened_buffer.extend(&buf[..name_end]);
-            Ok(Event::Start(BytesStart::wrap(buf, name_end)))
+            self.opened_buffer.extend(&content[..name_end]);
+            Ok(Event::Start(BytesStart::wrap(content, name_end)))
         }
     }
 

diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs
@@ -310,6 +310,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
         let mut state = ReadElementState::Elem;
 
         if let Some((bytes, i)) = state.change(self) {
+            // Position now just after the `>` symbol
             *position += i;
             *self = &self[i..];
             return Ok(Some(bytes));

diff --git a/src/se/key.rs b/src/se/key.rs
@@ -10,7 +10,7 @@ use std::fmt::Write;
 /// [not allowed] in XML names, because in some cases it should pass names
 /// that would be filtered on higher level.
 ///
-/// [not allowed]: https://www.w3.org/TR/REC-xml/#sec-common-syn
+/// [not allowed]: https://www.w3.org/TR/xml11/#sec-common-syn
 pub struct QNameSerializer<W: Write> {
     /// Writer to which this serializer writes content
     pub writer: W,

diff --git a/src/se/mod.rs b/src/se/mod.rs
@@ -207,7 +207,7 @@ where
 /// );
 /// ```
 ///
-/// [XML name]: https://www.w3.org/TR/REC-xml/#NT-Name
+/// [XML name]: https://www.w3.org/TR/xml11/#NT-Name
 pub fn to_writer_with_root<W, T>(mut writer: W, root_tag: &str, value: &T) -> Result<(), DeError>
 where
     W: Write,
@@ -249,7 +249,7 @@ where
 /// );
 /// ```
 ///
-/// [XML name]: https://www.w3.org/TR/REC-xml/#NT-Name
+/// [XML name]: https://www.w3.org/TR/xml11/#NT-Name
 pub fn to_string_with_root<T>(root_tag: &str, value: &T) -> Result<String, DeError>
 where
     T: ?Sized + Serialize,
@@ -369,7 +369,7 @@ pub(self) struct XmlName<'n>(&'n str);
 impl<'n> XmlName<'n> {
     /// Checks correctness of the XML name according to [XML 1.1 specification]
     ///
-    /// [XML 1.1 specification]: https://www.w3.org/TR/REC-xml/#NT-Name
+    /// [XML 1.1 specification]: https://www.w3.org/TR/xml11/#NT-Name
     pub fn try_from(name: &'n str) -> Result<XmlName<'n>, DeError> {
         //TODO: Customization point: allow user to decide if he want to reject or encode the name
         match name.chars().next() {
@@ -514,7 +514,7 @@ impl<'w, 'r, W: Write> Serializer<'w, 'r, W> {
     /// );
     /// ```
     ///
-    /// [XML name]: https://www.w3.org/TR/REC-xml/#NT-Name
+    /// [XML name]: https://www.w3.org/TR/xml11/#NT-Name
     pub fn with_root(writer: &'w mut W, root_tag: Option<&'r str>) -> Result<Self, DeError> {
         Ok(Self {
             ser: ContentSerializer {