Skip to content

Commit

Permalink
Handle text/html mime type in content->type
Browse files Browse the repository at this point in the history
The type attribute on a content element is not expected to contain a
mime type (the spec only defines 'text','html','xhtml').

The scattered feed specifies html content as a mime-type, which isn't
correct but we might as well work around this and avoid a parser panic.
  • Loading branch information
markpritchard committed Jan 6, 2024
1 parent 9bbb5af commit 5d38609
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 2 deletions.
18 changes: 18 additions & 0 deletions feed-rs/fixture/atom/atom_scattered.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
<title>Scattered Thoughts</title>
<link href="https://www.scattered-thoughts.net/atom.xml" rel="self" type="application/atom+xml"/>
<link href="https://www.scattered-thoughts.net/"/>
<updated>2023-12-01T00:00:00+00:00</updated>
<author><name>Jamie Brandon</name></author>
<id>https://www.scattered-thoughts.net/atom.xml</id>
<entry xml:lang="en">
<title>0042: consulting lessons, there are no strings on me, buttondown, focus goof, jsfuck, 1ml</title>
<published>2023-12-01T00:00:00+00:00</published>
<updated>2023-12-01T00:00:00+00:00</updated>
<link href="https://www.scattered-thoughts.net/log/0042/" type="text/html" rel="alternate"/>
<id>https://www.scattered-thoughts.net/log/0042/</id>
<content type="text/html"><a href="https://www.scattered-thoughts.net/log/0042/">0042: consulting lessons, there are no strings on me</a></content>
</entry>
</feed>
4 changes: 2 additions & 2 deletions feed-rs/src/parser/atom/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ fn handle_content<R: BufRead>(element: Element<R>) -> ParseFeedResult<Option<Con
// from http://www.atomenabled.org/developers/syndication/#contentElement
match content_type.as_deref() {
// Should be handled as a text element per "In the most common case, the type attribute is either text, html, xhtml, in which case the content element is defined identically to other text constructs"
Some("text") | Some("html") | Some("xhtml") | None => {
Some("text") | Some("html") | Some("xhtml") | Some("text/html") | None => {
handle_text(element)?
.map(|text| {
let mut content = Content::default();
Expand Down Expand Up @@ -283,7 +283,7 @@ pub(crate) fn handle_text<R: BufRead>(element: Element<R>) -> ParseFeedResult<Op

let mime = match type_attr {
"text" => Ok(mime::TEXT_PLAIN),
"html" | "xhtml" => Ok(mime::TEXT_HTML),
"html" | "xhtml" | "text/html" => Ok(mime::TEXT_HTML),

// Unknown content type
_ => Err(ParseFeedError::ParseError(ParseErrorKind::UnknownMimeType(type_attr.into()))),
Expand Down
15 changes: 15 additions & 0 deletions feed-rs/src/parser/atom/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -542,3 +542,18 @@ fn test_reddit() {
let media_obj = &entry.media[0];
assert_eq!(media_obj, &expected);
}

// Handle text/html specified as a mime type on content
#[test]
fn test_scattered() {
let test_data = test::fixture_as_string("atom/atom_scattered.xml");
let actual = parser::parse(test_data.as_bytes()).unwrap().id("");
assert!(actual.entries[0]
.content
.as_ref()
.unwrap()
.body
.as_ref()
.unwrap()
.contains("there are no strings on me"));
}

0 comments on commit 5d38609

Please sign in to comment.