Skip to content

Commit

Permalink
Fix handling of content:encoded (#223)
Browse files Browse the repository at this point in the history
PR #220 introduced a failing test for detecting images in the "content"
element. It should instead be testing the "content:encoded" element. But
that uncovered an issue with how extensions were being detected (the
"content" namespace was being detected as an extension namespace).

As a more robust way of checking for the "content" namespace, this PR
exposes `shared.PrefixForNamspace()` as a public function so it can be
used in the rss parser. This should also fix PR #211 (and includes
@JLugagne's test case from that PR).

Once the fixes to xml:base handling in #222 are merged, this should fix
the remaining failing test reported in #210.
  • Loading branch information
cristoper authored Mar 1, 2024
1 parent 454d6a3 commit 8340fbd
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 9 deletions.
11 changes: 4 additions & 7 deletions internal/shared/extparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,15 @@ import (
// non empty prefix)
func IsExtension(p *xpp.XMLPullParser) bool {
space := strings.TrimSpace(p.Space)
if prefix, ok := p.Spaces[space]; ok {
return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content")
}

return p.Space != ""
prefix := PrefixForNamespace(space, p)
return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content")
}

// ParseExtension parses the current element of the
// XMLPullParser as an extension element and updates
// the extension map
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
prefix := prefixForNamespace(p.Space, p)
prefix := PrefixForNamespace(p.Space, p)

result, err := parseExtensionElement(p)
if err != nil {
Expand Down Expand Up @@ -93,7 +90,7 @@ func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
return e, nil
}

func prefixForNamespace(space string, p *xpp.XMLPullParser) string {
func PrefixForNamespace(space string, p *xpp.XMLPullParser) string {
// First we check if the global namespace map
// contains an entry for this namespace/prefix.
// This way we can use the canonical prefix for this
Expand Down
3 changes: 2 additions & 1 deletion rss/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,8 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
item.Description = result
} else if name == "encoded" {
space := strings.TrimSpace(p.Space)
if prefix, ok := p.Spaces[space]; ok && prefix == "content" {
prefix := shared.PrefixForNamespace(space, p)
if prefix == "content" {
result, err := shared.ParseText(p)
if err != nil {
return nil, err
Expand Down
8 changes: 8 additions & 0 deletions testdata/parser/rss/rss_channel_item_content_encoded.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"items": [
{
"content": "Item Description"
}
],
"version": "2.0"
}
10 changes: 10 additions & 0 deletions testdata/parser/rss/rss_channel_item_content_encoded.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!--
Description: rss item content encoded
-->
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<content:encoded xmlns="http://purl.org/rss/1.0/modules/content/">Item Description</content:encoded>
</item>
</channel>
</rss>
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Description: item image from content
<rss version="0.91">
<channel>
<item>
<content><![CDATA[<img src="http://example.com/content.png">]]></content>
<content:encoded><![CDATA[<img src="http://example.com/content.png">]]></content:encoded>
</item>
</channel>
</rss>

0 comments on commit 8340fbd

Please sign in to comment.