Skip to content

Commit

Permalink
Escape characters that must be escaped in XML (#124)
Browse files Browse the repository at this point in the history
* fixed xml special chars

* doc update

* Update action.yml

* Update CHANGELOG.md
  • Loading branch information
cicirello authored Jun 8, 2024
1 parent 6ecb9dd commit c1f0aea
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 11 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased] - 2024-05-20
## [Unreleased] - 2024-06-08

### Added

Expand All @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Removed

### Fixed
* Escape characters that must be escaped in XML.

### CI/CD

Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ sure to include the following as a step in your workflow:
```yml
steps:
- name: Checkout the repo
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
```
Expand Down Expand Up @@ -242,7 +242,7 @@ you can also use a specific version such as with:

```yml
- name: Generate the sitemap
uses: cicirello/generate-sitemap@v1.10.0
uses: cicirello/generate-sitemap@v1.10.1
with:
base-url-path: https://THE.URL.TO.YOUR.PAGE/
```
Expand All @@ -268,7 +268,7 @@ jobs:
steps:
- name: Checkout the repo
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
Expand Down Expand Up @@ -306,7 +306,7 @@ jobs:
steps:
- name: Checkout the repo
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
Expand Down Expand Up @@ -348,7 +348,7 @@ jobs:
steps:
- name: Checkout the repo
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
Expand Down Expand Up @@ -389,7 +389,7 @@ jobs:
steps:
- name: Checkout the repo
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
Expand Down
2 changes: 1 addition & 1 deletion action.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generate-sitemap: Github action for automating sitemap generation
#
# Copyright (c) 2020-2023 Vincent A Cicirello
# Copyright (c) 2020-2024 Vincent A Cicirello
# https://www.cicirello.org/
#
# MIT License
Expand Down
23 changes: 21 additions & 2 deletions generatesitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# generate-sitemap: Github action for automating sitemap generation
#
# Copyright (c) 2020-2023 Vincent A Cicirello
# Copyright (c) 2020-2024 Vincent A Cicirello
# https://www.cicirello.org/
#
# MIT License
Expand Down Expand Up @@ -262,6 +262,25 @@ def removeTime(dateString) :
"""
return dateString[:10]

def xmlEscapeCharacters(f):
"""Escapes any characters that XML requires escaped, such as
ampersands, etc.
Keyword arguments:
f - the filename
"""
return f.replace(
"&", "&"
).replace(
"<", "&lt;"
).replace(
">", "&gt;"
).replace(
"'", "&apos;"
).replace(
'"', "&quot;"
)

def xmlSitemapEntry(f, baseUrl, dateString, dropExtension=False, dateOnly=False) :
"""Forms a string with an entry formatted for an xml sitemap
including lastmod date.
Expand All @@ -273,7 +292,7 @@ def xmlSitemapEntry(f, baseUrl, dateString, dropExtension=False, dateOnly=False)
dropExtension - true to drop extensions of .html from the filename in urls
"""
return xmlSitemapEntryTemplate.format(
urlstring(f, baseUrl, dropExtension),
urlstring(xmlEscapeCharacters(f), baseUrl, dropExtension),
removeTime(dateString) if dateOnly else dateString
)

Expand Down
52 changes: 51 additions & 1 deletion tests/tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generate-sitemap: Github action for automating sitemap generation
#
# Copyright (c) 2020-2023 Vincent A Cicirello
# Copyright (c) 2020-2024 Vincent A Cicirello
# https://www.cicirello.org/
#
# MIT License
Expand Down Expand Up @@ -590,6 +590,26 @@ def test_removeTime(self) :
date = "2020-09-11T13:35:00-04:00"
expected = "2020-09-11"
self.assertEqual(expected, gs.removeTime(date))

def test_xmlEscapeCharacters(self):
test_strings = [
"abs&def",
"abs<def",
"abs>def",
"abs'def",
'abs"def',
"""&<>"'"'><&"""
]
expected = [
"abs&amp;def",
"abs&lt;def",
"abs&gt;def",
"abs&apos;def",
"abs&quot;def",
"&amp;&lt;&gt;&quot;&apos;&quot;&apos;&gt;&lt;&amp;"
]
for t, e in zip(test_strings, expected):
self.assertEqual(e, gs.xmlEscapeCharacters(t))

def test_xmlSitemapEntry(self) :
base = "https://TESTING.FAKE.WEB.ADDRESS.TESTING/"
Expand All @@ -613,6 +633,36 @@ def test_xmlSitemapEntryDateOnly(self) :
expected = "<url>\n<loc>https://TESTING.FAKE.WEB.ADDRESS.TESTING/a</loc>\n<lastmod>2020-09-11</lastmod>\n</url>"
self.assertEqual(actual, expected)

def test_xmlSitemapEntry_withEscapes(self):
base = "https://TESTING.FAKE.WEB.ADDRESS.TESTING/"
f_template = "./a{0}.html"
date = "2020-09-11T13:35:00-04:00"
test_strings = [
"abs&def",
"abs<def",
"abs>def",
"abs'def",
'abs"def',
"""&<>"'"'><&"""
]
expected = [
"abs&amp;def",
"abs&lt;def",
"abs&gt;def",
"abs&apos;def",
"abs&quot;def",
"&amp;&lt;&gt;&quot;&apos;&quot;&apos;&gt;&lt;&amp;"
]
for t, e in zip(test_strings, expected):
f = f_template.format(t)
self.assertEqual(e, gs.xmlEscapeCharacters(t))
actual = gs.xmlSitemapEntry(f, base, date)
expected = "<url>\n<loc>https://TESTING.FAKE.WEB.ADDRESS.TESTING/a{0}.html</loc>\n<lastmod>2020-09-11T13:35:00-04:00</lastmod>\n</url>".format(e)
self.assertEqual(actual, expected)
actual = gs.xmlSitemapEntry(f, base, date, True)
expected = "<url>\n<loc>https://TESTING.FAKE.WEB.ADDRESS.TESTING/a{0}</loc>\n<lastmod>2020-09-11T13:35:00-04:00</lastmod>\n</url>".format(e)
self.assertEqual(actual, expected)

def test_robotsTxtParser(self) :
expected = [ [],
["/"],
Expand Down

0 comments on commit c1f0aea

Please sign in to comment.