Skip to content

Commit 2b57847

Browse files
authored
[2.7] bpo-36742: Fix urlparse.urlsplit() error message for Unicode URL (GH-13937)
If urlparse.urlsplit() detects an invalid netloc according to NFKC normalization, the error message type is now str rather than unicode, and use repr() to format the URL, to prevent <exception str() failed> when display the error message.
1 parent 99b5c94 commit 2b57847

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

Lib/test/test_urlparse.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,15 @@ def test_urlsplit_normalization(self):
656656
with self.assertRaises(ValueError):
657657
urlparse.urlsplit(url)
658658

659+
# check error message: invalid netloc must be formated with repr()
660+
# to get an ASCII error message
661+
with self.assertRaises(ValueError) as cm:
662+
urlparse.urlsplit(u'http://example.com\uFF03@bing.com')
663+
self.assertEqual(str(cm.exception),
664+
"netloc u'example.com\\uff03@bing.com' contains invalid characters "
665+
"under NFKC normalization")
666+
self.assertIsInstance(cm.exception.args[0], str)
667+
659668
def test_main():
660669
test_support.run_unittest(UrlParseTestCase)
661670

Lib/urlparse.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,9 @@ def _checknetloc(netloc):
180180
return
181181
for c in '/?#@:':
182182
if c in netloc2:
183-
raise ValueError(u"netloc '" + netloc + u"' contains invalid " +
184-
u"characters under NFKC normalization")
183+
raise ValueError("netloc %r contains invalid characters "
184+
"under NFKC normalization"
185+
% netloc)
185186

186187
def urlsplit(url, scheme='', allow_fragments=True):
187188
"""Parse a URL into 5 components:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`urlparse.urlsplit` error message for invalid ``netloc`` according to
2+
NFKC normalization is now a :class:`str` string, rather than a
3+
:class:`unicode` string, to prevent error when displaying the error.

0 commit comments

Comments
 (0)