Skip to content

Commit 505b601

Browse files
authored
Revert "bpo-27657: Fix urlparse() with numeric paths (GH-661)" (#18526)
This reverts commit 82b5f6b. The change broke the backwards compatibility of parsing behavior in a patch release of Python (3.7.6). A decision was taken to revert this patch in 3.7.7. In https://bugs.python.org/issue27657 it was decided that the previous behavior like >>> urlparse('localhost:8080') ParseResult(scheme='', netloc='', path='localhost:8080', params='', query='', fragment='') >>> urlparse('undefined:8080') ParseResult(scheme='', netloc='', path='undefined:8080', params='', query='', fragment='') needs to be preserved in patch releases as number of users rely upon it. Explicitly mention the releases involved with the revert in NEWS. Adopt the wording suggested by @ned-deily.
1 parent 46cf4fc commit 505b601

File tree

3 files changed

+30
-7
lines changed

3 files changed

+30
-7
lines changed

Lib/test/test_urlparse.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -709,17 +709,15 @@ def test_withoutscheme(self):
709709

710710
def test_portseparator(self):
711711
# Issue 754016 makes changes for port separator ':' from scheme separator
712-
self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
713-
self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
714-
self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
712+
self.assertEqual(urllib.parse.urlparse("path:80"),
713+
('','','path:80','','',''))
715714
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
716715
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
717716
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
718717
('http','www.python.org:80','','','',''))
719718
# As usual, need to check bytes input as well
720-
self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
721-
self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
722-
self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
719+
self.assertEqual(urllib.parse.urlparse(b"path:80"),
720+
(b'',b'',b'path:80',b'',b'',b''))
723721
self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
724722
self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
725723
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),

Lib/urllib/parse.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,11 +426,31 @@ def urlsplit(url, scheme='', allow_fragments=True):
426426
netloc = query = fragment = ''
427427
i = url.find(':')
428428
if i > 0:
429+
if url[:i] == 'http': # optimize the common case
430+
url = url[i+1:]
431+
if url[:2] == '//':
432+
netloc, url = _splitnetloc(url, 2)
433+
if (('[' in netloc and ']' not in netloc) or
434+
(']' in netloc and '[' not in netloc)):
435+
raise ValueError("Invalid IPv6 URL")
436+
if allow_fragments and '#' in url:
437+
url, fragment = url.split('#', 1)
438+
if '?' in url:
439+
url, query = url.split('?', 1)
440+
_checknetloc(netloc)
441+
v = SplitResult('http', netloc, url, query, fragment)
442+
_parse_cache[key] = v
443+
return _coerce_result(v)
429444
for c in url[:i]:
430445
if c not in scheme_chars:
431446
break
432447
else:
433-
scheme, url = url[:i].lower(), url[i+1:]
448+
# make sure "url" is not actually a port number (in which case
449+
# "scheme" is really part of the path)
450+
rest = url[i+1:]
451+
if not rest or any(c not in '0123456789' for c in rest):
452+
# not a port number
453+
scheme, url = url[:i].lower(), rest
434454

435455
if url[:2] == '//':
436456
netloc, url = _splitnetloc(url, 2)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
The original fix for bpo-27657, "Fix urlparse() with numeric paths" (GH-16839)
2+
included in 3.7.6, inadvertently introduced a behavior change that broke
3+
several third-party packages relying on the original undefined parsing
4+
behavior. The change is reverted in 3.7.7, restoring the behavior of 3.7.5 and
5+
earlier releases.

0 commit comments

Comments
 (0)