Skip to content

Commit

Permalink
Adds checks to ensure that bracketed hosts found by urlsplit are of I…
Browse files Browse the repository at this point in the history
…Pv6 or IPvFuture format
  • Loading branch information
JamesJohnUtley committed Apr 26, 2023
1 parent a44568b commit 37bc08c
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 3 deletions.
23 changes: 23 additions & 0 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,29 @@ def test_issue14072(self):
self.assertEqual(p2.scheme, 'tel')
self.assertEqual(p2.path, '+31641044153')

def test_splitting_bracketed_hosts(self):
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
self.assertEqual(p1.hostname, 'v6a.ip')
self.assertEqual(p1.username, 'user')
self.assertEqual(p1.path, '/path')
p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
self.assertEqual(p2.username, 'user')
self.assertEqual(p2.path, '/path')
p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
self.assertEqual(p3.username, 'user')
self.assertEqual(p3.path, '/path')

def test_port_casting_failure_message(self):
message = "Port could not be cast to integer value as 'oracle'"
p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
Expand Down
18 changes: 15 additions & 3 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import re
import types
import warnings
import ipaddress

__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
Expand Down Expand Up @@ -199,7 +200,7 @@ def _hostinfo(self):
_, _, hostinfo = netloc.rpartition('@')
_, have_open_br, bracketed = hostinfo.partition('[')
if have_open_br:
hostname, _, port = bracketed.partition(']')
hostname, _, port = bracketed.rpartition(']')
_, _, port = port.partition(':')
else:
hostname, _, port = hostinfo.partition(':')
Expand Down Expand Up @@ -229,7 +230,7 @@ def _hostinfo(self):
_, _, hostinfo = netloc.rpartition(b'@')
_, have_open_br, bracketed = hostinfo.partition(b'[')
if have_open_br:
hostname, _, port = bracketed.partition(b']')
hostname, _, port = bracketed.rpartition(b']')
_, _, port = port.partition(b':')
else:
hostname, _, port = hostinfo.partition(b':')
Expand Down Expand Up @@ -426,6 +427,15 @@ def _checknetloc(netloc):
if c in netloc2:
raise ValueError("netloc '" + netloc + "' contains invalid " +
"characters under NFKC normalization")

def _check_bracketed_host(hostname):
if hostname.startswith('v'):
if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname):
raise ValueError(f"IPvFuture address is invalid")
else:
ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4
if isinstance(ip, ipaddress.IPv4Address):
raise ValueError(f"An IPv4 address cannot be in brackets")

# typed=True avoids BytesWarnings being emitted during cache key
# comparison since this API supports both bytes and str input.
Expand Down Expand Up @@ -466,12 +476,14 @@ def urlsplit(url, scheme='', allow_fragments=True):
break
else:
scheme, url = url[:i].lower(), url[i+1:]

if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if '[' in netloc and ']' in netloc:
bracketed_host = netloc.partition('[')[2].rpartition(']')[0]
_check_bracketed_host(bracketed_host)
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add checks to ensure that bracketed hosts found by urlsplit are of IPv6 or
IPvFuture format

0 comments on commit 37bc08c

Please sign in to comment.