diff --git a/CHANGES/9095.misc.rst b/CHANGES/9095.misc.rst new file mode 100644 index 00000000000..f4a06cb09d6 --- /dev/null +++ b/CHANGES/9095.misc.rst @@ -0,0 +1 @@ +Improved performance of checking if a host is an IP Address -- by :user:`bdraco`. diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 0327d31d961..6abbe74d8cf 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -34,7 +34,6 @@ List, Mapping, Optional, - Pattern, Protocol, Tuple, Type, @@ -471,44 +470,51 @@ def __set__(self, inst: _TSelf[_T], value: _T) -> None: except ImportError: pass -_ipv4_pattern = ( - r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}" - r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" -) -_ipv6_pattern = ( - r"^(?:(?:(?:[A-F0-9]{1,4}:){6}|(?=(?:[A-F0-9]{0,4}:){0,6}" - r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}$)(([0-9A-F]{1,4}:){0,5}|:)" - r"((:[0-9A-F]{1,4}){1,5}:|:)|::(?:[A-F0-9]{1,4}:){5})" - r"(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}" - r"(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])|(?:[A-F0-9]{1,4}:){7}" - r"[A-F0-9]{1,4}|(?=(?:[A-F0-9]{0,4}:){0,7}[A-F0-9]{0,4}$)" - r"(([0-9A-F]{1,4}:){1,7}|:)((:[0-9A-F]{1,4}){1,7}|:)|(?:[A-F0-9]{1,4}:){7}" - r":|:(:[A-F0-9]{1,4}){7})$" -) -_ipv4_regex = re.compile(_ipv4_pattern) -_ipv6_regex = re.compile(_ipv6_pattern, flags=re.IGNORECASE) -_ipv4_regexb = re.compile(_ipv4_pattern.encode("ascii")) -_ipv6_regexb = re.compile(_ipv6_pattern.encode("ascii"), flags=re.IGNORECASE) +def is_ipv4_address(host: Optional[Union[str, bytes]]) -> bool: + """Check if host looks like an IPv4 address. + + This function does not validate that the format is correct, only that + the host is a str or bytes, and its all numeric. -def _is_ip_address( - regex: Pattern[str], regexb: Pattern[bytes], host: Optional[Union[str, bytes]] -) -> bool: - if host is None: + This check is only meant as a heuristic to ensure that + a host is not a domain name. + """ + if not host: return False + # For a host to be an ipv4 address, it must be all numeric. if isinstance(host, str): - return bool(regex.match(host)) - elif isinstance(host, (bytes, bytearray, memoryview)): - return bool(regexb.match(host)) - else: - raise TypeError(f"{host} [{type(host)}] is not a str or bytes") + return host.replace(".", "").isdigit() + if isinstance(host, (bytes, bytearray, memoryview)): + return host.decode("ascii").replace(".", "").isdigit() + raise TypeError(f"{host} [{type(host)}] is not a str or bytes") + +def is_ipv6_address(host: Optional[Union[str, bytes]]) -> bool: + """Check if host looks like an IPv6 address. -is_ipv4_address = functools.partial(_is_ip_address, _ipv4_regex, _ipv4_regexb) -is_ipv6_address = functools.partial(_is_ip_address, _ipv6_regex, _ipv6_regexb) + This function does not validate that the format is correct, only that + the host contains a colon and that it is a str or bytes. + + This check is only meant as a heuristic to ensure that + a host is not a domain name. + """ + if not host: + return False + # The host must contain a colon to be an IPv6 address. + if isinstance(host, str): + return ":" in host + if isinstance(host, (bytes, bytearray, memoryview)): + return b":" in host + raise TypeError(f"{host} [{type(host)}] is not a str or bytes") def is_ip_address(host: Optional[Union[str, bytes, bytearray, memoryview]]) -> bool: + """Check if host looks like an IP Address. + + This check is only meant as a heuristic to ensure that + a host is not a domain name. + """ return is_ipv4_address(host) or is_ipv6_address(host) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 827a417c299..2d6e098aae5 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -267,14 +267,6 @@ def test_is_ip_address() -> None: assert not helpers.is_ip_address("localhost") assert not helpers.is_ip_address("www.example.com") - # Out of range - assert not helpers.is_ip_address("999.999.999.999") - # Contain a port - assert not helpers.is_ip_address("127.0.0.1:80") - assert not helpers.is_ip_address("[2001:db8:0:1]:80") - # Too many "::" - assert not helpers.is_ip_address("1200::AB00:1234::2552:7777:1313") - def test_is_ip_address_bytes() -> None: assert helpers.is_ip_address(b"127.0.0.1") @@ -285,14 +277,6 @@ def test_is_ip_address_bytes() -> None: assert not helpers.is_ip_address(b"localhost") assert not helpers.is_ip_address(b"www.example.com") - # Out of range - assert not helpers.is_ip_address(b"999.999.999.999") - # Contain a port - assert not helpers.is_ip_address(b"127.0.0.1:80") - assert not helpers.is_ip_address(b"[2001:db8:0:1]:80") - # Too many "::" - assert not helpers.is_ip_address(b"1200::AB00:1234::2552:7777:1313") - def test_ipv4_addresses() -> None: ip_addresses = [ @@ -340,6 +324,18 @@ def test_is_ip_address_invalid_type() -> None: with pytest.raises(TypeError): helpers.is_ip_address(object()) + with pytest.raises(TypeError): + helpers.is_ipv4_address(123) # type: ignore[arg-type] + + with pytest.raises(TypeError): + helpers.is_ipv4_address(object()) # type: ignore[arg-type] + + with pytest.raises(TypeError): + helpers.is_ipv6_address(123) # type: ignore[arg-type] + + with pytest.raises(TypeError): + helpers.is_ipv6_address(object()) # type: ignore[arg-type] + # ----------------------------------- TimeoutHandle -------------------