From ffcf9dc4ea157adc5b7b5b31b6cc69f37d533122 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 9 Sep 2024 14:40:33 -0500 Subject: [PATCH] Reduce overhead to check if a host is an IP Address (#9095) --- CHANGES/9095.misc.rst | 1 + aiohttp/helpers.py | 66 +++++++++++++++++++++++-------------------- tests/test_helpers.py | 28 ++++++++---------- 3 files changed, 49 insertions(+), 46 deletions(-) create mode 100644 CHANGES/9095.misc.rst diff --git a/CHANGES/9095.misc.rst b/CHANGES/9095.misc.rst new file mode 100644 index 00000000000..f4a06cb09d6 --- /dev/null +++ b/CHANGES/9095.misc.rst @@ -0,0 +1 @@ +Improved performance of checking if a host is an IP Address -- by :user:`bdraco`. diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index cce5d3d281b..abbc628261e 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -36,7 +36,6 @@ List, Mapping, Optional, - Pattern, Protocol, Tuple, Type, @@ -484,44 +483,51 @@ def __set__(self, inst: _TSelf[_T], value: _T) -> None: except ImportError: pass -_ipv4_pattern = ( - r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}" - r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" -) -_ipv6_pattern = ( - r"^(?:(?:(?:[A-F0-9]{1,4}:){6}|(?=(?:[A-F0-9]{0,4}:){0,6}" - r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}$)(([0-9A-F]{1,4}:){0,5}|:)" - r"((:[0-9A-F]{1,4}){1,5}:|:)|::(?:[A-F0-9]{1,4}:){5})" - r"(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}" - r"(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])|(?:[A-F0-9]{1,4}:){7}" - r"[A-F0-9]{1,4}|(?=(?:[A-F0-9]{0,4}:){0,7}[A-F0-9]{0,4}$)" - r"(([0-9A-F]{1,4}:){1,7}|:)((:[0-9A-F]{1,4}){1,7}|:)|(?:[A-F0-9]{1,4}:){7}" - r":|:(:[A-F0-9]{1,4}){7})$" -) -_ipv4_regex = re.compile(_ipv4_pattern) -_ipv6_regex = re.compile(_ipv6_pattern, flags=re.IGNORECASE) -_ipv4_regexb = re.compile(_ipv4_pattern.encode("ascii")) -_ipv6_regexb = re.compile(_ipv6_pattern.encode("ascii"), flags=re.IGNORECASE) +def is_ipv4_address(host: Optional[Union[str, bytes]]) -> bool: + """Check if host looks like an IPv4 address. -def _is_ip_address( - regex: Pattern[str], regexb: Pattern[bytes], host: Optional[Union[str, bytes]] -) -> bool: - if host is None: + This function does not validate that the format is correct, only that + the host is a str or bytes, and its all numeric. + + This check is only meant as a heuristic to ensure that + a host is not a domain name. + """ + if not host: return False + # For a host to be an ipv4 address, it must be all numeric. if isinstance(host, str): - return bool(regex.match(host)) - elif isinstance(host, (bytes, bytearray, memoryview)): - return bool(regexb.match(host)) - else: - raise TypeError(f"{host} [{type(host)}] is not a str or bytes") + return host.replace(".", "").isdigit() + if isinstance(host, (bytes, bytearray, memoryview)): + return host.decode("ascii").replace(".", "").isdigit() + raise TypeError(f"{host} [{type(host)}] is not a str or bytes") -is_ipv4_address = functools.partial(_is_ip_address, _ipv4_regex, _ipv4_regexb) -is_ipv6_address = functools.partial(_is_ip_address, _ipv6_regex, _ipv6_regexb) +def is_ipv6_address(host: Optional[Union[str, bytes]]) -> bool: + """Check if host looks like an IPv6 address. + + This function does not validate that the format is correct, only that + the host contains a colon and that it is a str or bytes. + + This check is only meant as a heuristic to ensure that + a host is not a domain name. + """ + if not host: + return False + # The host must contain a colon to be an IPv6 address. + if isinstance(host, str): + return ":" in host + if isinstance(host, (bytes, bytearray, memoryview)): + return b":" in host + raise TypeError(f"{host} [{type(host)}] is not a str or bytes") def is_ip_address(host: Optional[Union[str, bytes, bytearray, memoryview]]) -> bool: + """Check if host looks like an IP Address. + + This check is only meant as a heuristic to ensure that + a host is not a domain name. + """ return is_ipv4_address(host) or is_ipv6_address(host) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 48e1fc543b2..e79e168e753 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -286,14 +286,6 @@ def test_is_ip_address() -> None: assert not helpers.is_ip_address("localhost") assert not helpers.is_ip_address("www.example.com") - # Out of range - assert not helpers.is_ip_address("999.999.999.999") - # Contain a port - assert not helpers.is_ip_address("127.0.0.1:80") - assert not helpers.is_ip_address("[2001:db8:0:1]:80") - # Too many "::" - assert not helpers.is_ip_address("1200::AB00:1234::2552:7777:1313") - def test_is_ip_address_bytes() -> None: assert helpers.is_ip_address(b"127.0.0.1") @@ -304,14 +296,6 @@ def test_is_ip_address_bytes() -> None: assert not helpers.is_ip_address(b"localhost") assert not helpers.is_ip_address(b"www.example.com") - # Out of range - assert not helpers.is_ip_address(b"999.999.999.999") - # Contain a port - assert not helpers.is_ip_address(b"127.0.0.1:80") - assert not helpers.is_ip_address(b"[2001:db8:0:1]:80") - # Too many "::" - assert not helpers.is_ip_address(b"1200::AB00:1234::2552:7777:1313") - def test_ipv4_addresses() -> None: ip_addresses = [ @@ -360,6 +344,18 @@ def test_is_ip_address_invalid_type() -> None: with pytest.raises(TypeError): helpers.is_ip_address(object()) # type: ignore[arg-type] + with pytest.raises(TypeError): + helpers.is_ipv4_address(123) # type: ignore[arg-type] + + with pytest.raises(TypeError): + helpers.is_ipv4_address(object()) # type: ignore[arg-type] + + with pytest.raises(TypeError): + helpers.is_ipv6_address(123) # type: ignore[arg-type] + + with pytest.raises(TypeError): + helpers.is_ipv6_address(object()) # type: ignore[arg-type] + # ----------------------------------- TimeoutHandle -------------------