From 50aa4ca8425d0bb668d514b8ee5c6aeacb605b27 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Wed, 16 Oct 2024 16:21:47 -0700 Subject: [PATCH] Speed up stubs suggestions (#17965) See https://github.com/python/mypy/issues/17948 This is starting to show up on profiles - 1.01x faster on clean (below noise) - 1.02x faster on long - 1.02x faster on openai - 1.01x faster on openai incremental I had a dumb bug that was preventing the optimisation for a while, I'll see if I can make it even faster. Currently it's a small improvement We could also get rid of the legacy stuff in mypy 2.0 --- mypy/build.py | 21 +++++--------- mypy/modulefinder.py | 5 ++-- mypy/stubinfo.py | 60 ++++++++++++++++++++++++++++++--------- mypy/test/teststubinfo.py | 42 ++++++++++++++++++++++++--- 4 files changed, 93 insertions(+), 35 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index fd1c63868a69..52c11e065b63 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -92,7 +92,7 @@ from mypy.plugins.default import DefaultPlugin from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor from mypy.stats import dump_type_stats -from mypy.stubinfo import legacy_bundled_packages, non_bundled_packages, stub_distribution_name +from mypy.stubinfo import is_module_from_legacy_bundled_package, stub_distribution_name from mypy.types import Type from mypy.typestate import reset_global_state, type_state from mypy.util import json_dumps, json_loads @@ -2658,17 +2658,13 @@ def find_module_and_diagnose( ignore_missing_imports = options.ignore_missing_imports - id_components = id.split(".") # Don't honor a global (not per-module) ignore_missing_imports # setting for modules that used to have bundled stubs, as # otherwise updating mypy can silently result in new false # negatives. (Unless there are stubs but they are incomplete.) global_ignore_missing_imports = manager.options.ignore_missing_imports if ( - any( - ".".join(id_components[:i]) in legacy_bundled_packages - for i in range(len(id_components), 0, -1) - ) + is_module_from_legacy_bundled_package(id) and global_ignore_missing_imports and not options.ignore_missing_imports_per_module and result is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED @@ -2789,18 +2785,15 @@ def module_not_found( code = codes.IMPORT errors.report(line, 0, msg.format(module=target), code=code) - components = target.split(".") - for i in range(len(components), 0, -1): - module = ".".join(components[:i]) - if module in legacy_bundled_packages or module in non_bundled_packages: - break - + dist = stub_distribution_name(target) for note in notes: if "{stub_dist}" in note: - note = note.format(stub_dist=stub_distribution_name(module)) + assert dist is not None + note = note.format(stub_dist=dist) errors.report(line, 0, note, severity="note", only_once=True, code=code) if reason is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED: - manager.missing_stub_packages.add(stub_distribution_name(module)) + assert dist is not None + manager.missing_stub_packages.add(dist) errors.set_import_context(save_import_context) diff --git a/mypy/modulefinder.py b/mypy/modulefinder.py index 59a71025f71e..94d8a5d59e1f 100644 --- a/mypy/modulefinder.py +++ b/mypy/modulefinder.py @@ -331,9 +331,8 @@ def _find_module_non_stub_helper( # If this is not a directory then we can't traverse further into it if not self.fscache.isdir(dir_path): break - for i in range(len(components), 0, -1): - if approved_stub_package_exists(".".join(components[:i])): - return ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED + if approved_stub_package_exists(".".join(components)): + return ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED if plausible_match: return ModuleNotFoundReason.FOUND_WITHOUT_TYPE_HINTS else: diff --git a/mypy/stubinfo.py b/mypy/stubinfo.py index 9d8dfbe43f37..0ec64b037fee 100644 --- a/mypy/stubinfo.py +++ b/mypy/stubinfo.py @@ -1,22 +1,53 @@ from __future__ import annotations -def is_legacy_bundled_package(prefix: str) -> bool: - return prefix in legacy_bundled_packages +def is_module_from_legacy_bundled_package(module: str) -> bool: + top_level = module.split(".", 1)[0] + return top_level in legacy_bundled_packages -def approved_stub_package_exists(prefix: str) -> bool: - return is_legacy_bundled_package(prefix) or prefix in non_bundled_packages +def approved_stub_package_exists(module: str) -> bool: + top_level = module.split(".", 1)[0] + if top_level in legacy_bundled_packages: + return True + if top_level in non_bundled_packages_flat: + return True + if top_level in non_bundled_packages_namespace: + namespace = non_bundled_packages_namespace[top_level] + components = module.split(".") + for i in range(len(components), 0, -1): + module = ".".join(components[:i]) + if module in namespace: + return True + return False -def stub_distribution_name(prefix: str) -> str: - return legacy_bundled_packages.get(prefix) or non_bundled_packages[prefix] +def stub_distribution_name(module: str) -> str | None: + top_level = module.split(".", 1)[0] + + dist = legacy_bundled_packages.get(top_level) + if dist: + return dist + dist = non_bundled_packages_flat.get(top_level) + if dist: + return dist + + if top_level in non_bundled_packages_namespace: + namespace = non_bundled_packages_namespace[top_level] + components = module.split(".") + for i in range(len(components), 0, -1): + module = ".".join(components[:i]) + dist = namespace.get(module) + if dist: + return dist + + return None # Stubs for these third-party packages used to be shipped with mypy. # # Map package name to PyPI stub distribution name. -legacy_bundled_packages = { +legacy_bundled_packages: dict[str, str] = { "aiofiles": "types-aiofiles", "bleach": "types-bleach", "boto": "types-boto", @@ -32,7 +63,6 @@ def stub_distribution_name(prefix: str) -> str: "docutils": "types-docutils", "first": "types-first", "gflags": "types-python-gflags", - "google.protobuf": "types-protobuf", "markdown": "types-Markdown", "mock": "types-mock", "OpenSSL": "types-pyOpenSSL", @@ -66,20 +96,17 @@ def stub_distribution_name(prefix: str) -> str: # include packages that have a release that includes PEP 561 type # information. # -# Package name can have one or two components ('a' or 'a.b'). -# # Note that these packages are omitted for now: # pika: typeshed's stubs are on PyPI as types-pika-ts. # types-pika already exists on PyPI, and is more complete in many ways, # but is a non-typeshed stubs package. -non_bundled_packages = { +non_bundled_packages_flat: dict[str, str] = { "MySQLdb": "types-mysqlclient", "PIL": "types-Pillow", "PyInstaller": "types-pyinstaller", "Xlib": "types-python-xlib", "aws_xray_sdk": "types-aws-xray-sdk", "babel": "types-babel", - "backports.ssl_match_hostname": "types-backports.ssl_match_hostname", "braintree": "types-braintree", "bs4": "types-beautifulsoup4", "bugbear": "types-flake8-bugbear", @@ -107,7 +134,6 @@ def stub_distribution_name(prefix: str) -> str: "flask_migrate": "types-Flask-Migrate", "fpdf": "types-fpdf2", "gdb": "types-gdb", - "google.cloud.ndb": "types-google-cloud-ndb", "hdbcli": "types-hdbcli", "html5lib": "types-html5lib", "httplib2": "types-httplib2", @@ -123,7 +149,6 @@ def stub_distribution_name(prefix: str) -> str: "oauthlib": "types-oauthlib", "openpyxl": "types-openpyxl", "opentracing": "types-opentracing", - "paho.mqtt": "types-paho-mqtt", "parsimonious": "types-parsimonious", "passlib": "types-passlib", "passpy": "types-passpy", @@ -171,3 +196,10 @@ def stub_distribution_name(prefix: str) -> str: "pandas": "pandas-stubs", # https://github.com/pandas-dev/pandas-stubs "lxml": "lxml-stubs", # https://github.com/lxml/lxml-stubs } + + +non_bundled_packages_namespace: dict[str, dict[str, str]] = { + "backports": {"backports.ssl_match_hostname": "types-backports.ssl_match_hostname"}, + "google": {"google.cloud.ndb": "types-google-cloud-ndb", "google.protobuf": "types-protobuf"}, + "paho": {"paho.mqtt": "types-paho-mqtt"}, +} diff --git a/mypy/test/teststubinfo.py b/mypy/test/teststubinfo.py index eccee90244f3..10ce408e7023 100644 --- a/mypy/test/teststubinfo.py +++ b/mypy/test/teststubinfo.py @@ -2,11 +2,45 @@ import unittest -from mypy.stubinfo import is_legacy_bundled_package +from mypy.stubinfo import ( + approved_stub_package_exists, + is_module_from_legacy_bundled_package, + legacy_bundled_packages, + non_bundled_packages_flat, + stub_distribution_name, +) class TestStubInfo(unittest.TestCase): def test_is_legacy_bundled_packages(self) -> None: - assert not is_legacy_bundled_package("foobar_asdf") - assert is_legacy_bundled_package("pycurl") - assert is_legacy_bundled_package("dataclasses") + assert not is_module_from_legacy_bundled_package("foobar_asdf") + assert not is_module_from_legacy_bundled_package("PIL") + assert is_module_from_legacy_bundled_package("pycurl") + assert is_module_from_legacy_bundled_package("dataclasses") + + def test_approved_stub_package_exists(self) -> None: + assert not approved_stub_package_exists("foobar_asdf") + assert approved_stub_package_exists("pycurl") + assert approved_stub_package_exists("babel") + assert approved_stub_package_exists("google.cloud.ndb") + assert approved_stub_package_exists("google.cloud.ndb.submodule") + assert not approved_stub_package_exists("google.cloud.unknown") + assert approved_stub_package_exists("google.protobuf") + assert approved_stub_package_exists("google.protobuf.submodule") + assert not approved_stub_package_exists("google") + + def test_stub_distribution_name(self) -> None: + assert stub_distribution_name("foobar_asdf") is None + assert stub_distribution_name("pycurl") == "types-pycurl" + assert stub_distribution_name("babel") == "types-babel" + assert stub_distribution_name("google.cloud.ndb") == "types-google-cloud-ndb" + assert stub_distribution_name("google.cloud.ndb.submodule") == "types-google-cloud-ndb" + assert stub_distribution_name("google.cloud.unknown") is None + assert stub_distribution_name("google.protobuf") == "types-protobuf" + assert stub_distribution_name("google.protobuf.submodule") == "types-protobuf" + assert stub_distribution_name("google") is None + + def test_period_in_top_level(self) -> None: + for packages in (non_bundled_packages_flat, legacy_bundled_packages): + for top_level_module in packages: + assert "." not in top_level_module