Skip to content

Commit

Permalink
Speed up stubs suggestions (#17965)
Browse files Browse the repository at this point in the history
See #17948
This is starting to show up on profiles

- 1.01x faster on clean (below noise)
- 1.02x faster on long
- 1.02x faster on openai
- 1.01x faster on openai incremental

I had a dumb bug that was preventing the optimisation for a while, I'll
see if I can make it even faster. Currently it's a small improvement

We could also get rid of the legacy stuff in mypy 2.0
  • Loading branch information
hauntsaninja committed Oct 20, 2024
1 parent 7c27808 commit 50aa4ca
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 35 deletions.
21 changes: 7 additions & 14 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
from mypy.plugins.default import DefaultPlugin
from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor
from mypy.stats import dump_type_stats
from mypy.stubinfo import legacy_bundled_packages, non_bundled_packages, stub_distribution_name
from mypy.stubinfo import is_module_from_legacy_bundled_package, stub_distribution_name
from mypy.types import Type
from mypy.typestate import reset_global_state, type_state
from mypy.util import json_dumps, json_loads
Expand Down Expand Up @@ -2658,17 +2658,13 @@ def find_module_and_diagnose(

ignore_missing_imports = options.ignore_missing_imports

id_components = id.split(".")
# Don't honor a global (not per-module) ignore_missing_imports
# setting for modules that used to have bundled stubs, as
# otherwise updating mypy can silently result in new false
# negatives. (Unless there are stubs but they are incomplete.)
global_ignore_missing_imports = manager.options.ignore_missing_imports
if (
any(
".".join(id_components[:i]) in legacy_bundled_packages
for i in range(len(id_components), 0, -1)
)
is_module_from_legacy_bundled_package(id)
and global_ignore_missing_imports
and not options.ignore_missing_imports_per_module
and result is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
Expand Down Expand Up @@ -2789,18 +2785,15 @@ def module_not_found(
code = codes.IMPORT
errors.report(line, 0, msg.format(module=target), code=code)

components = target.split(".")
for i in range(len(components), 0, -1):
module = ".".join(components[:i])
if module in legacy_bundled_packages or module in non_bundled_packages:
break

dist = stub_distribution_name(target)
for note in notes:
if "{stub_dist}" in note:
note = note.format(stub_dist=stub_distribution_name(module))
assert dist is not None
note = note.format(stub_dist=dist)
errors.report(line, 0, note, severity="note", only_once=True, code=code)
if reason is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED:
manager.missing_stub_packages.add(stub_distribution_name(module))
assert dist is not None
manager.missing_stub_packages.add(dist)
errors.set_import_context(save_import_context)


Expand Down
5 changes: 2 additions & 3 deletions mypy/modulefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,8 @@ def _find_module_non_stub_helper(
# If this is not a directory then we can't traverse further into it
if not self.fscache.isdir(dir_path):
break
for i in range(len(components), 0, -1):
if approved_stub_package_exists(".".join(components[:i])):
return ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
if approved_stub_package_exists(".".join(components)):
return ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
if plausible_match:
return ModuleNotFoundReason.FOUND_WITHOUT_TYPE_HINTS
else:
Expand Down
60 changes: 46 additions & 14 deletions mypy/stubinfo.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,53 @@
from __future__ import annotations


def is_legacy_bundled_package(prefix: str) -> bool:
return prefix in legacy_bundled_packages
def is_module_from_legacy_bundled_package(module: str) -> bool:
top_level = module.split(".", 1)[0]
return top_level in legacy_bundled_packages


def approved_stub_package_exists(prefix: str) -> bool:
return is_legacy_bundled_package(prefix) or prefix in non_bundled_packages
def approved_stub_package_exists(module: str) -> bool:
top_level = module.split(".", 1)[0]
if top_level in legacy_bundled_packages:
return True
if top_level in non_bundled_packages_flat:
return True
if top_level in non_bundled_packages_namespace:
namespace = non_bundled_packages_namespace[top_level]
components = module.split(".")
for i in range(len(components), 0, -1):
module = ".".join(components[:i])
if module in namespace:
return True
return False


def stub_distribution_name(prefix: str) -> str:
return legacy_bundled_packages.get(prefix) or non_bundled_packages[prefix]
def stub_distribution_name(module: str) -> str | None:
top_level = module.split(".", 1)[0]

dist = legacy_bundled_packages.get(top_level)
if dist:
return dist
dist = non_bundled_packages_flat.get(top_level)
if dist:
return dist

if top_level in non_bundled_packages_namespace:
namespace = non_bundled_packages_namespace[top_level]
components = module.split(".")
for i in range(len(components), 0, -1):
module = ".".join(components[:i])
dist = namespace.get(module)
if dist:
return dist

return None


# Stubs for these third-party packages used to be shipped with mypy.
#
# Map package name to PyPI stub distribution name.
legacy_bundled_packages = {
legacy_bundled_packages: dict[str, str] = {
"aiofiles": "types-aiofiles",
"bleach": "types-bleach",
"boto": "types-boto",
Expand All @@ -32,7 +63,6 @@ def stub_distribution_name(prefix: str) -> str:
"docutils": "types-docutils",
"first": "types-first",
"gflags": "types-python-gflags",
"google.protobuf": "types-protobuf",
"markdown": "types-Markdown",
"mock": "types-mock",
"OpenSSL": "types-pyOpenSSL",
Expand Down Expand Up @@ -66,20 +96,17 @@ def stub_distribution_name(prefix: str) -> str:
# include packages that have a release that includes PEP 561 type
# information.
#
# Package name can have one or two components ('a' or 'a.b').
#
# Note that these packages are omitted for now:
# pika: typeshed's stubs are on PyPI as types-pika-ts.
# types-pika already exists on PyPI, and is more complete in many ways,
# but is a non-typeshed stubs package.
non_bundled_packages = {
non_bundled_packages_flat: dict[str, str] = {
"MySQLdb": "types-mysqlclient",
"PIL": "types-Pillow",
"PyInstaller": "types-pyinstaller",
"Xlib": "types-python-xlib",
"aws_xray_sdk": "types-aws-xray-sdk",
"babel": "types-babel",
"backports.ssl_match_hostname": "types-backports.ssl_match_hostname",
"braintree": "types-braintree",
"bs4": "types-beautifulsoup4",
"bugbear": "types-flake8-bugbear",
Expand Down Expand Up @@ -107,7 +134,6 @@ def stub_distribution_name(prefix: str) -> str:
"flask_migrate": "types-Flask-Migrate",
"fpdf": "types-fpdf2",
"gdb": "types-gdb",
"google.cloud.ndb": "types-google-cloud-ndb",
"hdbcli": "types-hdbcli",
"html5lib": "types-html5lib",
"httplib2": "types-httplib2",
Expand All @@ -123,7 +149,6 @@ def stub_distribution_name(prefix: str) -> str:
"oauthlib": "types-oauthlib",
"openpyxl": "types-openpyxl",
"opentracing": "types-opentracing",
"paho.mqtt": "types-paho-mqtt",
"parsimonious": "types-parsimonious",
"passlib": "types-passlib",
"passpy": "types-passpy",
Expand Down Expand Up @@ -171,3 +196,10 @@ def stub_distribution_name(prefix: str) -> str:
"pandas": "pandas-stubs", # https://github.com/pandas-dev/pandas-stubs
"lxml": "lxml-stubs", # https://github.com/lxml/lxml-stubs
}


non_bundled_packages_namespace: dict[str, dict[str, str]] = {
"backports": {"backports.ssl_match_hostname": "types-backports.ssl_match_hostname"},
"google": {"google.cloud.ndb": "types-google-cloud-ndb", "google.protobuf": "types-protobuf"},
"paho": {"paho.mqtt": "types-paho-mqtt"},
}
42 changes: 38 additions & 4 deletions mypy/test/teststubinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,45 @@

import unittest

from mypy.stubinfo import is_legacy_bundled_package
from mypy.stubinfo import (
approved_stub_package_exists,
is_module_from_legacy_bundled_package,
legacy_bundled_packages,
non_bundled_packages_flat,
stub_distribution_name,
)


class TestStubInfo(unittest.TestCase):
def test_is_legacy_bundled_packages(self) -> None:
assert not is_legacy_bundled_package("foobar_asdf")
assert is_legacy_bundled_package("pycurl")
assert is_legacy_bundled_package("dataclasses")
assert not is_module_from_legacy_bundled_package("foobar_asdf")
assert not is_module_from_legacy_bundled_package("PIL")
assert is_module_from_legacy_bundled_package("pycurl")
assert is_module_from_legacy_bundled_package("dataclasses")

def test_approved_stub_package_exists(self) -> None:
assert not approved_stub_package_exists("foobar_asdf")
assert approved_stub_package_exists("pycurl")
assert approved_stub_package_exists("babel")
assert approved_stub_package_exists("google.cloud.ndb")
assert approved_stub_package_exists("google.cloud.ndb.submodule")
assert not approved_stub_package_exists("google.cloud.unknown")
assert approved_stub_package_exists("google.protobuf")
assert approved_stub_package_exists("google.protobuf.submodule")
assert not approved_stub_package_exists("google")

def test_stub_distribution_name(self) -> None:
assert stub_distribution_name("foobar_asdf") is None
assert stub_distribution_name("pycurl") == "types-pycurl"
assert stub_distribution_name("babel") == "types-babel"
assert stub_distribution_name("google.cloud.ndb") == "types-google-cloud-ndb"
assert stub_distribution_name("google.cloud.ndb.submodule") == "types-google-cloud-ndb"
assert stub_distribution_name("google.cloud.unknown") is None
assert stub_distribution_name("google.protobuf") == "types-protobuf"
assert stub_distribution_name("google.protobuf.submodule") == "types-protobuf"
assert stub_distribution_name("google") is None

def test_period_in_top_level(self) -> None:
for packages in (non_bundled_packages_flat, legacy_bundled_packages):
for top_level_module in packages:
assert "." not in top_level_module

0 comments on commit 50aa4ca

Please sign in to comment.