From 7a303fc78a6dbd1b4c01f8a2b14ddcae29f4271b Mon Sep 17 00:00:00 2001 From: Alyssa Coghlan Date: Wed, 9 Oct 2024 20:40:50 +1000 Subject: [PATCH 001/114] Fix importlib.resources issue reference in 3.13 What's New (#125175) Previous link was to the PR that removed the mentioned importlib.resources APIs, rather than the issue that added back their improved forms. --- Doc/whatsnew/3.13.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 565f74149725d5..a2897097aaba57 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -922,12 +922,12 @@ importlib * :func:`~importlib.resources.read_text` These functions are no longer deprecated and are not scheduled for removal. - (Contributed by Petr Viktorin in :gh:`106532`.) + (Contributed by Petr Viktorin in :gh:`116608`.) * :func:`~importlib.resources.contents` remains deprecated in favor of the fully-featured :class:`~importlib.resources.abc.Traversable` API. However, there is now no plan to remove it. - (Contributed by Petr Viktorin in :gh:`106532`.) + (Contributed by Petr Viktorin in :gh:`116608`.) io From 3024b16d51bb7f74177c5a5038cc9a56fd2b26bd Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 9 Oct 2024 11:53:57 +0100 Subject: [PATCH 002/114] gh-101100: Consolidate documentation on `ModuleType` attributes (#124709) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Barry Warsaw Co-authored-by: Jelle Zijlstra --- Doc/c-api/import.rst | 26 +- Doc/c-api/module.rst | 22 +- Doc/deprecations/pending-removal-in-3.14.rst | 7 + Doc/deprecations/pending-removal-in-3.15.rst | 15 +- Doc/glossary.rst | 11 +- Doc/library/ast.rst | 2 +- Doc/library/importlib.rst | 87 +++--- Doc/library/pkgutil.rst | 3 +- Doc/library/sys.rst | 3 +- Doc/library/types.rst | 65 +---- Doc/reference/datamodel.rst | 256 +++++++++++++++--- Doc/reference/import.rst | 169 ++---------- Doc/tutorial/modules.rst | 5 +- Doc/whatsnew/2.6.rst | 8 +- Doc/whatsnew/3.0.rst | 4 +- Doc/whatsnew/3.12.rst | 13 +- Doc/whatsnew/3.2.rst | 4 +- Doc/whatsnew/3.4.rst | 3 +- Doc/whatsnew/3.5.rst | 4 +- Misc/NEWS.d/3.10.0a2.rst | 4 +- Misc/NEWS.d/3.11.0a5.rst | 4 +- Misc/NEWS.d/3.12.0a1.rst | 4 +- ...-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst | 4 +- 23 files changed, 379 insertions(+), 344 deletions(-) diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst index 8108a5015be972..6e48644c8fef8b 100644 --- a/Doc/c-api/import.rst +++ b/Doc/c-api/import.rst @@ -136,14 +136,14 @@ Importing Modules such modules have no way to know that the module object is an unknown (and probably damaged with respect to the module author's intents) state. - The module's :attr:`__spec__` and :attr:`__loader__` will be set, if - not set already, with the appropriate values. The spec's loader will - be set to the module's ``__loader__`` (if set) and to an instance of - :class:`~importlib.machinery.SourceFileLoader` otherwise. + The module's :attr:`~module.__spec__` and :attr:`~module.__loader__` will be + set, if not set already, with the appropriate values. The spec's loader + will be set to the module's :attr:`!__loader__` (if set) and to an instance + of :class:`~importlib.machinery.SourceFileLoader` otherwise. - The module's :attr:`__file__` attribute will be set to the code object's - :attr:`~codeobject.co_filename`. If applicable, :attr:`__cached__` will also - be set. + The module's :attr:`~module.__file__` attribute will be set to the code + object's :attr:`~codeobject.co_filename`. If applicable, + :attr:`~module.__cached__` will also be set. This function will reload the module if it was already imported. See :c:func:`PyImport_ReloadModule` for the intended way to reload a module. @@ -155,29 +155,29 @@ Importing Modules :c:func:`PyImport_ExecCodeModuleWithPathnames`. .. versionchanged:: 3.12 - The setting of :attr:`__cached__` and :attr:`__loader__` is - deprecated. See :class:`~importlib.machinery.ModuleSpec` for + The setting of :attr:`~module.__cached__` and :attr:`~module.__loader__` + is deprecated. See :class:`~importlib.machinery.ModuleSpec` for alternatives. .. c:function:: PyObject* PyImport_ExecCodeModuleEx(const char *name, PyObject *co, const char *pathname) - Like :c:func:`PyImport_ExecCodeModule`, but the :attr:`__file__` attribute of - the module object is set to *pathname* if it is non-``NULL``. + Like :c:func:`PyImport_ExecCodeModule`, but the :attr:`~module.__file__` + attribute of the module object is set to *pathname* if it is non-``NULL``. See also :c:func:`PyImport_ExecCodeModuleWithPathnames`. .. c:function:: PyObject* PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname, PyObject *cpathname) - Like :c:func:`PyImport_ExecCodeModuleEx`, but the :attr:`__cached__` + Like :c:func:`PyImport_ExecCodeModuleEx`, but the :attr:`~module.__cached__` attribute of the module object is set to *cpathname* if it is non-``NULL``. Of the three functions, this is the preferred one to use. .. versionadded:: 3.3 .. versionchanged:: 3.12 - Setting :attr:`__cached__` is deprecated. See + Setting :attr:`~module.__cached__` is deprecated. See :class:`~importlib.machinery.ModuleSpec` for alternatives. diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst index ec61be284caad9..f82a050ab75de0 100644 --- a/Doc/c-api/module.rst +++ b/Doc/c-api/module.rst @@ -37,18 +37,19 @@ Module Objects single: __package__ (module attribute) single: __loader__ (module attribute) - Return a new module object with the :attr:`__name__` attribute set to *name*. - The module's :attr:`__name__`, :attr:`__doc__`, :attr:`__package__`, and - :attr:`__loader__` attributes are filled in (all but :attr:`__name__` are set - to ``None``); the caller is responsible for providing a :attr:`__file__` - attribute. + Return a new module object with :attr:`module.__name__` set to *name*. + The module's :attr:`!__name__`, :attr:`~module.__doc__`, + :attr:`~module.__package__` and :attr:`~module.__loader__` attributes are + filled in (all but :attr:`!__name__` are set to ``None``). The caller is + responsible for setting a :attr:`~module.__file__` attribute. Return ``NULL`` with an exception set on error. .. versionadded:: 3.3 .. versionchanged:: 3.4 - :attr:`__package__` and :attr:`__loader__` are set to ``None``. + :attr:`~module.__package__` and :attr:`~module.__loader__` are now set to + ``None``. .. c:function:: PyObject* PyModule_New(const char *name) @@ -77,8 +78,9 @@ Module Objects single: __name__ (module attribute) single: SystemError (built-in exception) - Return *module*'s :attr:`__name__` value. If the module does not provide one, - or if it is not a string, :exc:`SystemError` is raised and ``NULL`` is returned. + Return *module*'s :attr:`~module.__name__` value. If the module does not + provide one, or if it is not a string, :exc:`SystemError` is raised and + ``NULL`` is returned. .. versionadded:: 3.3 @@ -108,8 +110,8 @@ Module Objects single: SystemError (built-in exception) Return the name of the file from which *module* was loaded using *module*'s - :attr:`__file__` attribute. If this is not defined, or if it is not a - unicode string, raise :exc:`SystemError` and return ``NULL``; otherwise return + :attr:`~module.__file__` attribute. If this is not defined, or if it is not a + string, raise :exc:`SystemError` and return ``NULL``; otherwise return a reference to a Unicode object. .. versionadded:: 3.2 diff --git a/Doc/deprecations/pending-removal-in-3.14.rst b/Doc/deprecations/pending-removal-in-3.14.rst index 452d6643e1d146..de30f4695059ed 100644 --- a/Doc/deprecations/pending-removal-in-3.14.rst +++ b/Doc/deprecations/pending-removal-in-3.14.rst @@ -1,6 +1,13 @@ Pending Removal in Python 3.14 ------------------------------ +* The import system: + + * Setting :attr:`~module.__loader__` on a module while + failing to set :attr:`__spec__.loader ` + is deprecated. In Python 3.14, :attr:`!__loader__` will cease to be set or + taken into consideration by the import system or the standard library. + * :mod:`argparse`: The *type*, *choices*, and *metavar* parameters of :class:`!argparse.BooleanOptionalAction` are deprecated and will be removed in 3.14. diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index b921b4f97d524e..a55fb6bea3fdaa 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -1,6 +1,18 @@ Pending Removal in Python 3.15 ------------------------------ +* The import system: + + * Setting :attr:`~module.__cached__` on a module while + failing to set :attr:`__spec__.cached ` + is deprecated. In Python 3.15, :attr:`!__cached__` will cease to be set or + take into consideration by the import system or standard library. (:gh:`97879`) + + * Setting :attr:`~module.__package__` on a module while + failing to set :attr:`__spec__.parent ` + is deprecated. In Python 3.15, :attr:`!__package__` will cease to be set or + take into consideration by the import system or standard library. (:gh:`97879`) + * :mod:`ctypes`: * The undocumented :func:`!ctypes.SetPointerType` function @@ -17,9 +29,6 @@ Pending Removal in Python 3.15 * The :option:`!--cgi` flag to the :program:`python -m http.server` command-line interface has been deprecated since Python 3.13. -* :mod:`importlib`: ``__package__`` and ``__cached__`` will cease to be set or - taken into consideration by the import system (:gh:`97879`). - * :class:`locale`: * The :func:`~locale.getdefaultlocale` function diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 933fb0319452a6..cb7e0a2b89d037 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -461,7 +461,7 @@ Glossary ` for use with :data:`sys.meta_path`, and :term:`path entry finders ` for use with :data:`sys.path_hooks`. - See :ref:`importsystem` and :mod:`importlib` for much more detail. + See :ref:`finders-and-loaders` and :mod:`importlib` for much more detail. floor division Mathematical division that rounds down to nearest integer. The floor @@ -791,8 +791,11 @@ Glossary loader An object that loads a module. It must define a method named :meth:`load_module`. A loader is typically returned by a - :term:`finder`. See :pep:`302` for details and - :class:`importlib.abc.Loader` for an :term:`abstract base class`. + :term:`finder`. See also: + + * :ref:`finders-and-loaders` + * :class:`importlib.abc.Loader` + * :pep:`302` locale encoding On Unix, it is the encoding of the LC_CTYPE locale. It can be set with @@ -862,6 +865,8 @@ Glossary A namespace containing the import-related information used to load a module. An instance of :class:`importlib.machinery.ModuleSpec`. + See also :ref:`module-specs`. + MRO See :term:`method resolution order`. diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index a9518859b83478..3d2df035a85c21 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -902,7 +902,7 @@ Statements (indicating a "simple" target). A "simple" target consists solely of a :class:`Name` node that does not appear between parentheses; all other targets are considered complex. Only simple targets appear in - the :attr:`__annotations__` dictionary of modules and classes. + the :attr:`~object.__annotations__` dictionary of modules and classes. .. doctest:: diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst index 27d31f66b12495..9e088a598a6c08 100644 --- a/Doc/library/importlib.rst +++ b/Doc/library/importlib.rst @@ -249,7 +249,7 @@ ABC hierarchy:: An abstract method for finding a :term:`spec ` for the specified module. If this is a top-level import, *path* will be ``None``. Otherwise, this is a search for a subpackage or - module and *path* will be the value of :attr:`__path__` from the + module and *path* will be the value of :attr:`~module.__path__` from the parent package. If a spec cannot be found, ``None`` is returned. When passed in, ``target`` is a module object that the finder may use to make a more educated guess about what spec to return. @@ -355,34 +355,12 @@ ABC hierarchy:: (note that some of these attributes can change when a module is reloaded): - - :attr:`__name__` - The module's fully qualified name. - It is ``'__main__'`` for an executed module. - - - :attr:`__file__` - The location the :term:`loader` used to load the module. - For example, for modules loaded from a .py file this is the filename. - It is not set on all modules (e.g. built-in modules). - - - :attr:`__cached__` - The filename of a compiled version of the module's code. - It is not set on all modules (e.g. built-in modules). - - - :attr:`__path__` - The list of locations where the package's submodules will be found. - Most of the time this is a single directory. - The import system passes this attribute to ``__import__()`` and to finders - in the same way as :data:`sys.path` but just for the package. - It is not set on non-package modules so it can be used - as an indicator that the module is a package. - - - :attr:`__package__` - The fully qualified name of the package the module is in (or the - empty string for a top-level module). - If the module is a package then this is the same as :attr:`__name__`. - - - :attr:`__loader__` - The :term:`loader` used to load the module. + - :attr:`module.__name__` + - :attr:`module.__file__` + - :attr:`module.__cached__` *(deprecated)* + - :attr:`module.__path__` + - :attr:`module.__package__` *(deprecated)* + - :attr:`module.__loader__` *(deprecated)* When :meth:`exec_module` is available then backwards-compatible functionality is provided. @@ -418,7 +396,8 @@ ABC hierarchy:: can implement this abstract method to give direct access to the data stored. :exc:`OSError` is to be raised if the *path* cannot be found. The *path* is expected to be constructed using a module's - :attr:`__file__` attribute or an item from a package's :attr:`__path__`. + :attr:`~module.__file__` attribute or an item from a package's + :attr:`~module.__path__`. .. versionchanged:: 3.4 Raises :exc:`OSError` instead of :exc:`NotImplementedError`. @@ -505,9 +484,9 @@ ABC hierarchy:: .. abstractmethod:: get_filename(fullname) - An abstract method that is to return the value of :attr:`__file__` for - the specified module. If no path is available, :exc:`ImportError` is - raised. + An abstract method that is to return the value of + :attr:`~module.__file__` for the specified module. If no path is + available, :exc:`ImportError` is raised. If source code is available, then the method should return the path to the source file, regardless of whether a bytecode was used to load the @@ -1166,43 +1145,45 @@ find and load modules. .. class:: ModuleSpec(name, loader, *, origin=None, loader_state=None, is_package=None) A specification for a module's import-system-related state. This is - typically exposed as the module's :attr:`__spec__` attribute. Many + typically exposed as the module's :attr:`~module.__spec__` attribute. Many of these attributes are also available directly on a module: for example, ``module.__spec__.origin == module.__file__``. Note, however, that while the *values* are usually equivalent, they can differ since there is - no synchronization between the two objects. For example, it is possible to update - the module's :attr:`__file__` at runtime and this will not be automatically - reflected in the module's :attr:`__spec__.origin`, and vice versa. + no synchronization between the two objects. For example, it is possible to + update the module's :attr:`~module.__file__` at runtime and this will not be + automatically reflected in the module's + :attr:`__spec__.origin `, and vice versa. .. versionadded:: 3.4 .. attribute:: name - The module's fully qualified name - (see :attr:`__name__` attributes on modules). + The module's fully qualified name (see :attr:`module.__name__`). The :term:`finder` should always set this attribute to a non-empty string. .. attribute:: loader - The :term:`loader` used to load the module - (see :attr:`__loader__` attributes on modules). + The :term:`loader` used to load the module (see :attr:`module.__loader__`). The :term:`finder` should always set this attribute. .. attribute:: origin The location the :term:`loader` should use to load the module - (see :attr:`__file__` attributes on modules). - For example, for modules loaded from a .py file this is the filename. + (see :attr:`module.__file__`). + For example, for modules loaded from a ``.py`` file this is the filename. The :term:`finder` should always set this attribute to a meaningful value for the :term:`loader` to use. In the uncommon case that there is not one (like for namespace packages), it should be set to ``None``. .. attribute:: submodule_search_locations - The list of locations where the package's submodules will be found - (see :attr:`__path__` attributes on modules). - Most of the time this is a single directory. - The :term:`finder` should set this attribute to a list, even an empty one, to indicate + A (possibly empty) :term:`sequence` of strings enumerating the locations + in which a package's submodules will be found + (see :attr:`module.__path__`). Most of the time there will only be a + single directory in this list. + + The :term:`finder` should set this attribute to a sequence, even an empty + one, to indicate to the import system that the module is a package. It should be set to ``None`` for non-package modules. It is set automatically later to a special object for namespace packages. @@ -1216,7 +1197,7 @@ find and load modules. .. attribute:: cached The filename of a compiled version of the module's code - (see :attr:`__cached__` attributes on modules). + (see :attr:`module.__cached__`). The :term:`finder` should always set this attribute but it may be ``None`` for modules that do not need compiled code stored. @@ -1224,14 +1205,14 @@ find and load modules. (Read-only) The fully qualified name of the package the module is in (or the empty string for a top-level module). - See :attr:`__package__` attributes on modules. + See :attr:`module.__package__`. If the module is a package then this is the same as :attr:`name`. .. attribute:: has_location ``True`` if the spec's :attr:`origin` refers to a loadable location, - ``False`` otherwise. This value impacts how :attr:`origin` is interpreted - and how the module's :attr:`__file__` is populated. + ``False`` otherwise. This value impacts how :attr:`!origin` is interpreted + and how the module's :attr:`~module.__file__` is populated. .. class:: AppleFrameworkLoader(name, path) @@ -1416,8 +1397,8 @@ an :term:`importer`. .. versionchanged:: 3.7 Raises :exc:`ModuleNotFoundError` instead of :exc:`AttributeError` if - **package** is in fact not a package (i.e. lacks a :attr:`__path__` - attribute). + **package** is in fact not a package (i.e. lacks a + :attr:`~module.__path__` attribute). .. function:: module_from_spec(spec) diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst index f095cc84173737..4a39d53a5f1440 100644 --- a/Doc/library/pkgutil.rst +++ b/Doc/library/pkgutil.rst @@ -26,7 +26,8 @@ support. __path__ = extend_path(__path__, __name__) For each directory on :data:`sys.path` that has a subdirectory that matches the - package name, add the subdirectory to the package's :attr:`__path__`. This is useful + package name, add the subdirectory to the package's + :attr:`~module.__path__`. This is useful if one wants to distribute different parts of a single logical package as multiple directories. diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index b0e40a4ea06946..20a06a1ecd1a4c 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1274,7 +1274,8 @@ always available. that implement Python's default import semantics. The :meth:`~importlib.abc.MetaPathFinder.find_spec` method is called with at least the absolute name of the module being imported. If the module to be - imported is contained in a package, then the parent package's :attr:`__path__` + imported is contained in a package, then the parent package's + :attr:`~module.__path__` attribute is passed in as a second argument. The method returns a :term:`module spec`, or ``None`` if the module cannot be found. diff --git a/Doc/library/types.rst b/Doc/library/types.rst index 84b80ec6efd59f..439e119461f798 100644 --- a/Doc/library/types.rst +++ b/Doc/library/types.rst @@ -260,63 +260,18 @@ Standard names are defined for the following types: The type of :term:`modules `. The constructor takes the name of the module to be created and optionally its :term:`docstring`. - .. note:: - Use :func:`importlib.util.module_from_spec` to create a new module if you - wish to set the various import-controlled attributes. - - .. attribute:: __doc__ - - The :term:`docstring` of the module. Defaults to ``None``. - - .. attribute:: __loader__ - - The :term:`loader` which loaded the module. Defaults to ``None``. - - This attribute is to match :attr:`importlib.machinery.ModuleSpec.loader` - as stored in the :attr:`__spec__` object. - - .. note:: - A future version of Python may stop setting this attribute by default. - To guard against this potential change, preferably read from the - :attr:`__spec__` attribute instead or use - ``getattr(module, "__loader__", None)`` if you explicitly need to use - this attribute. - - .. versionchanged:: 3.4 - Defaults to ``None``. Previously the attribute was optional. - - .. attribute:: __name__ - - The name of the module. Expected to match - :attr:`importlib.machinery.ModuleSpec.name`. - - .. attribute:: __package__ - - Which :term:`package` a module belongs to. If the module is top-level - (i.e. not a part of any specific package) then the attribute should be set - to ``''``, else it should be set to the name of the package (which can be - :attr:`__name__` if the module is a package itself). Defaults to ``None``. - - This attribute is to match :attr:`importlib.machinery.ModuleSpec.parent` - as stored in the :attr:`__spec__` object. - - .. note:: - A future version of Python may stop setting this attribute by default. - To guard against this potential change, preferably read from the - :attr:`__spec__` attribute instead or use - ``getattr(module, "__package__", None)`` if you explicitly need to use - this attribute. - - .. versionchanged:: 3.4 - Defaults to ``None``. Previously the attribute was optional. - - .. attribute:: __spec__ - - A record of the module's import-system-related state. Expected to be an - instance of :class:`importlib.machinery.ModuleSpec`. + .. seealso:: - .. versionadded:: 3.4 + :ref:`Documentation on module objects ` + Provides details on the special attributes that can be found on + instances of :class:`!ModuleType`. + :func:`importlib.util.module_from_spec` + Modules created using the :class:`!ModuleType` constructor are + created with many of their special attributes unset or set to default + values. :func:`!module_from_spec` provides a more robust way of + creating :class:`!ModuleType` instances which ensures the various + attributes are set appropriately. .. data:: EllipsisType diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index d059a660548c7e..f56bd5e8a7803a 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -865,6 +865,8 @@ Instances of arbitrary classes can be made callable by defining a :meth:`~object.__call__` method in their class. +.. _module-objects: + Modules ------- @@ -890,57 +892,243 @@ Attribute assignment updates the module's namespace dictionary, e.g., .. index:: single: __name__ (module attribute) - single: __doc__ (module attribute) + single: __spec__ (module attribute) + single: __package__ (module attribute) + single: __loader__ (module attribute) + single: __path__ (module attribute) single: __file__ (module attribute) + single: __cached__ (module attribute) + single: __doc__ (module attribute) single: __annotations__ (module attribute) single: __annotate__ (module attribute) pair: module; namespace -Predefined (writable) attributes: +.. _import-mod-attrs: + +Import-related attributes on module objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Module objects have the following attributes that relate to the +:ref:`import system `. When a module is created using the machinery associated +with the import system, these attributes are filled in based on the module's +:term:`spec `, before the :term:`loader` executes and loads the +module. + +To create a module dynamically rather than using the import system, +it's recommended to use :func:`importlib.util.module_from_spec`, +which will set the various import-controlled attributes to appropriate values. +It's also possible to use the :class:`types.ModuleType` constructor to create +modules directly, but this technique is more error-prone, as most attributes +must be manually set on the module object after it has been created when using +this approach. + +.. caution:: + + With the exception of :attr:`~module.__name__`, it is **strongly** + recommended that you rely on :attr:`~module.__spec__` and its attributes + instead of any of the other individual attributes listed in this subsection. + Note that updating an attribute on :attr:`!__spec__` will not update the + corresponding attribute on the module itself: + + .. doctest:: + + >>> import typing + >>> typing.__name__, typing.__spec__.name + ('typing', 'typing') + >>> typing.__spec__.name = 'spelling' + >>> typing.__name__, typing.__spec__.name + ('typing', 'spelling') + >>> typing.__name__ = 'keyboard_smashing' + >>> typing.__name__, typing.__spec__.name + ('keyboard_smashing', 'spelling') + +.. attribute:: module.__name__ - :attr:`__name__` - The module's name. + The name used to uniquely identify the module in the import system. + For a directly executed module, this will be set to ``"__main__"``. - :attr:`__doc__` - The module's documentation string, or ``None`` if - unavailable. + This attribute must be set to the fully qualified name of the module. + It is expected to match the value of + :attr:`module.__spec__.name `. - :attr:`__file__` - The pathname of the file from which the - module was loaded, if it was loaded from a file. - The :attr:`__file__` - attribute may be missing for certain types of modules, such as C modules - that are statically linked into the interpreter. For extension modules - loaded dynamically from a shared library, it's the pathname of the shared - library file. +.. attribute:: module.__spec__ - :attr:`~object.__annotations__` - A dictionary containing - :term:`variable annotations ` collected during - module body execution. For best practices on working - with :attr:`!__annotations__`, see :mod:`annotationlib`. + A record of the module's import-system-related state. + + Set to the :class:`module spec ` that was + used when importing the module. See :ref:`module-specs` for more details. + + .. versionadded:: 3.4 + +.. attribute:: module.__package__ + + The :term:`package` a module belongs to. + + If the module is top-level (that is, not a part of any specific package) + then the attribute should be set to ``''`` (the empty string). Otherwise, + it should be set to the name of the module's package (which can be equal to + :attr:`module.__name__` if the module itself is a package). See :pep:`366` + for further details. + + This attribute is used instead of :attr:`~module.__name__` to calculate + explicit relative imports for main modules. It defaults to ``None`` for + modules created dynamically using the :class:`types.ModuleType` constructor; + use :func:`importlib.util.module_from_spec` instead to ensure the attribute + is set to a :class:`str`. + + It is **strongly** recommended that you use + :attr:`module.__spec__.parent ` + instead of :attr:`!module.__package__`. :attr:`__package__` is now only used + as a fallback if :attr:`!__spec__.parent` is not set, and this fallback + path is deprecated. + + .. versionchanged:: 3.4 + This attribute now defaults to ``None`` for modules created dynamically + using the :class:`types.ModuleType` constructor. + Previously the attribute was optional. + + .. versionchanged:: 3.6 + The value of :attr:`!__package__` is expected to be the same as + :attr:`__spec__.parent `. + :attr:`__package__` is now only used as a fallback during import + resolution if :attr:`!__spec__.parent` is not defined. + + .. versionchanged:: 3.10 + :exc:`ImportWarning` is raised if an import resolution falls back to + :attr:`!__package__` instead of + :attr:`__spec__.parent `. + + .. versionchanged:: 3.12 + Raise :exc:`DeprecationWarning` instead of :exc:`ImportWarning` when + falling back to :attr:`!__package__` during import resolution. - .. versionchanged:: 3.14 - Annotations are now :ref:`lazily evaluated `. - See :pep:`649`. + .. deprecated-removed:: 3.13 3.15 + :attr:`!__package__` will cease to be set or taken into consideration + by the import system or standard library. - :attr:`~object.__annotate__` - The :term:`annotate function` for this module, or ``None`` - if the module has no annotations. See :attr:`object.__annotate__`. +.. attribute:: module.__loader__ - .. versionadded:: 3.14 + The :term:`loader` object that the import machinery used to load the module. + + This attribute is mostly useful for introspection, but can be used for + additional loader-specific functionality, for example getting data + associated with a loader. + + :attr:`!__loader__` defaults to ``None`` for modules created dynamically + using the :class:`types.ModuleType` constructor; + use :func:`importlib.util.module_from_spec` instead to ensure the attribute + is set to a :term:`loader` object. + + It is **strongly** recommended that you use + :attr:`module.__spec__.loader ` + instead of :attr:`!module.__loader__`. + + .. versionchanged:: 3.4 + This attribute now defaults to ``None`` for modules created dynamically + using the :class:`types.ModuleType` constructor. + Previously the attribute was optional. + + .. deprecated-removed:: 3.12 3.14 + Setting :attr:`!__loader__` on a module while failing to set + :attr:`!__spec__.loader` is deprecated. In Python 3.14, + :attr:`!__loader__` will cease to be set or taken into consideration by + the import system or the standard library. + +.. attribute:: module.__path__ + + A (possibly empty) :term:`sequence` of strings enumerating the locations + where the package's submodules will be found. Non-package modules should + not have a :attr:`!__path__` attribute. See :ref:`package-path-rules` for + more details. + + It is **strongly** recommended that you use + :attr:`module.__spec__.submodule_search_locations ` + instead of :attr:`!module.__path__`. + +.. attribute:: module.__file__ +.. attribute:: module.__cached__ + + :attr:`!__file__` and :attr:`!__cached__` are both optional attributes that + may or may not be set. Both attributes should be a :class:`str` when they + are available. + + :attr:`!__file__` indicates the pathname of the file from which the module + was loaded (if loaded from a file), or the pathname of the shared library + file for extension modules loaded dynamically from a shared library. + It might be missing for certain types of modules, such as C modules that are + statically linked into the interpreter, and the + :ref:`import system ` may opt to leave it unset if it + has no semantic meaning (for example, a module loaded from a database). + + If :attr:`!__file__` is set then the :attr:`!__cached__` attribute might + also be set, which is the path to any compiled version of + the code (for example, a byte-compiled file). The file does not need to exist + to set this attribute; the path can simply point to where the + compiled file *would* exist (see :pep:`3147`). + + Note that :attr:`!__cached__` may be set even if :attr:`!__file__` is not + set. However, that scenario is quite atypical. Ultimately, the + :term:`loader` is what makes use of the module spec provided by the + :term:`finder` (from which :attr:`!__file__` and :attr:`!__cached__` are + derived). So if a loader can load from a cached module but otherwise does + not load from a file, that atypical scenario may be appropriate. + + It is **strongly** recommended that you use + :attr:`module.__spec__.cached ` + instead of :attr:`!module.__cached__`. + + .. deprecated-removed:: 3.13 3.15 + Setting :attr:`!__cached__` on a module while failing to set + :attr:`!__spec__.cached` is deprecated. In Python 3.15, + :attr:`!__cached__` will cease to be set or taken into consideration by + the import system or standard library. + +Other writable attributes on module objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As well as the import-related attributes listed above, module objects also have +the following writable attributes: + +.. attribute:: module.__doc__ + + The module's documentation string, or ``None`` if unavailable. + See also: :attr:`__doc__ attributes `. + +.. attribute:: module.__annotations__ + + A dictionary containing :term:`variable annotations ` + collected during module body execution. For best practices on working with + :attr:`!__annotations__`, see :mod:`annotationlib`. + + .. versionchanged:: 3.14 + Annotations are now :ref:`lazily evaluated `. + See :pep:`649`. + +.. attribute:: module.__annotate__ + + The :term:`annotate function` for this module, or ``None`` if the module has + no annotations. See also: :attr:`~object.__annotate__` attributes. + + .. versionadded:: 3.14 + +Module dictionaries +^^^^^^^^^^^^^^^^^^^ + +Module objects also have the following special read-only attribute: .. index:: single: __dict__ (module attribute) +.. attribute:: module.__dict__ -Special read-only attribute: :attr:`~object.__dict__` is the module's -namespace as a dictionary object. + The module's namespace as a dictionary object. Uniquely among the attributes + listed here, :attr:`!__dict__` cannot be accessed as a global variable from + within a module; it can only be accessed as an attribute on module objects. -.. impl-detail:: + .. impl-detail:: - Because of the way CPython clears module dictionaries, the module - dictionary will be cleared when the module falls out of scope even if the - dictionary still has live references. To avoid this, copy the dictionary - or keep the module around while using its dictionary directly. + Because of the way CPython clears module dictionaries, the module + dictionary will be cleared when the module falls out of scope even if the + dictionary still has live references. To avoid this, copy the dictionary + or keep the module around while using its dictionary directly. .. _class-attrs-and-methods: diff --git a/Doc/reference/import.rst b/Doc/reference/import.rst index 0b9d1c233d182a..ac363e8cfa00dc 100644 --- a/Doc/reference/import.rst +++ b/Doc/reference/import.rst @@ -513,8 +513,10 @@ holding is that if you have ``sys.modules['spam']`` and ``sys.modules['spam.foo']`` (as you would after the above import), the latter must appear as the ``foo`` attribute of the former. -Module spec ------------ +.. _module-specs: + +Module specs +------------ The import machinery uses a variety of information about each module during import, especially before loading. Most of the information is @@ -527,163 +529,44 @@ and the loader that executes it. Most importantly, it allows the import machinery to perform the boilerplate operations of loading, whereas without a module spec the loader had that responsibility. -The module's spec is exposed as the ``__spec__`` attribute on a module object. +The module's spec is exposed as :attr:`module.__spec__`. Setting +:attr:`!__spec__` appropriately applies equally to +:ref:`modules initialized during interpreter startup `. +The one exception is ``__main__``, where :attr:`!__spec__` is +:ref:`set to None in some cases `. + See :class:`~importlib.machinery.ModuleSpec` for details on the contents of the module spec. .. versionadded:: 3.4 -.. _import-mod-attrs: - -Import-related module attributes --------------------------------- - -The import machinery fills in these attributes on each module object -during loading, based on the module's spec, before the loader executes -the module. - -It is **strongly** recommended that you rely on :attr:`__spec__` and -its attributes instead of any of the other individual attributes -listed below, except :attr:`__name__`. - -.. attribute:: __name__ - - The ``__name__`` attribute must be set to the fully qualified name of - the module. This name is used to uniquely identify the module in - the import system. - -.. attribute:: __loader__ - - The ``__loader__`` attribute must be set to the loader object that - the import machinery used when loading the module. This is mostly - for introspection, but can be used for additional loader-specific - functionality, for example getting data associated with a loader. - - It is **strongly** recommended that you rely on :attr:`__spec__` - instead of this attribute. - - .. versionchanged:: 3.12 - The value of ``__loader__`` is expected to be the same as - ``__spec__.loader``. The use of ``__loader__`` is deprecated and slated - for removal in Python 3.14. - -.. attribute:: __package__ - - The module's ``__package__`` attribute may be set. Its value must - be a string, but it can be the same value as its ``__name__``. When - the module is a package, its ``__package__`` value should be set to - its ``__name__``. When the module is not a package, ``__package__`` - should be set to the empty string for top-level modules, or for - submodules, to the parent package's name. See :pep:`366` for further - details. - - This attribute is used instead of ``__name__`` to calculate explicit - relative imports for main modules, as defined in :pep:`366`. - - It is **strongly** recommended that you rely on :attr:`__spec__` - instead of this attribute. - - .. versionchanged:: 3.6 - The value of ``__package__`` is expected to be the same as - ``__spec__.parent``. - - .. versionchanged:: 3.10 - :exc:`ImportWarning` is raised if import falls back to - ``__package__`` instead of - :attr:`~importlib.machinery.ModuleSpec.parent`. - - .. versionchanged:: 3.12 - Raise :exc:`DeprecationWarning` instead of :exc:`ImportWarning` - when falling back to ``__package__``. - - .. deprecated-removed:: 3.13 3.15 - ``__package__`` will cease to be set or taken into consideration - by the import system or standard library. - - -.. attribute:: __spec__ - - The ``__spec__`` attribute must be set to the module spec that was - used when importing the module. Setting ``__spec__`` - appropriately applies equally to :ref:`modules initialized during - interpreter startup `. The one exception is ``__main__``, - where ``__spec__`` is :ref:`set to None in some cases `. - - When ``__spec__.parent`` is not set, ``__package__`` is used as - a fallback. - - .. versionadded:: 3.4 - - .. versionchanged:: 3.6 - ``__spec__.parent`` is used as a fallback when ``__package__`` is - not defined. - -.. attribute:: __path__ - - If the module is a package (either regular or namespace), the module - object's ``__path__`` attribute must be set. The value must be - iterable, but may be empty if ``__path__`` has no further significance. - If ``__path__`` is not empty, it must produce strings when iterated - over. More details on the semantics of ``__path__`` are given - :ref:`below `. - - Non-package modules should not have a ``__path__`` attribute. - -.. attribute:: __file__ -.. attribute:: __cached__ - - ``__file__`` is optional (if set, value must be a string). It indicates - the pathname of the file from which the module was loaded (if - loaded from a file), or the pathname of the shared library file - for extension modules loaded dynamically from a shared library. - It might be missing for certain types of modules, such as C - modules that are statically linked into the interpreter, and the - import system may opt to leave it unset if it has no semantic - meaning (e.g. a module loaded from a database). - - If ``__file__`` is set then the ``__cached__`` attribute might also - be set, which is the path to any compiled version of - the code (e.g. byte-compiled file). The file does not need to exist - to set this attribute; the path can simply point to where the - compiled file would exist (see :pep:`3147`). - - Note that ``__cached__`` may be set even if ``__file__`` is not - set. However, that scenario is quite atypical. Ultimately, the - loader is what makes use of the module spec provided by the finder - (from which ``__file__`` and ``__cached__`` are derived). So - if a loader can load from a cached module but otherwise does not load - from a file, that atypical scenario may be appropriate. - - It is **strongly** recommended that you rely on :attr:`__spec__` - instead of ``__cached__``. - - .. deprecated-removed:: 3.13 3.15 - ``__cached__`` will cease to be set or taken into consideration - by the import system or standard library. - .. _package-path-rules: -module.__path__ ---------------- +__path__ attributes on modules +------------------------------ -By definition, if a module has a ``__path__`` attribute, it is a package. +The :attr:`~module.__path__` attribute should be a (possibly empty) +:term:`sequence` of strings enumerating the locations where the package's +submodules will be found. By definition, if a module has a :attr:`!__path__` +attribute, it is a :term:`package`. -A package's ``__path__`` attribute is used during imports of its subpackages. +A package's :attr:`~module.__path__` attribute is used during imports of its +subpackages. Within the import machinery, it functions much the same as :data:`sys.path`, i.e. providing a list of locations to search for modules during import. -However, ``__path__`` is typically much more constrained than -:data:`sys.path`. +However, :attr:`!__path__` is typically much more constrained than +:data:`!sys.path`. -``__path__`` must be an iterable of strings, but it may be empty. The same rules used for :data:`sys.path` also apply to a package's -``__path__``, and :data:`sys.path_hooks` (described below) are -consulted when traversing a package's ``__path__``. +:attr:`!__path__`. :data:`sys.path_hooks` (described below) are +consulted when traversing a package's :attr:`!__path__`. -A package's ``__init__.py`` file may set or alter the package's ``__path__`` +A package's ``__init__.py`` file may set or alter the package's +:attr:`~module.__path__` attribute, and this was typically the way namespace packages were implemented prior to :pep:`420`. With the adoption of :pep:`420`, namespace packages no -longer need to supply ``__init__.py`` files containing only ``__path__`` -manipulation code; the import machinery automatically sets ``__path__`` +longer need to supply ``__init__.py`` files containing only :attr:`!__path__` +manipulation code; the import machinery automatically sets :attr:`!__path__` correctly for the namespace package. Module reprs diff --git a/Doc/tutorial/modules.rst b/Doc/tutorial/modules.rst index 0316239e776a95..de7aa0e2342946 100644 --- a/Doc/tutorial/modules.rst +++ b/Doc/tutorial/modules.rst @@ -585,8 +585,9 @@ as the main module of a Python application must always use absolute imports. Packages in Multiple Directories -------------------------------- -Packages support one more special attribute, :attr:`__path__`. This is -initialized to be a list containing the name of the directory holding the +Packages support one more special attribute, :attr:`~module.__path__`. This is +initialized to be a :term:`sequence` of strings containing the name of the +directory holding the package's :file:`__init__.py` before the code in that file is executed. This variable can be modified; doing so affects future searches for modules and subpackages contained in the package. diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index 3c9c2049b89ea0..fdccfb7deb1ed7 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -502,12 +502,12 @@ Python's :option:`-m` switch allows running a module as a script. When you ran a module that was located inside a package, relative imports didn't work correctly. -The fix for Python 2.6 adds a :attr:`__package__` attribute to -modules. When this attribute is present, relative imports will be +The fix for Python 2.6 adds a :attr:`module.__package__` attribute. +When this attribute is present, relative imports will be relative to the value of this attribute instead of the -:attr:`__name__` attribute. +:attr:`~module.__name__` attribute. -PEP 302-style importers can then set :attr:`__package__` as necessary. +PEP 302-style importers can then set :attr:`~module.__package__` as necessary. The :mod:`runpy` module that implements the :option:`-m` switch now does this, so relative imports will now work correctly in scripts running from inside a package. diff --git a/Doc/whatsnew/3.0.rst b/Doc/whatsnew/3.0.rst index 888e6279754fc2..d97f5fdd9eaa4a 100644 --- a/Doc/whatsnew/3.0.rst +++ b/Doc/whatsnew/3.0.rst @@ -357,8 +357,8 @@ New Syntax provides a standardized way of annotating a function's parameters and return value. There are no semantics attached to such annotations except that they can be introspected at runtime using - the :attr:`__annotations__` attribute. The intent is to encourage - experimentation through metaclasses, decorators or frameworks. + the :attr:`~object.__annotations__` attribute. The intent is to + encourage experimentation through metaclasses, decorators or frameworks. * :pep:`3102`: Keyword-only arguments. Named parameters occurring after ``*args`` in the parameter list *must* be specified using diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index fb8809feda4ec7..463fc269ee8fcc 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -1309,14 +1309,15 @@ Deprecated may be removed in a future version of Python. Use the single-arg versions of these functions instead. (Contributed by Ofey Chan in :gh:`89874`.) -* :exc:`DeprecationWarning` is now raised when ``__package__`` on a - module differs from ``__spec__.parent`` (previously it was - :exc:`ImportWarning`). +* :exc:`DeprecationWarning` is now raised when :attr:`~module.__package__` on a + module differs from + :attr:`__spec__.parent ` (previously + it was :exc:`ImportWarning`). (Contributed by Brett Cannon in :gh:`65961`.) -* Setting ``__package__`` or ``__cached__`` on a module is deprecated, - and will cease to be set or taken into consideration by the import system in Python 3.14. - (Contributed by Brett Cannon in :gh:`65961`.) +* Setting :attr:`~module.__package__` or :attr:`~module.__cached__` on a + module is deprecated, and will cease to be set or taken into consideration by + the import system in Python 3.14. (Contributed by Brett Cannon in :gh:`65961`.) * The bitwise inversion operator (``~``) on bool is deprecated. It will throw an error in Python 3.16. Use ``not`` for logical negation of bools instead. diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst index c09fa839886305..7104904c956a7a 100644 --- a/Doc/whatsnew/3.2.rst +++ b/Doc/whatsnew/3.2.rst @@ -312,8 +312,8 @@ cluttering source directories, the *pyc* files are now collected in a Aside from the filenames and target directories, the new scheme has a few aspects that are visible to the programmer: -* Imported modules now have a :attr:`__cached__` attribute which stores the name - of the actual file that was imported: +* Imported modules now have a :attr:`~module.__cached__` attribute which stores + the name of the actual file that was imported: >>> import collections >>> collections.__cached__ # doctest: +SKIP diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 71425120c37185..9d746b378995c3 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -2271,7 +2271,8 @@ Changes in the Python API :func:`super` and falling through all the way to the ABCs. For compatibility, catch both :exc:`NotImplementedError` or the appropriate exception as needed. -* The module type now initializes the :attr:`__package__` and :attr:`__loader__` +* The module type now initializes the :attr:`~module.__package__` and + :attr:`~module.__loader__` attributes to ``None`` by default. To determine if these attributes were set in a backwards-compatible fashion, use e.g. ``getattr(module, '__loader__', None) is not None``. (:issue:`17115`.) diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index d4ae6f1f45d346..3f3f634171dab6 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -423,8 +423,8 @@ are declared in the annotations:: return 'Hello ' + name While these annotations are available at runtime through the usual -:attr:`__annotations__` attribute, *no automatic type checking happens at -runtime*. Instead, it is assumed that a separate off-line type checker +:attr:`~object.__annotations__` attribute, *no automatic type checking happens +at runtime*. Instead, it is assumed that a separate off-line type checker (e.g. `mypy `_) will be used for on-demand source code analysis. diff --git a/Misc/NEWS.d/3.10.0a2.rst b/Misc/NEWS.d/3.10.0a2.rst index bd002b6ad3db9b..3e82de9ef266d6 100644 --- a/Misc/NEWS.d/3.10.0a2.rst +++ b/Misc/NEWS.d/3.10.0a2.rst @@ -226,8 +226,8 @@ thread at the time the function is called. .. section: Core and Builtins Enable ``from __future__ import annotations`` (:pep:`563`) by default. The -values found in :attr:`__annotations__` dicts are now strings, e.g. ``{"x": -"int"}`` instead of ``{"x": int}``. +values found in :attr:`~object.__annotations__` dicts are now strings, for +example ``{"x": "int"}`` instead of ``{"x": int}``. .. diff --git a/Misc/NEWS.d/3.11.0a5.rst b/Misc/NEWS.d/3.11.0a5.rst index 954f5c18b48000..5418d5d59dd583 100644 --- a/Misc/NEWS.d/3.11.0a5.rst +++ b/Misc/NEWS.d/3.11.0a5.rst @@ -486,8 +486,8 @@ Use ``dis.Positions`` in ``dis.Instruction`` instead of a regular ``tuple``. .. nonce: geS-aP .. section: Library -:mod:`pdb` now gracefully handles ``help`` when :attr:`__doc__` is missing, -for example when run with pregenerated optimized ``.pyc`` files. +:mod:`pdb` now gracefully handles ``help`` when :attr:`~module.__doc__` is +missing, for example when run with pregenerated optimized ``.pyc`` files. .. diff --git a/Misc/NEWS.d/3.12.0a1.rst b/Misc/NEWS.d/3.12.0a1.rst index 7e0f86179bce50..f2668e99a6299b 100644 --- a/Misc/NEWS.d/3.12.0a1.rst +++ b/Misc/NEWS.d/3.12.0a1.rst @@ -4237,8 +4237,8 @@ by :mod:`asyncio` to AIX platform only. .. nonce: 4dzB80 .. section: Library -Set :attr:`doctest.DocTest.lineno` to ``None`` when object does not have -:attr:`__doc__`. +Set :attr:`doctest.DocTest.lineno` to ``None`` when an object does not have +:attr:`~definition.__doc__`. .. diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst index 1e5ad7d08eed7c..567c096472878f 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst @@ -1,3 +1,3 @@ :func:`classmethod` and :func:`staticmethod` now wrap the -:attr:`__annotations__` and :attr:`!__annotate__` attributes of their -underlying callable lazily. See :pep:`649`. Patch by Jelle Zijlstra. +:attr:`~object.__annotations__` and :attr:`~object.__annotate__` attributes of +their underlying callable lazily. See :pep:`649`. Patch by Jelle Zijlstra. From 6b533a659bc8df04daa194d827604dcae14d5801 Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Wed, 9 Oct 2024 14:54:39 +0200 Subject: [PATCH 003/114] gh-125039: Make `this_instr`/`prev_instr` const in cases generator (GH-125071) --- Lib/test/test_generated_cases.py | 6 +- ...-10-07-23-33-18.gh-issue-125039.MKTyNI.rst | 1 + Python/generated_cases.c.h | 144 +++++++++--------- Tools/cases_generator/tier1_generator.py | 6 +- 4 files changed, 79 insertions(+), 78 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-07-23-33-18.gh-issue-125039.MKTyNI.rst diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 5b15838f077f3b..cd3718b80612bd 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -450,7 +450,7 @@ def test_cache_effect(self): """ output = """ TARGET(OP) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 4; INSTRUCTION_STATS(OP); @@ -503,7 +503,7 @@ def test_macro_instruction(self): next_instr += 6; INSTRUCTION_STATS(OP); PREDICTED(OP); - _Py_CODEUNIT *this_instr = next_instr - 6; + _Py_CODEUNIT* const this_instr = next_instr - 6; (void)this_instr; _PyStackRef left; _PyStackRef right; @@ -536,7 +536,7 @@ def test_macro_instruction(self): } TARGET(OP1) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(OP1); diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-07-23-33-18.gh-issue-125039.MKTyNI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-07-23-33-18.gh-issue-125039.MKTyNI.rst new file mode 100644 index 00000000000000..93716c0b1c0df1 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-07-23-33-18.gh-issue-125039.MKTyNI.rst @@ -0,0 +1 @@ +Make ``this_instr`` and ``prev_instr`` const in cases generator. diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e1107caa197d7a..c4de7bdeb4ce80 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -14,7 +14,7 @@ next_instr += 2; INSTRUCTION_STATS(BINARY_OP); PREDICTED(BINARY_OP); - _Py_CODEUNIT *this_instr = next_instr - 2; + _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef lhs; _PyStackRef rhs; @@ -422,7 +422,7 @@ next_instr += 2; INSTRUCTION_STATS(BINARY_SUBSCR); PREDICTED(BINARY_SUBSCR); - _Py_CODEUNIT *this_instr = next_instr - 2; + _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef container; _PyStackRef sub; @@ -858,7 +858,7 @@ next_instr += 4; INSTRUCTION_STATS(CALL); PREDICTED(CALL); - _Py_CODEUNIT *this_instr = next_instr - 4; + _Py_CODEUNIT* const this_instr = next_instr - 4; (void)this_instr; _PyStackRef *callable; _PyStackRef *self_or_null; @@ -1012,7 +1012,7 @@ } TARGET(CALL_ALLOC_AND_ENTER_INIT) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(CALL_ALLOC_AND_ENTER_INIT); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -1107,7 +1107,7 @@ } TARGET(CALL_BOUND_METHOD_EXACT_ARGS) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(CALL_BOUND_METHOD_EXACT_ARGS); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -1210,7 +1210,7 @@ } TARGET(CALL_BOUND_METHOD_GENERAL) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(CALL_BOUND_METHOD_GENERAL); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -1625,7 +1625,7 @@ next_instr += 1; INSTRUCTION_STATS(CALL_FUNCTION_EX); PREDICTED(CALL_FUNCTION_EX); - _Py_CODEUNIT *this_instr = next_instr - 1; + _Py_CODEUNIT* const this_instr = next_instr - 1; (void)this_instr; _PyStackRef func; _PyStackRef callargs; @@ -1871,7 +1871,7 @@ next_instr += 4; INSTRUCTION_STATS(CALL_KW); PREDICTED(CALL_KW); - _Py_CODEUNIT *this_instr = next_instr - 4; + _Py_CODEUNIT* const this_instr = next_instr - 4; (void)this_instr; _PyStackRef *callable; _PyStackRef *self_or_null; @@ -2024,7 +2024,7 @@ } TARGET(CALL_KW_BOUND_METHOD) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(CALL_KW_BOUND_METHOD); static_assert(INLINE_CACHE_ENTRIES_CALL_KW == 3, "incorrect cache size"); @@ -2223,7 +2223,7 @@ } TARGET(CALL_KW_PY) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(CALL_KW_PY); static_assert(INLINE_CACHE_ENTRIES_CALL_KW == 3, "incorrect cache size"); @@ -2795,7 +2795,7 @@ } TARGET(CALL_PY_EXACT_ARGS) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(CALL_PY_EXACT_ARGS); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -2876,7 +2876,7 @@ } TARGET(CALL_PY_GENERAL) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(CALL_PY_GENERAL); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -3157,7 +3157,7 @@ } TARGET(CLEANUP_THROW) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(CLEANUP_THROW); @@ -3201,7 +3201,7 @@ next_instr += 2; INSTRUCTION_STATS(COMPARE_OP); PREDICTED(COMPARE_OP); - _Py_CODEUNIT *this_instr = next_instr - 2; + _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef left; _PyStackRef right; @@ -3381,7 +3381,7 @@ next_instr += 2; INSTRUCTION_STATS(CONTAINS_OP); PREDICTED(CONTAINS_OP); - _Py_CODEUNIT *this_instr = next_instr - 2; + _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef left; _PyStackRef right; @@ -3720,7 +3720,7 @@ } TARGET(END_ASYNC_FOR) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(END_ASYNC_FOR); @@ -3781,7 +3781,7 @@ } TARGET(ENTER_EXECUTOR) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(ENTER_EXECUTOR); @@ -3896,7 +3896,7 @@ next_instr += 2; INSTRUCTION_STATS(FOR_ITER); PREDICTED(FOR_ITER); - _Py_CODEUNIT *this_instr = next_instr - 2; + _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef iter; _PyStackRef next; @@ -4357,7 +4357,7 @@ } TARGET(INSTRUMENTED_CALL) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 4; INSTRUCTION_STATS(INSTRUMENTED_CALL); @@ -4530,7 +4530,7 @@ } TARGET(INSTRUMENTED_CALL_KW) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 4; INSTRUCTION_STATS(INSTRUMENTED_CALL_KW); @@ -4554,7 +4554,7 @@ } TARGET(INSTRUMENTED_END_FOR) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_END_FOR); @@ -4579,7 +4579,7 @@ } TARGET(INSTRUMENTED_END_SEND) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_END_SEND); @@ -4606,7 +4606,7 @@ } TARGET(INSTRUMENTED_FOR_ITER) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(INSTRUMENTED_FOR_ITER); @@ -4647,7 +4647,7 @@ } TARGET(INSTRUMENTED_INSTRUCTION) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_INSTRUCTION); @@ -4666,7 +4666,7 @@ } TARGET(INSTRUMENTED_JUMP_BACKWARD) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(INSTRUMENTED_JUMP_BACKWARD); @@ -4690,7 +4690,7 @@ } TARGET(INSTRUMENTED_JUMP_FORWARD) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_JUMP_FORWARD); @@ -4699,8 +4699,8 @@ } TARGET(INSTRUMENTED_LINE) { - _Py_CODEUNIT *prev_instr = frame->instr_ptr; - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const prev_instr = frame->instr_ptr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_LINE); @@ -4734,7 +4734,7 @@ } TARGET(INSTRUMENTED_LOAD_SUPER_ATTR) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(INSTRUMENTED_LOAD_SUPER_ATTR); @@ -4748,7 +4748,7 @@ } TARGET(INSTRUMENTED_POP_JUMP_IF_FALSE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_FALSE); @@ -4765,7 +4765,7 @@ } TARGET(INSTRUMENTED_POP_JUMP_IF_NONE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_NONE); @@ -4788,7 +4788,7 @@ } TARGET(INSTRUMENTED_POP_JUMP_IF_NOT_NONE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_NOT_NONE); @@ -4811,7 +4811,7 @@ } TARGET(INSTRUMENTED_POP_JUMP_IF_TRUE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_TRUE); @@ -4828,7 +4828,7 @@ } TARGET(INSTRUMENTED_RESUME) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_RESUME); @@ -4878,7 +4878,7 @@ } TARGET(INSTRUMENTED_RETURN_CONST) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_RETURN_CONST); @@ -4931,7 +4931,7 @@ } TARGET(INSTRUMENTED_RETURN_VALUE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_RETURN_VALUE); @@ -4976,7 +4976,7 @@ } TARGET(INSTRUMENTED_YIELD_VALUE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_YIELD_VALUE); @@ -5087,7 +5087,7 @@ } TARGET(JUMP_BACKWARD) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(JUMP_BACKWARD); @@ -5220,7 +5220,7 @@ next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR); PREDICTED(LOAD_ATTR); - _Py_CODEUNIT *this_instr = next_instr - 10; + _Py_CODEUNIT* const this_instr = next_instr - 10; (void)this_instr; _PyStackRef owner; _PyStackRef attr; @@ -5294,7 +5294,7 @@ } TARGET(LOAD_ATTR_CLASS) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_CLASS); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5329,7 +5329,7 @@ } TARGET(LOAD_ATTR_CLASS_WITH_METACLASS_CHECK) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_CLASS_WITH_METACLASS_CHECK); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5370,7 +5370,7 @@ } TARGET(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5406,7 +5406,7 @@ } TARGET(LOAD_ATTR_INSTANCE_VALUE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_INSTANCE_VALUE); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5451,7 +5451,7 @@ } TARGET(LOAD_ATTR_METHOD_LAZY_DICT) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_METHOD_LAZY_DICT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5494,7 +5494,7 @@ } TARGET(LOAD_ATTR_METHOD_NO_DICT) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_METHOD_NO_DICT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5530,7 +5530,7 @@ } TARGET(LOAD_ATTR_METHOD_WITH_VALUES) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_METHOD_WITH_VALUES); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5578,7 +5578,7 @@ } TARGET(LOAD_ATTR_MODULE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_MODULE); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5621,7 +5621,7 @@ } TARGET(LOAD_ATTR_NONDESCRIPTOR_NO_DICT) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_NONDESCRIPTOR_NO_DICT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5652,7 +5652,7 @@ } TARGET(LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5694,7 +5694,7 @@ } TARGET(LOAD_ATTR_PROPERTY) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_PROPERTY); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5759,7 +5759,7 @@ } TARGET(LOAD_ATTR_SLOT) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_SLOT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5796,7 +5796,7 @@ } TARGET(LOAD_ATTR_WITH_HINT) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_WITH_HINT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -6101,7 +6101,7 @@ next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL); PREDICTED(LOAD_GLOBAL); - _Py_CODEUNIT *this_instr = next_instr - 5; + _Py_CODEUNIT* const this_instr = next_instr - 5; (void)this_instr; _PyStackRef *res; _PyStackRef null = PyStackRef_NULL; @@ -6142,7 +6142,7 @@ } TARGET(LOAD_GLOBAL_BUILTIN) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL_BUILTIN); static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); @@ -6185,7 +6185,7 @@ } TARGET(LOAD_GLOBAL_MODULE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL_MODULE); static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); @@ -6299,7 +6299,7 @@ next_instr += 2; INSTRUCTION_STATS(LOAD_SUPER_ATTR); PREDICTED(LOAD_SUPER_ATTR); - _Py_CODEUNIT *this_instr = next_instr - 2; + _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef global_super_st; _PyStackRef class_st; @@ -6654,7 +6654,7 @@ } TARGET(POP_JUMP_IF_FALSE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(POP_JUMP_IF_FALSE); @@ -6673,7 +6673,7 @@ } TARGET(POP_JUMP_IF_NONE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(POP_JUMP_IF_NONE); @@ -6708,7 +6708,7 @@ } TARGET(POP_JUMP_IF_NOT_NONE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(POP_JUMP_IF_NOT_NONE); @@ -6743,7 +6743,7 @@ } TARGET(POP_JUMP_IF_TRUE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; INSTRUCTION_STATS(POP_JUMP_IF_TRUE); @@ -6811,7 +6811,7 @@ } TARGET(RAISE_VARARGS) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(RAISE_VARARGS); @@ -6836,7 +6836,7 @@ } TARGET(RERAISE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 1; INSTRUCTION_STATS(RERAISE); @@ -6886,7 +6886,7 @@ next_instr += 1; INSTRUCTION_STATS(RESUME); PREDICTED(RESUME); - _Py_CODEUNIT *this_instr = next_instr - 1; + _Py_CODEUNIT* const this_instr = next_instr - 1; (void)this_instr; // _MAYBE_INSTRUMENT { @@ -7049,7 +7049,7 @@ next_instr += 2; INSTRUCTION_STATS(SEND); PREDICTED(SEND); - _Py_CODEUNIT *this_instr = next_instr - 2; + _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef receiver; _PyStackRef v; @@ -7288,7 +7288,7 @@ next_instr += 5; INSTRUCTION_STATS(STORE_ATTR); PREDICTED(STORE_ATTR); - _Py_CODEUNIT *this_instr = next_instr - 5; + _Py_CODEUNIT* const this_instr = next_instr - 5; (void)this_instr; _PyStackRef owner; _PyStackRef v; @@ -7329,7 +7329,7 @@ } TARGET(STORE_ATTR_INSTANCE_VALUE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 5; INSTRUCTION_STATS(STORE_ATTR_INSTANCE_VALUE); static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size"); @@ -7378,7 +7378,7 @@ } TARGET(STORE_ATTR_SLOT) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 5; INSTRUCTION_STATS(STORE_ATTR_SLOT); static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size"); @@ -7411,7 +7411,7 @@ } TARGET(STORE_ATTR_WITH_HINT) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 5; INSTRUCTION_STATS(STORE_ATTR_WITH_HINT); static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size"); @@ -7625,7 +7625,7 @@ next_instr += 2; INSTRUCTION_STATS(STORE_SUBSCR); PREDICTED(STORE_SUBSCR); - _Py_CODEUNIT *this_instr = next_instr - 2; + _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef container; _PyStackRef sub; @@ -7748,7 +7748,7 @@ next_instr += 4; INSTRUCTION_STATS(TO_BOOL); PREDICTED(TO_BOOL); - _Py_CODEUNIT *this_instr = next_instr - 4; + _Py_CODEUNIT* const this_instr = next_instr - 4; (void)this_instr; _PyStackRef value; _PyStackRef res; @@ -7784,7 +7784,7 @@ } TARGET(TO_BOOL_ALWAYS_TRUE) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; next_instr += 4; INSTRUCTION_STATS(TO_BOOL_ALWAYS_TRUE); static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size"); @@ -7984,7 +7984,7 @@ next_instr += 2; INSTRUCTION_STATS(UNPACK_SEQUENCE); PREDICTED(UNPACK_SEQUENCE); - _Py_CODEUNIT *this_instr = next_instr - 2; + _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef seq; _PyStackRef *output; diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py index 1b116a578c5a86..8dadc5736c8889 100644 --- a/Tools/cases_generator/tier1_generator.py +++ b/Tools/cases_generator/tier1_generator.py @@ -150,9 +150,9 @@ def generate_tier1( out.emit(f"TARGET({name}) {{\n") unused_guard = "(void)this_instr;\n" if inst.family is None else "" if inst.properties.needs_prev: - out.emit(f"_Py_CODEUNIT *prev_instr = frame->instr_ptr;\n") + out.emit(f"_Py_CODEUNIT* const prev_instr = frame->instr_ptr;\n") if needs_this and not inst.is_target: - out.emit(f"_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;\n") + out.emit(f"_Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr;\n") out.emit(unused_guard) else: out.emit(f"frame->instr_ptr = next_instr;\n") @@ -161,7 +161,7 @@ def generate_tier1( if inst.is_target: out.emit(f"PREDICTED({name});\n") if needs_this: - out.emit(f"_Py_CODEUNIT *this_instr = next_instr - {inst.size};\n") + out.emit(f"_Py_CODEUNIT* const this_instr = next_instr - {inst.size};\n") out.emit(unused_guard) if inst.family is not None: out.emit( From e0835aff2e45629ee85af642190e79e4061312b5 Mon Sep 17 00:00:00 2001 From: JamesMcCarthy21 <168590015+JamesMcCarthy21@users.noreply.github.com> Date: Wed, 9 Oct 2024 09:17:18 -0400 Subject: [PATCH 004/114] gh-125168: Fix typo in `__future__.rst` (#125183) * Update `__future__.rst` Fixed typo in the sentence :pep:`649`: *Deferred evaluation of annotations using descriptors* - James McCarthy * Update `__future__.rst` Fixed sphinx formatting --- Doc/library/__future__.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/__future__.rst b/Doc/library/__future__.rst index 6a1179434acd5a..4f3b663006fb28 100644 --- a/Doc/library/__future__.rst +++ b/Doc/library/__future__.rst @@ -66,7 +66,7 @@ language using this mechanism: +------------------+-------------+--------------+---------------------------------------------+ | annotations | 3.7.0b1 | Never [1]_ | :pep:`563`: | | | | | *Postponed evaluation of annotations*, | -| | | | :pep:`649`: *Deferred evalutation of | +| | | | :pep:`649`: *Deferred evaluation of | | | | | annotations using descriptors* | +------------------+-------------+--------------+---------------------------------------------+ From a5716a30914cc11c3a4bec374cdaece3c79b541f Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 9 Oct 2024 17:02:18 +0300 Subject: [PATCH 005/114] gh-101100: Fix Sphinx warnings in `library/unittest.mock.rst` (#124106) --- Doc/library/unittest.mock.rst | 60 +++++++++++++++++------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/Doc/library/unittest.mock.rst b/Doc/library/unittest.mock.rst index cc2b1b4299553c..eae3ef2888eae0 100644 --- a/Doc/library/unittest.mock.rst +++ b/Doc/library/unittest.mock.rst @@ -68,7 +68,7 @@ available, and then make assertions about how they have been used: 3 >>> thing.method.assert_called_with(3, 4, 5, key='value') -:attr:`side_effect` allows you to perform side effects, including raising an +:attr:`~Mock.side_effect` allows you to perform side effects, including raising an exception when a mock is called: >>> from unittest.mock import Mock @@ -760,8 +760,8 @@ the *new_callable* argument to :func:`patch`. .. attribute:: __class__ - Normally the :attr:`__class__` attribute of an object will return its type. - For a mock object with a :attr:`spec`, ``__class__`` returns the spec class + Normally the :attr:`!__class__` attribute of an object will return its type. + For a mock object with a :attr:`!spec`, :attr:`!__class__` returns the spec class instead. This allows mock objects to pass :func:`isinstance` tests for the object they are replacing / masquerading as: @@ -769,7 +769,7 @@ the *new_callable* argument to :func:`patch`. >>> isinstance(mock, int) True - :attr:`__class__` is assignable to, this allows a mock to pass an + :attr:`!__class__` is assignable to, this allows a mock to pass an :func:`isinstance` check without forcing you to use a spec: >>> mock = Mock() @@ -783,8 +783,8 @@ the *new_callable* argument to :func:`patch`. meaning of :class:`Mock`, with the exception of *return_value* and *side_effect* which have no meaning on a non-callable mock. -Mock objects that use a class or an instance as a :attr:`spec` or -:attr:`spec_set` are able to pass :func:`isinstance` tests: +Mock objects that use a class or an instance as a :attr:`!spec` or +:attr:`!spec_set` are able to pass :func:`isinstance` tests: >>> mock = Mock(spec=SomeClass) >>> isinstance(mock, SomeClass) @@ -1198,7 +1198,7 @@ Calls made to the object will be recorded in the attributes like :attr:`~Mock.call_args` and :attr:`~Mock.call_args_list`. If :attr:`~Mock.side_effect` is set then it will be called after the call has -been recorded, so if :attr:`side_effect` raises an exception the call is still +been recorded, so if :attr:`!side_effect` raises an exception the call is still recorded. The simplest way to make a mock raise an exception when called is to make @@ -1219,8 +1219,8 @@ The simplest way to make a mock raise an exception when called is to make >>> m.mock_calls [call(1, 2, 3), call('two', 'three', 'four')] -If :attr:`side_effect` is a function then whatever that function returns is what -calls to the mock return. The :attr:`side_effect` function is called with the +If :attr:`~Mock.side_effect` is a function then whatever that function returns is what +calls to the mock return. The :attr:`!side_effect` function is called with the same arguments as the mock. This allows you to vary the return value of the call dynamically, based on the input: @@ -1237,7 +1237,7 @@ call dynamically, based on the input: If you want the mock to still return the default return value (a new mock), or any set return value, then there are two ways of doing this. Either return -:attr:`mock.return_value` from inside :attr:`side_effect`, or return :data:`DEFAULT`: +:attr:`~Mock.return_value` from inside :attr:`~Mock.side_effect`, or return :data:`DEFAULT`: >>> m = MagicMock() >>> def side_effect(*args, **kwargs): @@ -1254,8 +1254,8 @@ any set return value, then there are two ways of doing this. Either return >>> m() 3 -To remove a :attr:`side_effect`, and return to the default behaviour, set the -:attr:`side_effect` to ``None``: +To remove a :attr:`~Mock.side_effect`, and return to the default behaviour, set the +:attr:`!side_effect` to ``None``: >>> m = MagicMock(return_value=6) >>> def side_effect(*args, **kwargs): @@ -1268,7 +1268,7 @@ To remove a :attr:`side_effect`, and return to the default behaviour, set the >>> m() 6 -The :attr:`side_effect` can also be any iterable object. Repeated calls to the mock +The :attr:`~Mock.side_effect` can also be any iterable object. Repeated calls to the mock will return values from the iterable (until the iterable is exhausted and a :exc:`StopIteration` is raised): @@ -1309,7 +1309,7 @@ objects of any type. You may want a mock object to return ``False`` to a :func:`hasattr` call, or raise an :exc:`AttributeError` when an attribute is fetched. You can do this by providing -an object as a :attr:`spec` for a mock, but that isn't always convenient. +an object as a :attr:`!spec` for a mock, but that isn't always convenient. You "block" attributes by deleting them. Once deleted, accessing an attribute will raise an :exc:`AttributeError`. @@ -1478,7 +1478,7 @@ patch If you are patching builtins in a module then you don't need to pass ``create=True``, it will be added by default. - Patch can be used as a :class:`TestCase` class decorator. It works by + Patch can be used as a :class:`~unittest.TestCase` class decorator. It works by decorating each test method in the class. This reduces the boilerplate code when your test methods share a common patchings set. :func:`patch` finds tests by looking for method names that start with ``patch.TEST_PREFIX``. @@ -1516,7 +1516,7 @@ If the class is instantiated multiple times you could use can set the *return_value* to be anything you want. To configure return values on methods of *instances* on the patched class -you must do this on the :attr:`return_value`. For example:: +you must do this on the :attr:`~Mock.return_value`. For example:: >>> class Class: ... def method(self): @@ -1838,13 +1838,13 @@ context manager is a dictionary where created mocks are keyed by name:: patch methods: start and stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -All the patchers have :meth:`start` and :meth:`stop` methods. These make it simpler to do +All the patchers have :meth:`!start` and :meth:`!stop` methods. These make it simpler to do patching in ``setUp`` methods or where you want to do multiple patches without nesting decorators or with statements. To use them call :func:`patch`, :func:`patch.object` or :func:`patch.dict` as normal and keep a reference to the returned ``patcher`` object. You can then -call :meth:`start` to put the patch in place and :meth:`stop` to undo it. +call :meth:`!start` to put the patch in place and :meth:`!stop` to undo it. If you are using :func:`patch` to create a mock for you then it will be returned by the call to ``patcher.start``. :: @@ -1861,7 +1861,7 @@ the call to ``patcher.start``. :: A typical use case for this might be for doing multiple patches in the ``setUp`` -method of a :class:`TestCase`:: +method of a :class:`~unittest.TestCase`:: >>> class MyTest(unittest.TestCase): ... def setUp(self): @@ -2534,7 +2534,7 @@ behaviour you can switch it off by setting the module level switch Alternatively you can just use ``vars(my_mock)`` (instance members) and ``dir(type(my_mock))`` (type members) to bypass the filtering irrespective of -:const:`mock.FILTER_DIR`. +:const:`FILTER_DIR`. mock_open @@ -2549,7 +2549,7 @@ mock_open default) then a :class:`MagicMock` will be created for you, with the API limited to methods or attributes available on standard file handles. - *read_data* is a string for the :meth:`~io.IOBase.read`, + *read_data* is a string for the :meth:`~io.RawIOBase.read`, :meth:`~io.IOBase.readline`, and :meth:`~io.IOBase.readlines` methods of the file handle to return. Calls to those methods will take data from *read_data* until it is depleted. The mock of these methods is pretty @@ -2561,7 +2561,7 @@ mock_open .. versionchanged:: 3.4 Added :meth:`~io.IOBase.readline` and :meth:`~io.IOBase.readlines` support. - The mock of :meth:`~io.IOBase.read` changed to consume *read_data* rather + The mock of :meth:`~io.RawIOBase.read` changed to consume *read_data* rather than returning it on each call. .. versionchanged:: 3.5 @@ -2613,7 +2613,7 @@ And for reading files:: Autospeccing ~~~~~~~~~~~~ -Autospeccing is based on the existing :attr:`spec` feature of mock. It limits the +Autospeccing is based on the existing :attr:`!spec` feature of mock. It limits the api of mocks to the api of an original object (the spec), but it is recursive (implemented lazily) so that attributes of mocks only have the same api as the attributes of the spec. In addition mocked functions / methods have the @@ -2638,8 +2638,8 @@ unit tests. Testing everything in isolation is all fine and dandy, but if you don't test how your units are "wired together" there is still lots of room for bugs that tests might have caught. -:mod:`mock` already provides a feature to help with this, called speccing. If you -use a class or instance as the :attr:`spec` for a mock then you can only access +:mod:`unittest.mock` already provides a feature to help with this, called speccing. If you +use a class or instance as the :attr:`!spec` for a mock then you can only access attributes on the mock that exist on the real class: >>> from urllib import request @@ -2677,7 +2677,7 @@ Here's an example of it in use:: >>> mock_request.Request -You can see that :class:`request.Request` has a spec. :class:`request.Request` takes two +You can see that :class:`!request.Request` has a spec. :class:`!request.Request` takes two arguments in the constructor (one of which is *self*). Here's what happens if we try to call it incorrectly:: @@ -2693,8 +2693,8 @@ specced mocks):: >>> req -:class:`Request` objects are not callable, so the return value of instantiating our -mocked out :class:`request.Request` is a non-callable mock. With the spec in place +:class:`!Request` objects are not callable, so the return value of instantiating our +mocked out :class:`!request.Request` is a non-callable mock. With the spec in place any typos in our asserts will raise the correct error:: >>> req.add_header('spam', 'eggs') @@ -2846,8 +2846,8 @@ Sealing mocks .. versionadded:: 3.7 -Order of precedence of :attr:`side_effect`, :attr:`return_value` and *wraps* ----------------------------------------------------------------------------- +Order of precedence of :attr:`!side_effect`, :attr:`!return_value` and *wraps* +------------------------------------------------------------------------------ The order of their precedence is: From 440632adb2934277599bc246b405393ac54b684c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Oct 2024 16:13:55 +0200 Subject: [PATCH 006/114] gh-111178: Fix function signatures in cellobject.c (#125182) --- Objects/cellobject.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/Objects/cellobject.c b/Objects/cellobject.c index b1154e4ca4ace6..590c8a80857699 100644 --- a/Objects/cellobject.c +++ b/Objects/cellobject.c @@ -5,6 +5,8 @@ #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_object.h" +#define _PyCell_CAST(op) _Py_CAST(PyCellObject*, (op)) + PyObject * PyCell_New(PyObject *obj) { @@ -72,8 +74,9 @@ PyCell_Set(PyObject *op, PyObject *value) } static void -cell_dealloc(PyCellObject *op) +cell_dealloc(PyObject *self) { + PyCellObject *op = _PyCell_CAST(self); _PyObject_GC_UNTRACK(op); Py_XDECREF(op->ob_ref); PyObject_GC_Del(op); @@ -100,10 +103,12 @@ cell_richcompare(PyObject *a, PyObject *b, int op) } static PyObject * -cell_repr(PyCellObject *op) +cell_repr(PyObject *self) { - if (op->ob_ref == NULL) + PyCellObject *op = _PyCell_CAST(self); + if (op->ob_ref == NULL) { return PyUnicode_FromFormat("", op); + } return PyUnicode_FromFormat("", op, Py_TYPE(op->ob_ref)->tp_name, @@ -111,24 +116,26 @@ cell_repr(PyCellObject *op) } static int -cell_traverse(PyCellObject *op, visitproc visit, void *arg) +cell_traverse(PyObject *self, visitproc visit, void *arg) { + PyCellObject *op = _PyCell_CAST(self); Py_VISIT(op->ob_ref); return 0; } static int -cell_clear(PyCellObject *op) +cell_clear(PyObject *self) { + PyCellObject *op = _PyCell_CAST(self); Py_CLEAR(op->ob_ref); return 0; } static PyObject * -cell_get_contents(PyCellObject *op, void *closure) +cell_get_contents(PyObject *self, void *closure) { - if (op->ob_ref == NULL) - { + PyCellObject *op = _PyCell_CAST(self); + if (op->ob_ref == NULL) { PyErr_SetString(PyExc_ValueError, "Cell is empty"); return NULL; } @@ -136,15 +143,15 @@ cell_get_contents(PyCellObject *op, void *closure) } static int -cell_set_contents(PyCellObject *op, PyObject *obj, void *Py_UNUSED(ignored)) +cell_set_contents(PyObject *self, PyObject *obj, void *Py_UNUSED(ignored)) { + PyCellObject *op = _PyCell_CAST(self); Py_XSETREF(op->ob_ref, Py_XNewRef(obj)); return 0; } static PyGetSetDef cell_getsetlist[] = { - {"cell_contents", (getter)cell_get_contents, - (setter)cell_set_contents, NULL}, + {"cell_contents", cell_get_contents, cell_set_contents, NULL}, {NULL} /* sentinel */ }; @@ -153,12 +160,12 @@ PyTypeObject PyCell_Type = { "cell", sizeof(PyCellObject), 0, - (destructor)cell_dealloc, /* tp_dealloc */ + cell_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)cell_repr, /* tp_repr */ + cell_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ @@ -170,8 +177,8 @@ PyTypeObject PyCell_Type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ cell_new_doc, /* tp_doc */ - (traverseproc)cell_traverse, /* tp_traverse */ - (inquiry)cell_clear, /* tp_clear */ + cell_traverse, /* tp_traverse */ + cell_clear, /* tp_clear */ cell_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ @@ -186,6 +193,6 @@ PyTypeObject PyCell_Type = { 0, /* tp_dictoffset */ 0, /* tp_init */ 0, /* tp_alloc */ - (newfunc)cell_new, /* tp_new */ + cell_new, /* tp_new */ 0, /* tp_free */ }; From eb18574cc3a0f99fca60b329283d1413abe6eb74 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 9 Oct 2024 15:43:09 +0100 Subject: [PATCH 007/114] GH-120024: Tidy up pycore_stackref.h, splitting into GIL and free-threading sections (GH-125095) --- Include/internal/pycore_stackref.h | 166 +++++++++++------------------ 1 file changed, 65 insertions(+), 101 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index cf6dd22cfb18d1..7d1eb11aa5ecb8 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -60,54 +60,22 @@ typedef union _PyStackRef { #define Py_TAG_BITS ((uintptr_t)1) #ifdef Py_GIL_DISABLED - static const _PyStackRef PyStackRef_NULL = { .bits = 0 | Py_TAG_DEFERRED}; -#else - static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; -#endif +static const _PyStackRef PyStackRef_NULL = { .bits = Py_TAG_DEFERRED}; #define PyStackRef_IsNull(stackref) ((stackref).bits == PyStackRef_NULL.bits) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_DEFERRED }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_DEFERRED }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_DEFERRED }) - -#ifdef Py_GIL_DISABLED -# define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_DEFERRED }) -#else -# define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) }) -#endif - -#ifdef Py_GIL_DISABLED -# define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_DEFERRED }) -#else -# define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) }) -#endif - -#ifdef Py_GIL_DISABLED -# define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_DEFERRED }) -#else -# define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) }) -#endif - -// Note: the following are all macros because MSVC (Windows) has trouble inlining them. - -#define PyStackRef_Is(a, b) ((a).bits == (b).bits) - -#define PyStackRef_IsDeferred(ref) (((ref).bits & Py_TAG_BITS) == Py_TAG_DEFERRED) - - -#ifdef Py_GIL_DISABLED -// Gets a PyObject * from a _PyStackRef static inline PyObject * PyStackRef_AsPyObjectBorrow(_PyStackRef stackref) { PyObject *cleared = ((PyObject *)((stackref).bits & (~Py_TAG_BITS))); return cleared; } -#else -# define PyStackRef_AsPyObjectBorrow(stackref) ((PyObject *)(stackref).bits) -#endif -// Converts a PyStackRef back to a PyObject *, stealing the -// PyStackRef. -#ifdef Py_GIL_DISABLED +#define PyStackRef_IsDeferred(ref) (((ref).bits & Py_TAG_BITS) == Py_TAG_DEFERRED) + static inline PyObject * PyStackRef_AsPyObjectSteal(_PyStackRef stackref) { @@ -117,18 +85,7 @@ PyStackRef_AsPyObjectSteal(_PyStackRef stackref) } return PyStackRef_AsPyObjectBorrow(stackref); } -#else -# define PyStackRef_AsPyObjectSteal(stackref) PyStackRef_AsPyObjectBorrow(stackref) -#endif - -// Converts a PyStackRef back to a PyObject *, converting the -// stackref to a new reference. -#define PyStackRef_AsPyObjectNew(stackref) Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref)) - -#define PyStackRef_TYPE(stackref) Py_TYPE(PyStackRef_AsPyObjectBorrow(stackref)) -// Converts a PyObject * to a PyStackRef, stealing the reference -#ifdef Py_GIL_DISABLED static inline _PyStackRef _PyStackRef_FromPyObjectSteal(PyObject *obj) { @@ -139,13 +96,7 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) return ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); } # define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj)) -#else -# define PyStackRef_FromPyObjectSteal(obj) ((_PyStackRef){.bits = ((uintptr_t)(obj))}) -#endif - -// Converts a PyObject * to a PyStackRef, with a new reference -#ifdef Py_GIL_DISABLED static inline _PyStackRef PyStackRef_FromPyObjectNew(PyObject *obj) { @@ -159,13 +110,8 @@ PyStackRef_FromPyObjectNew(PyObject *obj) return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR }; } } -# define PyStackRef_FromPyObjectNew(obj) PyStackRef_FromPyObjectNew(_PyObject_CAST(obj)) -#else -# define PyStackRef_FromPyObjectNew(obj) ((_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) }) -#endif +#define PyStackRef_FromPyObjectNew(obj) PyStackRef_FromPyObjectNew(_PyObject_CAST(obj)) -#ifdef Py_GIL_DISABLED -// Same as PyStackRef_FromPyObjectNew but only for immortal objects. static inline _PyStackRef PyStackRef_FromPyObjectImmortal(PyObject *obj) { @@ -175,24 +121,9 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) assert(_Py_IsImmortal(obj)); return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_DEFERRED }; } -# define PyStackRef_FromPyObjectImmortal(obj) PyStackRef_FromPyObjectImmortal(_PyObject_CAST(obj)) -#else -# define PyStackRef_FromPyObjectImmortal(obj) ((_PyStackRef){ .bits = (uintptr_t)(obj) }) -#endif - - -#define PyStackRef_CLEAR(op) \ - do { \ - _PyStackRef *_tmp_op_ptr = &(op); \ - _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ - if (!PyStackRef_IsNull(_tmp_old_op)) { \ - *_tmp_op_ptr = PyStackRef_NULL; \ - PyStackRef_CLOSE(_tmp_old_op); \ - } \ - } while (0) +#define PyStackRef_FromPyObjectImmortal(obj) PyStackRef_FromPyObjectImmortal(_PyObject_CAST(obj)) -#ifdef Py_GIL_DISABLED -# define PyStackRef_CLOSE(REF) \ +#define PyStackRef_CLOSE(REF) \ do { \ _PyStackRef _close_tmp = (REF); \ assert(!PyStackRef_IsNull(_close_tmp)); \ @@ -200,20 +131,7 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) Py_DECREF(PyStackRef_AsPyObjectBorrow(_close_tmp)); \ } \ } while (0) -#else -# define PyStackRef_CLOSE(stackref) Py_DECREF(PyStackRef_AsPyObjectBorrow(stackref)) -#endif -#define PyStackRef_XCLOSE(stackref) \ - do { \ - _PyStackRef _tmp = (stackref); \ - if (!PyStackRef_IsNull(_tmp)) { \ - PyStackRef_CLOSE(_tmp); \ - } \ - } while (0); - - -#ifdef Py_GIL_DISABLED static inline _PyStackRef PyStackRef_DUP(_PyStackRef stackref) { @@ -227,9 +145,6 @@ PyStackRef_DUP(_PyStackRef stackref) Py_INCREF(PyStackRef_AsPyObjectBorrow(stackref)); return stackref; } -#else -# define PyStackRef_DUP(stackref) PyStackRef_FromPyObjectSteal(Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref))) -#endif // Convert a possibly deferred reference to a strong reference. static inline _PyStackRef @@ -238,13 +153,62 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) return PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(stackref)); } -static inline void -_PyObjectStack_FromStackRefStack(PyObject **dst, const _PyStackRef *src, size_t length) -{ - for (size_t i = 0; i < length; i++) { - dst[i] = PyStackRef_AsPyObjectBorrow(src[i]); - } -} + +#else // Py_GIL_DISABLED + +// With GIL +static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; +#define PyStackRef_IsNull(stackref) ((stackref).bits == 0) +#define PyStackRef_True ((_PyStackRef){.bits = (uintptr_t)&_Py_TrueStruct }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) }) + +#define PyStackRef_AsPyObjectBorrow(stackref) ((PyObject *)(stackref).bits) + +#define PyStackRef_AsPyObjectSteal(stackref) PyStackRef_AsPyObjectBorrow(stackref) + +#define PyStackRef_FromPyObjectSteal(obj) ((_PyStackRef){.bits = ((uintptr_t)(obj))}) + +#define PyStackRef_FromPyObjectNew(obj) ((_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) }) + +#define PyStackRef_FromPyObjectImmortal(obj) ((_PyStackRef){ .bits = (uintptr_t)(obj) }) + +#define PyStackRef_CLOSE(stackref) Py_DECREF(PyStackRef_AsPyObjectBorrow(stackref)) + +#define PyStackRef_DUP(stackref) PyStackRef_FromPyObjectSteal(Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref))) + + +#endif // Py_GIL_DISABLED + +// Note: this is a macro because MSVC (Windows) has trouble inlining it. + +#define PyStackRef_Is(a, b) ((a).bits == (b).bits) + +// Converts a PyStackRef back to a PyObject *, converting the +// stackref to a new reference. +#define PyStackRef_AsPyObjectNew(stackref) Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref)) + +#define PyStackRef_TYPE(stackref) Py_TYPE(PyStackRef_AsPyObjectBorrow(stackref)) + +#define PyStackRef_CLEAR(op) \ + do { \ + _PyStackRef *_tmp_op_ptr = &(op); \ + _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ + if (!PyStackRef_IsNull(_tmp_old_op)) { \ + *_tmp_op_ptr = PyStackRef_NULL; \ + PyStackRef_CLOSE(_tmp_old_op); \ + } \ + } while (0) + +#define PyStackRef_XCLOSE(stackref) \ + do { \ + _PyStackRef _tmp = (stackref); \ + if (!PyStackRef_IsNull(_tmp)) { \ + PyStackRef_CLOSE(_tmp); \ + } \ + } while (0); + + // StackRef type checks From 3ee474f5683110e153fdd0cbd2024f99d6c124e5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Oct 2024 17:02:24 +0200 Subject: [PATCH 008/114] gh-111178: Fix function signatures in codeobject.c (#125180) --- Objects/codeobject.c | 80 ++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 8a2f4d32b911d9..de80f6cca2904f 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1278,8 +1278,9 @@ typedef struct { static void -lineiter_dealloc(lineiterator *li) +lineiter_dealloc(PyObject *self) { + lineiterator *li = (lineiterator*)self; Py_DECREF(li->li_code); Py_TYPE(li)->tp_free(li); } @@ -1293,8 +1294,9 @@ _source_offset_converter(int *value) { } static PyObject * -lineiter_next(lineiterator *li) +lineiter_next(PyObject *self) { + lineiterator *li = (lineiterator*)self; PyCodeAddressRange *bounds = &li->li_line; if (!_PyLineTable_NextAddressRange(bounds)) { return NULL; @@ -1318,7 +1320,7 @@ PyTypeObject _PyLineIterator = { sizeof(lineiterator), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)lineiter_dealloc, /* tp_dealloc */ + lineiter_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -1340,7 +1342,7 @@ PyTypeObject _PyLineIterator = { 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ - (iternextfunc)lineiter_next, /* tp_iternext */ + lineiter_next, /* tp_iternext */ 0, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ @@ -1379,15 +1381,17 @@ typedef struct { } positionsiterator; static void -positionsiter_dealloc(positionsiterator* pi) +positionsiter_dealloc(PyObject *self) { + positionsiterator *pi = (positionsiterator*)self; Py_DECREF(pi->pi_code); Py_TYPE(pi)->tp_free(pi); } static PyObject* -positionsiter_next(positionsiterator* pi) +positionsiter_next(PyObject *self) { + positionsiterator *pi = (positionsiterator*)self; if (pi->pi_offset >= pi->pi_range.ar_end) { assert(pi->pi_offset == pi->pi_range.ar_end); if (at_end(&pi->pi_range)) { @@ -1409,7 +1413,7 @@ PyTypeObject _PyPositionsIterator = { sizeof(positionsiterator), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)positionsiter_dealloc, /* tp_dealloc */ + positionsiter_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -1431,7 +1435,7 @@ PyTypeObject _PyPositionsIterator = { 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ - (iternextfunc)positionsiter_next, /* tp_iternext */ + positionsiter_next, /* tp_iternext */ 0, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ @@ -1447,8 +1451,9 @@ PyTypeObject _PyPositionsIterator = { }; static PyObject* -code_positionsiterator(PyCodeObject* code, PyObject* Py_UNUSED(args)) +code_positionsiterator(PyObject *self, PyObject* Py_UNUSED(args)) { + PyCodeObject *code = (PyCodeObject*)self; positionsiterator* pi = (positionsiterator*)PyType_GenericAlloc(&_PyPositionsIterator, 0); if (pi == NULL) { return NULL; @@ -1875,16 +1880,18 @@ code_dealloc(PyCodeObject *co) #ifdef Py_GIL_DISABLED static int -code_traverse(PyCodeObject *co, visitproc visit, void *arg) +code_traverse(PyObject *self, visitproc visit, void *arg) { + PyCodeObject *co = (PyCodeObject*)self; Py_VISIT(co->co_consts); return 0; } #endif static PyObject * -code_repr(PyCodeObject *co) +code_repr(PyObject *self) { + PyCodeObject *co = (PyCodeObject*)self; int lineno; if (co->co_firstlineno != 0) lineno = co->co_firstlineno; @@ -1991,8 +1998,9 @@ code_richcompare(PyObject *self, PyObject *other, int op) } static Py_hash_t -code_hash(PyCodeObject *co) +code_hash(PyObject *self) { + PyCodeObject *co = (PyCodeObject*)self; Py_uhash_t uhash = 20221211; #define SCRAMBLE_IN(H) do { \ uhash ^= (Py_uhash_t)(H); \ @@ -2053,8 +2061,9 @@ static PyMemberDef code_memberlist[] = { static PyObject * -code_getlnotab(PyCodeObject *code, void *closure) +code_getlnotab(PyObject *self, void *closure) { + PyCodeObject *code = (PyCodeObject*)self; if (PyErr_WarnEx(PyExc_DeprecationWarning, "co_lnotab is deprecated, use co_lines instead.", 1) < 0) { @@ -2064,51 +2073,57 @@ code_getlnotab(PyCodeObject *code, void *closure) } static PyObject * -code_getvarnames(PyCodeObject *code, void *closure) +code_getvarnames(PyObject *self, void *closure) { + PyCodeObject *code = (PyCodeObject*)self; return _PyCode_GetVarnames(code); } static PyObject * -code_getcellvars(PyCodeObject *code, void *closure) +code_getcellvars(PyObject *self, void *closure) { + PyCodeObject *code = (PyCodeObject*)self; return _PyCode_GetCellvars(code); } static PyObject * -code_getfreevars(PyCodeObject *code, void *closure) +code_getfreevars(PyObject *self, void *closure) { + PyCodeObject *code = (PyCodeObject*)self; return _PyCode_GetFreevars(code); } static PyObject * -code_getcodeadaptive(PyCodeObject *code, void *closure) +code_getcodeadaptive(PyObject *self, void *closure) { + PyCodeObject *code = (PyCodeObject*)self; return PyBytes_FromStringAndSize(code->co_code_adaptive, _PyCode_NBYTES(code)); } static PyObject * -code_getcode(PyCodeObject *code, void *closure) +code_getcode(PyObject *self, void *closure) { + PyCodeObject *code = (PyCodeObject*)self; return _PyCode_GetCode(code); } static PyGetSetDef code_getsetlist[] = { - {"co_lnotab", (getter)code_getlnotab, NULL, NULL}, - {"_co_code_adaptive", (getter)code_getcodeadaptive, NULL, NULL}, + {"co_lnotab", code_getlnotab, NULL, NULL}, + {"_co_code_adaptive", code_getcodeadaptive, NULL, NULL}, // The following old names are kept for backward compatibility. - {"co_varnames", (getter)code_getvarnames, NULL, NULL}, - {"co_cellvars", (getter)code_getcellvars, NULL, NULL}, - {"co_freevars", (getter)code_getfreevars, NULL, NULL}, - {"co_code", (getter)code_getcode, NULL, NULL}, + {"co_varnames", code_getvarnames, NULL, NULL}, + {"co_cellvars", code_getcellvars, NULL, NULL}, + {"co_freevars", code_getfreevars, NULL, NULL}, + {"co_code", code_getcode, NULL, NULL}, {0} }; static PyObject * -code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args)) +code_sizeof(PyObject *self, PyObject *Py_UNUSED(args)) { + PyCodeObject *co = (PyCodeObject*)self; size_t res = _PyObject_VAR_SIZE(Py_TYPE(co), Py_SIZE(co)); _PyCodeObjectExtra *co_extra = (_PyCodeObjectExtra*) co->co_extra; if (co_extra != NULL) { @@ -2119,8 +2134,9 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args)) } static PyObject * -code_linesiterator(PyCodeObject *code, PyObject *Py_UNUSED(args)) +code_linesiterator(PyObject *self, PyObject *Py_UNUSED(args)) { + PyCodeObject *code = (PyCodeObject*)self; return (PyObject *)new_linesiterator(code); } @@ -2262,9 +2278,9 @@ code__varname_from_oparg_impl(PyCodeObject *self, int oparg) /* XXX code objects need to participate in GC? */ static struct PyMethodDef code_methods[] = { - {"__sizeof__", (PyCFunction)code_sizeof, METH_NOARGS}, - {"co_lines", (PyCFunction)code_linesiterator, METH_NOARGS}, - {"co_positions", (PyCFunction)code_positionsiterator, METH_NOARGS}, + {"__sizeof__", code_sizeof, METH_NOARGS}, + {"co_lines", code_linesiterator, METH_NOARGS}, + {"co_positions", code_positionsiterator, METH_NOARGS}, CODE_REPLACE_METHODDEF CODE__VARNAME_FROM_OPARG_METHODDEF {"__replace__", _PyCFunction_CAST(code_replace), METH_FASTCALL|METH_KEYWORDS, @@ -2283,11 +2299,11 @@ PyTypeObject PyCode_Type = { 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)code_repr, /* tp_repr */ + code_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ - (hashfunc)code_hash, /* tp_hash */ + code_hash, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ @@ -2300,7 +2316,7 @@ PyTypeObject PyCode_Type = { #endif code_new__doc__, /* tp_doc */ #ifdef Py_GIL_DISABLED - (traverseproc)code_traverse, /* tp_traverse */ + code_traverse, /* tp_traverse */ #else 0, /* tp_traverse */ #endif From 6a39e96ab8ebc1144f713988ac6fe439e4476488 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Oct 2024 17:12:11 +0200 Subject: [PATCH 009/114] gh-115754: Use Py_GetConstant(Py_CONSTANT_EMPTY_BYTES) (#125195) Replace PyBytes_FromString("") and PyBytes_FromStringAndSize("", 0) with Py_GetConstant(Py_CONSTANT_EMPTY_BYTES). --- Modules/_ctypes/_ctypes.c | 4 ++-- Modules/_io/textio.c | 2 +- Modules/mmapmodule.c | 4 ++-- Modules/zlibmodule.c | 4 ++-- Objects/bytesobject.c | 2 +- Parser/action_helpers.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 951e6914ba67a4..9453ed3250bc3b 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -4732,7 +4732,7 @@ Array_subscript(PyObject *myself, PyObject *item) char *dest; if (slicelen <= 0) - return PyBytes_FromStringAndSize("", 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); if (step == 1) { return PyBytes_FromStringAndSize(ptr + start, slicelen); @@ -5418,7 +5418,7 @@ Pointer_subscript(PyObject *myself, PyObject *item) char *dest; if (len <= 0) - return PyBytes_FromStringAndSize("", 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); if (step == 1) { return PyBytes_FromStringAndSize(ptr + start, len); diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 439e26c5271939..68d16361962412 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -559,7 +559,7 @@ _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) Py_DECREF(state); } else { - buffer = PyBytes_FromString(""); + buffer = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); flag = 0; } flag <<= 1; diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 99a85e9e49ad47..e1c26e19932664 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -486,7 +486,7 @@ mmap_read_line_method(mmap_object *self, remaining = (self->pos < self->size) ? self->size - self->pos : 0; if (!remaining) - return PyBytes_FromString(""); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); start = self->data + self->pos; if (safe_memchr(&eol, start, '\n', remaining) < 0) { @@ -1274,7 +1274,7 @@ mmap_subscript(mmap_object *self, PyObject *item) CHECK_VALID(NULL); if (slicelen <= 0) - return PyBytes_FromStringAndSize("", 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); else if (step == 1) return _safe_PyBytes_FromStringAndSize(self->data + start, slicelen); else { diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index c5aaf22eeb2948..78dcce73cdaade 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -267,12 +267,12 @@ newcompobject(PyTypeObject *type) self->eof = 0; self->is_initialised = 0; self->zdict = NULL; - self->unused_data = PyBytes_FromStringAndSize("", 0); + self->unused_data = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); if (self->unused_data == NULL) { Py_DECREF(self); return NULL; } - self->unconsumed_tail = PyBytes_FromStringAndSize("", 0); + self->unconsumed_tail = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); if (self->unconsumed_tail == NULL) { Py_DECREF(self); return NULL; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 58a4feed351707..bf58e55e100b3a 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1638,7 +1638,7 @@ bytes_subscript(PyObject *op, PyObject* item) &stop, step); if (slicelength <= 0) { - return PyBytes_FromStringAndSize("", 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else if (start == 0 && step == 1 && slicelength == PyBytes_GET_SIZE(self) && diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 1972c606827cdb..24b817c6f8ff27 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1541,7 +1541,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, } if (bytes_found) { - PyObject* res = PyBytes_FromString(""); + PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); /* Bytes literals never get a kind, but just for consistency since they are represented as Constant nodes, we'll mirror From b9a8ca0a6aa9251cb798f34f0c9d2cc95107eec6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Oct 2024 17:15:23 +0200 Subject: [PATCH 010/114] gh-115754: Use Py_GetConstant(Py_CONSTANT_EMPTY_STR) (#125194) Replace PyUnicode_New(0, 0), PyUnicode_FromString("") and PyUnicode_FromStringAndSize("", 0) with Py_GetConstant(Py_CONSTANT_EMPTY_STR). --- Doc/includes/newtypes/custom2.c | 4 ++-- Doc/includes/newtypes/custom3.c | 4 ++-- Doc/includes/newtypes/custom4.c | 4 ++-- Modules/_ctypes/_ctypes.c | 4 ++-- Modules/_datetimemodule.c | 2 +- Modules/_elementtree.c | 4 ++-- Modules/_functoolsmodule.c | 2 +- Modules/_io/stringio.c | 2 +- Modules/_testcapi/datetime.c | 2 +- Modules/cjkcodecs/multibytecodec.c | 4 ++-- Modules/socketmodule.c | 2 +- Modules/unicodedata.c | 2 +- Objects/abstract.c | 2 +- Objects/exceptions.c | 8 ++++---- Objects/stringlib/unicode_format.h | 4 ++-- Parser/pegen_errors.c | 4 ++-- Python/Python-tokenize.c | 6 +++--- Python/ceval.c | 2 +- Python/codecs.c | 2 +- Python/formatter_unicode.c | 2 +- Python/marshal.c | 2 +- Python/symtable.c | 2 +- 22 files changed, 35 insertions(+), 35 deletions(-) diff --git a/Doc/includes/newtypes/custom2.c b/Doc/includes/newtypes/custom2.c index a0222b1795209b..768ce29fab9ff0 100644 --- a/Doc/includes/newtypes/custom2.c +++ b/Doc/includes/newtypes/custom2.c @@ -23,12 +23,12 @@ Custom_new(PyTypeObject *type, PyObject *args, PyObject *kwds) CustomObject *self; self = (CustomObject *) type->tp_alloc(type, 0); if (self != NULL) { - self->first = PyUnicode_FromString(""); + self->first = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (self->first == NULL) { Py_DECREF(self); return NULL; } - self->last = PyUnicode_FromString(""); + self->last = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (self->last == NULL) { Py_DECREF(self); return NULL; diff --git a/Doc/includes/newtypes/custom3.c b/Doc/includes/newtypes/custom3.c index 4aeebe0a7507d1..7d969adfa7c9cc 100644 --- a/Doc/includes/newtypes/custom3.c +++ b/Doc/includes/newtypes/custom3.c @@ -23,12 +23,12 @@ Custom_new(PyTypeObject *type, PyObject *args, PyObject *kwds) CustomObject *self; self = (CustomObject *) type->tp_alloc(type, 0); if (self != NULL) { - self->first = PyUnicode_FromString(""); + self->first = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (self->first == NULL) { Py_DECREF(self); return NULL; } - self->last = PyUnicode_FromString(""); + self->last = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (self->last == NULL) { Py_DECREF(self); return NULL; diff --git a/Doc/includes/newtypes/custom4.c b/Doc/includes/newtypes/custom4.c index 3998918f68301e..a7b8de44a57c90 100644 --- a/Doc/includes/newtypes/custom4.c +++ b/Doc/includes/newtypes/custom4.c @@ -39,12 +39,12 @@ Custom_new(PyTypeObject *type, PyObject *args, PyObject *kwds) CustomObject *self; self = (CustomObject *) type->tp_alloc(type, 0); if (self != NULL) { - self->first = PyUnicode_FromString(""); + self->first = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (self->first == NULL) { Py_DECREF(self); return NULL; } - self->last = PyUnicode_FromString(""); + self->last = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (self->last == NULL) { Py_DECREF(self); return NULL; diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 9453ed3250bc3b..8435ee4090b9e5 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -4756,7 +4756,7 @@ Array_subscript(PyObject *myself, PyObject *item) wchar_t *dest; if (slicelen <= 0) - return PyUnicode_New(0, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (step == 1) { return PyUnicode_FromWideChar(ptr + start, slicelen); @@ -5438,7 +5438,7 @@ Pointer_subscript(PyObject *myself, PyObject *item) wchar_t *dest; if (len <= 0) - return PyUnicode_New(0, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (step == 1) { return PyUnicode_FromWideChar(ptr + start, len); diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 90527d2a3e0350..2ba46cddb4f558 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -2921,7 +2921,7 @@ delta_bool(PyDateTime_Delta *self) static PyObject * delta_repr(PyDateTime_Delta *self) { - PyObject *args = PyUnicode_FromString(""); + PyObject *args = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (args == NULL) { return NULL; diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index ec999582d2fb9d..e134e096e044b7 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -196,7 +196,7 @@ list_join(PyObject* list) PyObject* joiner; PyObject* result; - joiner = PyUnicode_FromStringAndSize("", 0); + joiner = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (!joiner) return NULL; result = PyUnicode_Join(joiner, list); @@ -1317,7 +1317,7 @@ _elementtree_Element_findtext_impl(ElementObject *self, PyTypeObject *cls, PyObject* text = element_get_text((ElementObject*)item); if (text == Py_None) { Py_DECREF(item); - return PyUnicode_New(0, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); } Py_XINCREF(text); Py_DECREF(item); diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 31cf7bcc09782c..4ab3adc0fe44cc 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -604,7 +604,7 @@ partial_repr(partialobject *pto) return PyUnicode_FromString("..."); } - arglist = PyUnicode_FromString(""); + arglist = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (arglist == NULL) goto done; /* Pack positional arguments */ diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index 6d48bcb552b4bf..f558613dc6233c 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -353,7 +353,7 @@ _stringio_readline(stringio *self, Py_ssize_t limit) /* In case of overseek, return the empty string */ if (self->pos >= self->string_size) - return PyUnicode_New(0, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); start = self->buf + self->pos; if (limit < 0 || limit > self->string_size - self->pos) diff --git a/Modules/_testcapi/datetime.c b/Modules/_testcapi/datetime.c index f3d54215e04232..b800f9b8eb3473 100644 --- a/Modules/_testcapi/datetime.c +++ b/Modules/_testcapi/datetime.c @@ -129,7 +129,7 @@ static PyObject * get_timezones_offset_zero(PyObject *self, PyObject *args) { PyObject *offset = PyDelta_FromDSU(0, 0, 0); - PyObject *name = PyUnicode_FromString(""); + PyObject *name = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (offset == NULL || name == NULL) { Py_XDECREF(offset); Py_XDECREF(name); diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 373518673dd352..53135ae4aa7968 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -669,7 +669,7 @@ _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self, if (datalen == 0) { ERROR_DECREF(errorcb); - return make_tuple(PyUnicode_New(0, 0), 0); + return make_tuple(Py_GetConstant(Py_CONSTANT_EMPTY_STR), 0); } _PyUnicodeWriter_Init(&buf.writer); @@ -1434,7 +1434,7 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, Py_ssize_t rsize; if (sizehint == 0) - return PyUnicode_New(0, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); _PyUnicodeWriter_Init(&buf.writer); buf.excobj = NULL; diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index ded6f255aaddea..0829d2358129d2 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -5636,7 +5636,7 @@ socket_gethostname(PyObject *self, PyObject *unused) return PyErr_SetFromWindowsErr(0); if (size == 0) - return PyUnicode_New(0, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); /* MSDN says ERROR_MORE_DATA may occur because DNS allows longer names */ diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 333ffe68a454e4..60bde755d24574 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -413,7 +413,7 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr) if (UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed == 0) - return PyUnicode_FromString(""); /* unassigned */ + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); /* unassigned */ } if (code < 0 || code >= 0x110000) diff --git a/Objects/abstract.c b/Objects/abstract.c index 7cca81464cd112..f6647874d732f6 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -862,7 +862,7 @@ PyObject_Format(PyObject *obj, PyObject *format_spec) /* If no format_spec is provided, use an empty string */ if (format_spec == NULL) { - empty = PyUnicode_New(0, 0); + empty = Py_GetConstant(Py_CONSTANT_EMPTY_STR); format_spec = empty; } diff --git a/Objects/exceptions.c b/Objects/exceptions.c index c685481b13a93a..6fbe0f197eaebf 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -154,7 +154,7 @@ BaseException_str(PyBaseExceptionObject *self) { switch (PyTuple_GET_SIZE(self->args)) { case 0: - return PyUnicode_FromString(""); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); case 1: return PyObject_Str(PyTuple_GET_ITEM(self->args, 0)); default: @@ -3001,7 +3001,7 @@ UnicodeEncodeError_str(PyObject *self) if (exc->object == NULL) { /* Not properly initialized. */ - return PyUnicode_FromString(""); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); } /* Get reason and encoding as strings, which they might not be if @@ -3123,7 +3123,7 @@ UnicodeDecodeError_str(PyObject *self) if (exc->object == NULL) { /* Not properly initialized. */ - return PyUnicode_FromString(""); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); } /* Get reason and encoding as strings, which they might not be if @@ -3224,7 +3224,7 @@ UnicodeTranslateError_str(PyObject *self) if (exc->object == NULL) { /* Not properly initialized. */ - return PyUnicode_FromString(""); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); } /* Get reason as a string, which it might not be if it's been diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index 91c71ab5736c25..44b269ba8ceb55 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -73,7 +73,7 @@ Py_LOCAL_INLINE(PyObject *) SubString_new_object_or_empty(SubString *str) { if (str->str == NULL) { - return PyUnicode_New(0, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); } return SubString_new_object(str); } @@ -531,7 +531,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write format_spec->start, format_spec->end); else - format_spec_object = PyUnicode_New(0, 0); + format_spec_object = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (format_spec_object == NULL) goto done; diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index e94a4923228d0f..6146f69912bfa3 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -276,7 +276,7 @@ get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno) assert(p->tok->fp_interactive); // We can reach this point if the tokenizer buffers for interactive source have not been // initialized because we failed to decode the original source with the given locale. - return PyUnicode_FromStringAndSize("", 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); } Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 1 : lineno; @@ -359,7 +359,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, error_line = get_error_line_from_tokenizer_buffers(p, lineno); } else { - error_line = PyUnicode_FromStringAndSize("", 0); + error_line = Py_GetConstant(Py_CONSTANT_EMPTY_STR); } if (!error_line) { goto error; diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 34b4445be27f62..50ce83d18f6e73 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -263,7 +263,7 @@ tokenizeriter_next(tokenizeriterobject *it) } PyObject *str = NULL; if (token.start == NULL || token.end == NULL) { - str = PyUnicode_FromString(""); + str = Py_GetConstant(Py_CONSTANT_EMPTY_STR); } else { str = PyUnicode_FromStringAndSize(token.start, token.end - token.start); @@ -281,7 +281,7 @@ tokenizeriter_next(tokenizeriterobject *it) PyObject* line = NULL; int line_changed = 1; if (it->tok->tok_extra_tokens && is_trailing_token) { - line = PyUnicode_FromString(""); + line = Py_GetConstant(Py_CONSTANT_EMPTY_STR); } else { Py_ssize_t size = it->tok->inp - line_start; if (size >= 1 && it->tok->implicit_newline) { @@ -326,7 +326,7 @@ tokenizeriter_next(tokenizeriterobject *it) else if (type == NL) { if (it->tok->implicit_newline) { Py_DECREF(str); - str = PyUnicode_FromString(""); + str = Py_GetConstant(Py_CONSTANT_EMPTY_STR); } } diff --git a/Python/ceval.c b/Python/ceval.c index ba5c70b25f0a89..f4e0add3034707 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1287,7 +1287,7 @@ too_many_positional(PyThreadState *tstate, PyCodeObject *co, } else { /* This will not fail. */ - kwonly_sig = PyUnicode_FromString(""); + kwonly_sig = Py_GetConstant(Py_CONSTANT_EMPTY_STR); assert(kwonly_sig != NULL); } _PyErr_Format(tstate, PyExc_TypeError, diff --git a/Python/codecs.c b/Python/codecs.c index 68dc232bb86163..2cb3875db35058 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -696,7 +696,7 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc) wrong_exception_type(exc); return NULL; } - return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end); + return Py_BuildValue("(Nn)", Py_GetConstant(Py_CONSTANT_EMPTY_STR), end); } diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index ebd67214f43042..16f711184990ac 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -740,7 +740,7 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info) break; case LT_NO_LOCALE: locale_info->decimal_point = PyUnicode_FromOrdinal('.'); - locale_info->thousands_sep = PyUnicode_New(0, 0); + locale_info->thousands_sep = Py_GetConstant(Py_CONSTANT_EMPTY_STR); if (!locale_info->decimal_point || !locale_info->thousands_sep) return -1; locale_info->grouping = no_grouping; diff --git a/Python/marshal.c b/Python/marshal.c index 3d127b4e331d0d..a280fbfd078f41 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -1226,7 +1226,7 @@ r_object(RFILE *p) v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); } else { - v = PyUnicode_New(0, 0); + v = Py_GetConstant(Py_CONSTANT_EMPTY_STR); } if (v == NULL) break; diff --git a/Python/symtable.c b/Python/symtable.c index 8bc9db6d7d6811..52be910c0b6a9b 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -354,7 +354,7 @@ static void _dump_symtable(PySTEntryObject* ste, PyObject* prefix) static void dump_symtable(PySTEntryObject* ste) { - PyObject *empty = PyUnicode_FromString(""); + PyObject *empty = Py_GetConstant(Py_CONSTANT_EMPTY_STR); assert(empty != NULL); _dump_symtable(ste, empty); Py_DECREF(empty); From f978fb4f8d6eac0585057e463bb1701dc04a9900 Mon Sep 17 00:00:00 2001 From: mpage Date: Wed, 9 Oct 2024 08:18:25 -0700 Subject: [PATCH 011/114] gh-115999: Refactor `LOAD_GLOBAL` specializations to avoid reloading {globals, builtins} keys (gh-124953) Each of the `LOAD_GLOBAL` specializations is implemented roughly as: 1. Load keys version. 2. Load cached keys version. 3. Deopt if (1) and (2) don't match. 4. Load keys. 5. Load cached index into keys. 6. Load object from (4) at offset from (5). This is not thread-safe in free-threaded builds; the keys object may be replaced in between steps (3) and (4). This change refactors the specializations to avoid reloading the keys object and instead pass the keys object from guards to be consumed by downstream uops. --- Include/internal/pycore_opcode_metadata.h | 4 +- Include/internal/pycore_uop_ids.h | 213 +++++++++++----------- Include/internal/pycore_uop_metadata.h | 28 ++- Python/bytecodes.c | 57 ++++-- Python/executor_cases.c.h | 91 ++++++++- Python/generated_cases.c.h | 22 ++- Python/optimizer_analysis.c | 42 ++++- Python/optimizer_bytecodes.c | 10 + Python/optimizer_cases.c.h | 60 +++++- 9 files changed, 372 insertions(+), 155 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index a0d3072d2cd5f6..8fec45b1e8d5c3 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1362,8 +1362,8 @@ _PyOpcode_macro_expansion[256] = { [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, 5, 0 }, { _LOAD_FAST, 6, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, [LOAD_GLOBAL] = { .nuops = 1, .uops = { { _LOAD_GLOBAL, 0, 0 } } }, - [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION, 1, 2 }, { _LOAD_GLOBAL_BUILTINS, 1, 3 } } }, - [LOAD_GLOBAL_MODULE] = { .nuops = 2, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _LOAD_GLOBAL_MODULE, 1, 3 } } }, + [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION_PUSH_KEYS, 1, 2 }, { _LOAD_GLOBAL_BUILTINS_FROM_KEYS, 1, 3 } } }, + [LOAD_GLOBAL_MODULE] = { .nuops = 2, .uops = { { _GUARD_GLOBALS_VERSION_PUSH_KEYS, 1, 1 }, { _LOAD_GLOBAL_MODULE_FROM_KEYS, 1, 3 } } }, [LOAD_LOCALS] = { .nuops = 1, .uops = { { _LOAD_LOCALS, 0, 0 } } }, [LOAD_NAME] = { .nuops = 1, .uops = { { _LOAD_NAME, 0, 0 } } }, [LOAD_SPECIAL] = { .nuops = 1, .uops = { { _LOAD_SPECIAL, 0, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 48bafacabcfd1d..1951c65a2871cf 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -120,32 +120,33 @@ extern "C" { #define _GUARD_BOTH_FLOAT 367 #define _GUARD_BOTH_INT 368 #define _GUARD_BOTH_UNICODE 369 -#define _GUARD_BUILTINS_VERSION 370 +#define _GUARD_BUILTINS_VERSION_PUSH_KEYS 370 #define _GUARD_DORV_NO_DICT 371 #define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 372 #define _GUARD_GLOBALS_VERSION 373 -#define _GUARD_IS_FALSE_POP 374 -#define _GUARD_IS_NONE_POP 375 -#define _GUARD_IS_NOT_NONE_POP 376 -#define _GUARD_IS_TRUE_POP 377 -#define _GUARD_KEYS_VERSION 378 -#define _GUARD_NOS_FLOAT 379 -#define _GUARD_NOS_INT 380 -#define _GUARD_NOT_EXHAUSTED_LIST 381 -#define _GUARD_NOT_EXHAUSTED_RANGE 382 -#define _GUARD_NOT_EXHAUSTED_TUPLE 383 -#define _GUARD_TOS_FLOAT 384 -#define _GUARD_TOS_INT 385 -#define _GUARD_TYPE_VERSION 386 +#define _GUARD_GLOBALS_VERSION_PUSH_KEYS 374 +#define _GUARD_IS_FALSE_POP 375 +#define _GUARD_IS_NONE_POP 376 +#define _GUARD_IS_NOT_NONE_POP 377 +#define _GUARD_IS_TRUE_POP 378 +#define _GUARD_KEYS_VERSION 379 +#define _GUARD_NOS_FLOAT 380 +#define _GUARD_NOS_INT 381 +#define _GUARD_NOT_EXHAUSTED_LIST 382 +#define _GUARD_NOT_EXHAUSTED_RANGE 383 +#define _GUARD_NOT_EXHAUSTED_TUPLE 384 +#define _GUARD_TOS_FLOAT 385 +#define _GUARD_TOS_INT 386 +#define _GUARD_TYPE_VERSION 387 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 387 -#define _INIT_CALL_PY_EXACT_ARGS 388 -#define _INIT_CALL_PY_EXACT_ARGS_0 389 -#define _INIT_CALL_PY_EXACT_ARGS_1 390 -#define _INIT_CALL_PY_EXACT_ARGS_2 391 -#define _INIT_CALL_PY_EXACT_ARGS_3 392 -#define _INIT_CALL_PY_EXACT_ARGS_4 393 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 388 +#define _INIT_CALL_PY_EXACT_ARGS 389 +#define _INIT_CALL_PY_EXACT_ARGS_0 390 +#define _INIT_CALL_PY_EXACT_ARGS_1 391 +#define _INIT_CALL_PY_EXACT_ARGS_2 392 +#define _INIT_CALL_PY_EXACT_ARGS_3 393 +#define _INIT_CALL_PY_EXACT_ARGS_4 394 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER @@ -157,133 +158,135 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 394 -#define _IS_NONE 395 +#define _INTERNAL_INCREMENT_OPT_COUNTER 395 +#define _IS_NONE 396 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 396 -#define _ITER_CHECK_RANGE 397 -#define _ITER_CHECK_TUPLE 398 -#define _ITER_JUMP_LIST 399 -#define _ITER_JUMP_RANGE 400 -#define _ITER_JUMP_TUPLE 401 -#define _ITER_NEXT_LIST 402 -#define _ITER_NEXT_RANGE 403 -#define _ITER_NEXT_TUPLE 404 -#define _JUMP_TO_TOP 405 +#define _ITER_CHECK_LIST 397 +#define _ITER_CHECK_RANGE 398 +#define _ITER_CHECK_TUPLE 399 +#define _ITER_JUMP_LIST 400 +#define _ITER_JUMP_RANGE 401 +#define _ITER_JUMP_TUPLE 402 +#define _ITER_NEXT_LIST 403 +#define _ITER_NEXT_RANGE 404 +#define _ITER_NEXT_TUPLE 405 +#define _JUMP_TO_TOP 406 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 406 -#define _LOAD_ATTR_CLASS 407 -#define _LOAD_ATTR_CLASS_0 408 -#define _LOAD_ATTR_CLASS_1 409 +#define _LOAD_ATTR 407 +#define _LOAD_ATTR_CLASS 408 +#define _LOAD_ATTR_CLASS_0 409 +#define _LOAD_ATTR_CLASS_1 410 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 410 -#define _LOAD_ATTR_INSTANCE_VALUE_0 411 -#define _LOAD_ATTR_INSTANCE_VALUE_1 412 -#define _LOAD_ATTR_METHOD_LAZY_DICT 413 -#define _LOAD_ATTR_METHOD_NO_DICT 414 -#define _LOAD_ATTR_METHOD_WITH_VALUES 415 -#define _LOAD_ATTR_MODULE 416 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 417 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 418 -#define _LOAD_ATTR_PROPERTY_FRAME 419 -#define _LOAD_ATTR_SLOT 420 -#define _LOAD_ATTR_SLOT_0 421 -#define _LOAD_ATTR_SLOT_1 422 -#define _LOAD_ATTR_WITH_HINT 423 +#define _LOAD_ATTR_INSTANCE_VALUE 411 +#define _LOAD_ATTR_INSTANCE_VALUE_0 412 +#define _LOAD_ATTR_INSTANCE_VALUE_1 413 +#define _LOAD_ATTR_METHOD_LAZY_DICT 414 +#define _LOAD_ATTR_METHOD_NO_DICT 415 +#define _LOAD_ATTR_METHOD_WITH_VALUES 416 +#define _LOAD_ATTR_MODULE 417 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 418 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 419 +#define _LOAD_ATTR_PROPERTY_FRAME 420 +#define _LOAD_ATTR_SLOT 421 +#define _LOAD_ATTR_SLOT_0 422 +#define _LOAD_ATTR_SLOT_1 423 +#define _LOAD_ATTR_WITH_HINT 424 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 424 -#define _LOAD_CONST_INLINE_BORROW 425 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 426 -#define _LOAD_CONST_INLINE_WITH_NULL 427 +#define _LOAD_CONST_INLINE 425 +#define _LOAD_CONST_INLINE_BORROW 426 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 427 +#define _LOAD_CONST_INLINE_WITH_NULL 428 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 428 -#define _LOAD_FAST_0 429 -#define _LOAD_FAST_1 430 -#define _LOAD_FAST_2 431 -#define _LOAD_FAST_3 432 -#define _LOAD_FAST_4 433 -#define _LOAD_FAST_5 434 -#define _LOAD_FAST_6 435 -#define _LOAD_FAST_7 436 +#define _LOAD_FAST 429 +#define _LOAD_FAST_0 430 +#define _LOAD_FAST_1 431 +#define _LOAD_FAST_2 432 +#define _LOAD_FAST_3 433 +#define _LOAD_FAST_4 434 +#define _LOAD_FAST_5 435 +#define _LOAD_FAST_6 436 +#define _LOAD_FAST_7 437 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 437 -#define _LOAD_GLOBAL_BUILTINS 438 -#define _LOAD_GLOBAL_MODULE 439 +#define _LOAD_GLOBAL 438 +#define _LOAD_GLOBAL_BUILTINS 439 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 440 +#define _LOAD_GLOBAL_MODULE 441 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 442 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 440 +#define _MAKE_CALLARGS_A_TUPLE 443 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 441 +#define _MAKE_WARM 444 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 442 -#define _MAYBE_EXPAND_METHOD_KW 443 -#define _MONITOR_CALL 444 -#define _MONITOR_JUMP_BACKWARD 445 -#define _MONITOR_RESUME 446 +#define _MAYBE_EXPAND_METHOD 445 +#define _MAYBE_EXPAND_METHOD_KW 446 +#define _MONITOR_CALL 447 +#define _MONITOR_JUMP_BACKWARD 448 +#define _MONITOR_RESUME 449 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 447 -#define _POP_JUMP_IF_TRUE 448 +#define _POP_JUMP_IF_FALSE 450 +#define _POP_JUMP_IF_TRUE 451 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 449 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 452 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 450 +#define _PUSH_FRAME 453 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 451 -#define _PY_FRAME_KW 452 -#define _QUICKEN_RESUME 453 -#define _REPLACE_WITH_TRUE 454 +#define _PY_FRAME_GENERAL 454 +#define _PY_FRAME_KW 455 +#define _QUICKEN_RESUME 456 +#define _REPLACE_WITH_TRUE 457 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 455 -#define _SEND 456 -#define _SEND_GEN_FRAME 457 +#define _SAVE_RETURN_OFFSET 458 +#define _SEND 459 +#define _SEND_GEN_FRAME 460 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 458 -#define _STORE_ATTR 459 -#define _STORE_ATTR_INSTANCE_VALUE 460 -#define _STORE_ATTR_SLOT 461 -#define _STORE_ATTR_WITH_HINT 462 +#define _START_EXECUTOR 461 +#define _STORE_ATTR 462 +#define _STORE_ATTR_INSTANCE_VALUE 463 +#define _STORE_ATTR_SLOT 464 +#define _STORE_ATTR_WITH_HINT 465 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 463 -#define _STORE_FAST_0 464 -#define _STORE_FAST_1 465 -#define _STORE_FAST_2 466 -#define _STORE_FAST_3 467 -#define _STORE_FAST_4 468 -#define _STORE_FAST_5 469 -#define _STORE_FAST_6 470 -#define _STORE_FAST_7 471 +#define _STORE_FAST 466 +#define _STORE_FAST_0 467 +#define _STORE_FAST_1 468 +#define _STORE_FAST_2 469 +#define _STORE_FAST_3 470 +#define _STORE_FAST_4 471 +#define _STORE_FAST_5 472 +#define _STORE_FAST_6 473 +#define _STORE_FAST_7 474 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 472 -#define _STORE_SUBSCR 473 +#define _STORE_SLICE 475 +#define _STORE_SUBSCR 476 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 474 -#define _TO_BOOL 475 +#define _TIER2_RESUME_CHECK 477 +#define _TO_BOOL 478 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -293,13 +296,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 476 +#define _UNPACK_SEQUENCE 479 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 476 +#define MAX_UOP_ID 479 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index c68ee594947283..fd41e9a5fe862b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -116,9 +116,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_GLOBALS_VERSION] = HAS_DEOPT_FLAG, - [_GUARD_BUILTINS_VERSION] = HAS_DEOPT_FLAG, - [_LOAD_GLOBAL_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_LOAD_GLOBAL_BUILTINS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_GUARD_GLOBALS_VERSION_PUSH_KEYS] = HAS_DEOPT_FLAG, + [_GUARD_BUILTINS_VERSION_PUSH_KEYS] = HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_MODULE_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_BUILTINS_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_DELETE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MAKE_CELL] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, [_DELETE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, @@ -273,6 +274,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_BUILTINS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, @@ -397,10 +400,11 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_BOTH_FLOAT] = "_GUARD_BOTH_FLOAT", [_GUARD_BOTH_INT] = "_GUARD_BOTH_INT", [_GUARD_BOTH_UNICODE] = "_GUARD_BOTH_UNICODE", - [_GUARD_BUILTINS_VERSION] = "_GUARD_BUILTINS_VERSION", + [_GUARD_BUILTINS_VERSION_PUSH_KEYS] = "_GUARD_BUILTINS_VERSION_PUSH_KEYS", [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", + [_GUARD_GLOBALS_VERSION_PUSH_KEYS] = "_GUARD_GLOBALS_VERSION_PUSH_KEYS", [_GUARD_IS_FALSE_POP] = "_GUARD_IS_FALSE_POP", [_GUARD_IS_NONE_POP] = "_GUARD_IS_NONE_POP", [_GUARD_IS_NOT_NONE_POP] = "_GUARD_IS_NOT_NONE_POP", @@ -476,7 +480,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", [_LOAD_GLOBAL] = "_LOAD_GLOBAL", [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", + [_LOAD_GLOBAL_BUILTINS_FROM_KEYS] = "_LOAD_GLOBAL_BUILTINS_FROM_KEYS", [_LOAD_GLOBAL_MODULE] = "_LOAD_GLOBAL_MODULE", + [_LOAD_GLOBAL_MODULE_FROM_KEYS] = "_LOAD_GLOBAL_MODULE_FROM_KEYS", [_LOAD_LOCALS] = "_LOAD_LOCALS", [_LOAD_NAME] = "_LOAD_NAME", [_LOAD_SPECIAL] = "_LOAD_SPECIAL", @@ -752,12 +758,14 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _GUARD_GLOBALS_VERSION: return 0; - case _GUARD_BUILTINS_VERSION: + case _GUARD_GLOBALS_VERSION_PUSH_KEYS: return 0; - case _LOAD_GLOBAL_MODULE: - return 0; - case _LOAD_GLOBAL_BUILTINS: + case _GUARD_BUILTINS_VERSION_PUSH_KEYS: return 0; + case _LOAD_GLOBAL_MODULE_FROM_KEYS: + return 1; + case _LOAD_GLOBAL_BUILTINS_FROM_KEYS: + return 1; case _DELETE_FAST: return 0; case _MAKE_CELL: @@ -1066,6 +1074,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _CHECK_FUNCTION: return 0; + case _LOAD_GLOBAL_MODULE: + return 0; + case _LOAD_GLOBAL_BUILTINS: + return 0; case _INTERNAL_INCREMENT_OPT_COUNTER: return 1; case _DYNAMIC_EXIT: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 228d82173e6126..87cca3fc1d373c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1569,17 +1569,29 @@ dummy_func( assert(DK_IS_UNICODE(dict->ma_keys)); } - op(_GUARD_BUILTINS_VERSION, (version/1 --)) { + op(_GUARD_GLOBALS_VERSION_PUSH_KEYS, (version / 1 -- globals_keys: PyDictKeysObject *)) + { + PyDictObject *dict = (PyDictObject *)GLOBALS(); + DEOPT_IF(!PyDict_CheckExact(dict)); + DEOPT_IF(dict->ma_keys->dk_version != version); + globals_keys = dict->ma_keys; + assert(DK_IS_UNICODE(globals_keys)); + } + + op(_GUARD_BUILTINS_VERSION_PUSH_KEYS, (version / 1 -- builtins_keys: PyDictKeysObject *)) + { PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict)); DEOPT_IF(dict->ma_keys->dk_version != version); - assert(DK_IS_UNICODE(dict->ma_keys)); + builtins_keys = dict->ma_keys; + assert(DK_IS_UNICODE(builtins_keys)); } - op(_LOAD_GLOBAL_MODULE, (index/1 -- res, null if (oparg & 1))) { - PyDictObject *dict = (PyDictObject *)GLOBALS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + op(_LOAD_GLOBAL_MODULE_FROM_KEYS, (index/1, globals_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); PyObject *res_o = entries[index].me_value; + DEAD(globals_keys); + SYNC_SP(); DEOPT_IF(res_o == NULL); Py_INCREF(res_o); STAT_INC(LOAD_GLOBAL, hit); @@ -1587,10 +1599,11 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_LOAD_GLOBAL_BUILTINS, (index/1 -- res, null if (oparg & 1))) { - PyDictObject *bdict = (PyDictObject *)BUILTINS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys); + op(_LOAD_GLOBAL_BUILTINS_FROM_KEYS, (index/1, builtins_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); PyObject *res_o = entries[index].me_value; + DEAD(builtins_keys); + SYNC_SP(); DEOPT_IF(res_o == NULL); Py_INCREF(res_o); STAT_INC(LOAD_GLOBAL, hit); @@ -1600,15 +1613,15 @@ dummy_func( macro(LOAD_GLOBAL_MODULE) = unused/1 + // Skip over the counter - _GUARD_GLOBALS_VERSION + + _GUARD_GLOBALS_VERSION_PUSH_KEYS + unused/1 + // Skip over the builtins version - _LOAD_GLOBAL_MODULE; + _LOAD_GLOBAL_MODULE_FROM_KEYS; macro(LOAD_GLOBAL_BUILTIN) = unused/1 + // Skip over the counter _GUARD_GLOBALS_VERSION + - _GUARD_BUILTINS_VERSION + - _LOAD_GLOBAL_BUILTINS; + _GUARD_BUILTINS_VERSION_PUSH_KEYS + + _LOAD_GLOBAL_BUILTINS_FROM_KEYS; inst(DELETE_FAST, (--)) { _PyStackRef v = GETLOCAL(oparg); @@ -4871,6 +4884,26 @@ dummy_func( DEOPT_IF(func->func_version != func_version); } + tier2 op(_LOAD_GLOBAL_MODULE, (index/1 -- res, null if (oparg & 1))) { + PyDictObject *dict = (PyDictObject *)GLOBALS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyObject *res_o = entries[index].me_value; + DEOPT_IF(res_o == NULL); + Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + null = PyStackRef_NULL; + } + + tier2 op(_LOAD_GLOBAL_BUILTINS, (index/1 -- res, null if (oparg & 1))) { + PyDictObject *dict = (PyDictObject *)BUILTINS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyObject *res_o = entries[index].me_value; + DEOPT_IF(res_o == NULL); + Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + null = PyStackRef_NULL; + } + /* Internal -- for testing executors */ op(_INTERNAL_INCREMENT_OPT_COUNTER, (opt --)) { _PyCounterOptimizerObject *exe = (_PyCounterOptimizerObject *)PyStackRef_AsPyObjectBorrow(opt); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4574e183921006..57e15f33ca7703 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1791,7 +1791,28 @@ break; } - case _GUARD_BUILTINS_VERSION: { + case _GUARD_GLOBALS_VERSION_PUSH_KEYS: { + PyDictKeysObject *globals_keys; + uint16_t version = (uint16_t)CURRENT_OPERAND(); + PyDictObject *dict = (PyDictObject *)GLOBALS(); + if (!PyDict_CheckExact(dict)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + if (dict->ma_keys->dk_version != version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + globals_keys = dict->ma_keys; + assert(DK_IS_UNICODE(globals_keys)); + stack_pointer[0].bits = (uintptr_t)globals_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_BUILTINS_VERSION_PUSH_KEYS: { + PyDictKeysObject *builtins_keys; uint16_t version = (uint16_t)CURRENT_OPERAND(); PyDictObject *dict = (PyDictObject *)BUILTINS(); if (!PyDict_CheckExact(dict)) { @@ -1802,18 +1823,25 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - assert(DK_IS_UNICODE(dict->ma_keys)); + builtins_keys = dict->ma_keys; + assert(DK_IS_UNICODE(builtins_keys)); + stack_pointer[0].bits = (uintptr_t)builtins_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_GLOBAL_MODULE: { + case _LOAD_GLOBAL_MODULE_FROM_KEYS: { + PyDictKeysObject *globals_keys; _PyStackRef res; _PyStackRef null = PyStackRef_NULL; oparg = CURRENT_OPARG(); + globals_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND(); - PyDictObject *dict = (PyDictObject *)GLOBALS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); PyObject *res_o = entries[index].me_value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -1829,14 +1857,17 @@ break; } - case _LOAD_GLOBAL_BUILTINS: { + case _LOAD_GLOBAL_BUILTINS_FROM_KEYS: { + PyDictKeysObject *builtins_keys; _PyStackRef res; _PyStackRef null = PyStackRef_NULL; oparg = CURRENT_OPARG(); + builtins_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND(); - PyDictObject *bdict = (PyDictObject *)BUILTINS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); PyObject *res_o = entries[index].me_value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -5698,6 +5729,50 @@ break; } + case _LOAD_GLOBAL_MODULE: { + _PyStackRef res; + _PyStackRef null = PyStackRef_NULL; + oparg = CURRENT_OPARG(); + uint16_t index = (uint16_t)CURRENT_OPERAND(); + PyDictObject *dict = (PyDictObject *)GLOBALS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyObject *res_o = entries[index].me_value; + if (res_o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + null = PyStackRef_NULL; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + _PyStackRef res; + _PyStackRef null = PyStackRef_NULL; + oparg = CURRENT_OPARG(); + uint16_t index = (uint16_t)CURRENT_OPERAND(); + PyDictObject *dict = (PyDictObject *)BUILTINS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyObject *res_o = entries[index].me_value; + if (res_o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + null = PyStackRef_NULL; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _INTERNAL_INCREMENT_OPT_COUNTER: { _PyStackRef opt; opt = stack_pointer[-1]; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c4de7bdeb4ce80..7656ce6bb7e313 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6146,6 +6146,7 @@ next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL_BUILTIN); static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); + PyDictKeysObject *builtins_keys; _PyStackRef res; _PyStackRef null = PyStackRef_NULL; /* Skip 1 cache entry */ @@ -6157,19 +6158,19 @@ DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); assert(DK_IS_UNICODE(dict->ma_keys)); } - // _GUARD_BUILTINS_VERSION + // _GUARD_BUILTINS_VERSION_PUSH_KEYS { uint16_t version = read_u16(&this_instr[3].cache); PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - assert(DK_IS_UNICODE(dict->ma_keys)); + builtins_keys = dict->ma_keys; + assert(DK_IS_UNICODE(builtins_keys)); } - // _LOAD_GLOBAL_BUILTINS + // _LOAD_GLOBAL_BUILTINS_FROM_KEYS { uint16_t index = read_u16(&this_instr[4].cache); - PyDictObject *bdict = (PyDictObject *)BUILTINS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); PyObject *res_o = entries[index].me_value; DEOPT_IF(res_o == NULL, LOAD_GLOBAL); Py_INCREF(res_o); @@ -6189,23 +6190,24 @@ next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL_MODULE); static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); + PyDictKeysObject *globals_keys; _PyStackRef res; _PyStackRef null = PyStackRef_NULL; /* Skip 1 cache entry */ - // _GUARD_GLOBALS_VERSION + // _GUARD_GLOBALS_VERSION_PUSH_KEYS { uint16_t version = read_u16(&this_instr[2].cache); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - assert(DK_IS_UNICODE(dict->ma_keys)); + globals_keys = dict->ma_keys; + assert(DK_IS_UNICODE(globals_keys)); } /* Skip 1 cache entry */ - // _LOAD_GLOBAL_MODULE + // _LOAD_GLOBAL_MODULE_FROM_KEYS { uint16_t index = read_u16(&this_instr[4].cache); - PyDictObject *dict = (PyDictObject *)GLOBALS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); PyObject *res_o = entries[index].me_value; DEOPT_IF(res_o == NULL, LOAD_GLOBAL); Py_INCREF(res_o); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 06826ff942a761..25166bc2dc5c02 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -131,6 +131,26 @@ incorrect_keys(_PyUOpInstruction *inst, PyObject *obj) return 0; } +static int +check_next_uop(_PyUOpInstruction *buffer, int size, int pc, uint16_t expected) +{ + if (pc + 1 >= size) { + DPRINTF(1, "Cannot rewrite %s at pc %d: buffer too small\n", + _PyOpcode_uop_name[buffer[pc].opcode], pc); + return 0; + } + uint16_t next_opcode = buffer[pc + 1].opcode; + if (next_opcode != expected) { + DPRINTF(1, + "Cannot rewrite %s at pc %d: unexpected next opcode %s, " + "expected %s\n", + _PyOpcode_uop_name[buffer[pc].opcode], pc, + _PyOpcode_uop_name[next_opcode], _PyOpcode_uop_name[expected]); + return 0; + } + return 1; +} + /* Returns 1 if successfully optimized * 0 if the trace is not suitable for optimization (yet) * -1 if there was an error. */ @@ -174,7 +194,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, _PyUOpInstruction *inst = &buffer[pc]; int opcode = inst->opcode; switch(opcode) { - case _GUARD_BUILTINS_VERSION: + case _GUARD_BUILTINS_VERSION_PUSH_KEYS: if (incorrect_keys(inst, builtins)) { OPT_STAT_INC(remove_globals_incorrect_keys); return 0; @@ -182,6 +202,10 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) { continue; } + if (!check_next_uop(buffer, buffer_size, pc, + _LOAD_GLOBAL_BUILTINS_FROM_KEYS)) { + continue; + } if ((builtins_watched & 1) == 0) { PyDict_Watch(BUILTINS_WATCHER_ID, builtins); builtins_watched |= 1; @@ -194,8 +218,13 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, buffer[pc].operand = function_version; function_checked |= 1; } + // We're no longer pushing the builtins keys; rewrite the + // instruction that consumed the keys to load them from the + // frame. + buffer[pc + 1].opcode = _LOAD_GLOBAL_BUILTINS; break; case _GUARD_GLOBALS_VERSION: + case _GUARD_GLOBALS_VERSION_PUSH_KEYS: if (incorrect_keys(inst, globals)) { OPT_STAT_INC(remove_globals_incorrect_keys); return 0; @@ -204,6 +233,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, if (watched_mutations >= _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) { continue; } + if (opcode == _GUARD_GLOBALS_VERSION_PUSH_KEYS && + !check_next_uop(buffer, buffer_size, pc, + _LOAD_GLOBAL_MODULE_FROM_KEYS)) { + continue; + } if ((globals_watched & 1) == 0) { PyDict_Watch(GLOBALS_WATCHER_ID, globals); _Py_BloomFilter_Add(dependencies, globals); @@ -217,6 +251,12 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, buffer[pc].operand = function_version; function_checked |= 1; } + if (opcode == _GUARD_GLOBALS_VERSION_PUSH_KEYS) { + // We're no longer pushing the globals keys; rewrite the + // instruction that consumed the keys to load them from the + // frame. + buffer[pc + 1].opcode = _LOAD_GLOBAL_MODULE; + } break; case _LOAD_GLOBAL_BUILTINS: if (function_checked & globals_watched & builtins_watched & 1) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index c73b632d1afdd7..d71b55cbe1c68d 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -836,6 +836,16 @@ dummy_func(void) { ctx->done = true; } + op(_GUARD_GLOBALS_VERSION_PUSH_KEYS, (version/1 -- globals_keys)) { + globals_keys = sym_new_unknown(ctx); + (void)version; + } + + op(_GUARD_BUILTINS_VERSION_PUSH_KEYS, (version/1 -- builtins_keys)) { + builtins_keys = sym_new_unknown(ctx); + (void)version; + } + // END BYTECODES // } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index ae532fd27769ab..6ec9e69d1dbc44 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -844,30 +844,48 @@ break; } - case _GUARD_BUILTINS_VERSION: { + case _GUARD_GLOBALS_VERSION_PUSH_KEYS: { + _Py_UopsSymbol *globals_keys; + uint16_t version = (uint16_t)this_instr->operand; + globals_keys = sym_new_unknown(ctx); + (void)version; + stack_pointer[0] = globals_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_GLOBAL_MODULE: { + case _GUARD_BUILTINS_VERSION_PUSH_KEYS: { + _Py_UopsSymbol *builtins_keys; + uint16_t version = (uint16_t)this_instr->operand; + builtins_keys = sym_new_unknown(ctx); + (void)version; + stack_pointer[0] = builtins_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_GLOBAL_MODULE_FROM_KEYS: { _Py_UopsSymbol *res; _Py_UopsSymbol *null = NULL; res = sym_new_not_null(ctx); null = sym_new_null(ctx); - stack_pointer[0] = res; - if (oparg & 1) stack_pointer[1] = null; - stack_pointer += 1 + (oparg & 1); + stack_pointer[-1] = res; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_GLOBAL_BUILTINS: { + case _LOAD_GLOBAL_BUILTINS_FROM_KEYS: { _Py_UopsSymbol *res; _Py_UopsSymbol *null = NULL; res = sym_new_not_null(ctx); null = sym_new_null(ctx); - stack_pointer[0] = res; - if (oparg & 1) stack_pointer[1] = null; - stack_pointer += 1 + (oparg & 1); + stack_pointer[-1] = res; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); assert(WITHIN_STACK_BOUNDS()); break; } @@ -2419,6 +2437,30 @@ break; } + case _LOAD_GLOBAL_MODULE: { + _Py_UopsSymbol *res; + _Py_UopsSymbol *null = NULL; + res = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + _Py_UopsSymbol *res; + _Py_UopsSymbol *null = NULL; + res = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _INTERNAL_INCREMENT_OPT_COUNTER: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); From 9bda7750c2af779d3431f5ea120db91c6c83ec49 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Oct 2024 17:54:54 +0200 Subject: [PATCH 012/114] gh-125196: Use PyUnicodeWriter in symtable.c (#125199) --- Python/symtable.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/Python/symtable.c b/Python/symtable.c index 52be910c0b6a9b..709918b27afcc8 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -3120,33 +3120,30 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident) if (ipriv == plen) { return Py_NewRef(ident); /* Don't mangle if class is just underscores */ } - plen -= ipriv; - if (plen + nlen >= PY_SSIZE_T_MAX - 1) { + if (nlen + (plen - ipriv) >= PY_SSIZE_T_MAX - 1) { PyErr_SetString(PyExc_OverflowError, "private identifier too large to be mangled"); return NULL; } - Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(ident); - if (PyUnicode_MAX_CHAR_VALUE(privateobj) > maxchar) { - maxchar = PyUnicode_MAX_CHAR_VALUE(privateobj); - } - - PyObject *result = PyUnicode_New(1 + nlen + plen, maxchar); - if (!result) { + PyUnicodeWriter *writer = PyUnicodeWriter_Create(1 + nlen + (plen - ipriv)); + if (!writer) { return NULL; } - /* ident = "_" + priv[ipriv:] + ident # i.e. 1+plen+nlen bytes */ - PyUnicode_WRITE(PyUnicode_KIND(result), PyUnicode_DATA(result), 0, '_'); - if (PyUnicode_CopyCharacters(result, 1, privateobj, ipriv, plen) < 0) { - Py_DECREF(result); - return NULL; + // ident = "_" + priv[ipriv:] + ident + if (PyUnicodeWriter_WriteChar(writer, '_') < 0) { + goto error; } - if (PyUnicode_CopyCharacters(result, plen+1, ident, 0, nlen) < 0) { - Py_DECREF(result); - return NULL; + if (PyUnicodeWriter_WriteSubstring(writer, privateobj, ipriv, plen) < 0) { + goto error; } - assert(_PyUnicode_CheckConsistency(result, 1)); - return result; + if (PyUnicodeWriter_WriteStr(writer, ident) < 0) { + goto error; + } + return PyUnicodeWriter_Finish(writer); + +error: + PyUnicodeWriter_Discard(writer); + return NULL; } From d501153aed6cd9c03b77836821ed8d47f0655a0b Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 9 Oct 2024 18:21:35 +0100 Subject: [PATCH 013/114] gh-119786: Move parser doc from devguide to InternalDocs (#125119) Co-authored-by: Jacob Coffee Co-authored-by: Carol Willing Co-Authored-By: Adam Turner <9087854+aa-turner@users.noreply.github.com> Co-Authored-By: Carl Friedrich Bolz-Tereick Co-Authored-By: Carol Willing Co-Authored-By: Erlend E. Aasland Co-Authored-By: Ezio Melotti Co-Authored-By: Hugo van Kemenade Co-Authored-By: Irit Katriel Co-Authored-By: Itamar Ostricher Co-Authored-By: Julien Palard Co-Authored-By: Mana Co-Authored-By: Muhammad Mahad Co-Authored-By: Ned Batchelder Co-Authored-By: Pablo Galindo Salgado Co-Authored-By: slateny <46876382+slateny@users.noreply.github.com> Co-Authored-By: wookie184 --- InternalDocs/README.md | 2 + InternalDocs/parser.md | 894 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 896 insertions(+) create mode 100644 InternalDocs/parser.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 95181a420f1dfb..8956ecafed2039 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -12,6 +12,8 @@ it is not, please report that through the [issue tracker](https://github.com/python/cpython/issues). +[Guide to the parser](parser.md) + [Compiler Design](compiler.md) [Frames](frames.md) diff --git a/InternalDocs/parser.md b/InternalDocs/parser.md new file mode 100644 index 00000000000000..11aaf11253646d --- /dev/null +++ b/InternalDocs/parser.md @@ -0,0 +1,894 @@ + +Guide to the parser +=================== + +Abstract +-------- + +Python's Parser is currently a +[`PEG` (Parser Expression Grammar)](https://en.wikipedia.org/wiki/Parsing_expression_grammar) +parser. It was introduced in +[PEP 617: New PEG parser for CPython](https://peps.python.org/pep-0617/) to replace +the original [``LL(1)``](https://en.wikipedia.org/wiki/LL_parser) parser. + +The code implementing the parser is generated from a grammar definition by a +[parser generator](https://en.wikipedia.org/wiki/Compiler-compiler). +Therefore, changes to the Python language are made by modifying the +[grammar file](https://github.com/python/cpython/blob/main/Grammar/python.gram). +Developers rarely need to modify the generator itself. + +See the devguide's [Changing CPython's grammar](https://devguide.python.org/developer-workflow/grammar/#grammar) +for a detailed description of the grammar and the process for changing it. + +How PEG parsers work +==================== + +A PEG (Parsing Expression Grammar) grammar differs from a +[context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) +in that the way it is written more closely reflects how the parser will operate +when parsing. The fundamental technical difference is that the choice operator +is ordered. This means that when writing: + +``` + rule: A | B | C +``` + +a parser that implements a context-free-grammar (such as an ``LL(1)`` parser) will +generate constructions that, given an input string, *deduce* which alternative +(``A``, ``B`` or ``C``) must be expanded. On the other hand, a PEG parser will +check each alternative, in the order in which they are specified, and select +that first one that succeeds. + +This means that in a PEG grammar, the choice operator is not commutative. +Furthermore, unlike context-free grammars, the derivation according to a +PEG grammar cannot be ambiguous: if a string parses, it has exactly one +valid parse tree. + +PEG parsers are usually constructed as a recursive descent parser in which every +rule in the grammar corresponds to a function in the program implementing the +parser, and the parsing expression (the "expansion" or "definition" of the rule) +represents the "code" in said function. Each parsing function conceptually takes +an input string as its argument, and yields one of the following results: + +* A "success" result. This result indicates that the expression can be parsed by + that rule and the function may optionally move forward or consume one or more + characters of the input string supplied to it. +* A "failure" result, in which case no input is consumed. + +Note that "failure" results do not imply that the program is incorrect, nor do +they necessarily mean that the parsing has failed. Since the choice operator is +ordered, a failure very often merely indicates "try the following option". A +direct implementation of a PEG parser as a recursive descent parser will present +exponential time performance in the worst case, because PEG parsers have +infinite lookahead (this means that they can consider an arbitrary number of +tokens before deciding for a rule). Usually, PEG parsers avoid this exponential +time complexity with a technique called +["packrat parsing"](https://pdos.csail.mit.edu/~baford/packrat/thesis/) +which not only loads the entire program in memory before parsing it but also +allows the parser to backtrack arbitrarily. This is made efficient by memoizing +the rules already matched for each position. The cost of the memoization cache +is that the parser will naturally use more memory than a simple ``LL(1)`` parser, +which normally are table-based. + + +Key ideas +--------- + +- Alternatives are ordered ( ``A | B`` is not the same as ``B | A`` ). +- If a rule returns a failure, it doesn't mean that the parsing has failed, + it just means "try something else". +- By default PEG parsers run in exponential time, which can be optimized to linear by + using memoization. +- If parsing fails completely (no rule succeeds in parsing all the input text), the + PEG parser doesn't have a concept of "where the + [``SyntaxError``](https://docs.python.org/3/library/exceptions.html#SyntaxError) is". + + +> [!IMPORTANT] +> Don't try to reason about a PEG grammar in the same way you would to with an +> [EBNF](https://en.wikipedia.org/wiki/Extended_Backus–Naur_form) +> or context free grammar. PEG is optimized to describe **how** input strings will +> be parsed, while context-free grammars are optimized to generate strings of the +> language they describe (in EBNF, to know whether a given string is in the +> language, you need to do work to find out as it is not immediately obvious from +> the grammar). + + +Consequences of the ordered choice operator +------------------------------------------- + +Although PEG may look like EBNF, its meaning is quite different. The fact +that the alternatives are ordered in a PEG grammer (which is at the core of +how PEG parsers work) has deep consequences, other than removing ambiguity. + +If a rule has two alternatives and the first of them succeeds, the second one is +**not** attempted even if the caller rule fails to parse the rest of the input. +Thus the parser is said to be "eager". To illustrate this, consider +the following two rules (in these examples, a token is an individual character): + +``` + first_rule: ( 'a' | 'aa' ) 'a' + second_rule: ('aa' | 'a' ) 'a' +``` + +In a regular EBNF grammar, both rules specify the language ``{aa, aaa}`` but +in PEG, one of these two rules accepts the string ``aaa`` but not the string +``aa``. The other does the opposite -- it accepts the string ``aa`` +but not the string ``aaa``. The rule ``('a'|'aa')'a'`` does +not accept ``aaa`` because ``'a'|'aa'`` consumes the first ``a``, letting the +final ``a`` in the rule consume the second, and leaving out the third ``a``. +As the rule has succeeded, no attempt is ever made to go back and let +``'a'|'aa'`` try the second alternative. The expression ``('aa'|'a')'a'`` does +not accept ``aa`` because ``'aa'|'a'`` accepts all of ``aa``, leaving nothing +for the final ``a``. Again, the second alternative of ``'aa'|'a'`` is not +tried. + +> [!CAUTION] +> The effects of ordered choice, such as the ones illustrated above, may be +> hidden by many levels of rules. + +For this reason, writing rules where an alternative is contained in the next +one is in almost all cases a mistake, for example: + +``` + my_rule: + | 'if' expression 'then' block + | 'if' expression 'then' block 'else' block +``` + +In this example, the second alternative will never be tried because the first one will +succeed first (even if the input string has an ``'else' block`` that follows). To correctly +write this rule you can simply alter the order: + +``` + my_rule: + | 'if' expression 'then' block 'else' block + | 'if' expression 'then' block +``` + +In this case, if the input string doesn't have an ``'else' block``, the first alternative +will fail and the second will be attempted. + +Grammar Syntax +============== + +The grammar consists of a sequence of rules of the form: + +``` + rule_name: expression +``` + +Optionally, a type can be included right after the rule name, which +specifies the return type of the C or Python function corresponding to +the rule: + +``` + rule_name[return_type]: expression +``` + +If the return type is omitted, then a ``void *`` is returned in C and an +``Any`` in Python. + +Grammar expressions +------------------- + +| Expression | Description and Example | +|-----------------|-----------------------------------------------------------------------------------------------------------------------| +| `# comment` | Python-style comments. | +| `e1 e2` | Match `e1`, then match `e2`.
`rule_name: first_rule second_rule` | +| `e1 \| e2` | Match `e1` or `e2`.
`rule_name[return_type]:`
` \| first_alt`
` \| second_alt` | +| `( e )` | Grouping operator: Match `e`.
`rule_name: (e)`
`rule_name: (e1 e2)*` | +| `[ e ]` or `e?` | Optionally match `e`.
`rule_name: [e]`
`rule_name: e (',' e)* [',']` | +| `e*` | Match zero or more occurrences of `e`.
`rule_name: (e1 e2)*` | +| `e+` | Match one or more occurrences of `e`.
`rule_name: (e1 e2)+` | +| `s.e+` | Match one or more occurrences of `e`, separated by `s`.
`rule_name: ','.e+` | +| `&e` | Positive lookahead: Succeed if `e` can be parsed, without consuming input. | +| `!e` | Negative lookahead: Fail if `e` can be parsed, without consuming input.
`primary: atom !'.' !'(' !'['` | +| `~` | Commit to the current alternative, even if it fails to parse (cut).
`rule_name: '(' ~ some_rule ')' \| some_alt` | + + +Left recursion +-------------- + +PEG parsers normally do not support left recursion, but CPython's parser +generator implements a technique similar to the one described in +[Medeiros et al.](https://arxiv.org/pdf/1207.0443) but using the memoization +cache instead of static variables. This approach is closer to the one described +in [Warth et al.](http://web.cs.ucla.edu/~todd/research/pepm08.pdf). This +allows us to write not only simple left-recursive rules but also more +complicated rules that involve indirect left-recursion like: + +``` + rule1: rule2 | 'a' + rule2: rule3 | 'b' + rule3: rule1 | 'c' +``` + +and "hidden left-recursion" like: + +``` + rule: 'optional'? rule '@' some_other_rule +``` + +Variables in the grammar +------------------------ + +A sub-expression can be named by preceding it with an identifier and an +``=`` sign. The name can then be used in the action (see below), like this: + +``` + rule_name[return_type]: '(' a=some_other_rule ')' { a } +``` + +Grammar actions +--------------- + +To avoid the intermediate steps that obscure the relationship between the +grammar and the AST generation, the PEG parser allows directly generating AST +nodes for a rule via grammar actions. Grammar actions are language-specific +expressions that are evaluated when a grammar rule is successfully parsed. These +expressions can be written in Python or C depending on the desired output of the +parser generator. This means that if one would want to generate a parser in +Python and another in C, two grammar files should be written, each one with a +different set of actions, keeping everything else apart from said actions +identical in both files. As an example of a grammar with Python actions, the +piece of the parser generator that parses grammar files is bootstrapped from a +meta-grammar file with Python actions that generate the grammar tree as a result +of the parsing. + +In the specific case of the PEG grammar for Python, having actions allows +directly describing how the AST is composed in the grammar itself, making it +more clear and maintainable. This AST generation process is supported by the use +of some helper functions that factor out common AST object manipulations and +some other required operations that are not directly related to the grammar. + +To indicate these actions each alternative can be followed by the action code +inside curly-braces, which specifies the return value of the alternative: + +``` + rule_name[return_type]: + | first_alt1 first_alt2 { first_alt1 } + | second_alt1 second_alt2 { second_alt1 } +``` + +If the action is omitted, a default action is generated: + +- If there is a single name in the rule, it gets returned. +- If there multiple names in the rule, a collection with all parsed + expressions gets returned (the type of the collection will be different + in C and Python). + +This default behaviour is primarily made for very simple situations and for +debugging purposes. + +> [!WARNING] +> It's important that the actions don't mutate any AST nodes that are passed +> into them via variables referring to other rules. The reason for mutation +> being not allowed is that the AST nodes are cached by memoization and could +> potentially be reused in a different context, where the mutation would be +> invalid. If an action needs to change an AST node, it should instead make a +> new copy of the node and change that. + +The full meta-grammar for the grammars supported by the PEG generator is: + +``` + start[Grammar]: grammar ENDMARKER { grammar } + + grammar[Grammar]: + | metas rules { Grammar(rules, metas) } + | rules { Grammar(rules, []) } + + metas[MetaList]: + | meta metas { [meta] + metas } + | meta { [meta] } + + meta[MetaTuple]: + | "@" NAME NEWLINE { (name.string, None) } + | "@" a=NAME b=NAME NEWLINE { (a.string, b.string) } + | "@" NAME STRING NEWLINE { (name.string, literal_eval(string.string)) } + + rules[RuleList]: + | rule rules { [rule] + rules } + | rule { [rule] } + + rule[Rule]: + | rulename ":" alts NEWLINE INDENT more_alts DEDENT { + Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts)) } + | rulename ":" NEWLINE INDENT more_alts DEDENT { Rule(rulename[0], rulename[1], more_alts) } + | rulename ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts) } + + rulename[RuleName]: + | NAME '[' type=NAME '*' ']' {(name.string, type.string+"*")} + | NAME '[' type=NAME ']' {(name.string, type.string)} + | NAME {(name.string, None)} + + alts[Rhs]: + | alt "|" alts { Rhs([alt] + alts.alts)} + | alt { Rhs([alt]) } + + more_alts[Rhs]: + | "|" alts NEWLINE more_alts { Rhs(alts.alts + more_alts.alts) } + | "|" alts NEWLINE { Rhs(alts.alts) } + + alt[Alt]: + | items '$' action { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=action) } + | items '$' { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=None) } + | items action { Alt(items, action=action) } + | items { Alt(items, action=None) } + + items[NamedItemList]: + | named_item items { [named_item] + items } + | named_item { [named_item] } + + named_item[NamedItem]: + | NAME '=' ~ item {NamedItem(name.string, item)} + | item {NamedItem(None, item)} + | it=lookahead {NamedItem(None, it)} + + lookahead[LookaheadOrCut]: + | '&' ~ atom {PositiveLookahead(atom)} + | '!' ~ atom {NegativeLookahead(atom)} + | '~' {Cut()} + + item[Item]: + | '[' ~ alts ']' {Opt(alts)} + | atom '?' {Opt(atom)} + | atom '*' {Repeat0(atom)} + | atom '+' {Repeat1(atom)} + | sep=atom '.' node=atom '+' {Gather(sep, node)} + | atom {atom} + + atom[Plain]: + | '(' ~ alts ')' {Group(alts)} + | NAME {NameLeaf(name.string) } + | STRING {StringLeaf(string.string)} + + # Mini-grammar for the actions + + action[str]: "{" ~ target_atoms "}" { target_atoms } + + target_atoms[str]: + | target_atom target_atoms { target_atom + " " + target_atoms } + | target_atom { target_atom } + + target_atom[str]: + | "{" ~ target_atoms "}" { "{" + target_atoms + "}" } + | NAME { name.string } + | NUMBER { number.string } + | STRING { string.string } + | "?" { "?" } + | ":" { ":" } +``` + +As an illustrative example this simple grammar file allows directly +generating a full parser that can parse simple arithmetic expressions and that +returns a valid C-based Python AST: + +``` + start[mod_ty]: a=expr_stmt* ENDMARKER { _PyAST_Module(a, NULL, p->arena) } + expr_stmt[stmt_ty]: a=expr NEWLINE { _PyAST_Expr(a, EXTRA) } + + expr[expr_ty]: + | l=expr '+' r=term { _PyAST_BinOp(l, Add, r, EXTRA) } + | l=expr '-' r=term { _PyAST_BinOp(l, Sub, r, EXTRA) } + | term + + term[expr_ty]: + | l=term '*' r=factor { _PyAST_BinOp(l, Mult, r, EXTRA) } + | l=term '/' r=factor { _PyAST_BinOp(l, Div, r, EXTRA) } + | factor + + factor[expr_ty]: + | '(' e=expr ')' { e } + | atom + + atom[expr_ty]: + | NAME + | NUMBER +``` + +Here ``EXTRA`` is a macro that expands to ``start_lineno, start_col_offset, +end_lineno, end_col_offset, p->arena``, those being variables automatically +injected by the parser; ``p`` points to an object that holds on to all state +for the parser. + +A similar grammar written to target Python AST objects: + +``` + start[ast.Module]: a=expr_stmt* ENDMARKER { ast.Module(body=a or [] } + expr_stmt: a=expr NEWLINE { ast.Expr(value=a, EXTRA) } + + expr: + | l=expr '+' r=term { ast.BinOp(left=l, op=ast.Add(), right=r, EXTRA) } + | l=expr '-' r=term { ast.BinOp(left=l, op=ast.Sub(), right=r, EXTRA) } + | term + + term: + | l=term '*' r=factor { ast.BinOp(left=l, op=ast.Mult(), right=r, EXTRA) } + | l=term '/' r=factor { ast.BinOp(left=l, op=ast.Div(), right=r, EXTRA) } + | factor + + factor: + | '(' e=expr ')' { e } + | atom + + atom: + | NAME + | NUMBER +``` + +Pegen +===== + +Pegen is the parser generator used in CPython to produce the final PEG parser +used by the interpreter. It is the program that can be used to read the python +grammar located in +[`Grammar/python.gram`](https://github.com/python/cpython/blob/main/Grammar/python.gram) +and produce the final C parser. It contains the following pieces: + +- A parser generator that can read a grammar file and produce a PEG parser + written in Python or C that can parse said grammar. The generator is located at + [`Tools/peg_generator/pegen`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen). +- A PEG meta-grammar that automatically generates a Python parser which is used + for the parser generator itself (this means that there are no manually-written + parsers). The meta-grammar is located at + [`Tools/peg_generator/pegen/metagrammar.gram`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/metagrammar.gram). +- A generated parser (using the parser generator) that can directly produce C and Python AST objects. + +The source code for Pegen lives at +[`Tools/peg_generator/pegen`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen) +but normally all typical commands to interact with the parser generator are executed from +the main makefile. + +How to regenerate the parser +---------------------------- + +Once you have made the changes to the grammar files, to regenerate the ``C`` +parser (the one used by the interpreter) just execute: + +``` + make regen-pegen +``` + +using the ``Makefile`` in the main directory. If you are on Windows you can +use the Visual Studio project files to regenerate the parser or to execute: + +``` + ./PCbuild/build.bat --regen +``` + +The generated parser file is located at +[`Parser/parser.c`](https://github.com/python/cpython/blob/main/Parser/parser.c). + +How to regenerate the meta-parser +--------------------------------- + +The meta-grammar (the grammar that describes the grammar for the grammar files +themselves) is located at +[`Tools/peg_generator/pegen/metagrammar.gram`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/metagrammar.gram). +Although it is very unlikely that you will ever need to modify it, if you make +any modifications to this file (in order to implement new Pegen features) you will +need to regenerate the meta-parser (the parser that parses the grammar files). +To do so just execute: + +``` + make regen-pegen-metaparser +``` + +If you are on Windows you can use the Visual Studio project files +to regenerate the parser or to execute: + +``` + ./PCbuild/build.bat --regen +``` + + +Grammatical elements and rules +------------------------------ + +Pegen has some special grammatical elements and rules: + +- Strings with single quotes (') (for example, ``'class'``) denote KEYWORDS. +- Strings with double quotes (") (for example, ``"match"``) denote SOFT KEYWORDS. +- Uppercase names (for example, ``NAME``) denote tokens in the + [`Grammar/Tokens`](https://github.com/python/cpython/blob/main/Grammar/Tokens) file. +- Rule names starting with ``invalid_`` are used for specialized syntax errors. + + - These rules are NOT used in the first pass of the parser. + - Only if the first pass fails to parse, a second pass including the invalid + rules will be executed. + - If the parser fails in the second phase with a generic syntax error, the + location of the generic failure of the first pass will be used (this avoids + reporting incorrect locations due to the invalid rules). + - The order of the alternatives involving invalid rules matter + (like any rule in PEG). + +Tokenization +------------ + +It is common among PEG parser frameworks that the parser does both the parsing +and the tokenization, but this does not happen in Pegen. The reason is that the +Python language needs a custom tokenizer to handle things like indentation +boundaries, some special keywords like ``ASYNC`` and ``AWAIT`` (for +compatibility purposes), backtracking errors (such as unclosed parenthesis), +dealing with encoding, interactive mode and much more. Some of these reasons +are also there for historical purposes, and some others are useful even today. + +The list of tokens (all uppercase names in the grammar) that you can use can +be found in thei +[`Grammar/Tokens`](https://github.com/python/cpython/blob/main/Grammar/Tokens) +file. If you change this file to add new tokens, make sure to regenerate the +files by executing: + +``` + make regen-token +``` + +If you are on Windows you can use the Visual Studio project files to regenerate +the tokens or to execute: + +``` + ./PCbuild/build.bat --regen +``` + +How tokens are generated and the rules governing this are completely up to the tokenizer +([`Parser/lexer`](https://github.com/python/cpython/blob/main/Parser/lexer) +and +[`Parser/tokenizer`](https://github.com/python/cpython/blob/main/Parser/tokenizer)); +the parser just receives tokens from it. + +Memoization +----------- + +As described previously, to avoid exponential time complexity in the parser, +memoization is used. + +The C parser used by Python is highly optimized and memoization can be expensive +both in memory and time. Although the memory cost is obvious (the parser needs +memory for storing previous results in the cache) the execution time cost comes +for continuously checking if the given rule has a cache hit or not. In many +situations, just parsing it again can be faster. Pegen **disables memoization +by default** except for rules with the special marker ``memo`` after the rule +name (and type, if present): + +``` + rule_name[typr] (memo): + ... +``` + +By selectively turning on memoization for a handful of rules, the parser becomes +faster and uses less memory. + +> [!NOTE] +> Left-recursive rules always use memoization, since the implementation of +> left-recursion depends on it. + +To determine whether a new rule needs memoization or not, benchmarking is required +(comparing execution times and memory usage of some considerably large files with +and without memoization). There is a very simple instrumentation API available +in the generated C parse code that allows to measure how much each rule uses +memoization (check the +[`Parser/pegen.c`](https://github.com/python/cpython/blob/main/Parser/pegen.c) +file for more information) but it needs to be manually activated. + +Automatic variables +------------------- + +To make writing actions easier, Pegen injects some automatic variables in the +namespace available when writing actions. In the C parser, some of these +automatic variable names are: + +- ``p``: The parser structure. +- ``EXTRA``: This is a macro that expands to + ``(_start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena)``, + which is normally used to create AST nodes as almost all constructors need these + attributes to be provided. All of the location variables are taken from the + location information of the current token. + +Hard and soft keywords +---------------------- + +> [!NOTE] +> In the grammar files, keywords are defined using **single quotes** (for example, +> ``'class'``) while soft keywords are defined using **double quotes** (for example, +> ``"match"``). + +There are two kinds of keywords allowed in pegen grammars: *hard* and *soft* +keywords. The difference between hard and soft keywords is that hard keywords +are always reserved words, even in positions where they make no sense +(for example, ``x = class + 1``), while soft keywords only get a special +meaning in context. Trying to use a hard keyword as a variable will always +fail: + +``` + >>> class = 3 + File "", line 1 + class = 3 + ^ + SyntaxError: invalid syntax + >>> foo(class=3) + File "", line 1 + foo(class=3) + ^^^^^ + SyntaxError: invalid syntax +``` + +While soft keywords don't have this limitation if used in a context other the +one where they are defined as keywords: + +``` + >>> match = 45 + >>> foo(match="Yeah!") +``` + +The ``match`` and ``case`` keywords are soft keywords, so that they are +recognized as keywords at the beginning of a match statement or case block +respectively, but are allowed to be used in other places as variable or +argument names. + +You can get a list of all keywords defined in the grammar from Python: + +``` + >>> import keyword + >>> keyword.kwlist + ['False', 'None', 'True', 'and', 'as', 'assert', 'async', 'await', 'break', + 'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'finally', 'for', + 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', + 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield'] +``` + +as well as soft keywords: + +``` + >>> import keyword + >>> keyword.softkwlist + ['_', 'case', 'match'] +``` + +> [!CAUTION] +> Soft keywords can be a bit challenging to manage as they can be accepted in +> places you don't intend, given how the order alternatives behave in PEG +> parsers (see the +> [consequences of ordered choice](#consequences-of-the-ordered-choice-operator) +> section for some background on this). In general, try to define them in places +> where there are not many alternatives. + +Error handling +-------------- + +When a pegen-generated parser detects that an exception is raised, it will +**automatically stop parsing**, no matter what the current state of the parser +is, and it will unwind the stack and report the exception. This means that if a +[rule action](#grammar-actions) raises an exception, all parsing will +stop at that exact point. This is done to allow to correctly propagate any +exception set by calling Python's C API functions. This also includes +[``SyntaxError``](https://docs.python.org/3/library/exceptions.html#SyntaxError) +exceptions and it is the main mechanism the parser uses to report custom syntax +error messages. + +> [!NOTE] +> Tokenizer errors are normally reported by raising exceptions but some special +> tokenizer errors such as unclosed parenthesis will be reported only after the +> parser finishes without returning anything. + +How syntax errors are reported +------------------------------ + +As described previously in the [how PEG parsers work](#how-peg-parsers-work) +section, PEG parsers don't have a defined concept of where errors happened +in the grammar, because a rule failure doesn't imply a parsing failure like +in context free grammars. This means that a heuristic has to be used to report +generic errors unless something is explicitly declared as an error in the +grammar. + +To report generic syntax errors, pegen uses a common heuristic in PEG parsers: +the location of *generic* syntax errors is reported to be the furthest token that +was attempted to be matched but failed. This is only done if parsing has failed +(the parser returns ``NULL`` in C or ``None`` in Python) but no exception has +been raised. + +As the Python grammar was primordially written as an ``LL(1)`` grammar, this heuristic +has an extremely high success rate, but some PEG features, such as lookaheads, +can impact this. + +> [!CAUTION] +> Positive and negative lookaheads will try to match a token so they will affect +> the location of generic syntax errors. Use them carefully at boundaries +> between rules. + +To generate more precise syntax errors, custom rules are used. This is a common +practice also in context free grammars: the parser will try to accept some +construct that is known to be incorrect just to report a specific syntax error +for that construct. In pegen grammars, these rules start with the ``invalid_`` +prefix. This is because trying to match these rules normally has a performance +impact on parsing (and can also affect the 'correct' grammar itself in some +tricky cases, depending on the ordering of the rules) so the generated parser +acts in two phases: + +1. The first phase will try to parse the input stream without taking into + account rules that start with the ``invalid_`` prefix. If the parsing + succeeds it will return the generated AST and the second phase will be + skipped. + +2. If the first phase failed, a second parsing attempt is done including the + rules that start with an ``invalid_`` prefix. By design this attempt + **cannot succeed** and is only executed to give to the invalid rules a + chance to detect specific situations where custom, more precise, syntax + errors can be raised. This also allows to trade a bit of performance for + precision reporting errors: given that we know that the input text is + invalid, there is typically no need to be fast because execution is going + to stop anyway. + +> [!IMPORTANT] +> When defining invalid rules: +> +> - Make sure all custom invalid rules raise +> [``SyntaxError``](https://docs.python.org/3/library/exceptions.html#SyntaxError) +> exceptions (or a subclass of it). +> - Make sure **all** invalid rules start with the ``invalid_`` prefix to not +> impact performance of parsing correct Python code. +> - Make sure the parser doesn't behave differently for regular rules when you introduce invalid rules +> (see the [how PEG parsers work](#how-peg-parsers-work) section for more information). + +You can find a collection of macros to raise specialized syntax errors in the +[`Parser/pegen.h`](https://github.com/python/cpython/blob/main/Parser/pegen.h) +header file. These macros allow also to report ranges for +the custom errors, which will be highlighted in the tracebacks that will be +displayed when the error is reported. + + +> [!TIP] +> A good way to test whether an invalid rule will be triggered when you expect +> is to test if introducing a syntax error **after** valid code triggers the +> rule or not. For example: + +``` + $ 42 +``` + +should trigger the syntax error in the ``$`` character. If your rule is not correctly defined this +won't happen. As another example, suppose that you try to define a rule to match Python 2 style +``print`` statements in order to create a better error message and you define it as: + +``` + invalid_print: "print" expression +``` + +This will **seem** to work because the parser will correctly parse ``print(something)`` because it is valid +code and the second phase will never execute but if you try to parse ``print(something) $ 3`` the first pass +of the parser will fail (because of the ``$``) and in the second phase, the rule will match the +``print(something)`` as ``print`` followed by the variable ``something`` between parentheses and the error +will be reported there instead of the ``$`` character. + +Generating AST objects +---------------------- + +The output of the C parser used by CPython, which is generated from the +[grammar file](https://github.com/python/cpython/blob/main/Grammar/python.gram), +is a Python AST object (using C structures). This means that the actions in the +grammar file generate AST objects when they succeed. Constructing these objects +can be quite cumbersome (see the [AST compiler section](compiler.md#abstract-syntax-trees-ast) +for more information on how these objects are constructed and how they are used +by the compiler), so special helper functions are used. These functions are +declared in the +[`Parser/pegen.h`](https://github.com/python/cpython/blob/main/Parser/pegen.h) +header file and defined in the +[`Parser/action_helpers.c`](https://github.com/python/cpython/blob/main/Parser/action_helpers.c) +file. The helpers include functions that join AST sequences, get specific elements +from them or to perform extra processing on the generated tree. + + +> [!CAUTION] +> Actions must **never** be used to accept or reject rules. It may be tempting +> in some situations to write a very generic rule and then check the generated +> AST to decide whether it is valid or not, but this will render the +> (official grammar)[https://docs.python.org/3/reference/grammar.html] partially +> incorrect (because it does not include actions) and will make it more difficult +> for other Python implementations to adapt the grammar to their own needs. + +As a general rule, if an action spawns multiple lines or requires something more +complicated than a single expression of C code, is normally better to create a +custom helper in +[`Parser/action_helpers.c`](https://github.com/python/cpython/blob/main/Parser/action_helpers.c) +and expose it in the +[`Parser/pegen.h`](https://github.com/python/cpython/blob/main/Parser/pegen.h) +header file so that it can be used from the grammar. + +When parsing succeeds, the parser **must** return a **valid** AST object. + +Testing +======= + +There are three files that contain tests for the grammar and the parser: + +- [test_grammar.py](https://github.com/python/cpython/blob/main/Lib/test/test_grammar.py) +- [test_syntax.py](https://github.com/python/cpython/blob/main/Lib/test/test_syntax.py) +- [test_exceptions.py](https://github.com/python/cpython/blob/main/Lib/test/test_exceptions.py) + +Check the contents of these files to know which is the best place for new tests, depending +on the nature of the new feature you are adding. + +Tests for the parser generator itself can be found in the +[test_peg_generator](https://github.com/python/cpython/blob/main/Lib/test_peg_generator) +directory. + + +Debugging generated parsers +=========================== + +Making experiments +------------------ + +As the generated C parser is the one used by Python, this means that if +something goes wrong when adding some new rules to the grammar, you cannot +correctly compile and execute Python anymore. This makes it a bit challenging +to debug when something goes wrong, especially when experimenting. + +For this reason it is a good idea to experiment first by generating a Python +parser. To do this, you can go to the +[Tools/peg_generator](https://github.com/python/cpython/blob/main/Tools/peg_generator) +directory on the CPython repository and manually call the parser generator by executing: + +``` + $ python -m pegen python +``` + +This will generate a file called ``parse.py`` in the same directory that you +can use to parse some input: + +``` + $ python parse.py file_with_source_code_to_test.py +``` + +As the generated ``parse.py`` file is just Python code, you can modify it +and add breakpoints to debug or better understand some complex situations. + + +Verbose mode +------------ + +When Python is compiled in debug mode (by adding ``--with-pydebug`` when +running the configure step in Linux or by adding ``-d`` when calling the +[PCbuild/build.bat](https://github.com/python/cpython/blob/main/PCbuild/build.bat)), +it is possible to activate a **very** verbose mode in the generated parser. This +is very useful to debug the generated parser and to understand how it works, but it +can be a bit hard to understand at first. + +> [!NOTE] +> When activating verbose mode in the Python parser, it is better to not use +> interactive mode as it can be much harder to understand, because interactive +> mode involves some special steps compared to regular parsing. + +To activate verbose mode you can add the ``-d`` flag when executing Python: + +``` + $ python -d file_to_test.py +``` + +This will print **a lot** of output to ``stderr`` so it is probably better to dump +it to a file for further analysis. The output consists of trace lines with the +following structure:: + +``` + ('>'|'-'|'+'|'!') []: ... +``` + +Every line is indented by a different amount (````) depending on how +deep the call stack is. The next character marks the type of the trace: + +- ``>`` indicates that a rule is going to be attempted to be parsed. +- ``-`` indicates that a rule has failed to be parsed. +- ``+`` indicates that a rule has been parsed correctly. +- ``!`` indicates that an exception or an error has been detected and the parser is unwinding. + +The ```` part indicates the current index in the token array, +the ```` part indicates what rule is being parsed and +the ```` part indicates what alternative within that rule +is being attempted. + + +> [!NOTE] +> **Document history** +> +> Pablo Galindo Salgado - Original author +> Irit Katriel and Jacob Coffee - Convert to Markdown From f2cb39947093feda3ff85b8dc820922cc5e5f954 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 9 Oct 2024 11:32:16 -0600 Subject: [PATCH 014/114] gh-116510: Fix a Crash Due to Shared Immortal Interned Strings (gh-124865) Fix a crash caused by immortal interned strings being shared between sub-interpreters that use basic single-phase init. In that case, the string can be used by an interpreter that outlives the interpreter that created and interned it. For interpreters that share obmalloc state, also share the interned dict with the main interpreter. This is an un-revert of gh-124646 that then addresses the Py_TRACE_REFS failures identified by gh-124785. --- ...-09-26-18-21-06.gh-issue-116510.FacUWO.rst | 5 ++ Objects/object.c | 36 ++++++++++++ Objects/unicodeobject.c | 56 +++++++++++++++++-- 3 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst new file mode 100644 index 00000000000000..e3741321006548 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst @@ -0,0 +1,5 @@ +Fix a crash caused by immortal interned strings being shared between +sub-interpreters that use basic single-phase init. In that case, the string +can be used by an interpreter that outlives the interpreter that created and +interned it. For interpreters that share obmalloc state, also share the +interned dict with the main interpreter. diff --git a/Objects/object.c b/Objects/object.c index 8a819dd336e421..8d809158a6c1da 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2491,6 +2491,42 @@ _Py_ResurrectReference(PyObject *op) #ifdef Py_TRACE_REFS +/* Make sure the ref is associated with the right interpreter. + * This only needs special attention for heap-allocated objects + * that have been immortalized, and only when the object might + * outlive the interpreter where it was created. That means the + * object was necessarily created using a global allocator + * (i.e. from the main interpreter). Thus in that specific case + * we move the object over to the main interpreter's refchain. + * + * This was added for the sake of the immortal interned strings, + * where legacy subinterpreters share the main interpreter's + * interned dict (and allocator), and therefore the strings can + * outlive the subinterpreter. + * + * It may make sense to fold this into _Py_SetImmortalUntracked(), + * but that requires further investigation. In the meantime, it is + * up to the caller to know if this is needed. There should be + * very few cases. + */ +void +_Py_NormalizeImmortalReference(PyObject *op) +{ + assert(_Py_IsImmortal(op)); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (!_PyRefchain_IsTraced(interp, op)) { + return; + } + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + if (interp != main_interp + && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) + { + assert(!_PyRefchain_IsTraced(main_interp, op)); + _PyRefchain_Remove(interp, op); + _PyRefchain_Trace(main_interp, op); + } +} + void _Py_ForgetReference(PyObject *op) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bd5bb5048fdacc..4ea7d5f380e9a2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -281,13 +281,37 @@ hashtable_unicode_compare(const void *key1, const void *key2) } } +/* Return true if this interpreter should share the main interpreter's + intern_dict. That's important for interpreters which load basic + single-phase init extension modules (m_size == -1). There could be interned + immortal strings that are shared between interpreters, due to the + PyDict_Update(mdict, m_copy) call in import_find_extension(). + + It's not safe to deallocate those strings until all interpreters that + potentially use them are freed. By storing them in the main interpreter, we + ensure they get freed after all other interpreters are freed. +*/ +static bool +has_shared_intern_dict(PyInterpreterState *interp) +{ + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC; +} + static int init_interned_dict(PyInterpreterState *interp) { assert(get_interned_dict(interp) == NULL); - PyObject *interned = interned = PyDict_New(); - if (interned == NULL) { - return -1; + PyObject *interned; + if (has_shared_intern_dict(interp)) { + interned = get_interned_dict(_PyInterpreterState_Main()); + Py_INCREF(interned); + } + else { + interned = PyDict_New(); + if (interned == NULL) { + return -1; + } } _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; return 0; @@ -298,7 +322,10 @@ clear_interned_dict(PyInterpreterState *interp) { PyObject *interned = get_interned_dict(interp); if (interned != NULL) { - PyDict_Clear(interned); + if (!has_shared_intern_dict(interp)) { + // only clear if the dict belongs to this interpreter + PyDict_Clear(interned); + } Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } @@ -15401,6 +15428,10 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) assert(*p); } +#ifdef Py_TRACE_REFS +extern void _Py_NormalizeImmortalReference(PyObject *); +#endif + static void immortalize_interned(PyObject *s) { @@ -15416,6 +15447,10 @@ immortalize_interned(PyObject *s) #endif _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; _Py_SetImmortal(s); +#ifdef Py_TRACE_REFS + /* Make sure the ref is associated with the right interpreter. */ + _Py_NormalizeImmortalReference(s); +#endif } static /* non-null */ PyObject* @@ -15609,6 +15644,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); + if (has_shared_intern_dict(interp)) { + // the dict doesn't belong to this interpreter, skip the debug + // checks on it and just clear the pointer to it + clear_interned_dict(interp); + return; + } + #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -16117,8 +16159,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(get_interned_dict(interp) == NULL); + if (!has_shared_intern_dict(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(get_interned_dict(interp) == NULL); + } _PyUnicode_FiniEncodings(&state->fs_codec); From cbfd39247983309a9ef0ae6da6c61cc71665b967 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Wed, 9 Oct 2024 21:50:03 +0100 Subject: [PATCH 015/114] GH-121970: Extract ``availability`` into a new extension (#125082) --- Doc/conf.py | 1 + Doc/tools/extensions/availability.py | 125 +++++++++++++++++++++++++++ Doc/tools/extensions/pyspecific.py | 76 ---------------- 3 files changed, 126 insertions(+), 76 deletions(-) create mode 100644 Doc/tools/extensions/availability.py diff --git a/Doc/conf.py b/Doc/conf.py index 5f22340ac434c9..287e0da46eb11c 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -21,6 +21,7 @@ extensions = [ 'audit_events', + 'availability', 'c_annotations', 'glossary_search', 'lexers', diff --git a/Doc/tools/extensions/availability.py b/Doc/tools/extensions/availability.py new file mode 100644 index 00000000000000..47833fdcb87590 --- /dev/null +++ b/Doc/tools/extensions/availability.py @@ -0,0 +1,125 @@ +"""Support for documenting platform availability""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from docutils import nodes +from sphinx import addnodes +from sphinx.util import logging +from sphinx.util.docutils import SphinxDirective + +if TYPE_CHECKING: + from sphinx.application import Sphinx + from sphinx.util.typing import ExtensionMetadata + +logger = logging.getLogger("availability") + +# known platform, libc, and threading implementations +_PLATFORMS = frozenset({ + "AIX", + "Android", + "BSD", + "DragonFlyBSD", + "Emscripten", + "FreeBSD", + "GNU/kFreeBSD", + "iOS", + "Linux", + "macOS", + "NetBSD", + "OpenBSD", + "POSIX", + "Solaris", + "Unix", + "VxWorks", + "WASI", + "Windows", +}) +_LIBC = frozenset({ + "BSD libc", + "glibc", + "musl", +}) +_THREADING = frozenset({ + # POSIX platforms with pthreads + "pthreads", +}) +KNOWN_PLATFORMS = _PLATFORMS | _LIBC | _THREADING + + +class Availability(SphinxDirective): + has_content = True + required_arguments = 1 + optional_arguments = 0 + final_argument_whitespace = True + + def run(self) -> list[nodes.container]: + title = "Availability" + refnode = addnodes.pending_xref( + title, + nodes.inline(title, title, classes=["xref", "std", "std-ref"]), + refdoc=self.env.docname, + refdomain="std", + refexplicit=True, + reftarget="availability", + reftype="ref", + refwarn=True, + ) + sep = nodes.Text(": ") + parsed, msgs = self.state.inline_text(self.arguments[0], self.lineno) + pnode = nodes.paragraph(title, "", refnode, sep, *parsed, *msgs) + self.set_source_info(pnode) + cnode = nodes.container("", pnode, classes=["availability"]) + self.set_source_info(cnode) + if self.content: + self.state.nested_parse(self.content, self.content_offset, cnode) + self.parse_platforms() + + return [cnode] + + def parse_platforms(self) -> dict[str, str | bool]: + """Parse platform information from arguments + + Arguments is a comma-separated string of platforms. A platform may + be prefixed with "not " to indicate that a feature is not available. + + Example:: + + .. availability:: Windows, Linux >= 4.2, not WASI + + Arguments like "Linux >= 3.17 with glibc >= 2.27" are currently not + parsed into separate tokens. + """ + platforms = {} + for arg in self.arguments[0].rstrip(".").split(","): + arg = arg.strip() + platform, _, version = arg.partition(" >= ") + if platform.startswith("not "): + version = False + platform = platform.removeprefix("not ") + elif not version: + version = True + platforms[platform] = version + + if unknown := set(platforms).difference(KNOWN_PLATFORMS): + logger.warning( + "Unknown platform%s or syntax '%s' in '.. availability:: %s', " + "see %s:KNOWN_PLATFORMS for a set of known platforms.", + "s" if len(platforms) != 1 else "", + " ".join(sorted(unknown)), + self.arguments[0], + __file__, + ) + + return platforms + + +def setup(app: Sphinx) -> ExtensionMetadata: + app.add_directive("availability", Availability) + + return { + "version": "1.0", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py index b6623a2b8e01f1..bcb8a421e32d09 100644 --- a/Doc/tools/extensions/pyspecific.py +++ b/Doc/tools/extensions/pyspecific.py @@ -24,7 +24,6 @@ from sphinx.domains.changeset import VersionChange, versionlabels, versionlabel_classes from sphinx.domains.python import PyFunction, PyMethod, PyModule from sphinx.locale import _ as sphinx_gettext -from sphinx.util import logging from sphinx.util.docutils import SphinxDirective from sphinx.writers.text import TextWriter, TextTranslator from sphinx.util.display import status_iterator @@ -108,80 +107,6 @@ def run(self): return [pnode] -# Support for documenting platform availability - -class Availability(SphinxDirective): - - has_content = True - required_arguments = 1 - optional_arguments = 0 - final_argument_whitespace = True - - # known platform, libc, and threading implementations - known_platforms = frozenset({ - "AIX", "Android", "BSD", "DragonFlyBSD", "Emscripten", "FreeBSD", - "GNU/kFreeBSD", "Linux", "NetBSD", "OpenBSD", "POSIX", "Solaris", - "Unix", "VxWorks", "WASI", "Windows", "macOS", "iOS", - # libc - "BSD libc", "glibc", "musl", - # POSIX platforms with pthreads - "pthreads", - }) - - def run(self): - availability_ref = ':ref:`Availability `: ' - avail_nodes, avail_msgs = self.state.inline_text( - availability_ref + self.arguments[0], - self.lineno) - pnode = nodes.paragraph(availability_ref + self.arguments[0], - '', *avail_nodes, *avail_msgs) - self.set_source_info(pnode) - cnode = nodes.container("", pnode, classes=["availability"]) - self.set_source_info(cnode) - if self.content: - self.state.nested_parse(self.content, self.content_offset, cnode) - self.parse_platforms() - - return [cnode] - - def parse_platforms(self): - """Parse platform information from arguments - - Arguments is a comma-separated string of platforms. A platform may - be prefixed with "not " to indicate that a feature is not available. - - Example:: - - .. availability:: Windows, Linux >= 4.2, not WASI - - Arguments like "Linux >= 3.17 with glibc >= 2.27" are currently not - parsed into separate tokens. - """ - platforms = {} - for arg in self.arguments[0].rstrip(".").split(","): - arg = arg.strip() - platform, _, version = arg.partition(" >= ") - if platform.startswith("not "): - version = False - platform = platform[4:] - elif not version: - version = True - platforms[platform] = version - - unknown = set(platforms).difference(self.known_platforms) - if unknown: - cls = type(self) - logger = logging.getLogger(cls.__qualname__) - logger.warning( - f"Unknown platform(s) or syntax '{' '.join(sorted(unknown))}' " - f"in '.. availability:: {self.arguments[0]}', see " - f"{__file__}:{cls.__qualname__}.known_platforms for a set " - "known platforms." - ) - - return platforms - - # Support for documenting decorators class PyDecoratorMixin(object): @@ -492,7 +417,6 @@ def setup(app): app.add_role('issue', issue_role) app.add_role('gh', gh_issue_role) app.add_directive('impl-detail', ImplementationDetail) - app.add_directive('availability', Availability) app.add_directive('versionadded', PyVersionChange, override=True) app.add_directive('versionchanged', PyVersionChange, override=True) app.add_directive('versionremoved', PyVersionChange, override=True) From 7d2c39752fa6f685f15ad9c585d83a62553477c2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 10 Oct 2024 00:20:53 +0300 Subject: [PATCH 016/114] gh-91818: Use default program name in the CLI of many modules (GH-124867) As argparse now detects by default when the code was run as a module. This leads to using the actual executable name instead of simply "python" to display in the usage message ("usage: python -m ..."). --- Lib/ast.py | 2 +- Lib/ensurepip/__init__.py | 2 +- Lib/ensurepip/_uninstall.py | 2 +- Lib/json/tool.py | 3 +-- Lib/pdb.py | 3 +-- Lib/sqlite3/__main__.py | 1 - Lib/test/test_sqlite3/test_cli.py | 4 +++- Lib/tokenize.py | 2 +- Lib/venv/__init__.py | 3 +-- .../Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst | 4 ++++ 10 files changed, 14 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst diff --git a/Lib/ast.py b/Lib/ast.py index a954d4a97d3c22..154d2c8c1f9ebb 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1743,7 +1743,7 @@ def unparse(ast_obj): def main(): import argparse - parser = argparse.ArgumentParser(prog='python -m ast') + parser = argparse.ArgumentParser() parser.add_argument('infile', nargs='?', default='-', help='the file to parse; defaults to stdin') parser.add_argument('-m', '--mode', default='exec', diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py index c5350df270487a..585afc85836c06 100644 --- a/Lib/ensurepip/__init__.py +++ b/Lib/ensurepip/__init__.py @@ -205,7 +205,7 @@ def _uninstall_helper(*, verbosity=0): def _main(argv=None): import argparse - parser = argparse.ArgumentParser(prog="python -m ensurepip") + parser = argparse.ArgumentParser() parser.add_argument( "--version", action="version", diff --git a/Lib/ensurepip/_uninstall.py b/Lib/ensurepip/_uninstall.py index b257904328d2f5..4183c28a809008 100644 --- a/Lib/ensurepip/_uninstall.py +++ b/Lib/ensurepip/_uninstall.py @@ -6,7 +6,7 @@ def _main(argv=None): - parser = argparse.ArgumentParser(prog="python -m ensurepip._uninstall") + parser = argparse.ArgumentParser() parser.add_argument( "--version", action="version", diff --git a/Lib/json/tool.py b/Lib/json/tool.py index 9028e517fb9f7d..1ba91384c81f27 100644 --- a/Lib/json/tool.py +++ b/Lib/json/tool.py @@ -9,10 +9,9 @@ def main(): - prog = 'python -m json' description = ('A simple command line interface for json module ' 'to validate and pretty-print JSON objects.') - parser = argparse.ArgumentParser(prog=prog, description=description) + parser = argparse.ArgumentParser(description=description) parser.add_argument('infile', nargs='?', help='a JSON file to be validated or pretty-printed', default='-') diff --git a/Lib/pdb.py b/Lib/pdb.py index aea6fb70ae3106..d9aed24bfcd8e7 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -2423,8 +2423,7 @@ def help(): def main(): import argparse - parser = argparse.ArgumentParser(prog="pdb", - usage="%(prog)s [-h] [-c command] (-m module | pyfile) [args ...]", + parser = argparse.ArgumentParser(usage="%(prog)s [-h] [-c command] (-m module | pyfile) [args ...]", description=_usage, formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False) diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py index d9423c25e34135..cfdee61403d1fa 100644 --- a/Lib/sqlite3/__main__.py +++ b/Lib/sqlite3/__main__.py @@ -65,7 +65,6 @@ def runsource(self, source, filename="", symbol="single"): def main(*args): parser = ArgumentParser( description="Python sqlite3 CLI", - prog="python -m sqlite3", ) parser.add_argument( "filename", type=str, default=":memory:", nargs="?", diff --git a/Lib/test/test_sqlite3/test_cli.py b/Lib/test/test_sqlite3/test_cli.py index 303f9e03b5383f..d014a9ce841607 100644 --- a/Lib/test/test_sqlite3/test_cli.py +++ b/Lib/test/test_sqlite3/test_cli.py @@ -34,7 +34,9 @@ def expect_failure(self, *args): def test_cli_help(self): out = self.expect_success("-h") - self.assertIn("usage: python -m sqlite3", out) + self.assertIn("usage: ", out) + self.assertIn(" [-h] [-v] [filename] [sql]", out) + self.assertIn("Python sqlite3 CLI", out) def test_cli_version(self): out = self.expect_success("-v") diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 4b4c3cfe16999b..7ece4e9b70d31b 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -510,7 +510,7 @@ def error(message, filename=None, location=None): sys.exit(1) # Parse the arguments and options - parser = argparse.ArgumentParser(prog='python -m tokenize') + parser = argparse.ArgumentParser() parser.add_argument(dest='filename', nargs='?', metavar='filename.py', help='the file to tokenize; defaults to stdin') diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index a5cb2bdb064692..a00fa690fa0b88 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -575,8 +575,7 @@ def create(env_dir, system_site_packages=False, clear=False, def main(args=None): import argparse - parser = argparse.ArgumentParser(prog=__name__, - description='Creates virtual Python ' + parser = argparse.ArgumentParser(description='Creates virtual Python ' 'environments in one or ' 'more target ' 'directories.', diff --git a/Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst b/Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst new file mode 100644 index 00000000000000..f45f00e48a3830 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst @@ -0,0 +1,4 @@ +The CLI of many modules (:mod:`ast`, :mod:`ensurepip`, :mod:`json`, +:mod:`pdb`, :mod:`sqlite3`, :mod:`tokenize`, :mod:`venv`) now uses the +actual executable name instead of simply "python" to display in the usage +message. From 52f70da19cf3c7198be37faeac233ef803080f6f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Oct 2024 23:56:30 +0200 Subject: [PATCH 017/114] gh-125196: Use PyUnicodeWriter for repr(list) (#125202) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. Replace PyObject_Repr() + _PyUnicodeWriter_WriteStr() with PyUnicodeWriter_WriteRepr(). --- Objects/listobject.c | 45 ++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 8abe15d6674140..e7090f20001a39 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -522,49 +522,50 @@ list_dealloc(PyObject *self) static PyObject * list_repr_impl(PyListObject *v) { - PyObject *s; - _PyUnicodeWriter writer; - Py_ssize_t i = Py_ReprEnter((PyObject*)v); - if (i != 0) { - return i > 0 ? PyUnicode_FromString("[...]") : NULL; + int res = Py_ReprEnter((PyObject*)v); + if (res != 0) { + return (res > 0 ? PyUnicode_FromString("[...]") : NULL); } - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; /* "[" + "1" + ", 2" * (len - 1) + "]" */ - writer.min_length = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; + Py_ssize_t prealloc = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { + goto error; + } - if (_PyUnicodeWriter_WriteChar(&writer, '[') < 0) + if (PyUnicodeWriter_WriteChar(writer, '[') < 0) { goto error; + } /* Do repr() on each element. Note that this may mutate the list, so must refetch the list size on each iteration. */ - for (i = 0; i < Py_SIZE(v); ++i) { + for (Py_ssize_t i = 0; i < Py_SIZE(v); ++i) { if (i > 0) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; + } } - s = PyObject_Repr(v->ob_item[i]); - if (s == NULL) - goto error; - - if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) { - Py_DECREF(s); + if (PyUnicodeWriter_WriteRepr(writer, v->ob_item[i]) < 0) { goto error; } - Py_DECREF(s); } - writer.overallocate = 0; - if (_PyUnicodeWriter_WriteChar(&writer, ']') < 0) + if (PyUnicodeWriter_WriteChar(writer, ']') < 0) { goto error; + } Py_ReprLeave((PyObject *)v); - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + if (writer != NULL) { + PyUnicodeWriter_Discard(writer); + } Py_ReprLeave((PyObject *)v); return NULL; } From 0c5a48c1c9039eb1ce25a96c43505c4de0a0b9d7 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Wed, 9 Oct 2024 15:56:50 -0600 Subject: [PATCH 018/114] GH-124693: Support parsing negative scientific and complex numbers argparse (GH-124823) Co-authored-by: Serhiy Storchaka --- Lib/argparse.py | 2 +- Lib/test/test_argparse.py | 27 ++++++++++++++----- ...-10-01-02-31-13.gh-issue-124693.qzbXKB.rst | 1 + 3 files changed, 22 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 21299b69ecd74c..d1f8fa2ace8611 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1367,7 +1367,7 @@ def __init__(self, self._defaults = {} # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-(?:\d+(?:_\d+)*(?:\.\d+(?:_\d+)*)?|\.\d+(?:_\d+)*)$') + self._negative_number_matcher = _re.compile(r'-\.?\d') # whether or not there are any optionals that look like negative # numbers -- uses a list so it can be shared and edited diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 1bf812b36fc2c6..c9e79eb18a08fb 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2195,20 +2195,33 @@ class TestNegativeNumber(ParserTestCase): argument_signatures = [ Sig('--int', type=int), Sig('--float', type=float), + Sig('--complex', type=complex), ] failures = [ '--float -_.45', '--float -1__000.0', + '--float -1.0.0', '--int -1__000', + '--int -1.0', + '--complex -1__000.0j', + '--complex -1.0jj', + '--complex -_.45j', ] successes = [ - ('--int -1000 --float -1000.0', NS(int=-1000, float=-1000.0)), - ('--int -1_000 --float -1_000.0', NS(int=-1000, float=-1000.0)), - ('--int -1_000_000 --float -1_000_000.0', NS(int=-1000000, float=-1000000.0)), - ('--float -1_000.0', NS(int=None, float=-1000.0)), - ('--float -1_000_000.0_0', NS(int=None, float=-1000000.0)), - ('--float -.5', NS(int=None, float=-0.5)), - ('--float -.5_000', NS(int=None, float=-0.5)), + ('--int -1000 --float -1000.0', NS(int=-1000, float=-1000.0, complex=None)), + ('--int -1_000 --float -1_000.0', NS(int=-1000, float=-1000.0, complex=None)), + ('--int -1_000_000 --float -1_000_000.0', NS(int=-1000000, float=-1000000.0, complex=None)), + ('--float -1_000.0', NS(int=None, float=-1000.0, complex=None)), + ('--float -1_000_000.0_0', NS(int=None, float=-1000000.0, complex=None)), + ('--float -.5', NS(int=None, float=-0.5, complex=None)), + ('--float -.5_000', NS(int=None, float=-0.5, complex=None)), + ('--float -1e3', NS(int=None, float=-1000, complex=None)), + ('--float -1e-3', NS(int=None, float=-0.001, complex=None)), + ('--complex -1j', NS(int=None, float=None, complex=-1j)), + ('--complex -1_000j', NS(int=None, float=None, complex=-1000j)), + ('--complex -1_000.0j', NS(int=None, float=None, complex=-1000.0j)), + ('--complex -1e3j', NS(int=None, float=None, complex=-1000j)), + ('--complex -1e-3j', NS(int=None, float=None, complex=-0.001j)), ] class TestInvalidAction(TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst b/Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst new file mode 100644 index 00000000000000..3e87eb457d9911 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst @@ -0,0 +1 @@ +Fix a bug where :mod:`argparse` doesn't recognize negative complex numbers or negative numbers using scientific notation. From ee3167b9787bf9424d5637a224233de775450231 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 00:01:02 +0200 Subject: [PATCH 019/114] gh-125196: Add fast-path for int in PyUnicodeWriter_WriteStr() (#125214) PyUnicodeWriter_WriteStr() and PyUnicodeWriter_WriteRepr() now call directly _PyLong_FormatWriter() if the argument is an int. --- Objects/unicodeobject.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4ea7d5f380e9a2..a9b33248163880 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13632,6 +13632,10 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) { + if (Py_TYPE(obj) == &PyLong_Type) { + return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); + } + PyObject *str = PyObject_Str(obj); if (str == NULL) { return -1; @@ -13646,6 +13650,10 @@ PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) int PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj) { + if (Py_TYPE(obj) == &PyLong_Type) { + return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); + } + PyObject *repr = PyObject_Repr(obj); if (repr == NULL) { return -1; From 1877543d03d323d581b5fc0f19eff501926ba151 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 00:04:50 +0200 Subject: [PATCH 020/114] gh-125196: Use PyUnicodeWriter for repr(structseq) (#125219) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. * Avoid temporary PyUnicode_DecodeUTF8(): call PyUnicodeWriter_WriteUTF8() instead. * Avoid temporary PyObject_Repr(): call PyUnicodeWriter_WriteRepr() instead. --- Objects/structseq.c | 65 +++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/Objects/structseq.c b/Objects/structseq.c index ee3dbf9d4c047a..6092742835400b 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -266,83 +266,66 @@ static PyObject * structseq_repr(PyStructSequence *obj) { PyTypeObject *typ = Py_TYPE(obj); - _PyUnicodeWriter writer; - /* Write "typename(" */ - PyObject *type_name = PyUnicode_DecodeUTF8(typ->tp_name, - strlen(typ->tp_name), - NULL); - if (type_name == NULL) { + // count 5 characters per item: "x=1, " + Py_ssize_t type_name_len = strlen(typ->tp_name); + Py_ssize_t prealloc = (type_name_len + 1 + + VISIBLE_SIZE(obj) * 5 + 1); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { return NULL; } - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; - /* count 5 characters per item: "x=1, " */ - writer.min_length = (PyUnicode_GET_LENGTH(type_name) + 1 - + VISIBLE_SIZE(obj) * 5 + 1); - - if (_PyUnicodeWriter_WriteStr(&writer, type_name) < 0) { - Py_DECREF(type_name); + // Write "typename(" + if (PyUnicodeWriter_WriteUTF8(writer, typ->tp_name, type_name_len) < 0) { goto error; } - Py_DECREF(type_name); - - if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) { + if (PyUnicodeWriter_WriteChar(writer, '(') < 0) { goto error; } for (Py_ssize_t i=0; i < VISIBLE_SIZE(obj); i++) { if (i > 0) { - /* Write ", " */ - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) { + // Write ", " + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; } } - /* Write "name=repr" */ + // Write name const char *name_utf8 = typ->tp_members[i].name; if (name_utf8 == NULL) { - PyErr_Format(PyExc_SystemError, "In structseq_repr(), member %zd name is NULL" + PyErr_Format(PyExc_SystemError, + "In structseq_repr(), member %zd name is NULL" " for type %.500s", i, typ->tp_name); goto error; } - - PyObject *name = PyUnicode_DecodeUTF8(name_utf8, strlen(name_utf8), NULL); - if (name == NULL) { - goto error; - } - if (_PyUnicodeWriter_WriteStr(&writer, name) < 0) { - Py_DECREF(name); + if (PyUnicodeWriter_WriteUTF8(writer, name_utf8, -1) < 0) { goto error; } - Py_DECREF(name); - if (_PyUnicodeWriter_WriteChar(&writer, '=') < 0) { + // Write "=" + repr(value) + if (PyUnicodeWriter_WriteChar(writer, '=') < 0) { goto error; } - PyObject *value = PyStructSequence_GetItem((PyObject*)obj, i); assert(value != NULL); - PyObject *repr = PyObject_Repr(value); - if (repr == NULL) { - goto error; - } - if (_PyUnicodeWriter_WriteStr(&writer, repr) < 0) { - Py_DECREF(repr); + if (PyUnicodeWriter_WriteRepr(writer, value) < 0) { goto error; } - Py_DECREF(repr); } - if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) { + if (PyUnicodeWriter_WriteChar(writer, ')') < 0) { goto error; } - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } From c7d5d1d93b630e352abd9a0c93ea6d34c443f444 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 9 Oct 2024 23:30:56 +0100 Subject: [PATCH 021/114] gh-125140: Remove the current directory from sys.path when using pyrepl (GH-125212) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pablo Galindo Co-authored-by: Łukasz Langa Co-authored-by: Peter Bierma --- Lib/site.py | 11 ++++++++--- .../2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst diff --git a/Lib/site.py b/Lib/site.py index b3194d79fb5ab8..07a6361fad44e5 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -503,9 +503,14 @@ def register_readline(): if PYTHON_BASIC_REPL: CAN_USE_PYREPL = False else: - import _pyrepl.readline - import _pyrepl.unix_console - from _pyrepl.main import CAN_USE_PYREPL + original_path = sys.path + sys.path = [p for p in original_path if p != ''] + try: + import _pyrepl.readline + import _pyrepl.unix_console + from _pyrepl.main import CAN_USE_PYREPL + finally: + sys.path = original_path except ImportError: return diff --git a/Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst b/Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst new file mode 100644 index 00000000000000..f4a49302372647 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst @@ -0,0 +1 @@ +Remove the current directory from ``sys.path`` when using PyREPL. From 9ad55e85d78c5338b1ad170e614345652bd1b651 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 10 Oct 2024 02:30:14 +0300 Subject: [PATCH 022/114] gh-124969: Skip the test for ALT_DIGITS also on iOS (#125177) Skip the locale.ALT_DIGITS test on all Apple platforms, not just macOS. --- Lib/test/test__locale.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 02d2acc6d1c417..e403c2a822788d 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -209,8 +209,8 @@ def test_alt_digits_nl_langinfo(self): with self.subTest(locale=loc): alt_digits = nl_langinfo(locale.ALT_DIGITS) self.assertIsInstance(alt_digits, tuple) - if count and not alt_digits and sys.platform == 'darwin': - self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on macOS') + if count and not alt_digits and support.is_apple: + self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on Apple platforms') self.assertEqual(len(alt_digits), count) for i in samples: self.assertEqual(alt_digits[i], samples[i]) From 1b2a5485f94ccbe43a45eb9990a5649ae3d2499e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 01:32:02 +0200 Subject: [PATCH 023/114] gh-125196: PyUnicodeWriter_Discard(NULL) does nothing (#125222) --- Doc/c-api/unicode.rst | 2 ++ Objects/listobject.c | 4 +--- Objects/unicodeobject.c | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index f5704cffa199a5..4daf9e9fdbf2f1 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1600,6 +1600,8 @@ object. Discard the internal Unicode buffer and destroy the writer instance. + If *writer* is ``NULL``, no operation is performed. + .. c:function:: int PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch) Write the single Unicode character *ch* into *writer*. diff --git a/Objects/listobject.c b/Objects/listobject.c index e7090f20001a39..930aefde325a7c 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -563,9 +563,7 @@ list_repr_impl(PyListObject *v) return PyUnicodeWriter_Finish(writer); error: - if (writer != NULL) { - PyUnicodeWriter_Discard(writer); - } + PyUnicodeWriter_Discard(writer); Py_ReprLeave((PyObject *)v); return NULL; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a9b33248163880..93c1025b6a3cae 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13455,6 +13455,9 @@ PyUnicodeWriter_Create(Py_ssize_t length) void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) { + if (writer == NULL) { + return; + } _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); PyMem_Free(writer); } From 942916378aa6a0946b1385c2c7ca6935620d710a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 01:37:14 +0200 Subject: [PATCH 024/114] gh-125196: Use PyUnicodeWriter for repr(contextvars.Token) (#125220) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. --- Lib/test/test_context.py | 8 ++++++++ Python/context.c | 39 +++++++++++---------------------------- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/Lib/test/test_context.py b/Lib/test/test_context.py index 255be306156c0b..b06b9df9f5b0b8 100644 --- a/Lib/test/test_context.py +++ b/Lib/test/test_context.py @@ -60,6 +60,14 @@ def test_context_var_repr_1(self): c.reset(t) self.assertIn(' used ', repr(t)) + @isolated_context + def test_token_repr_1(self): + c = contextvars.ContextVar('a') + tok = c.set(1) + self.assertRegex(repr(tok), + r"^ at 0x[0-9a-fA-F]+>$") + def test_context_subclassing_1(self): with self.assertRaisesRegex(TypeError, 'not an acceptable base type'): class MyContextVar(contextvars.ContextVar): diff --git a/Python/context.c b/Python/context.c index ddb03555f9e402..36e2677c398f59 100644 --- a/Python/context.c +++ b/Python/context.c @@ -1154,48 +1154,31 @@ token_tp_dealloc(PyContextToken *self) static PyObject * token_tp_repr(PyContextToken *self) { - _PyUnicodeWriter writer; - - _PyUnicodeWriter_Init(&writer); - - if (_PyUnicodeWriter_WriteASCIIString(&writer, "tok_used) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, " used", 5) < 0) { + if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) { goto error; } } - - if (_PyUnicodeWriter_WriteASCIIString(&writer, " var=", 5) < 0) { + if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) { goto error; } - - PyObject *var = PyObject_Repr((PyObject *)self->tok_var); - if (var == NULL) { + if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) { goto error; } - if (_PyUnicodeWriter_WriteStr(&writer, var) < 0) { - Py_DECREF(var); - goto error; - } - Py_DECREF(var); - - PyObject *addr = PyUnicode_FromFormat(" at %p>", self); - if (addr == NULL) { - goto error; - } - if (_PyUnicodeWriter_WriteStr(&writer, addr) < 0) { - Py_DECREF(addr); + if (PyUnicodeWriter_Format(writer, " at %p>", self) < 0) { goto error; } - Py_DECREF(addr); - - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } From 99400930ac1d4e5e10a5ae30f8202d8bc2661e39 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Wed, 9 Oct 2024 19:44:03 -0400 Subject: [PATCH 025/114] gh-124872: Refine contextvars documentation (#124773) * Add definitions for "context", "current context", and "context management protocol". * Update related definitions to be consistent with the new definitions. * Restructure the documentation for the `contextvars.Context` class to prepare for adding context manager support, and for consistency with the definitions. * Use `testcode` and `testoutput` to test the `Context.run` example. * Expand the documentation for the `Py_CONTEXT_EVENT_ENTER` and `Py_CONTEXT_EVENT_EXIT` events to clarify and to prepare for planned changes. --- Doc/c-api/contextvars.rst | 20 ++-- Doc/glossary.rst | 40 ++++++-- Doc/library/contextvars.rst | 98 +++++++++++++------ Include/cpython/context.h | 23 +++-- ...-09-29-18-14-52.gh-issue-119333.7tinr0.rst | 3 + 5 files changed, 132 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst diff --git a/Doc/c-api/contextvars.rst b/Doc/c-api/contextvars.rst index 0de135b232aaaf..59e74ba1ac7022 100644 --- a/Doc/c-api/contextvars.rst +++ b/Doc/c-api/contextvars.rst @@ -122,18 +122,24 @@ Context object management functions: .. c:type:: PyContextEvent Enumeration of possible context object watcher events: - - ``Py_CONTEXT_EVENT_ENTER`` - - ``Py_CONTEXT_EVENT_EXIT`` + + - ``Py_CONTEXT_EVENT_ENTER``: A context has been entered, causing the + :term:`current context` to switch to it. The object passed to the watch + callback is the now-current :class:`contextvars.Context` object. Each + enter event will eventually have a corresponding exit event for the same + context object after any subsequently entered contexts have themselves been + exited. + - ``Py_CONTEXT_EVENT_EXIT``: A context is about to be exited, which will + cause the :term:`current context` to switch back to what it was before the + context was entered. The object passed to the watch callback is the + still-current :class:`contextvars.Context` object. .. versionadded:: 3.14 .. c:type:: int (*PyContext_WatchCallback)(PyContextEvent event, PyContext* ctx) - Type of a context object watcher callback function. - If *event* is ``Py_CONTEXT_EVENT_ENTER``, then the callback is invoked - after *ctx* has been set as the current context for the current thread. - Otherwise, the callback is invoked before the deactivation of *ctx* as the current context - and the restoration of the previous contex object for the current thread. + Context object watcher callback function. The object passed to the callback + is event-specific; see :c:type:`PyContextEvent` for details. If the callback returns with an exception set, it must return ``-1``; this exception will be printed as an unraisable exception using diff --git a/Doc/glossary.rst b/Doc/glossary.rst index cb7e0a2b89d037..1d407732eef576 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -265,19 +265,33 @@ Glossary advanced mathematical feature. If you're not aware of a need for them, it's almost certain you can safely ignore them. + context + This term has different meanings depending on where and how it is used. + Some common meanings: + + * The temporary state or environment established by a :term:`context + manager` via a :keyword:`with` statement. + * The collection of key­value bindings associated with a particular + :class:`contextvars.Context` object and accessed via + :class:`~contextvars.ContextVar` objects. Also see :term:`context + variable`. + * A :class:`contextvars.Context` object. Also see :term:`current + context`. + + context management protocol + The :meth:`~object.__enter__` and :meth:`~object.__exit__` methods called + by the :keyword:`with` statement. See :pep:`343`. + context manager - An object which controls the environment seen in a :keyword:`with` - statement by defining :meth:`~object.__enter__` and :meth:`~object.__exit__` methods. - See :pep:`343`. + An object which implements the :term:`context management protocol` and + controls the environment seen in a :keyword:`with` statement. See + :pep:`343`. context variable - A variable which can have different values depending on its context. - This is similar to Thread-Local Storage in which each execution - thread may have a different value for a variable. However, with context - variables, there may be several contexts in one execution thread and the - main usage for context variables is to keep track of variables in + A variable whose value depends on which context is the :term:`current + context`. Values are accessed via :class:`contextvars.ContextVar` + objects. Context variables are primarily used to isolate state between concurrent asynchronous tasks. - See :mod:`contextvars`. contiguous .. index:: C-contiguous, Fortran contiguous @@ -311,6 +325,14 @@ Glossary is used when necessary to distinguish this implementation from others such as Jython or IronPython. + current context + The :term:`context` (:class:`contextvars.Context` object) that is + currently used by :class:`~contextvars.ContextVar` objects to access (get + or set) the values of :term:`context variables `. Each + thread has its own current context. Frameworks for executing asynchronous + tasks (see :mod:`asyncio`) associate each task with a context which + becomes the current context whenever the task starts or resumes execution. + decorator A function returning another function, usually applied as a function transformation using the ``@wrapper`` syntax. Common examples for diff --git a/Doc/library/contextvars.rst b/Doc/library/contextvars.rst index 2a79dfe8f81e26..2b1fb9fdd29cd8 100644 --- a/Doc/library/contextvars.rst +++ b/Doc/library/contextvars.rst @@ -144,51 +144,89 @@ Manual Context Management To get a copy of the current context use the :func:`~contextvars.copy_context` function. - Every thread will have a different top-level :class:`~contextvars.Context` - object. This means that a :class:`ContextVar` object behaves in a similar - fashion to :func:`threading.local` when values are assigned in different - threads. + Each thread has its own effective stack of :class:`!Context` objects. The + :term:`current context` is the :class:`!Context` object at the top of the + current thread's stack. All :class:`!Context` objects in the stacks are + considered to be *entered*. + + *Entering* a context, which can be done by calling its :meth:`~Context.run` + method, makes the context the current context by pushing it onto the top of + the current thread's context stack. + + *Exiting* from the current context, which can be done by returning from the + callback passed to the :meth:`~Context.run` method, restores the current + context to what it was before the context was entered by popping the context + off the top of the context stack. + + Since each thread has its own context stack, :class:`ContextVar` objects + behave in a similar fashion to :func:`threading.local` when values are + assigned in different threads. + + Attempting to enter an already entered context, including contexts entered in + other threads, raises a :exc:`RuntimeError`. + + After exiting a context, it can later be re-entered (from any thread). + + Any changes to :class:`ContextVar` values via the :meth:`ContextVar.set` + method are recorded in the current context. The :meth:`ContextVar.get` + method returns the value associated with the current context. Exiting a + context effectively reverts any changes made to context variables while the + context was entered (if needed, the values can be restored by re-entering the + context). Context implements the :class:`collections.abc.Mapping` interface. .. method:: run(callable, *args, **kwargs) - Execute ``callable(*args, **kwargs)`` code in the context object - the *run* method is called on. Return the result of the execution - or propagate an exception if one occurred. + Enters the Context, executes ``callable(*args, **kwargs)``, then exits the + Context. Returns *callable*'s return value, or propagates an exception if + one occurred. + + Example: + + .. testcode:: + + import contextvars - Any changes to any context variables that *callable* makes will - be contained in the context object:: + var = contextvars.ContextVar('var') + var.set('spam') + print(var.get()) # 'spam' - var = ContextVar('var') - var.set('spam') + ctx = contextvars.copy_context() - def main(): - # 'var' was set to 'spam' before - # calling 'copy_context()' and 'ctx.run(main)', so: - # var.get() == ctx[var] == 'spam' + def main(): + # 'var' was set to 'spam' before + # calling 'copy_context()' and 'ctx.run(main)', so: + print(var.get()) # 'spam' + print(ctx[var]) # 'spam' - var.set('ham') + var.set('ham') - # Now, after setting 'var' to 'ham': - # var.get() == ctx[var] == 'ham' + # Now, after setting 'var' to 'ham': + print(var.get()) # 'ham' + print(ctx[var]) # 'ham' - ctx = copy_context() + # Any changes that the 'main' function makes to 'var' + # will be contained in 'ctx'. + ctx.run(main) - # Any changes that the 'main' function makes to 'var' - # will be contained in 'ctx'. - ctx.run(main) + # The 'main()' function was run in the 'ctx' context, + # so changes to 'var' are contained in it: + print(ctx[var]) # 'ham' - # The 'main()' function was run in the 'ctx' context, - # so changes to 'var' are contained in it: - # ctx[var] == 'ham' + # However, outside of 'ctx', 'var' is still set to 'spam': + print(var.get()) # 'spam' - # However, outside of 'ctx', 'var' is still set to 'spam': - # var.get() == 'spam' + .. testoutput:: + :hide: - The method raises a :exc:`RuntimeError` when called on the same - context object from more than one OS thread, or when called - recursively. + spam + spam + spam + ham + ham + ham + spam .. method:: copy() diff --git a/Include/cpython/context.h b/Include/cpython/context.h index ec72966e82c6f9..d722b4d93134f7 100644 --- a/Include/cpython/context.h +++ b/Include/cpython/context.h @@ -28,15 +28,26 @@ PyAPI_FUNC(int) PyContext_Enter(PyObject *); PyAPI_FUNC(int) PyContext_Exit(PyObject *); typedef enum { - Py_CONTEXT_EVENT_ENTER, - Py_CONTEXT_EVENT_EXIT, + /* + * A context has been entered, causing the "current context" to switch to + * it. The object passed to the watch callback is the now-current + * contextvars.Context object. Each enter event will eventually have a + * corresponding exit event for the same context object after any + * subsequently entered contexts have themselves been exited. + */ + Py_CONTEXT_EVENT_ENTER, + /* + * A context is about to be exited, which will cause the "current context" + * to switch back to what it was before the context was entered. The + * object passed to the watch callback is the still-current + * contextvars.Context object. + */ + Py_CONTEXT_EVENT_EXIT, } PyContextEvent; /* - * Callback to be invoked when a context object is entered or exited. - * - * The callback is invoked with the event and a reference to - * the context after its entered and before its exited. + * Context object watcher callback function. The object passed to the callback + * is event-specific; see PyContextEvent for details. * * if the callback returns with an exception set, it must return -1. Otherwise * it should return 0 diff --git a/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst b/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst new file mode 100644 index 00000000000000..69a5c764d05a2e --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst @@ -0,0 +1,3 @@ +Added definitions for :term:`context`, :term:`current context`, and +:term:`context management protocol`, updated related definitions to be +consistent, and expanded the documentation for :class:`contextvars.Context`. From 7a10cdec359750b5154490fa9e24475c90d05aab Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:34:55 +0100 Subject: [PATCH 026/114] Pin the doctest workflow to Ubuntu 22.04 (#125236) --- .github/workflows/reusable-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 4b021b3dc32f15..7755cb431bd301 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -95,7 +95,7 @@ jobs: # Run "doctest" on HEAD as new syntax doesn't exist in the latest stable release doctest: name: 'Doctest' - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 60 steps: - uses: actions/checkout@v4 From 1639d934b9180c278ac9c02be43cbb1beada494a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 12:11:06 +0200 Subject: [PATCH 027/114] gh-125196: Add a free list to PyUnicodeWriter (#125227) --- Include/internal/pycore_freelist_state.h | 2 ++ Objects/object.c | 1 + Objects/unicodeobject.c | 13 +++++++++---- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 762c583ce94e9a..4e04cf431e0b31 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -20,6 +20,7 @@ extern "C" { # define Py_async_gen_asends_MAXFREELIST 80 # define Py_futureiters_MAXFREELIST 255 # define Py_object_stack_chunks_MAXFREELIST 4 +# define Py_unicode_writers_MAXFREELIST 1 // A generic freelist of either PyObjects or other data structures. struct _Py_freelist { @@ -44,6 +45,7 @@ struct _Py_freelists { struct _Py_freelist async_gen_asends; struct _Py_freelist futureiters; struct _Py_freelist object_stack_chunks; + struct _Py_freelist unicode_writers; }; #ifdef __cplusplus diff --git a/Objects/object.c b/Objects/object.c index 8d809158a6c1da..a97a900890320d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -862,6 +862,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) // stacks during GC, so emptying the free-list is counterproductive. clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } + clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); } /* diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 93c1025b6a3cae..b94a74c2c688a9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -46,6 +46,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_codecs.h" // _PyCodec_Lookup() #include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST #include "pycore_format.h" // F_LJUST +#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_long.h" // _PyLong_FormatWriter() @@ -13436,9 +13437,13 @@ PyUnicodeWriter_Create(Py_ssize_t length) } const size_t size = sizeof(_PyUnicodeWriter); - PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); + PyUnicodeWriter *pub_writer; + pub_writer = _Py_FREELIST_POP_MEM(unicode_writers); if (pub_writer == NULL) { - return (PyUnicodeWriter *)PyErr_NoMemory(); + pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); + if (pub_writer == NULL) { + return (PyUnicodeWriter *)PyErr_NoMemory(); + } } _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer; @@ -13459,7 +13464,7 @@ void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) return; } _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); - PyMem_Free(writer); + _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); } @@ -13881,7 +13886,7 @@ PyUnicodeWriter_Finish(PyUnicodeWriter *writer) { PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer); assert(((_PyUnicodeWriter*)writer)->buffer == NULL); - PyMem_Free(writer); + _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); return str; } From 82dfdc328779778295075d791ee30a0308fb9af4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 12:20:53 +0200 Subject: [PATCH 028/114] gh-125196: Use PyUnicodeWriter for repr(tuple) (#125242) --- Objects/tupleobject.c | 50 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 47134697918052..f3132e0933ac30 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -232,56 +232,54 @@ tuple_repr(PyObject *self) return res > 0 ? PyUnicode_FromString("(...)") : NULL; } - _PyUnicodeWriter writer; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; + Py_ssize_t prealloc; if (n > 1) { - /* "(" + "1" + ", 2" * (len - 1) + ")" */ - writer.min_length = 1 + 1 + (2 + 1) * (n - 1) + 1; + // "(" + "1" + ", 2" * (len - 1) + ")" + prealloc = 1 + 1 + (2 + 1) * (n - 1) + 1; } else { - /* "(1,)" */ - writer.min_length = 4; + // "(1,)" + prealloc = 4; + } + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { + goto error; } - if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) + if (PyUnicodeWriter_WriteChar(writer, '(') < 0) { goto error; + } /* Do repr() on each element. */ for (Py_ssize_t i = 0; i < n; ++i) { - PyObject *s; - if (i > 0) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { + goto error; + } } - s = PyObject_Repr(v->ob_item[i]); - if (s == NULL) - goto error; - - if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) { - Py_DECREF(s); + if (PyUnicodeWriter_WriteRepr(writer, v->ob_item[i]) < 0) { goto error; } - Py_DECREF(s); } - writer.overallocate = 0; - if (n > 1) { - if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) + if (n == 1) { + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { goto error; + } } - else { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ",)", 2) < 0) - goto error; + if (PyUnicodeWriter_WriteChar(writer, ')') < 0) { + goto error; } Py_ReprLeave((PyObject *)v); - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); Py_ReprLeave((PyObject *)v); return NULL; } From f9ae5d1cee2f8927a71cd4f1f66f10050a4f658a Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:21:12 +0100 Subject: [PATCH 029/114] gh-71784: [doc] add usage examples for traceback.TracebackException (#125189) Co-authored-by: Alex Waygood --- Doc/library/traceback.rst | 133 +++++++++++++++++++++++++++++++++----- 1 file changed, 118 insertions(+), 15 deletions(-) diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 401e12be45f418..100a92b73d5497 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -8,11 +8,15 @@ -------------- -This module provides a standard interface to extract, format and print stack -traces of Python programs. It exactly mimics the behavior of the Python -interpreter when it prints a stack trace. This is useful when you want to print -stack traces under program control, such as in a "wrapper" around the -interpreter. +This module provides a standard interface to extract, format and print +stack traces of Python programs. It is more flexible than the +interpreter's default traceback display, and therefore makes it +possible to configure certain aspects of the output. Finally, +it contains a utility for capturing enough information about an +exception to print it later, without the need to save a reference +to the actual exception. Since exceptions can be the roots of large +objects graph, this utility can significantly improve +memory management. .. index:: pair: object; traceback @@ -29,7 +33,20 @@ which are assigned to the :attr:`~BaseException.__traceback__` field of Module :mod:`pdb` Interactive source code debugger for Python programs. -The module defines the following functions: +The module's API can be divided into two parts: + +* Module-level functions offering basic functionality, which are useful for interactive + inspection of exceptions and tracebacks. + +* :class:`TracebackException` class and its helper classes + :class:`StackSummary` and :class:`FrameSummary`. These offer both more + flexibility in the output generated and the ability to store the information + necessary for later formatting without holding references to actual exception + and traceback objects. + + +Module-Level Functions +---------------------- .. function:: print_tb(tb, limit=None, file=None) @@ -237,7 +254,6 @@ The module defines the following functions: .. versionadded:: 3.5 -The module also defines the following classes: :class:`!TracebackException` Objects ------------------------------------ @@ -245,12 +261,17 @@ The module also defines the following classes: .. versionadded:: 3.5 :class:`!TracebackException` objects are created from actual exceptions to -capture data for later printing in a lightweight fashion. +capture data for later printing. They offer a more lightweight method of +storing this information by avoiding holding references to +:ref:`traceback` and :ref:`frame` objects +In addition, they expose more options to configure the output compared to +the module-level functions described above. .. class:: TracebackException(exc_type, exc_value, exc_traceback, *, limit=None, lookup_lines=True, capture_locals=False, compact=False, max_group_width=15, max_group_depth=10) - Capture an exception for later rendering. *limit*, *lookup_lines* and - *capture_locals* are as for the :class:`StackSummary` class. + Capture an exception for later rendering. The meaning of *limit*, + *lookup_lines* and *capture_locals* are as for the :class:`StackSummary` + class. If *compact* is true, only data that is required by :class:`!TracebackException`'s :meth:`format` method @@ -509,8 +530,8 @@ in a :ref:`traceback `. .. _traceback-example: -Traceback Examples ------------------- +Examples of Using the Module-Level Functions +-------------------------------------------- This simple example implements a basic read-eval-print loop, similar to (but less useful than) the standard Python interactive interpreter loop. For a more @@ -549,8 +570,7 @@ exception and traceback: try: lumberjack() - except IndexError: - exc = sys.exception() + except IndexError as exc: print("*** print_tb:") traceback.print_tb(exc.__traceback__, limit=1, file=sys.stdout) print("*** print_exception:") @@ -653,5 +673,88 @@ This last example demonstrates the final few formatting functions: [' File "spam.py", line 3, in \n spam.eggs()\n', ' File "eggs.py", line 42, in eggs\n return "bacon"\n'] >>> an_error = IndexError('tuple index out of range') - >>> traceback.format_exception_only(type(an_error), an_error) + >>> traceback.format_exception_only(an_error) ['IndexError: tuple index out of range\n'] + + +Examples of Using :class:`TracebackException` +--------------------------------------------- + +With the helper class, we have more options:: + + >>> import sys + >>> from traceback import TracebackException + >>> + >>> def lumberjack(): + ... bright_side_of_life() + ... + >>> def bright_side_of_life(): + ... t = "bright", "side", "of", "life" + ... return t[5] + ... + >>> try: + ... lumberjack() + ... except IndexError as e: + ... exc = e + ... + >>> try: + ... try: + ... lumberjack() + ... except: + ... 1/0 + ... except Exception as e: + ... chained_exc = e + ... + >>> # limit works as with the module-level functions + >>> TracebackException.from_exception(exc, limit=-2).print() + Traceback (most recent call last): + File "", line 6, in lumberjack + bright_side_of_life() + ~~~~~~~~~~~~~~~~~~~^^ + File "", line 10, in bright_side_of_life + return t[5] + ~^^^ + IndexError: tuple index out of range + + >>> # capture_locals adds local variables in frames + >>> TracebackException.from_exception(exc, limit=-2, capture_locals=True).print() + Traceback (most recent call last): + File "", line 6, in lumberjack + bright_side_of_life() + ~~~~~~~~~~~~~~~~~~~^^ + File "", line 10, in bright_side_of_life + return t[5] + ~^^^ + t = ("bright", "side", "of", "life") + IndexError: tuple index out of range + + >>> # The *chain* kwarg to print() controls whether chained + >>> # exceptions are displayed + >>> TracebackException.from_exception(chained_exc).print() + Traceback (most recent call last): + File "", line 4, in + lumberjack() + ~~~~~~~~~~^^ + File "", line 7, in lumberjack + bright_side_of_life() + ~~~~~~~~~~~~~~~~~~~^^ + File "", line 11, in bright_side_of_life + return t[5] + ~^^^ + IndexError: tuple index out of range + + During handling of the above exception, another exception occurred: + + Traceback (most recent call last): + File "", line 6, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + + >>> TracebackException.from_exception(chained_exc).print(chain=False) + Traceback (most recent call last): + File "", line 6, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + From e4cab488d4445e8444932f3bed1c329c0d9e5038 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Thu, 10 Oct 2024 21:39:53 +0900 Subject: [PATCH 030/114] gh-124471: Set name for unnamed reusable workflow (#124475) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко) --- .github/workflows/reusable-change-detection.yml | 4 +--- .github/workflows/reusable-docs.yml | 2 +- .github/workflows/reusable-macos.yml | 2 ++ .github/workflows/reusable-tsan.yml | 2 ++ .github/workflows/reusable-ubuntu.yml | 2 ++ .github/workflows/reusable-wasi.yml | 2 ++ .github/workflows/reusable-windows-msi.yml | 2 +- .github/workflows/reusable-windows.yml | 2 ++ 8 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/reusable-change-detection.yml b/.github/workflows/reusable-change-detection.yml index 6f599f75547ceb..5cd6fb39f1e12f 100644 --- a/.github/workflows/reusable-change-detection.yml +++ b/.github/workflows/reusable-change-detection.yml @@ -1,6 +1,4 @@ ---- - -name: Change detection +name: Reusable change detection on: # yamllint disable-line rule:truthy workflow_call: diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 7755cb431bd301..3809f24dcc977e 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -1,4 +1,4 @@ -name: Docs +name: Reusable Docs on: workflow_call: diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index b4227545887ad1..b3a160fbbf8053 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -1,3 +1,5 @@ +name: Reusable macOS + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-tsan.yml b/.github/workflows/reusable-tsan.yml index 27f4eacd86fd95..f4c976ca996410 100644 --- a/.github/workflows/reusable-tsan.yml +++ b/.github/workflows/reusable-tsan.yml @@ -1,3 +1,5 @@ +name: Reusable Thread Sanitizer + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 769f1210de4d3c..0cf40ba8a9b03b 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -1,3 +1,5 @@ +name: Reusable Ubuntu + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index 1b1a68c0badc76..4c8137c958a312 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -1,3 +1,5 @@ +name: Reusable WASI + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index fc34ab7c3eb1f2..abdb1a1982fef8 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -1,4 +1,4 @@ -name: TestsMSI +name: Reusable Windows MSI on: workflow_call: diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index e9c3c8e05a801c..dcfc62d7f5d145 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -1,3 +1,5 @@ +name: Reusable Windows + on: workflow_call: inputs: From 87d7315ac57250046372b0d9ae4619ba619c8c87 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Thu, 10 Oct 2024 15:42:03 +0300 Subject: [PATCH 031/114] gh-125118: don't copy arbitrary values to _Bool in the struct module (GH-125169) memcopy'ing arbitrary values to _Bool variable triggers undefined behaviour. Avoid this. We assume that `false` is represented by all zero bytes. Credits to Alex Gaynor. Co-authored-by: Sam Gross Co-authored-by: Victor Stinner Co-authored-by: Petr Viktorin --- Lib/test/test_struct.py | 3 +++ .../Library/2024-10-09-07-09-00.gh-issue-125118.J9rQ1S.rst | 1 + Modules/_struct.c | 5 ++--- 3 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-09-07-09-00.gh-issue-125118.J9rQ1S.rst diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index e3193c7863fbae..04ec3ed0837c82 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -540,6 +540,9 @@ def __bool__(self): for c in [b'\x01', b'\x7f', b'\xff', b'\x0f', b'\xf0']: self.assertTrue(struct.unpack('>?', c)[0]) + self.assertTrue(struct.unpack(' Date: Thu, 10 Oct 2024 21:57:13 +0900 Subject: [PATCH 032/114] gh-124153: Simplify PyType_GetBaseByToken (GH-124488) --- Objects/typeobject.c | 123 +++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 80 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 6484e8921f8122..5380633fa1149e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5269,11 +5269,10 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) static PyTypeObject * -get_base_by_token_recursive(PyTypeObject *type, void *token) +get_base_by_token_recursive(PyObject *bases, void *token) { - assert(PyType_GetSlot(type, Py_tp_token) != token); - PyObject *bases = lookup_tp_bases(type); assert(bases != NULL); + PyTypeObject *res = NULL; Py_ssize_t n = PyTuple_GET_SIZE(bases); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(bases, i)); @@ -5281,112 +5280,76 @@ get_base_by_token_recursive(PyTypeObject *type, void *token) continue; } if (((PyHeapTypeObject*)base)->ht_token == token) { - return base; + res = base; + break; } - base = get_base_by_token_recursive(base, token); + base = get_base_by_token_recursive(lookup_tp_bases(base), token); if (base != NULL) { - return base; + res = base; + break; } } - return NULL; + return res; // Prefer to return recursively from one place } -static inline PyTypeObject * -get_base_by_token_from_mro(PyTypeObject *type, void *token) +int +PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) { - // Bypass lookup_tp_mro() as PyType_IsSubtype() does - PyObject *mro = type->tp_mro; - assert(mro != NULL); - assert(PyTuple_Check(mro)); - // mro_invoke() ensures that the type MRO cannot be empty. - assert(PyTuple_GET_SIZE(mro) >= 1); - // Also, the first item in the MRO is the type itself, which is supposed - // to be already checked by the caller. We skip it in the loop. - assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type); - assert(PyType_GetSlot(type, Py_tp_token) != token); - - Py_ssize_t n = PyTuple_GET_SIZE(mro); - for (Py_ssize_t i = 1; i < n; i++) { - PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(mro, i)); - if (!_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { - continue; - } - if (((PyHeapTypeObject*)base)->ht_token == token) { - return base; - } + if (result != NULL) { + *result = NULL; } - return NULL; -} -static int -check_base_by_token(PyTypeObject *type, void *token) { - // Chain the branches, which will be optimized exclusive here if (token == NULL) { PyErr_Format(PyExc_SystemError, "PyType_GetBaseByToken called with token=NULL"); return -1; } - else if (!PyType_Check(type)) { + if (!PyType_Check(type)) { PyErr_Format(PyExc_TypeError, "expected a type, got a '%T' object", type); return -1; } - else if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - return 0; - } - else if (((PyHeapTypeObject*)type)->ht_token == token) { - return 1; - } - else if (type->tp_mro != NULL) { - // This will not be inlined - return get_base_by_token_from_mro(type, token) ? 1 : 0; - } - else { - return get_base_by_token_recursive(type, token) ? 1 : 0; - } -} -int -PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) -{ - if (result == NULL) { - // If the `result` is checked only once here, the subsequent - // branches will become trivial to optimize. - return check_base_by_token(type, token); - } - if (token == NULL || !PyType_Check(type)) { - *result = NULL; - return check_base_by_token(type, token); - } - - // Chain the branches, which will be optimized exclusive here - PyTypeObject *base; if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // No static type has a heaptype superclass, // which is ensured by type_ready_mro(). - *result = NULL; return 0; } - else if (((PyHeapTypeObject*)type)->ht_token == token) { - *result = (PyTypeObject *)Py_NewRef(type); + if (((PyHeapTypeObject*)type)->ht_token == token) { +found: + if (result != NULL) { + *result = (PyTypeObject *)Py_NewRef(type); + } return 1; } - else if (type->tp_mro != NULL) { - // Expect this to be inlined - base = get_base_by_token_from_mro(type, token); - } - else { - base = get_base_by_token_recursive(type, token); - } - if (base != NULL) { - *result = (PyTypeObject *)Py_NewRef(base); - return 1; - } - else { - *result = NULL; + PyObject *mro = type->tp_mro; // No lookup, following PyType_IsSubtype() + if (mro == NULL) { + PyTypeObject *base; + base = get_base_by_token_recursive(lookup_tp_bases(type), token); + if (base != NULL) { + // Copying the given type can cause a slowdown, + // unlike the overwrite below. + type = base; + goto found; + } return 0; } + // mro_invoke() ensures that the type MRO cannot be empty. + assert(PyTuple_GET_SIZE(mro) >= 1); + // Also, the first item in the MRO is the type itself, which + // we already checked above. We skip it in the loop. + assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type); + Py_ssize_t n = PyTuple_GET_SIZE(mro); + for (Py_ssize_t i = 1; i < n; i++) { + PyTypeObject *base = (PyTypeObject *)PyTuple_GET_ITEM(mro, i); + if (_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE) + && ((PyHeapTypeObject*)base)->ht_token == token) { + type = base; + goto found; + } + } + return 0; } From c914212474792312bb125211bae5719650fe2f58 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 15:33:00 +0200 Subject: [PATCH 033/114] gh-125196: Use PyUnicodeWriter for JSON encoder (#125249) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. --- Modules/_json.c | 87 ++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 9e29de0f22465f..ce0093ab431d05 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self); static int encoder_clear(PyEncoderObject *self); static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); static PyObject * _encoded_const(PyObject *obj); static void @@ -1268,38 +1268,39 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) { /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; - PyObject *obj, *result; + PyObject *obj; Py_ssize_t indent_level; - _PyUnicodeWriter writer; if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist, - &obj, &indent_level)) + &obj, &indent_level)) return NULL; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } PyObject *newline_indent = NULL; if (self->indent != Py_None) { newline_indent = _create_newline_indent(self->indent, indent_level); if (newline_indent == NULL) { - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } } - if (encoder_listencode_obj(self, &writer, obj, newline_indent)) { - _PyUnicodeWriter_Dealloc(&writer); + if (encoder_listencode_obj(self, writer, obj, newline_indent)) { + PyUnicodeWriter_Discard(writer); Py_XDECREF(newline_indent); return NULL; } Py_XDECREF(newline_indent); - result = PyTuple_New(1); - if (result == NULL || - PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) { - Py_XDECREF(result); + PyObject *str = PyUnicodeWriter_Finish(writer); + if (str == NULL) { return NULL; } + PyObject *result = PyTuple_Pack(1, str); + Py_DECREF(str); return result; } @@ -1370,16 +1371,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen) +_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = _PyUnicodeWriter_WriteStr(writer, stolen); + int rval = PyUnicodeWriter_WriteStr(writer, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent) { /* Encode Python object obj to a JSON term */ @@ -1387,13 +1388,13 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4); + return PyUnicodeWriter_WriteUTF8(writer, "null", 4); } else if (obj == Py_True) { - return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4); + return PyUnicodeWriter_WriteUTF8(writer, "true", 4); } else if (obj == Py_False) { - return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5); + return PyUnicodeWriter_WriteUTF8(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1402,6 +1403,10 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, return _steal_accumulate(writer, encoded); } else if (PyLong_Check(obj)) { + if (PyLong_CheckExact(obj)) { + // Fast-path for exact integers + return PyUnicodeWriter_WriteRepr(writer, obj); + } PyObject *encoded = PyLong_Type.tp_repr(obj); if (encoded == NULL) return -1; @@ -1478,7 +1483,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, +encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first, PyObject *dct, PyObject *key, PyObject *value, PyObject *newline_indent, PyObject *item_separator) @@ -1518,7 +1523,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir *first = false; } else { - if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { Py_DECREF(keystr); return -1; } @@ -1533,7 +1538,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir if (_steal_accumulate(writer, encoded) < 0) { return -1; } - if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; } if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { @@ -1544,7 +1549,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir } static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent) { /* Encode Python dict dct a JSON term */ @@ -1555,8 +1560,10 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *new_newline_indent = NULL; PyObject *separator_indent = NULL; - if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ - return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + if (PyDict_GET_SIZE(dct) == 0) { + /* Fast path */ + return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + } if (s->markers != Py_None) { int has_key; @@ -1574,8 +1581,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '{')) + if (PyUnicodeWriter_WriteChar(writer, '{')) { goto bail; + } PyObject *current_item_separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1589,7 +1597,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } // update item separator with a borrowed reference current_item_separator = separator_indent; - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } } @@ -1635,13 +1643,14 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, '}')) + if (PyUnicodeWriter_WriteChar(writer, '}')) { goto bail; + } return 0; bail: @@ -1653,7 +1662,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent) { PyObject *ident = NULL; @@ -1668,7 +1677,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2); + return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); } if (s->markers != Py_None) { @@ -1687,8 +1696,9 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '[')) + if (PyUnicodeWriter_WriteChar(writer, '[')) { goto bail; + } PyObject *separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1697,7 +1707,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, goto bail; } - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } @@ -1710,7 +1720,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); if (i) { - if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) goto bail; } if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) { @@ -1727,13 +1737,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, if (s->indent != Py_None) { Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, ']')) + if (PyUnicodeWriter_WriteChar(writer, ']')) { goto bail; + } Py_DECREF(s_fast); return 0; From 01fc3b34cc6994bc83b6540da3a8573e79dfbb56 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 10 Oct 2024 16:27:52 +0200 Subject: [PATCH 034/114] gh-124570: ctypes: Run some Structure tests on Union as well (GH-124976) - Move some Structure tests to test_structunion; use a common base test class + two subclasses to run them on Union too - Remove test_union for now as it's redundant Note: `test_simple_structs` & `test_simple_unions` are in the common file because they share `formats`. --- Lib/test/test_ctypes/test_structunion.py | 353 +++++++++++++++++++++++ Lib/test/test_ctypes/test_structures.py | 337 +--------------------- Lib/test/test_ctypes/test_unions.py | 35 --- 3 files changed, 368 insertions(+), 357 deletions(-) create mode 100644 Lib/test/test_ctypes/test_structunion.py delete mode 100644 Lib/test/test_ctypes/test_unions.py diff --git a/Lib/test/test_ctypes/test_structunion.py b/Lib/test/test_ctypes/test_structunion.py new file mode 100644 index 00000000000000..973ac3b2f1919d --- /dev/null +++ b/Lib/test/test_ctypes/test_structunion.py @@ -0,0 +1,353 @@ +"""Common tests for ctypes.Structure and ctypes.Union""" + +import unittest +from ctypes import (Structure, Union, POINTER, sizeof, alignment, + c_char, c_byte, c_ubyte, + c_short, c_ushort, c_int, c_uint, + c_long, c_ulong, c_longlong, c_ulonglong, c_float, c_double) +from ._support import (_CData, PyCStructType, UnionType, + Py_TPFLAGS_DISALLOW_INSTANTIATION, + Py_TPFLAGS_IMMUTABLETYPE) +from struct import calcsize + + +class StructUnionTestBase: + formats = {"c": c_char, + "b": c_byte, + "B": c_ubyte, + "h": c_short, + "H": c_ushort, + "i": c_int, + "I": c_uint, + "l": c_long, + "L": c_ulong, + "q": c_longlong, + "Q": c_ulonglong, + "f": c_float, + "d": c_double, + } + + def test_subclass(self): + class X(self.cls): + _fields_ = [("a", c_int)] + + class Y(X): + _fields_ = [("b", c_int)] + + class Z(X): + pass + + self.assertEqual(sizeof(X), sizeof(c_int)) + self.check_sizeof(Y, + struct_size=sizeof(c_int)*2, + union_size=sizeof(c_int)) + self.assertEqual(sizeof(Z), sizeof(c_int)) + self.assertEqual(X._fields_, [("a", c_int)]) + self.assertEqual(Y._fields_, [("b", c_int)]) + self.assertEqual(Z._fields_, [("a", c_int)]) + + def test_subclass_delayed(self): + class X(self.cls): + pass + self.assertEqual(sizeof(X), 0) + X._fields_ = [("a", c_int)] + + class Y(X): + pass + self.assertEqual(sizeof(Y), sizeof(X)) + Y._fields_ = [("b", c_int)] + + class Z(X): + pass + + self.assertEqual(sizeof(X), sizeof(c_int)) + self.check_sizeof(Y, + struct_size=sizeof(c_int)*2, + union_size=sizeof(c_int)) + self.assertEqual(sizeof(Z), sizeof(c_int)) + self.assertEqual(X._fields_, [("a", c_int)]) + self.assertEqual(Y._fields_, [("b", c_int)]) + self.assertEqual(Z._fields_, [("a", c_int)]) + + def test_inheritance_hierarchy(self): + self.assertEqual(self.cls.mro(), [self.cls, _CData, object]) + self.assertEqual(type(self.metacls), type) + + def test_type_flags(self): + for cls in self.cls, self.metacls: + with self.subTest(cls=cls): + self.assertTrue(cls.__flags__ & Py_TPFLAGS_IMMUTABLETYPE) + self.assertFalse(cls.__flags__ & Py_TPFLAGS_DISALLOW_INSTANTIATION) + + def test_metaclass_details(self): + # Abstract classes (whose metaclass __init__ was not called) can't be + # instantiated directly + NewClass = self.metacls.__new__(self.metacls, 'NewClass', + (self.cls,), {}) + for cls in self.cls, NewClass: + with self.subTest(cls=cls): + with self.assertRaisesRegex(TypeError, "abstract class"): + obj = cls() + + # Cannot call the metaclass __init__ more than once + class T(self.cls): + _fields_ = [("x", c_char), + ("y", c_char)] + with self.assertRaisesRegex(SystemError, "already initialized"): + self.metacls.__init__(T, 'ptr', (), {}) + + def test_alignment(self): + class X(self.cls): + _fields_ = [("x", c_char * 3)] + self.assertEqual(alignment(X), calcsize("s")) + self.assertEqual(sizeof(X), calcsize("3s")) + + class Y(self.cls): + _fields_ = [("x", c_char * 3), + ("y", c_int)] + self.assertEqual(alignment(Y), alignment(c_int)) + self.check_sizeof(Y, + struct_size=calcsize("3s i"), + union_size=max(calcsize("3s"), calcsize("i"))) + + class SI(self.cls): + _fields_ = [("a", X), + ("b", Y)] + self.assertEqual(alignment(SI), max(alignment(Y), alignment(X))) + self.check_sizeof(SI, + struct_size=calcsize("3s0i 3si 0i"), + union_size=max(calcsize("3s"), calcsize("i"))) + + class IS(self.cls): + _fields_ = [("b", Y), + ("a", X)] + + self.assertEqual(alignment(SI), max(alignment(X), alignment(Y))) + self.check_sizeof(IS, + struct_size=calcsize("3si 3s 0i"), + union_size=max(calcsize("3s"), calcsize("i"))) + + class XX(self.cls): + _fields_ = [("a", X), + ("b", X)] + self.assertEqual(alignment(XX), alignment(X)) + self.check_sizeof(XX, + struct_size=calcsize("3s 3s 0s"), + union_size=calcsize("3s")) + + def test_empty(self): + # I had problems with these + # + # Although these are pathological cases: Empty Structures! + class X(self.cls): + _fields_ = [] + + # Is this really the correct alignment, or should it be 0? + self.assertTrue(alignment(X) == 1) + self.assertTrue(sizeof(X) == 0) + + class XX(self.cls): + _fields_ = [("a", X), + ("b", X)] + + self.assertEqual(alignment(XX), 1) + self.assertEqual(sizeof(XX), 0) + + def test_fields(self): + # test the offset and size attributes of Structure/Union fields. + class X(self.cls): + _fields_ = [("x", c_int), + ("y", c_char)] + + self.assertEqual(X.x.offset, 0) + self.assertEqual(X.x.size, sizeof(c_int)) + + if self.cls == Structure: + self.assertEqual(X.y.offset, sizeof(c_int)) + else: + self.assertEqual(X.y.offset, 0) + self.assertEqual(X.y.size, sizeof(c_char)) + + # readonly + self.assertRaises((TypeError, AttributeError), setattr, X.x, "offset", 92) + self.assertRaises((TypeError, AttributeError), setattr, X.x, "size", 92) + + # XXX Should we check nested data types also? + # offset is always relative to the class... + + def test_invalid_field_types(self): + class POINT(self.cls): + pass + self.assertRaises(TypeError, setattr, POINT, "_fields_", [("x", 1), ("y", 2)]) + + def test_invalid_name(self): + # field name must be string + def declare_with_name(name): + class S(self.cls): + _fields_ = [(name, c_int)] + + self.assertRaises(TypeError, declare_with_name, b"x") + + def test_intarray_fields(self): + class SomeInts(self.cls): + _fields_ = [("a", c_int * 4)] + + # can use tuple to initialize array (but not list!) + self.assertEqual(SomeInts((1, 2)).a[:], [1, 2, 0, 0]) + self.assertEqual(SomeInts((1, 2)).a[::], [1, 2, 0, 0]) + self.assertEqual(SomeInts((1, 2)).a[::-1], [0, 0, 2, 1]) + self.assertEqual(SomeInts((1, 2)).a[::2], [1, 0]) + self.assertEqual(SomeInts((1, 2)).a[1:5:6], [2]) + self.assertEqual(SomeInts((1, 2)).a[6:4:-1], []) + self.assertEqual(SomeInts((1, 2, 3, 4)).a[:], [1, 2, 3, 4]) + self.assertEqual(SomeInts((1, 2, 3, 4)).a[::], [1, 2, 3, 4]) + # too long + # XXX Should raise ValueError?, not RuntimeError + self.assertRaises(RuntimeError, SomeInts, (1, 2, 3, 4, 5)) + + def test_huge_field_name(self): + # issue12881: segfault with large structure field names + def create_class(length): + class S(self.cls): + _fields_ = [('x' * length, c_int)] + + for length in [10 ** i for i in range(0, 8)]: + try: + create_class(length) + except MemoryError: + # MemoryErrors are OK, we just don't want to segfault + pass + + def test_abstract_class(self): + class X(self.cls): + _abstract_ = "something" + with self.assertRaisesRegex(TypeError, r"^abstract class$"): + X() + + def test_methods(self): + self.assertIn("in_dll", dir(type(self.cls))) + self.assertIn("from_address", dir(type(self.cls))) + self.assertIn("in_dll", dir(type(self.cls))) + + +class StructureTestCase(unittest.TestCase, StructUnionTestBase): + cls = Structure + metacls = PyCStructType + + def test_metaclass_name(self): + self.assertEqual(self.metacls.__name__, "PyCStructType") + + def check_sizeof(self, cls, *, struct_size, union_size): + self.assertEqual(sizeof(cls), struct_size) + + def test_simple_structs(self): + for code, tp in self.formats.items(): + class X(Structure): + _fields_ = [("x", c_char), + ("y", tp)] + self.assertEqual((sizeof(X), code), + (calcsize("c%c0%c" % (code, code)), code)) + + +class UnionTestCase(unittest.TestCase, StructUnionTestBase): + cls = Union + metacls = UnionType + + def test_metaclass_name(self): + self.assertEqual(self.metacls.__name__, "UnionType") + + def check_sizeof(self, cls, *, struct_size, union_size): + self.assertEqual(sizeof(cls), union_size) + + def test_simple_unions(self): + for code, tp in self.formats.items(): + class X(Union): + _fields_ = [("x", c_char), + ("y", tp)] + self.assertEqual((sizeof(X), code), + (calcsize("%c" % (code)), code)) + + +class PointerMemberTestBase: + def test(self): + # a Structure/Union with a POINTER field + class S(self.cls): + _fields_ = [("array", POINTER(c_int))] + + s = S() + # We can assign arrays of the correct type + s.array = (c_int * 3)(1, 2, 3) + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [1, 2, 3]) + + s.array[0] = 42 + + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [42, 2, 3]) + + s.array[0] = 1 + + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [1, 2, 3]) + +class PointerMemberTestCase_Struct(unittest.TestCase, PointerMemberTestBase): + cls = Structure + + def test_none_to_pointer_fields(self): + class S(self.cls): + _fields_ = [("x", c_int), + ("p", POINTER(c_int))] + + s = S() + s.x = 12345678 + s.p = None + self.assertEqual(s.x, 12345678) + +class PointerMemberTestCase_Union(unittest.TestCase, PointerMemberTestBase): + cls = Union + + def test_none_to_pointer_fields(self): + class S(self.cls): + _fields_ = [("x", c_int), + ("p", POINTER(c_int))] + + s = S() + s.x = 12345678 + s.p = None + self.assertFalse(s.p) # NULL pointers are falsy + + +class TestRecursiveBase: + def test_contains_itself(self): + class Recursive(self.cls): + pass + + try: + Recursive._fields_ = [("next", Recursive)] + except AttributeError as details: + self.assertIn("Structure or union cannot contain itself", + str(details)) + else: + self.fail("Structure or union cannot contain itself") + + + def test_vice_versa(self): + class First(self.cls): + pass + class Second(self.cls): + pass + + First._fields_ = [("second", Second)] + + try: + Second._fields_ = [("first", First)] + except AttributeError as details: + self.assertIn("_fields_ is final", str(details)) + else: + self.fail("AttributeError not raised") + +class TestRecursiveStructure(unittest.TestCase, TestRecursiveBase): + cls = Structure + +class TestRecursiveUnion(unittest.TestCase, TestRecursiveBase): + cls = Union diff --git a/Lib/test/test_ctypes/test_structures.py b/Lib/test/test_ctypes/test_structures.py index 6cc09c8f2b5b59..0ec238e04b74cd 100644 --- a/Lib/test/test_ctypes/test_structures.py +++ b/Lib/test/test_ctypes/test_structures.py @@ -1,209 +1,24 @@ +"""Tests for ctypes.Structure + +Features common with Union should go in test_structunion.py instead. +""" + from platform import architecture as _architecture import struct import sys import unittest -from ctypes import (CDLL, Structure, Union, POINTER, sizeof, byref, alignment, +from ctypes import (CDLL, Structure, Union, POINTER, sizeof, byref, c_void_p, c_char, c_wchar, c_byte, c_ubyte, - c_uint8, c_uint16, c_uint32, - c_short, c_ushort, c_int, c_uint, - c_long, c_ulong, c_longlong, c_ulonglong, c_float, c_double) + c_uint8, c_uint16, c_uint32, c_int, c_uint, + c_long, c_ulong, c_longlong, c_float, c_double) from ctypes.util import find_library -from struct import calcsize from collections import namedtuple from test import support from test.support import import_helper _ctypes_test = import_helper.import_module("_ctypes_test") -from ._support import (_CData, PyCStructType, Py_TPFLAGS_DISALLOW_INSTANTIATION, - Py_TPFLAGS_IMMUTABLETYPE) - - -class SubclassesTest(unittest.TestCase): - def test_subclass(self): - class X(Structure): - _fields_ = [("a", c_int)] - - class Y(X): - _fields_ = [("b", c_int)] - - class Z(X): - pass - - self.assertEqual(sizeof(X), sizeof(c_int)) - self.assertEqual(sizeof(Y), sizeof(c_int)*2) - self.assertEqual(sizeof(Z), sizeof(c_int)) - self.assertEqual(X._fields_, [("a", c_int)]) - self.assertEqual(Y._fields_, [("b", c_int)]) - self.assertEqual(Z._fields_, [("a", c_int)]) - - def test_subclass_delayed(self): - class X(Structure): - pass - self.assertEqual(sizeof(X), 0) - X._fields_ = [("a", c_int)] - - class Y(X): - pass - self.assertEqual(sizeof(Y), sizeof(X)) - Y._fields_ = [("b", c_int)] - - class Z(X): - pass - - self.assertEqual(sizeof(X), sizeof(c_int)) - self.assertEqual(sizeof(Y), sizeof(c_int)*2) - self.assertEqual(sizeof(Z), sizeof(c_int)) - self.assertEqual(X._fields_, [("a", c_int)]) - self.assertEqual(Y._fields_, [("b", c_int)]) - self.assertEqual(Z._fields_, [("a", c_int)]) class StructureTestCase(unittest.TestCase): - formats = {"c": c_char, - "b": c_byte, - "B": c_ubyte, - "h": c_short, - "H": c_ushort, - "i": c_int, - "I": c_uint, - "l": c_long, - "L": c_ulong, - "q": c_longlong, - "Q": c_ulonglong, - "f": c_float, - "d": c_double, - } - - def test_inheritance_hierarchy(self): - self.assertEqual(Structure.mro(), [Structure, _CData, object]) - - self.assertEqual(PyCStructType.__name__, "PyCStructType") - self.assertEqual(type(PyCStructType), type) - - - def test_type_flags(self): - for cls in Structure, PyCStructType: - with self.subTest(cls=cls): - self.assertTrue(Structure.__flags__ & Py_TPFLAGS_IMMUTABLETYPE) - self.assertFalse(Structure.__flags__ & Py_TPFLAGS_DISALLOW_INSTANTIATION) - - def test_metaclass_details(self): - # Abstract classes (whose metaclass __init__ was not called) can't be - # instantiated directly - NewStructure = PyCStructType.__new__(PyCStructType, 'NewStructure', - (Structure,), {}) - for cls in Structure, NewStructure: - with self.subTest(cls=cls): - with self.assertRaisesRegex(TypeError, "abstract class"): - obj = cls() - - # Cannot call the metaclass __init__ more than once - class T(Structure): - _fields_ = [("x", c_char), - ("y", c_char)] - with self.assertRaisesRegex(SystemError, "already initialized"): - PyCStructType.__init__(T, 'ptr', (), {}) - - def test_simple_structs(self): - for code, tp in self.formats.items(): - class X(Structure): - _fields_ = [("x", c_char), - ("y", tp)] - self.assertEqual((sizeof(X), code), - (calcsize("c%c0%c" % (code, code)), code)) - - def test_unions(self): - for code, tp in self.formats.items(): - class X(Union): - _fields_ = [("x", c_char), - ("y", tp)] - self.assertEqual((sizeof(X), code), - (calcsize("%c" % (code)), code)) - - def test_struct_alignment(self): - class X(Structure): - _fields_ = [("x", c_char * 3)] - self.assertEqual(alignment(X), calcsize("s")) - self.assertEqual(sizeof(X), calcsize("3s")) - - class Y(Structure): - _fields_ = [("x", c_char * 3), - ("y", c_int)] - self.assertEqual(alignment(Y), alignment(c_int)) - self.assertEqual(sizeof(Y), calcsize("3si")) - - class SI(Structure): - _fields_ = [("a", X), - ("b", Y)] - self.assertEqual(alignment(SI), max(alignment(Y), alignment(X))) - self.assertEqual(sizeof(SI), calcsize("3s0i 3si 0i")) - - class IS(Structure): - _fields_ = [("b", Y), - ("a", X)] - - self.assertEqual(alignment(SI), max(alignment(X), alignment(Y))) - self.assertEqual(sizeof(IS), calcsize("3si 3s 0i")) - - class XX(Structure): - _fields_ = [("a", X), - ("b", X)] - self.assertEqual(alignment(XX), alignment(X)) - self.assertEqual(sizeof(XX), calcsize("3s 3s 0s")) - - def test_empty(self): - # I had problems with these - # - # Although these are pathological cases: Empty Structures! - class X(Structure): - _fields_ = [] - - class Y(Union): - _fields_ = [] - - # Is this really the correct alignment, or should it be 0? - self.assertTrue(alignment(X) == alignment(Y) == 1) - self.assertTrue(sizeof(X) == sizeof(Y) == 0) - - class XX(Structure): - _fields_ = [("a", X), - ("b", X)] - - self.assertEqual(alignment(XX), 1) - self.assertEqual(sizeof(XX), 0) - - def test_fields(self): - # test the offset and size attributes of Structure/Union fields. - class X(Structure): - _fields_ = [("x", c_int), - ("y", c_char)] - - self.assertEqual(X.x.offset, 0) - self.assertEqual(X.x.size, sizeof(c_int)) - - self.assertEqual(X.y.offset, sizeof(c_int)) - self.assertEqual(X.y.size, sizeof(c_char)) - - # readonly - self.assertRaises((TypeError, AttributeError), setattr, X.x, "offset", 92) - self.assertRaises((TypeError, AttributeError), setattr, X.x, "size", 92) - - class X(Union): - _fields_ = [("x", c_int), - ("y", c_char)] - - self.assertEqual(X.x.offset, 0) - self.assertEqual(X.x.size, sizeof(c_int)) - - self.assertEqual(X.y.offset, 0) - self.assertEqual(X.y.size, sizeof(c_char)) - - # readonly - self.assertRaises((TypeError, AttributeError), setattr, X.x, "offset", 92) - self.assertRaises((TypeError, AttributeError), setattr, X.x, "size", 92) - - # XXX Should we check nested data types also? - # offset is always relative to the class... - def test_packed(self): class X(Structure): _fields_ = [("a", c_byte), @@ -290,36 +105,6 @@ class POINT(Structure): pt = POINT(y=2, x=1) self.assertEqual((pt.x, pt.y), (1, 2)) - def test_invalid_field_types(self): - class POINT(Structure): - pass - self.assertRaises(TypeError, setattr, POINT, "_fields_", [("x", 1), ("y", 2)]) - - def test_invalid_name(self): - # field name must be string - def declare_with_name(name): - class S(Structure): - _fields_ = [(name, c_int)] - - self.assertRaises(TypeError, declare_with_name, b"x") - - def test_intarray_fields(self): - class SomeInts(Structure): - _fields_ = [("a", c_int * 4)] - - # can use tuple to initialize array (but not list!) - self.assertEqual(SomeInts((1, 2)).a[:], [1, 2, 0, 0]) - self.assertEqual(SomeInts((1, 2)).a[::], [1, 2, 0, 0]) - self.assertEqual(SomeInts((1, 2)).a[::-1], [0, 0, 2, 1]) - self.assertEqual(SomeInts((1, 2)).a[::2], [1, 0]) - self.assertEqual(SomeInts((1, 2)).a[1:5:6], [2]) - self.assertEqual(SomeInts((1, 2)).a[6:4:-1], []) - self.assertEqual(SomeInts((1, 2, 3, 4)).a[:], [1, 2, 3, 4]) - self.assertEqual(SomeInts((1, 2, 3, 4)).a[::], [1, 2, 3, 4]) - # too long - # XXX Should raise ValueError?, not RuntimeError - self.assertRaises(RuntimeError, SomeInts, (1, 2, 3, 4, 5)) - def test_nested_initializers(self): # test initializing nested structures class Phone(Structure): @@ -374,37 +159,12 @@ class Person(Structure): self.assertEqual(msg, "(Phone) TypeError: too many initializers") - def test_huge_field_name(self): - # issue12881: segfault with large structure field names - def create_class(length): - class S(Structure): - _fields_ = [('x' * length, c_int)] - - for length in [10 ** i for i in range(0, 8)]: - try: - create_class(length) - except MemoryError: - # MemoryErrors are OK, we just don't want to segfault - pass - def get_except(self, func, *args): try: func(*args) except Exception as detail: return detail.__class__, str(detail) - def test_abstract_class(self): - class X(Structure): - _abstract_ = "something" - # try 'X()' - cls, msg = self.get_except(eval, "X()", locals()) - self.assertEqual((cls, msg), (TypeError, "abstract class")) - - def test_methods(self): - self.assertIn("in_dll", dir(type(Structure))) - self.assertIn("from_address", dir(type(Structure))) - self.assertIn("in_dll", dir(type(Structure))) - def test_positional_args(self): # see also http://bugs.python.org/issue5042 class W(Structure): @@ -507,6 +267,8 @@ class X(Structure): self.assertEqual(s.second, got.second) def _test_issue18060(self, Vector): + # Regression tests for gh-62260 + # The call to atan2() should succeed if the # class fields were correctly cloned in the # subclasses. Otherwise, it will segfault. @@ -698,6 +460,7 @@ class Test3E(Structure): self.assertEqual(result.data[i], float(i+1)) def test_38368(self): + # Regression test for gh-82549 class U(Union): _fields_ = [ ('f1', c_uint8 * 16), @@ -719,9 +482,9 @@ class U(Union): self.assertEqual(f2, [0x4567, 0x0123, 0xcdef, 0x89ab, 0x3210, 0x7654, 0xba98, 0xfedc]) - @unittest.skipIf(True, 'Test disabled for now - see bpo-16575/bpo-16576') + @unittest.skipIf(True, 'Test disabled for now - see gh-60779/gh-60780') def test_union_by_value(self): - # See bpo-16575 + # See gh-60779 # These should mirror the structures in Modules/_ctypes/_ctypes_test.c @@ -800,9 +563,9 @@ class Test5(Structure): self.assertEqual(test5.nested.an_int, 0) self.assertEqual(test5.another_int, 0) - @unittest.skipIf(True, 'Test disabled for now - see bpo-16575/bpo-16576') + @unittest.skipIf(True, 'Test disabled for now - see gh-60779/gh-60780') def test_bitfield_by_value(self): - # See bpo-16576 + # See gh-60780 # These should mirror the structures in Modules/_ctypes/_ctypes_test.c @@ -882,75 +645,5 @@ class Test8(Union): 'a union by value, which is unsupported.') -class PointerMemberTestCase(unittest.TestCase): - - def test(self): - # a Structure with a POINTER field - class S(Structure): - _fields_ = [("array", POINTER(c_int))] - - s = S() - # We can assign arrays of the correct type - s.array = (c_int * 3)(1, 2, 3) - items = [s.array[i] for i in range(3)] - self.assertEqual(items, [1, 2, 3]) - - # The following are bugs, but are included here because the unittests - # also describe the current behaviour. - # - # This fails with SystemError: bad arg to internal function - # or with IndexError (with a patch I have) - - s.array[0] = 42 - - items = [s.array[i] for i in range(3)] - self.assertEqual(items, [42, 2, 3]) - - s.array[0] = 1 - - items = [s.array[i] for i in range(3)] - self.assertEqual(items, [1, 2, 3]) - - def test_none_to_pointer_fields(self): - class S(Structure): - _fields_ = [("x", c_int), - ("p", POINTER(c_int))] - - s = S() - s.x = 12345678 - s.p = None - self.assertEqual(s.x, 12345678) - - -class TestRecursiveStructure(unittest.TestCase): - def test_contains_itself(self): - class Recursive(Structure): - pass - - try: - Recursive._fields_ = [("next", Recursive)] - except AttributeError as details: - self.assertIn("Structure or union cannot contain itself", - str(details)) - else: - self.fail("Structure or union cannot contain itself") - - - def test_vice_versa(self): - class First(Structure): - pass - class Second(Structure): - pass - - First._fields_ = [("second", Second)] - - try: - Second._fields_ = [("first", First)] - except AttributeError as details: - self.assertIn("_fields_ is final", str(details)) - else: - self.fail("AttributeError not raised") - - if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_ctypes/test_unions.py b/Lib/test/test_ctypes/test_unions.py deleted file mode 100644 index e2dff0f22a9213..00000000000000 --- a/Lib/test/test_ctypes/test_unions.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest -from ctypes import Union, c_char -from ._support import (_CData, UnionType, Py_TPFLAGS_DISALLOW_INSTANTIATION, - Py_TPFLAGS_IMMUTABLETYPE) - - -class ArrayTestCase(unittest.TestCase): - def test_inheritance_hierarchy(self): - self.assertEqual(Union.mro(), [Union, _CData, object]) - - self.assertEqual(UnionType.__name__, "UnionType") - self.assertEqual(type(UnionType), type) - - def test_type_flags(self): - for cls in Union, UnionType: - with self.subTest(cls=Union): - self.assertTrue(Union.__flags__ & Py_TPFLAGS_IMMUTABLETYPE) - self.assertFalse(Union.__flags__ & Py_TPFLAGS_DISALLOW_INSTANTIATION) - - def test_metaclass_details(self): - # Abstract classes (whose metaclass __init__ was not called) can't be - # instantiated directly - NewUnion = UnionType.__new__(UnionType, 'NewUnion', - (Union,), {}) - for cls in Union, NewUnion: - with self.subTest(cls=cls): - with self.assertRaisesRegex(TypeError, "abstract class"): - obj = cls() - - # Cannot call the metaclass __init__ more than once - class T(Union): - _fields_ = [("x", c_char), - ("y", c_char)] - with self.assertRaisesRegex(SystemError, "already initialized"): - UnionType.__init__(T, 'ptr', (), {}) From c9014374c50d6ef64786d3e7d9c7e99053d5c9e2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 10 Oct 2024 18:19:08 +0100 Subject: [PATCH 035/114] GH-125174: Make immortal objects more robust, following design from PEP 683 (GH-125251) --- .../pycore_global_objects_fini_generated.h | 2 +- Include/internal/pycore_object.h | 34 ++++----------- Include/object.h | 2 +- Include/refcount.h | 42 ++++++++++--------- Lib/test/test_builtin.py | 4 +- ...-10-10-12-04-56.gh-issue-125174._8h6T7.rst | 4 ++ Modules/_asynciomodule.c | 2 +- Objects/bytesobject.c | 6 +-- Objects/dictobject.c | 10 +++-- Objects/object.c | 2 +- Objects/structseq.c | 2 +- Objects/typeobject.c | 8 ++-- Python/bytecodes.c | 4 +- Python/executor_cases.c.h | 4 +- Python/generated_cases.c.h | 4 +- Python/import.c | 2 +- Tools/cases_generator/analyzer.py | 1 - 17 files changed, 61 insertions(+), 72 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 3140a75a47c5ee..de68ef93257234 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -11,7 +11,7 @@ extern "C" { #ifdef Py_DEBUG static inline void _PyStaticObject_CheckRefcnt(PyObject *obj) { - if (Py_REFCNT(obj) < _Py_IMMORTAL_REFCNT) { + if (!_Py_IsImmortal(obj)) { fprintf(stderr, "Immortal Object has less refcnt than expected.\n"); _PyObject_Dump(obj); } diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 0af13b1bcda20b..8832692d03c29e 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -16,9 +16,6 @@ extern "C" { #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uniqueid.h" // _PyType_IncrefSlow - -#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1) - // This value is added to `ob_ref_shared` for objects that use deferred // reference counting so that they are not immediately deallocated when the // non-deferred reference count drops to zero. @@ -27,25 +24,8 @@ extern "C" { // `ob_ref_shared` are used for flags. #define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8) -// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when -// comparing the reference count to stay compatible with C extensions built -// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF -// as refcnt++ and refcnt-- without taking in account immortal objects. For -// example, the reference count of an immortal object can change from -// _Py_IMMORTAL_REFCNT to _Py_IMMORTAL_REFCNT+1 (INCREF) or -// _Py_IMMORTAL_REFCNT-1 (DECREF). -// -// This function should only be used in assertions. Otherwise, _Py_IsImmortal() -// must be used instead. -static inline int _Py_IsImmortalLoose(PyObject *op) -{ -#if defined(Py_GIL_DISABLED) - return _Py_IsImmortal(op); -#else - return (op->ob_refcnt >= _Py_IMMORTAL_REFCNT_LOOSE); -#endif -} -#define _Py_IsImmortalLoose(op) _Py_IsImmortalLoose(_PyObject_CAST(op)) +/* For backwards compatibility -- Do not use this */ +#define _Py_IsImmortalLoose(op) _Py_IsImmortal /* Check if an object is consistent. For example, ensure that the reference @@ -97,7 +77,7 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); #else #define _PyObject_HEAD_INIT(type) \ { \ - .ob_refcnt = _Py_IMMORTAL_REFCNT, \ + .ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT, \ .ob_type = (type) \ } #endif @@ -184,7 +164,7 @@ PyAPI_FUNC(void) _Py_SetImmortalUntracked(PyObject *op); static inline void _Py_SetMortal(PyObject *op, Py_ssize_t refcnt) { if (op) { - assert(_Py_IsImmortalLoose(op)); + assert(_Py_IsImmortal(op)); #ifdef Py_GIL_DISABLED op->ob_tid = _Py_UNOWNED_TID; op->ob_ref_local = 0; @@ -316,7 +296,7 @@ static inline void _Py_INCREF_TYPE(PyTypeObject *type) { if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - assert(_Py_IsImmortalLoose(type)); + assert(_Py_IsImmortal(type)); _Py_INCREF_IMMORTAL_STAT_INC(); return; } @@ -357,7 +337,7 @@ static inline void _Py_DECREF_TYPE(PyTypeObject *type) { if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - assert(_Py_IsImmortalLoose(type)); + assert(_Py_IsImmortal(type)); _Py_DECREF_IMMORTAL_STAT_INC(); return; } @@ -393,7 +373,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj) { assert(op != NULL); Py_SET_TYPE(op, typeobj); - assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj)); + assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortal(typeobj)); _Py_INCREF_TYPE(typeobj); _Py_NewReference(op); } diff --git a/Include/object.h b/Include/object.h index 418f2196062df7..5be4dedadc20eb 100644 --- a/Include/object.h +++ b/Include/object.h @@ -81,7 +81,7 @@ whose size is determined when the object is allocated. #else #define PyObject_HEAD_INIT(type) \ { \ - { _Py_IMMORTAL_REFCNT }, \ + { _Py_IMMORTAL_INITIAL_REFCNT }, \ (type) \ }, #endif diff --git a/Include/refcount.h b/Include/refcount.h index 9a4e15065ecab8..141cbd34dd72e6 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -21,25 +21,30 @@ cleanup during runtime finalization. #if SIZEOF_VOID_P > 4 /* -In 64+ bit systems, an object will be marked as immortal by setting all of the -lower 32 bits of the reference count field, which is equal to: 0xFFFFFFFF +In 64+ bit systems, any object whose 32 bit reference count is >= 2**31 +will be treated as immortal. Using the lower 32 bits makes the value backwards compatible by allowing C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely -increase and decrease the objects reference count. The object would lose its -immortality, but the execution would still be correct. +increase and decrease the objects reference count. + +In order to offer sufficient resilience to C extensions using the stable ABI +compiled against 3.11 or earlier, we set the initial value near the +middle of the range (2**31, 2**32). That way the the refcount can be +off by ~1 billion without affecting immortality. Reference count increases will use saturated arithmetic, taking advantage of having all the lower 32 bits set, which will avoid the reference count to go beyond the refcount limit. Immortality checks for reference count decreases will be done by checking the bit sign flag in the lower 32 bits. + */ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX) +#define _Py_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(3UL << 30)) #else /* -In 32 bit systems, an object will be marked as immortal by setting all of the -lower 30 bits of the reference count field, which is equal to: 0x3FFFFFFF +In 32 bit systems, an object will be treated as immortal if its reference +count equals or exceeds _Py_IMMORTAL_MINIMUM_REFCNT (2**30). Using the lower 30 bits makes the value backwards compatible by allowing C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely @@ -47,9 +52,10 @@ increase and decrease the objects reference count. The object would lose its immortality, but the execution would still be correct. Reference count increases and decreases will first go through an immortality -check by comparing the reference count field to the immortality reference count. +check by comparing the reference count field to the minimum immortality refcount. */ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX >> 2) +#define _Py_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(3L << 29)) +#define _Py_IMMORTAL_MINIMUM_REFCNT ((Py_ssize_t)(1L << 30)) #endif // Py_GIL_DISABLED builds indicate immortal objects using `ob_ref_local`, which is @@ -90,7 +96,7 @@ PyAPI_FUNC(Py_ssize_t) Py_REFCNT(PyObject *ob); #else uint32_t local = _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local); if (local == _Py_IMMORTAL_REFCNT_LOCAL) { - return _Py_IMMORTAL_REFCNT; + return _Py_IMMORTAL_INITIAL_REFCNT; } Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared); return _Py_STATIC_CAST(Py_ssize_t, local) + @@ -109,9 +115,9 @@ static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) return (_Py_atomic_load_uint32_relaxed(&op->ob_ref_local) == _Py_IMMORTAL_REFCNT_LOCAL); #elif SIZEOF_VOID_P > 4 - return (_Py_CAST(PY_INT32_T, op->ob_refcnt) < 0); + return _Py_CAST(PY_INT32_T, op->ob_refcnt) < 0; #else - return (op->ob_refcnt == _Py_IMMORTAL_REFCNT); + return op->ob_refcnt >= _Py_IMMORTAL_MINIMUM_REFCNT; #endif } #define _Py_IsImmortal(op) _Py_IsImmortal(_PyObject_CAST(op)) @@ -236,7 +242,7 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) uint32_t new_local = local + 1; if (new_local == 0) { _Py_INCREF_IMMORTAL_STAT_INC(); - // local is equal to _Py_IMMORTAL_REFCNT: do nothing + // local is equal to _Py_IMMORTAL_REFCNT_LOCAL: do nothing return; } if (_Py_IsOwnedByCurrentThread(op)) { @@ -246,18 +252,14 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); } #elif SIZEOF_VOID_P > 4 - // Portable saturated add, branching on the carry flag and set low bits PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; - PY_UINT32_T new_refcnt = cur_refcnt + 1; - if (new_refcnt == 0) { + if (((int32_t)cur_refcnt) < 0) { + // the object is immortal _Py_INCREF_IMMORTAL_STAT_INC(); - // cur_refcnt is equal to _Py_IMMORTAL_REFCNT: the object is immortal, - // do nothing return; } - op->ob_refcnt_split[PY_BIG_ENDIAN] = new_refcnt; + op->ob_refcnt_split[PY_BIG_ENDIAN] = cur_refcnt + 1; #else - // Explicitly check immortality against the immortal value if (_Py_IsImmortal(op)) { _Py_INCREF_IMMORTAL_STAT_INC(); return; diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d884f54940b471..eb5906f8944c8e 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2574,9 +2574,9 @@ def __del__(self): class ImmortalTests(unittest.TestCase): if sys.maxsize < (1 << 32): - IMMORTAL_REFCOUNT = (1 << 30) - 1 + IMMORTAL_REFCOUNT = 3 << 29 else: - IMMORTAL_REFCOUNT = (1 << 32) - 1 + IMMORTAL_REFCOUNT = 3 << 30 IMMORTALS = (None, True, False, Ellipsis, NotImplemented, *range(-5, 257)) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst new file mode 100644 index 00000000000000..c7eaac32601bb3 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst @@ -0,0 +1,4 @@ +Make the handling of reference counts of immortal objects more robust. +Immortal objects with reference counts that deviate from their original +reference count by up to a billion (half a billion on 32 bit builds) are +still counted as immortal. diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 870084100a1b85..0a769c46b87ac8 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -1387,7 +1387,7 @@ FutureObj_get_state(FutureObj *fut, void *Py_UNUSED(ignored)) default: assert (0); } - assert(_Py_IsImmortalLoose(ret)); + assert(_Py_IsImmortal(ret)); return ret; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bf58e55e100b3a..dcc1aba76abbed 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -46,7 +46,7 @@ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, static inline PyObject* bytes_get_empty(void) { PyObject *empty = &EMPTY->ob_base.ob_base; - assert(_Py_IsImmortalLoose(empty)); + assert(_Py_IsImmortal(empty)); return empty; } @@ -119,7 +119,7 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) } if (size == 1 && str != NULL) { op = CHARACTER(*str & 255); - assert(_Py_IsImmortalLoose(op)); + assert(_Py_IsImmortal(op)); return (PyObject *)op; } if (size == 0) { @@ -155,7 +155,7 @@ PyBytes_FromString(const char *str) } else if (size == 1) { op = CHARACTER(*str & 255); - assert(_Py_IsImmortalLoose(op)); + assert(_Py_IsImmortal(op)); return (PyObject *)op; } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index adfd91d1e4d63b..12722eca6be5e5 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -416,6 +416,8 @@ _PyDict_DebugMallocStats(FILE *out) #define DK_MASK(dk) (DK_SIZE(dk)-1) +#define _Py_DICT_IMMORTAL_INITIAL_REFCNT PY_SSIZE_T_MIN + static void free_keys_object(PyDictKeysObject *keys, bool use_qsbr); /* PyDictKeysObject has refcounts like PyObject does, so we have the @@ -428,7 +430,8 @@ static void free_keys_object(PyDictKeysObject *keys, bool use_qsbr); static inline void dictkeys_incref(PyDictKeysObject *dk) { - if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_IMMORTAL_REFCNT) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { + assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); return; } #ifdef Py_REF_DEBUG @@ -440,7 +443,8 @@ dictkeys_incref(PyDictKeysObject *dk) static inline void dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk, bool use_qsbr) { - if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_IMMORTAL_REFCNT) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { + assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); return; } assert(FT_ATOMIC_LOAD_SSIZE(dk->dk_refcnt) > 0); @@ -586,7 +590,7 @@ estimate_log2_keysize(Py_ssize_t n) * (which cannot fail and thus can do no allocation). */ static PyDictKeysObject empty_keys_struct = { - _Py_IMMORTAL_REFCNT, /* dk_refcnt */ + _Py_DICT_IMMORTAL_INITIAL_REFCNT, /* dk_refcnt */ 0, /* dk_log2_size */ 0, /* dk_log2_index_bytes */ DICT_KEYS_UNICODE, /* dk_kind */ diff --git a/Objects/object.c b/Objects/object.c index a97a900890320d..27d06cc081259d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2453,7 +2453,7 @@ _Py_SetImmortalUntracked(PyObject *op) op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; op->ob_ref_shared = 0; #else - op->ob_refcnt = _Py_IMMORTAL_REFCNT; + op->ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT; #endif } diff --git a/Objects/structseq.c b/Objects/structseq.c index 6092742835400b..56a7851b98788d 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -708,7 +708,7 @@ _PyStructSequence_FiniBuiltin(PyInterpreterState *interp, PyTypeObject *type) assert(type->tp_name != NULL); assert(type->tp_base == &PyTuple_Type); assert((type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); - assert(_Py_IsImmortalLoose(type)); + assert(_Py_IsImmortal(type)); // Cannot delete a type if it still has subclasses if (_PyType_HasSubclasses(type)) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5380633fa1149e..d90bb5825fd437 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -476,7 +476,7 @@ set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) assert(PyTuple_GET_SIZE(bases) == 1); assert(PyTuple_GET_ITEM(bases, 0) == (PyObject *)self->tp_base); assert(self->tp_base->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); - assert(_Py_IsImmortalLoose(self->tp_base)); + assert(_Py_IsImmortal(self->tp_base)); } _Py_SetImmortal(bases); } @@ -493,7 +493,7 @@ clear_tp_bases(PyTypeObject *self, int final) Py_CLEAR(self->tp_bases); } else { - assert(_Py_IsImmortalLoose(self->tp_bases)); + assert(_Py_IsImmortal(self->tp_bases)); _Py_ClearImmortal(self->tp_bases); } } @@ -558,7 +558,7 @@ clear_tp_mro(PyTypeObject *self, int final) Py_CLEAR(self->tp_mro); } else { - assert(_Py_IsImmortalLoose(self->tp_mro)); + assert(_Py_IsImmortal(self->tp_mro)); _Py_ClearImmortal(self->tp_mro); } } @@ -5966,7 +5966,7 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, int isbuiltin, int final) { assert(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); - assert(_Py_IsImmortalLoose((PyObject *)type)); + assert(_Py_IsImmortal((PyObject *)type)); type_dealloc_common(type); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 87cca3fc1d373c..34fdfcb05e3c18 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -381,7 +381,7 @@ dummy_func( EXIT_IF(!PyLong_CheckExact(value_o)); STAT_INC(TO_BOOL, hit); if (_PyLong_IsZero((PyLongObject *)value_o)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); DEAD(value); res = PyStackRef_False; } @@ -412,7 +412,7 @@ dummy_func( EXIT_IF(!PyUnicode_CheckExact(value_o)); STAT_INC(TO_BOOL, hit); if (value_o == &_Py_STR(empty)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); DEAD(value); res = PyStackRef_False; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 57e15f33ca7703..ef110e2e2a794a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -402,7 +402,7 @@ } STAT_INC(TO_BOOL, hit); if (_PyLong_IsZero((PyLongObject *)value_o)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { @@ -455,7 +455,7 @@ } STAT_INC(TO_BOOL, hit); if (value_o == &_Py_STR(empty)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7656ce6bb7e313..7023aea369db49 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7840,7 +7840,7 @@ DEOPT_IF(!PyLong_CheckExact(value_o), TO_BOOL); STAT_INC(TO_BOOL, hit); if (_PyLong_IsZero((PyLongObject *)value_o)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { @@ -7902,7 +7902,7 @@ DEOPT_IF(!PyUnicode_CheckExact(value_o), TO_BOOL); STAT_INC(TO_BOOL, hit); if (value_o == &_Py_STR(empty)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { diff --git a/Python/import.c b/Python/import.c index 460b1fe225c72e..acf849f14562b9 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1051,7 +1051,7 @@ del_cached_def(struct extensions_cache_value *value) However, this decref would be problematic if the module def were dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ - assert(value->def == NULL || _Py_IsImmortalLoose(value->def)); + assert(value->def == NULL || _Py_IsImmortal(value->def)); Py_XDECREF(value->def->m_base.m_copy); value->def->m_base.m_copy = NULL; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 9c2981a68ac909..60f5d010a7a083 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -614,7 +614,6 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_Py_EnterRecursiveCallTstateUnchecked", "_Py_ID", "_Py_IsImmortal", - "_Py_IsImmortalLoose", "_Py_LeaveRecursiveCallPy", "_Py_LeaveRecursiveCallTstate", "_Py_NewRef", From 3b87fb74c907510402678bf1b7c4a94df0e5e65a Mon Sep 17 00:00:00 2001 From: Justin Kunimune Date: Thu, 10 Oct 2024 13:56:05 -0400 Subject: [PATCH 036/114] Note argparse exit code in documentation (GH-119568) Co-authored-by: Savannah Ostrowski --- Doc/library/argparse.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index e9a08984f77c3a..4eb6fad41f11ef 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -541,7 +541,8 @@ exit_on_error ^^^^^^^^^^^^^ Normally, when you pass an invalid argument list to the :meth:`~ArgumentParser.parse_args` -method of an :class:`ArgumentParser`, it will exit with error info. +method of an :class:`ArgumentParser`, it will print a *message* to :data:`sys.stderr` and exit with a status +code of 2. If the user would like to catch errors manually, the feature can be enabled by setting ``exit_on_error`` to ``False``:: From bb594e801b6a84823badbb85b88f0fc8b221d7bf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 20:41:14 +0200 Subject: [PATCH 037/114] gh-125196: Use PyUnicodeWriter for repr(dict) (#125270) --- Objects/dictobject.c | 72 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 12722eca6be5e5..b27599d2815c82 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3200,16 +3200,12 @@ static PyObject * dict_repr_lock_held(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; - Py_ssize_t i; PyObject *key = NULL, *value = NULL; - _PyUnicodeWriter writer; - int first; - ASSERT_DICT_LOCKED(mp); - i = Py_ReprEnter((PyObject *)mp); - if (i != 0) { - return i > 0 ? PyUnicode_FromString("{...}") : NULL; + int res = Py_ReprEnter((PyObject *)mp); + if (res != 0) { + return (res > 0 ? PyUnicode_FromString("{...}") : NULL); } if (mp->ma_used == 0) { @@ -3217,66 +3213,70 @@ dict_repr_lock_held(PyObject *self) return PyUnicode_FromString("{}"); } - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; - /* "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" */ - writer.min_length = 1 + 4 + (2 + 4) * (mp->ma_used - 1) + 1; + // "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" + Py_ssize_t prealloc = 1 + 4 + 6 * (mp->ma_used - 1) + 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { + goto error; + } - if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0) + if (PyUnicodeWriter_WriteChar(writer, '{') < 0) { goto error; + } /* Do repr() on each key+value pair, and insert ": " between them. Note that repr may mutate the dict. */ - i = 0; - first = 1; + Py_ssize_t i = 0; + int first = 1; while (_PyDict_Next((PyObject *)mp, &i, &key, &value, NULL)) { - PyObject *s; - int res; - - /* Prevent repr from deleting key or value during key format. */ + // Prevent repr from deleting key or value during key format. Py_INCREF(key); Py_INCREF(value); if (!first) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + // Write ", " + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; + } } first = 0; - s = PyObject_Repr(key); - if (s == NULL) - goto error; - res = _PyUnicodeWriter_WriteStr(&writer, s); - Py_DECREF(s); - if (res < 0) + // Write repr(key) + if (PyUnicodeWriter_WriteRepr(writer, key) < 0) { goto error; + } - if (_PyUnicodeWriter_WriteASCIIString(&writer, ": ", 2) < 0) + // Write ": " + if (PyUnicodeWriter_WriteChar(writer, ':') < 0) { goto error; - - s = PyObject_Repr(value); - if (s == NULL) + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; - res = _PyUnicodeWriter_WriteStr(&writer, s); - Py_DECREF(s); - if (res < 0) + } + + // Write repr(value) + if (PyUnicodeWriter_WriteRepr(writer, value) < 0) { goto error; + } Py_CLEAR(key); Py_CLEAR(value); } - writer.overallocate = 0; - if (_PyUnicodeWriter_WriteChar(&writer, '}') < 0) + if (PyUnicodeWriter_WriteChar(writer, '}') < 0) { goto error; + } Py_ReprLeave((PyObject *)mp); - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: Py_ReprLeave((PyObject *)mp); - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); Py_XDECREF(key); Py_XDECREF(value); return NULL; From 427dcf24de4e06d239745d74d08c4b2e541dca5a Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 10 Oct 2024 16:21:29 -0400 Subject: [PATCH 038/114] gh-125268: Use static string for "1e309" in AST (#125272) When formatting the AST as a string, infinite values are replaced by 1e309, which evaluates to infinity. The initialization of this string replacement was not thread-safe in the free threading build. --- Include/internal/pycore_global_objects.h | 3 -- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 +++ Parser/asdl_c.py | 2 -- Python/Python-ast.c | 2 -- Python/ast_unparse.c | 29 +++---------------- 8 files changed, 11 insertions(+), 32 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 913dce6f1ec0fe..e3f7ac707f0c37 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -66,9 +66,6 @@ struct _Py_static_objects { struct _Py_interp_cached_objects { PyObject *interned_strings; - /* AST */ - PyObject *str_replace_inf; - /* object.__reduce__ */ PyObject *objreduce; PyObject *type_slots_pname; diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index de68ef93257234..2fd7d5d13a98b2 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -562,6 +562,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(list_err)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(str_replace_inf)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(type_params)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(utf_8)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(CANCELLED)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 1591cb0a3f114f..fc3871570cc49d 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -48,6 +48,7 @@ struct _Py_global_strings { STRUCT_FOR_STR(json_decoder, "json.decoder") STRUCT_FOR_STR(kwdefaults, ".kwdefaults") STRUCT_FOR_STR(list_err, "list index out of range") + STRUCT_FOR_STR(str_replace_inf, "1e309") STRUCT_FOR_STR(type_params, ".type_params") STRUCT_FOR_STR(utf_8, "utf-8") } literals; diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index c9d20d0b5aacdb..3b80e265b0ca50 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -557,6 +557,7 @@ extern "C" { INIT_STR(json_decoder, "json.decoder"), \ INIT_STR(kwdefaults, ".kwdefaults"), \ INIT_STR(list_err, "list index out of range"), \ + INIT_STR(str_replace_inf, "1e309"), \ INIT_STR(type_params, ".type_params"), \ INIT_STR(utf_8, "utf-8"), \ } diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index d335373e88ee74..eb2eca06ec4d4f 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2936,6 +2936,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(str_replace_inf); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_STR(anon_null); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index f50c28afcfe205..32eac3afafa5d5 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -2242,8 +2242,6 @@ def generate_ast_fini(module_state, f): for s in module_state: f.write(" Py_CLEAR(state->" + s + ');\n') f.write(textwrap.dedent(""" - Py_CLEAR(_Py_INTERP_CACHED_OBJECT(interp, str_replace_inf)); - state->finalized = 1; state->once = (_PyOnceFlag){0}; } diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 89c52b9dc73cac..38d74b48d232f8 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -281,8 +281,6 @@ void _PyAST_Fini(PyInterpreterState *interp) Py_CLEAR(state->vararg); Py_CLEAR(state->withitem_type); - Py_CLEAR(_Py_INTERP_CACHED_OBJECT(interp, str_replace_inf)); - state->finalized = 1; state->once = (_PyOnceFlag){0}; } diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c index 86f7a582b981a3..8017cfc7fcf268 100644 --- a/Python/ast_unparse.c +++ b/Python/ast_unparse.c @@ -2,7 +2,6 @@ #include "pycore_ast.h" // expr_ty #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_runtime.h" // _Py_ID() -#include // DBL_MAX_10_EXP #include /* This limited unparser is used to convert annotations back to strings @@ -13,10 +12,6 @@ _Py_DECLARE_STR(dbl_open_br, "{{"); _Py_DECLARE_STR(dbl_close_br, "}}"); -/* We would statically initialize this if doing so were simple enough. */ -#define _str_replace_inf(interp) \ - _Py_INTERP_CACHED_OBJECT(interp, str_replace_inf) - /* Forward declarations for recursion via helper functions. */ static PyObject * expr_as_unicode(expr_ty e, int level); @@ -78,13 +73,13 @@ append_repr(_PyUnicodeWriter *writer, PyObject *obj) } if ((PyFloat_CheckExact(obj) && isinf(PyFloat_AS_DOUBLE(obj))) || - PyComplex_CheckExact(obj)) + PyComplex_CheckExact(obj)) { - PyInterpreterState *interp = _PyInterpreterState_GET(); + _Py_DECLARE_STR(str_replace_inf, "1e309"); // evaluates to inf PyObject *new_repr = PyUnicode_Replace( repr, &_Py_ID(inf), - _str_replace_inf(interp), + &_Py_STR(str_replace_inf), -1 ); Py_DECREF(repr); @@ -918,20 +913,6 @@ append_ast_expr(_PyUnicodeWriter *writer, expr_ty e, int level) return -1; } -static int -maybe_init_static_strings(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_str_replace_inf(interp) == NULL) { - PyObject *tmp = PyUnicode_FromFormat("1e%d", 1 + DBL_MAX_10_EXP); - if (tmp == NULL) { - return -1; - } - _str_replace_inf(interp) = tmp; - } - return 0; -} - static PyObject * expr_as_unicode(expr_ty e, int level) { @@ -939,9 +920,7 @@ expr_as_unicode(expr_ty e, int level) _PyUnicodeWriter_Init(&writer); writer.min_length = 256; writer.overallocate = 1; - if (-1 == maybe_init_static_strings() || - -1 == append_ast_expr(&writer, e, level)) - { + if (-1 == append_ast_expr(&writer, e, level)) { _PyUnicodeWriter_Dealloc(&writer); return NULL; } From dd0ee201da34d1d4a631d77b420728f9233f53f9 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 10 Oct 2024 21:26:01 +0100 Subject: [PATCH 039/114] Doc: Upgrade Sphinx to 8.1 (#125276) --- Doc/conf.py | 31 ++++++++++++++++++++++++------- Doc/requirements.txt | 2 +- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index 287e0da46eb11c..d7197b17865854 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -11,6 +11,8 @@ import sys import time +import sphinx + sys.path.append(os.path.abspath('tools/extensions')) sys.path.append(os.path.abspath('includes')) @@ -62,7 +64,10 @@ # General substitutions. project = 'Python' -copyright = f"2001-{time.strftime('%Y')}, Python Software Foundation" +if sphinx.version_info[:2] >= (8, 1): + copyright = "2001-%Y, Python Software Foundation" +else: + copyright = f"2001-{time.strftime('%Y')}, Python Software Foundation" # We look for the Include/patchlevel.h file in the current Python source tree # and replace the values accordingly. @@ -361,10 +366,14 @@ } # This 'Last updated on:' timestamp is inserted at the bottom of every page. -html_time = int(os.environ.get('SOURCE_DATE_EPOCH', time.time())) -html_last_updated_fmt = time.strftime( - '%b %d, %Y (%H:%M UTC)', time.gmtime(html_time) -) +html_last_updated_fmt = '%b %d, %Y (%H:%M UTC)' +if sphinx.version_info[:2] >= (8, 1): + html_last_updated_use_utc = True +else: + html_time = int(os.environ.get('SOURCE_DATE_EPOCH', time.time())) + html_last_updated_fmt = time.strftime( + html_last_updated_fmt, time.gmtime(html_time) + ) # Path to find HTML templates. templates_path = ['tools/templates'] @@ -596,13 +605,21 @@ # mapping unique short aliases to a base URL and a prefix. # https://www.sphinx-doc.org/en/master/usage/extensions/extlinks.html extlinks = { - "cve": ("https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-%s", "CVE-%s"), - "cwe": ("https://cwe.mitre.org/data/definitions/%s.html", "CWE-%s"), "pypi": ("https://pypi.org/project/%s/", "%s"), "source": (SOURCE_URI, "%s"), } extlinks_detect_hardcoded_links = True +if sphinx.version_info[:2] < (8, 1): + # Sphinx 8.1 has in-built CVE and CWE roles. + extlinks |= { + "cve": ( + "https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-%s", + "CVE-%s", + ), + "cwe": ("https://cwe.mitre.org/data/definitions/%s.html", "CWE-%s"), + } + # Options for c_annotations # ------------------------- diff --git a/Doc/requirements.txt b/Doc/requirements.txt index bf1028020b7af7..5105786ccf283c 100644 --- a/Doc/requirements.txt +++ b/Doc/requirements.txt @@ -6,7 +6,7 @@ # Sphinx version is pinned so that new versions that introduce new warnings # won't suddenly cause build failures. Updating the version is fine as long # as no warnings are raised by doing so. -sphinx~=8.0.0 +sphinx~=8.1.0 blurb From a726ce73ca69b3a5ccc2cbe23061070e686b1150 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 11 Oct 2024 00:53:45 +0100 Subject: [PATCH 040/114] Add some doctest cleanups for `turtle` and `configparser` (#125288) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/configparser.rst | 1 + Doc/library/turtle.rst | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst index b5c18bbccffb78..3aad6f7b5d2d20 100644 --- a/Doc/library/configparser.rst +++ b/Doc/library/configparser.rst @@ -54,6 +54,7 @@ can be customized by end users easily. import os os.remove("example.ini") + os.remove("override.ini") Quick Start diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst index da801d4dc1f5b3..efa4b6f8f1d3f9 100644 --- a/Doc/library/turtle.rst +++ b/Doc/library/turtle.rst @@ -14,6 +14,11 @@ from turtle import * turtle = Turtle() +.. testcleanup:: + + import os + os.remove("my_drawing.ps") + -------------- Introduction From 2f8301cbfbdd2976d254a4a772b4879069dd4298 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 10 Oct 2024 22:39:17 -0400 Subject: [PATCH 041/114] gh-124872: Rename blurb file to reference the correct issue (#125285) --- ....7tinr0.rst => 2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Documentation/{2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst => 2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst} (100%) diff --git a/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst b/Misc/NEWS.d/next/Documentation/2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst similarity index 100% rename from Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst rename to Misc/NEWS.d/next/Documentation/2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst From c1913effeed4e4da4d5310a40ab518945001ffba Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 10 Oct 2024 23:30:27 -0700 Subject: [PATCH 042/114] gh-125296: Fix strange fragment identifier for `name or flags` in argparse docs (#125297) --- Doc/library/argparse.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 4eb6fad41f11ef..d337de87ca8f39 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -602,7 +602,7 @@ The add_argument() method The following sections describe how each of these are used. -.. _name_or_flags: +.. _`name or flags`: name or flags ^^^^^^^^^^^^^ From b12e99261e656585ffbaa395af7c5dbaee5ad1ad Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 11 Oct 2024 03:56:01 -0400 Subject: [PATCH 043/114] gh-125221: Fix free-threading data race in `object.__reduce_ex__` (#125267) --- ...-10-10-14-47-13.gh-issue-125221.nfSQzT.rst | 2 ++ Objects/object.c | 8 ++++++++ Objects/typeobject.c | 20 +++++-------------- 3 files changed, 15 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst new file mode 100644 index 00000000000000..c79650c3a64feb --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst @@ -0,0 +1,2 @@ +Fix possible race condition when calling :meth:`~object.__reduce_ex__` for the +first time in the free threading build. diff --git a/Objects/object.c b/Objects/object.c index 27d06cc081259d..4a4c5bf7d7f08a 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2372,6 +2372,14 @@ _PyTypes_InitTypes(PyInterpreterState *interp) } } + // Cache __reduce__ from PyBaseObject_Type object + PyObject *baseobj_dict = _PyType_GetDict(&PyBaseObject_Type); + PyObject *baseobj_reduce = PyDict_GetItemWithError(baseobj_dict, &_Py_ID(__reduce__)); + if (baseobj_reduce == NULL && PyErr_Occurred()) { + return _PyStatus_ERR("Can't get __reduce__ from base object"); + } + _Py_INTERP_CACHED_OBJECT(interp, objreduce) = baseobj_reduce; + // Must be after static types are initialized if (_Py_initialize_generic(interp) < 0) { return _PyStatus_ERR("Can't initialize generic types"); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index d90bb5825fd437..6ca4406ec0ea2d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -7359,18 +7359,7 @@ static PyObject * object___reduce_ex___impl(PyObject *self, int protocol) /*[clinic end generated code: output=2e157766f6b50094 input=f326b43fb8a4c5ff]*/ { -#define objreduce \ - (_Py_INTERP_CACHED_OBJECT(_PyInterpreterState_GET(), objreduce)) - PyObject *reduce, *res; - - if (objreduce == NULL) { - PyObject *dict = lookup_tp_dict(&PyBaseObject_Type); - objreduce = PyDict_GetItemWithError(dict, &_Py_ID(__reduce__)); - if (objreduce == NULL && PyErr_Occurred()) { - return NULL; - } - } - + PyObject *reduce; if (PyObject_GetOptionalAttr(self, &_Py_ID(__reduce__), &reduce) < 0) { return NULL; } @@ -7384,10 +7373,12 @@ object___reduce_ex___impl(PyObject *self, int protocol) Py_DECREF(reduce); return NULL; } - override = (clsreduce != objreduce); + + PyInterpreterState *interp = _PyInterpreterState_GET(); + override = (clsreduce != _Py_INTERP_CACHED_OBJECT(interp, objreduce)); Py_DECREF(clsreduce); if (override) { - res = _PyObject_CallNoArgs(reduce); + PyObject *res = _PyObject_CallNoArgs(reduce); Py_DECREF(reduce); return res; } @@ -7396,7 +7387,6 @@ object___reduce_ex___impl(PyObject *self, int protocol) } return _common_reduce(self, protocol); -#undef objreduce } static PyObject * From 0135848059162ad81478a7776fec622d68a36524 Mon Sep 17 00:00:00 2001 From: Jan Kaliszewski Date: Fri, 11 Oct 2024 10:15:46 +0200 Subject: [PATCH 044/114] gh-125058: update `_thread` docs regarding interruptibility of `lock.acquire()` (#125141) --- Doc/library/_thread.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/library/_thread.rst b/Doc/library/_thread.rst index 5fd604c05380ac..6a66fc4c64bc45 100644 --- a/Doc/library/_thread.rst +++ b/Doc/library/_thread.rst @@ -219,9 +219,11 @@ In addition to these methods, lock objects can also be used via the * Calling :func:`sys.exit` or raising the :exc:`SystemExit` exception is equivalent to calling :func:`_thread.exit`. -* It is not possible to interrupt the :meth:`~threading.Lock.acquire` method on - a lock --- the :exc:`KeyboardInterrupt` exception will happen after the lock - has been acquired. +* It is platform-dependent whether the :meth:`~threading.Lock.acquire` method + on a lock can be interrupted (so that the :exc:`KeyboardInterrupt` exception + will happen immediately, rather than only after the lock has been acquired or + the operation has timed out). It can be interrupted on POSIX, but not on + Windows. * When the main thread exits, it is system defined whether the other threads survive. On most systems, they are killed without executing From 18c74497681e0107d7cde53e63ea42feb38f2176 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 11 Oct 2024 11:43:29 +0300 Subject: [PATCH 045/114] gh-61011: Fix inheritance of nested mutually exclusive groups in argparse (GH-125210) Previously, all nested mutually exclusive groups lost their connection to the group containing them and were displayed as belonging directly to the parser. Co-authored-by: Danica J. Sutherland --- Lib/argparse.py | 6 +++- Lib/test/test_argparse.py | 29 +++++++++++++++++++ Misc/ACKS | 1 + ...4-10-09-21-42-43.gh-issue-61011.pQXZb1.rst | 4 +++ 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index d1f8fa2ace8611..2d8a7ef343a4ef 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1521,7 +1521,11 @@ def _add_container_actions(self, container): # NOTE: if add_mutually_exclusive_group ever gains title= and # description= then this code will need to be expanded as above for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( + if group._container is container: + cont = self + else: + cont = title_group_map[group._container.title] + mutex_group = cont.add_mutually_exclusive_group( required=group.required) # map the actions to their new mutex group diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index c9e79eb18a08fb..1ebbc21bc1755b 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2942,6 +2942,35 @@ def test_groups_parents(self): def test_wrong_type_parents(self): self.assertRaises(TypeError, ErrorRaisingArgumentParser, parents=[1]) + def test_mutex_groups_parents(self): + parent = ErrorRaisingArgumentParser(add_help=False) + g = parent.add_argument_group(title='g', description='gd') + g.add_argument('-w') + g.add_argument('-x') + m = g.add_mutually_exclusive_group() + m.add_argument('-y') + m.add_argument('-z') + parser = ErrorRaisingArgumentParser(prog='PROG', parents=[parent]) + + self.assertRaises(ArgumentParserError, parser.parse_args, + ['-y', 'Y', '-z', 'Z']) + + parser_help = parser.format_help() + self.assertEqual(parser_help, textwrap.dedent('''\ + usage: PROG [-h] [-w W] [-x X] [-y Y | -z Z] + + options: + -h, --help show this help message and exit + + g: + gd + + -w W + -x X + -y Y + -z Z + ''')) + # ============================== # Mutually exclusive group tests # ============================== diff --git a/Misc/ACKS b/Misc/ACKS index d94cbacf888468..a1769d9601a2ea 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1814,6 +1814,7 @@ Reuben Sumner Eryk Sun Sanjay Sundaresan Marek Šuppa +Danica J. Sutherland Hisao Suzuki Kalle Svensson Andrew Svetlov diff --git a/Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst b/Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst new file mode 100644 index 00000000000000..20f9c0b9c78b12 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst @@ -0,0 +1,4 @@ +Fix inheritance of nested mutually exclusive groups from parent parser in +:class:`argparse.ArgumentParser`. Previously, all nested mutually exclusive +groups lost their connection to the group containing them and were displayed +as belonging directly to the parser. From b3aa1b5fe260382788a2df416599325ad680a5ee Mon Sep 17 00:00:00 2001 From: Y5 <124019959+y5c4l3@users.noreply.github.com> Date: Fri, 11 Oct 2024 09:08:03 +0000 Subject: [PATCH 046/114] gh-125235: Keep `_tkinter` TCL paths pointing to base installation on Windows (#125250) Signed-off-by: y5c4l3 --- .../next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst | 2 ++ Modules/_tkinter.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst b/Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst new file mode 100644 index 00000000000000..f64d15917da1fc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst @@ -0,0 +1,2 @@ +Keep :mod:`tkinter` TCL paths in venv pointing to base installation on +Windows. diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 4f05cab375ed6b..b0b70ccb8cc3d3 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -143,7 +143,7 @@ _get_tcl_lib_path(void) struct stat stat_buf; int stat_return_value; - PyObject *prefix = PySys_GetObject("prefix"); // borrowed reference + PyObject *prefix = PySys_GetObject("base_prefix"); // borrowed reference if (prefix == NULL) { return NULL; } From 2115d76acc14effb3dbb9fedcf21048b2ad62c5e Mon Sep 17 00:00:00 2001 From: sobolevn Date: Fri, 11 Oct 2024 17:39:18 +0300 Subject: [PATCH 047/114] gh-124787: Fix `TypeAliasType` and incorrect `type_params` (#124795) Co-authored-by: Jelle Zijlstra --- Lib/test/test_type_aliases.py | 44 ++++++++- ...-09-30-20-46-32.gh-issue-124787.3FnJnP.rst | 4 + Objects/typevarobject.c | 97 ++++++++++++++++--- 3 files changed, 133 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-30-20-46-32.gh-issue-124787.3FnJnP.rst diff --git a/Lib/test/test_type_aliases.py b/Lib/test/test_type_aliases.py index ebb65d8c6cf81b..230bbe646baf28 100644 --- a/Lib/test/test_type_aliases.py +++ b/Lib/test/test_type_aliases.py @@ -4,7 +4,9 @@ from test.support import check_syntax_error, run_code from test.typinganndata import mod_generics_cache -from typing import Callable, TypeAliasType, TypeVar, get_args +from typing import ( + Callable, TypeAliasType, TypeVar, TypeVarTuple, ParamSpec, get_args, +) class TypeParamsInvalidTest(unittest.TestCase): @@ -225,6 +227,46 @@ def test_not_generic(self): ): TA[int] + def test_type_params_order_with_defaults(self): + HasNoDefaultT = TypeVar("HasNoDefaultT") + WithDefaultT = TypeVar("WithDefaultT", default=int) + + HasNoDefaultP = ParamSpec("HasNoDefaultP") + WithDefaultP = ParamSpec("WithDefaultP", default=HasNoDefaultP) + + HasNoDefaultTT = TypeVarTuple("HasNoDefaultTT") + WithDefaultTT = TypeVarTuple("WithDefaultTT", default=HasNoDefaultTT) + + for type_params in [ + (HasNoDefaultT, WithDefaultT), + (HasNoDefaultP, WithDefaultP), + (HasNoDefaultTT, WithDefaultTT), + ]: + with self.subTest(type_params=type_params): + TypeAliasType("A", int, type_params=type_params) # ok + + msg = "follows default type parameter" + for type_params in [ + (WithDefaultT, HasNoDefaultT), + (WithDefaultP, HasNoDefaultP), + (WithDefaultTT, HasNoDefaultTT), + (WithDefaultT, HasNoDefaultP), # different types + ]: + with self.subTest(type_params=type_params): + with self.assertRaisesRegex(TypeError, msg): + TypeAliasType("A", int, type_params=type_params) + + def test_expects_type_like(self): + T = TypeVar("T") + + msg = "Expected a type param" + with self.assertRaisesRegex(TypeError, msg): + TypeAliasType("A", int, type_params=(1,)) + with self.assertRaisesRegex(TypeError, msg): + TypeAliasType("A", int, type_params=(1, 2)) + with self.assertRaisesRegex(TypeError, msg): + TypeAliasType("A", int, type_params=(T, 2)) + def test_keywords(self): TA = TypeAliasType(name="TA", value=int) self.assertEqual(TA.__name__, "TA") diff --git a/Misc/NEWS.d/next/Library/2024-09-30-20-46-32.gh-issue-124787.3FnJnP.rst b/Misc/NEWS.d/next/Library/2024-09-30-20-46-32.gh-issue-124787.3FnJnP.rst new file mode 100644 index 00000000000000..d9d1bbcf5a2fe4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-30-20-46-32.gh-issue-124787.3FnJnP.rst @@ -0,0 +1,4 @@ +Fix :class:`typing.TypeAliasType` with incorrect ``type_params`` argument. +Now it raises a :exc:`TypeError` when a type parameter without a default +follows one with a default, and when an entry in the ``type_params`` tuple +is not a type parameter object. diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c index 51d93ed8b5ba8c..91cc37c9a72636 100644 --- a/Objects/typevarobject.c +++ b/Objects/typevarobject.c @@ -1799,6 +1799,24 @@ _Py_make_typevartuple(PyThreadState *Py_UNUSED(ignored), PyObject *v) return (PyObject *)typevartuple_alloc(v, NULL, NULL); } +static PyObject * +get_type_param_default(PyThreadState *ts, PyObject *typeparam) { + // Does not modify refcount of existing objects. + if (Py_IS_TYPE(typeparam, ts->interp->cached_objects.typevar_type)) { + return typevar_default((typevarobject *)typeparam, NULL); + } + else if (Py_IS_TYPE(typeparam, ts->interp->cached_objects.paramspec_type)) { + return paramspec_default((paramspecobject *)typeparam, NULL); + } + else if (Py_IS_TYPE(typeparam, ts->interp->cached_objects.typevartuple_type)) { + return typevartuple_default((typevartupleobject *)typeparam, NULL); + } + else { + PyErr_Format(PyExc_TypeError, "Expected a type param, got %R", typeparam); + return NULL; + } +} + static void typealias_dealloc(PyObject *self) { @@ -1906,25 +1924,75 @@ static PyGetSetDef typealias_getset[] = { {0} }; -static typealiasobject * -typealias_alloc(PyObject *name, PyObject *type_params, PyObject *compute_value, - PyObject *value, PyObject *module) -{ - typealiasobject *ta = PyObject_GC_New(typealiasobject, &_PyTypeAlias_Type); - if (ta == NULL) { +static PyObject * +typealias_check_type_params(PyObject *type_params, int *err) { + // Can return type_params or NULL without exception set. + // Does not change the reference count of type_params, + // sets `*err` to 1 when error happens and sets an exception, + // otherwise `*err` is set to 0. + *err = 0; + if (type_params == NULL) { return NULL; } - ta->name = Py_NewRef(name); + + assert(PyTuple_Check(type_params)); + Py_ssize_t length = PyTuple_GET_SIZE(type_params); + if (!length) { // 0-length tuples are the same as `NULL`. + return NULL; + } + + PyThreadState *ts = _PyThreadState_GET(); + int default_seen = 0; + for (Py_ssize_t index = 0; index < length; index++) { + PyObject *type_param = PyTuple_GET_ITEM(type_params, index); + PyObject *dflt = get_type_param_default(ts, type_param); + if (dflt == NULL) { + *err = 1; + return NULL; + } + if (dflt == &_Py_NoDefaultStruct) { + if (default_seen) { + *err = 1; + PyErr_Format(PyExc_TypeError, + "non-default type parameter '%R' " + "follows default type parameter", + type_param); + return NULL; + } + } else { + default_seen = 1; + Py_DECREF(dflt); + } + } + + return type_params; +} + +static PyObject * +typelias_convert_type_params(PyObject *type_params) +{ if ( type_params == NULL || Py_IsNone(type_params) || (PyTuple_Check(type_params) && PyTuple_GET_SIZE(type_params) == 0) ) { - ta->type_params = NULL; + return NULL; } else { - ta->type_params = Py_NewRef(type_params); + return type_params; } +} + +static typealiasobject * +typealias_alloc(PyObject *name, PyObject *type_params, PyObject *compute_value, + PyObject *value, PyObject *module) +{ + typealiasobject *ta = PyObject_GC_New(typealiasobject, &_PyTypeAlias_Type); + if (ta == NULL) { + return NULL; + } + ta->name = Py_NewRef(name); + ta->type_params = Py_XNewRef(type_params); ta->compute_value = Py_XNewRef(compute_value); ta->value = Py_XNewRef(value); ta->module = Py_XNewRef(module); @@ -2002,11 +2070,18 @@ typealias_new_impl(PyTypeObject *type, PyObject *name, PyObject *value, PyErr_SetString(PyExc_TypeError, "type_params must be a tuple"); return NULL; } + + int err = 0; + PyObject *checked_params = typealias_check_type_params(type_params, &err); + if (err) { + return NULL; + } + PyObject *module = caller(); if (module == NULL) { return NULL; } - PyObject *ta = (PyObject *)typealias_alloc(name, type_params, NULL, value, + PyObject *ta = (PyObject *)typealias_alloc(name, checked_params, NULL, value, module); Py_DECREF(module); return ta; @@ -2072,7 +2147,7 @@ _Py_make_typealias(PyThreadState* unused, PyObject *args) assert(PyTuple_GET_SIZE(args) == 3); PyObject *name = PyTuple_GET_ITEM(args, 0); assert(PyUnicode_Check(name)); - PyObject *type_params = PyTuple_GET_ITEM(args, 1); + PyObject *type_params = typelias_convert_type_params(PyTuple_GET_ITEM(args, 1)); PyObject *compute_value = PyTuple_GET_ITEM(args, 2); assert(PyFunction_Check(compute_value)); return (PyObject *)typealias_alloc(name, type_params, compute_value, NULL, NULL); From 08f6bf717118963815d9a3e60578104470fdf3e1 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 12 Oct 2024 00:27:26 +0900 Subject: [PATCH 048/114] gh-124612: Update autoconf container image (#125320) --- Tools/build/regen-configure.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/build/regen-configure.sh b/Tools/build/regen-configure.sh index efc80c8527885c..1a24b07c3ff707 100755 --- a/Tools/build/regen-configure.sh +++ b/Tools/build/regen-configure.sh @@ -5,7 +5,7 @@ set -e -x # The check_generated_files job of .github/workflows/build.yml must kept in # sync with this script. Use the same container image than the job so the job # doesn't need to run autoreconf in a container. -IMAGE="ghcr.io/python/autoconf:2024.10.06.11200919239" +IMAGE="ghcr.io/python/autoconf:2024.10.11.11293396815" AUTORECONF="autoreconf -ivf -Werror" WORK_DIR="/src" From a00221e5a70e54a281ba0e2cff8d85cd37ae305f Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 12 Oct 2024 01:55:36 +0800 Subject: [PATCH 049/114] gh-116738: Make `_csv` module thread-safe (#118344) --- Modules/_csv.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 913560ce4a0ee3..1a4dc3f1f55ace 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -14,6 +14,7 @@ module instead. #endif #include "Python.h" +#include "pycore_pyatomic_ft_wrappers.h" #include // offsetof() #include @@ -34,7 +35,7 @@ typedef struct { PyTypeObject *dialect_type; PyTypeObject *reader_type; PyTypeObject *writer_type; - long field_limit; /* max parsed field size */ + Py_ssize_t field_limit; /* max parsed field size */ PyObject *str_write; } _csvstate; @@ -706,10 +707,11 @@ parse_grow_buff(ReaderObj *self) static int parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) { - if (self->field_len >= module_state->field_limit) { + Py_ssize_t field_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit); + if (self->field_len >= field_limit) { PyErr_Format(module_state->error_obj, - "field larger than field limit (%ld)", - module_state->field_limit); + "field larger than field limit (%zd)", + field_limit); return -1; } if (self->field_len == self->field_size && !parse_grow_buff(self)) @@ -1659,20 +1661,20 @@ _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit) /*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/ { _csvstate *module_state = get_csv_state(module); - long old_limit = module_state->field_limit; + Py_ssize_t old_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit); if (new_limit != NULL) { if (!PyLong_CheckExact(new_limit)) { PyErr_Format(PyExc_TypeError, "limit must be an integer"); return NULL; } - module_state->field_limit = PyLong_AsLong(new_limit); - if (module_state->field_limit == -1 && PyErr_Occurred()) { - module_state->field_limit = old_limit; + Py_ssize_t new_limit_value = PyLong_AsSsize_t(new_limit); + if (new_limit_value == -1 && PyErr_Occurred()) { return NULL; } + FT_ATOMIC_STORE_SSIZE_RELAXED(module_state->field_limit, new_limit_value); } - return PyLong_FromLong(old_limit); + return PyLong_FromSsize_t(old_limit); } static PyType_Slot error_slots[] = { From cc2938a18967c9d462ebb18bc09f73e4364aa7d2 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 11 Oct 2024 12:41:59 -0700 Subject: [PATCH 050/114] gh-124917: Allow keyword args to os.path.exists/lexists on Windows (#124918) --- Lib/test/test_genericpath.py | 4 + ...-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst | 2 + Modules/clinic/posixmodule.c.h | 78 ++++++++++++++++--- Modules/posixmodule.c | 6 +- 4 files changed, 77 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py index bf04b3fecf7057..6d2593cb4cf228 100644 --- a/Lib/test/test_genericpath.py +++ b/Lib/test/test_genericpath.py @@ -156,6 +156,10 @@ def test_exists(self): self.assertIs(self.pathmodule.lexists(filename + '\x00'), False) self.assertIs(self.pathmodule.lexists(bfilename + b'\x00'), False) + # Keyword arguments are accepted + self.assertIs(self.pathmodule.exists(path=filename), True) + self.assertIs(self.pathmodule.lexists(path=filename), True) + @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") @unittest.skipIf(is_emscripten, "Emscripten pipe fds have no stat") def test_exists_fd(self): diff --git a/Misc/NEWS.d/next/Library/2024-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst b/Misc/NEWS.d/next/Library/2024-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst new file mode 100644 index 00000000000000..f208793859bbf8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst @@ -0,0 +1,2 @@ +Allow calling :func:`os.path.exists` and :func:`os.path.lexists` with +keyword arguments on Windows. Fixes a regression in 3.13.0. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 749fe54598cc39..d9d919ea75d853 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2015,25 +2015,55 @@ os__path_splitroot(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py #if defined(MS_WINDOWS) PyDoc_STRVAR(os__path_exists__doc__, -"_path_exists($module, path, /)\n" +"_path_exists($module, /, path)\n" "--\n" "\n" "Test whether a path exists. Returns False for broken symbolic links."); #define OS__PATH_EXISTS_METHODDEF \ - {"_path_exists", (PyCFunction)os__path_exists, METH_O, os__path_exists__doc__}, + {"_path_exists", _PyCFunction_CAST(os__path_exists), METH_FASTCALL|METH_KEYWORDS, os__path_exists__doc__}, static int os__path_exists_impl(PyObject *module, path_t *path); static PyObject * -os__path_exists(PyObject *module, PyObject *arg) +os__path_exists(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(path), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"path", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_path_exists", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; path_t path = PATH_T_INITIALIZE_P("_path_exists", "path", 0, 0, 1, 1); int _return_value; - if (!path_converter(arg, &path)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &path)) { goto exit; } _return_value = os__path_exists_impl(module, &path); @@ -2054,25 +2084,55 @@ os__path_exists(PyObject *module, PyObject *arg) #if defined(MS_WINDOWS) PyDoc_STRVAR(os__path_lexists__doc__, -"_path_lexists($module, path, /)\n" +"_path_lexists($module, /, path)\n" "--\n" "\n" "Test whether a path exists. Returns True for broken symbolic links."); #define OS__PATH_LEXISTS_METHODDEF \ - {"_path_lexists", (PyCFunction)os__path_lexists, METH_O, os__path_lexists__doc__}, + {"_path_lexists", _PyCFunction_CAST(os__path_lexists), METH_FASTCALL|METH_KEYWORDS, os__path_lexists__doc__}, static int os__path_lexists_impl(PyObject *module, path_t *path); static PyObject * -os__path_lexists(PyObject *module, PyObject *arg) +os__path_lexists(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(path), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"path", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_path_lexists", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; path_t path = PATH_T_INITIALIZE_P("_path_lexists", "path", 0, 0, 1, 1); int _return_value; - if (!path_converter(arg, &path)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &path)) { goto exit; } _return_value = os__path_lexists_impl(module, &path); @@ -12837,4 +12897,4 @@ os__create_environ(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=b93bbaaa8eb5b0ce input=a9049054013a1b77]*/ +/*[clinic end generated code: output=18d75b737513dae6 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 334350285f3b6f..c0af78ba075e85 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5391,7 +5391,6 @@ _testFileType(path_t *path, int testedType) os._path_exists -> bool path: path_t(allow_fd=True, suppress_value_error=True) - / Test whether a path exists. Returns False for broken symbolic links. @@ -5399,7 +5398,7 @@ Test whether a path exists. Returns False for broken symbolic links. static int os__path_exists_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=8da13acf666e16ba input=29198507a6082a57]*/ +/*[clinic end generated code: output=8da13acf666e16ba input=142beabfc66783eb]*/ { return _testFileExists(path, TRUE); } @@ -5409,7 +5408,6 @@ os__path_exists_impl(PyObject *module, path_t *path) os._path_lexists -> bool path: path_t(allow_fd=True, suppress_value_error=True) - / Test whether a path exists. Returns True for broken symbolic links. @@ -5417,7 +5415,7 @@ Test whether a path exists. Returns True for broken symbolic links. static int os__path_lexists_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=e7240ed5fc45bff3 input=03d9fed8bc6ce96f]*/ +/*[clinic end generated code: output=e7240ed5fc45bff3 input=208205112a3cc1ed]*/ { return _testFileExists(path, FALSE); } From 76b29d271b3132bf8e13bc724f10be8c630057ba Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Fri, 11 Oct 2024 17:00:31 -0300 Subject: [PATCH 051/114] Fix typo in ``Doclibrary/functions.rst`` (#125327) --- Doc/library/functions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7f8df704a33327..0638df04c6ff40 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -686,7 +686,7 @@ are always available. They are listed here in alphabetical order. The *closure* argument specifies a closure--a tuple of cellvars. It's only valid when the *object* is a code object containing :term:`free (closure) variables `. - The length of the tuple must exactly match the length of the code object'S + The length of the tuple must exactly match the length of the code object's :attr:`~codeobject.co_freevars` attribute. .. audit-event:: exec code_object exec From 89515be596a0ca05fd9ab4ddf76c8013dd093545 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:18:37 +0100 Subject: [PATCH 052/114] gh-119786: Move garbage collection doc from devguide to InternalDocs (#125282) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Carol Willing carolcode@willingconsulting.com Co-Authored-By: Ezio Melotti ezio.melotti@gmail.com Co-Authored-By: Hugo van Kemenade hugovk@users.noreply.github.com Co-Authored-By: Itamar Ostricher itamarost@gmail.com Co-Authored-By: Jesús Cea jcea@jcea.es Co-Authored-By: Joannah Nanjekye 33177550+nanjekyejoannah@users.noreply.github.com Co-Authored-By: Ned Batchelder ned@nedbatchelder.com Co-Authored-By: Pablo Galindo Salgado Pablogsal@gmail.com Co-Authored-By: Pamela Fox pamela.fox@gmail.com Co-Authored-By: Sam Gross colesbury@gmail.com Co-Authored-By: Stefan Pochmann 609905+pochmann@users.noreply.github.com Co-Authored-By: T. Wouters thomas@python.org Co-Authored-By: q-ata 24601033+q-ata@users.noreply.github.com Co-Authored-By: slateny 46876382+slateny@users.noreply.github.com Co-Authored-By: Борис Верховский boris.verk@gmail.com Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Jacob Coffee --- InternalDocs/README.md | 2 + InternalDocs/garbage_collector.md | 596 ++++++++++++++++++ .../images/python-cyclic-gc-1-new-page.png | Bin 0 -> 4415 bytes .../images/python-cyclic-gc-2-new-page.png | Bin 0 -> 4337 bytes .../images/python-cyclic-gc-3-new-page.png | Bin 0 -> 4876 bytes .../images/python-cyclic-gc-4-new-page.png | Bin 0 -> 4863 bytes .../images/python-cyclic-gc-5-new-page.png | Bin 0 -> 5712 bytes 7 files changed, 598 insertions(+) create mode 100644 InternalDocs/garbage_collector.md create mode 100644 InternalDocs/images/python-cyclic-gc-1-new-page.png create mode 100644 InternalDocs/images/python-cyclic-gc-2-new-page.png create mode 100644 InternalDocs/images/python-cyclic-gc-3-new-page.png create mode 100644 InternalDocs/images/python-cyclic-gc-4-new-page.png create mode 100644 InternalDocs/images/python-cyclic-gc-5-new-page.png diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 8956ecafed2039..805e2f97937e1e 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -22,4 +22,6 @@ it is not, please report that through the [The Source Code Locations Table](locations.md) +[Garbage collector design](garbage_collector.md) + [Exception Handling](exception_handling.md) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md new file mode 100644 index 00000000000000..fd0246fa1a60e2 --- /dev/null +++ b/InternalDocs/garbage_collector.md @@ -0,0 +1,596 @@ + +Garbage collector design +======================== + +Abstract +======== + +The main garbage collection algorithm used by CPython is reference counting. The basic idea is +that CPython counts how many different places there are that have a reference to an +object. Such a place could be another object, or a global (or static) C variable, or +a local variable in some C function. When an object’s reference count becomes zero, +the object is deallocated. If it contains references to other objects, their +reference counts are decremented. Those other objects may be deallocated in turn, if +this decrement makes their reference count become zero, and so on. The reference +count field can be examined using the ``sys.getrefcount()`` function (notice that the +value returned by this function is always 1 more as the function also has a reference +to the object when called): + +```pycon + >>> x = object() + >>> sys.getrefcount(x) + 2 + >>> y = x + >>> sys.getrefcount(x) + 3 + >>> del y + >>> sys.getrefcount(x) + 2 +``` + +The main problem with the reference counting scheme is that it does not handle reference +cycles. For instance, consider this code: + +```pycon + >>> container = [] + >>> container.append(container) + >>> sys.getrefcount(container) + 3 + >>> del container +``` + +In this example, ``container`` holds a reference to itself, so even when we remove +our reference to it (the variable "container") the reference count never falls to 0 +because it still has its own internal reference. Therefore it would never be +cleaned just by simple reference counting. For this reason some additional machinery +is needed to clean these reference cycles between objects once they become +unreachable. This is the cyclic garbage collector, usually called just Garbage +Collector (GC), even though reference counting is also a form of garbage collection. + +Starting in version 3.13, CPython contains two GC implementations: + +- The default build implementation relies on the + [global interpreter lock](https://docs.python.org/3/glossary.html#term-global-interpreter-lock) + for thread safety. +- The free-threaded build implementation pauses other executing threads when + performing a collection for thread safety. + +Both implementations use the same basic algorithms, but operate on different +data structures. The the section on +[Differences between GC implementations](#Differences-between-GC-implementations) +for the details. + + +Memory layout and object structure +================================== + +The garbage collector requires additional fields in Python objects to support +garbage collection. These extra fields are different in the default and the +free-threaded builds. + + +GC for the default build +------------------------ + +Normally the C structure supporting a regular Python object looks as follows: + +``` + object -----> +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ \ + | ob_refcnt | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | PyObject_HEAD + | *ob_type | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ / + | ... | +``` + +In order to support the garbage collector, the memory layout of objects is altered +to accommodate extra information **before** the normal layout: + +``` + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ \ + | *_gc_next | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | PyGC_Head + | *_gc_prev | | + object -----> +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ / + | ob_refcnt | \ + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | PyObject_HEAD + | *ob_type | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ / + | ... | +``` + + +In this way the object can be treated as a normal python object and when the extra +information associated to the GC is needed the previous fields can be accessed by a +simple type cast from the original object: `((PyGC_Head *)(the_object)-1)`. + +As is explained later in the +[Optimization: reusing fields to save memory](#optimization-reusing-fields-to-save-memory) +section, these two extra fields are normally used to keep doubly linked lists of all the +objects tracked by the garbage collector (these lists are the GC generations, more on +that in the [Optimization: generations](#Optimization-generations) section), but +they are also reused to fulfill other purposes when the full doubly linked list +structure is not needed as a memory optimization. + +Doubly linked lists are used because they efficiently support the most frequently required operations. In +general, the collection of all objects tracked by GC is partitioned into disjoint sets, each in its own +doubly linked list. Between collections, objects are partitioned into "generations", reflecting how +often they've survived collection attempts. During collections, the generation(s) being collected +are further partitioned into, for example, sets of reachable and unreachable objects. Doubly linked lists +support moving an object from one partition to another, adding a new object, removing an object +entirely (objects tracked by GC are most often reclaimed by the refcounting system when GC +isn't running at all!), and merging partitions, all with a small constant number of pointer updates. +With care, they also support iterating over a partition while objects are being added to - and +removed from - it, which is frequently required while GC is running. + +GC for the free-threaded build +------------------------------ + +In the free-threaded build, Python objects contain a 1-byte field +``ob_gc_bits`` that is used to track garbage collection related state. The +field exists in all objects, including ones that do not support cyclic +garbage collection. The field is used to identify objects that are tracked +by the collector, ensure that finalizers are called only once per object, +and, during garbage collection, differentiate reachable vs. unreachable objects. + +``` + object -----> +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ \ + | ob_tid | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | + | pad | ob_mutex | ob_gc_bits | ob_ref_local | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | PyObject_HEAD + | ob_ref_shared | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | + | *ob_type | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ / + | ... | +``` + +Note that not all fields are to scale. ``pad`` is two bytes, ``ob_mutex`` and +``ob_gc_bits`` are each one byte, and ``ob_ref_local`` is four bytes. The +other fields, ``ob_tid``, ``ob_ref_shared``, and ``ob_type``, are all +pointer-sized (that is, eight bytes on a 64-bit platform). + + +The garbage collector also temporarily repurposes the ``ob_tid`` (thread ID) +and ``ob_ref_local`` (local reference count) fields for other purposes during +collections. + + +C APIs +------ + +Specific APIs are offered to allocate, deallocate, initialize, track, and untrack +objects with GC support. These APIs can be found in the +[Garbage Collector C API documentation](https://docs.python.org/3/c-api/gcsupport.html). + +Apart from this object structure, the type object for objects supporting garbage +collection must include the ``Py_TPFLAGS_HAVE_GC`` in its ``tp_flags`` slot and +provide an implementation of the ``tp_traverse`` handler. Unless it can be proven +that the objects cannot form reference cycles with only objects of its type or unless +the type is immutable, a ``tp_clear`` implementation must also be provided. + + +Identifying reference cycles +============================ + +The algorithm that CPython uses to detect those reference cycles is +implemented in the ``gc`` module. The garbage collector **only focuses** +on cleaning container objects (that is, objects that can contain a reference +to one or more objects). These can be arrays, dictionaries, lists, custom +class instances, classes in extension modules, etc. One could think that +cycles are uncommon but the truth is that many internal references needed by +the interpreter create cycles everywhere. Some notable examples: + +- Exceptions contain traceback objects that contain a list of frames that + contain the exception itself. +- Module-level functions reference the module's dict (which is needed to resolve globals), + which in turn contains entries for the module-level functions. +- Instances have references to their class which itself references its module, and the module + contains references to everything that is inside (and maybe other modules) + and this can lead back to the original instance. +- When representing data structures like graphs, it is very typical for them to + have internal links to themselves. + +To correctly dispose of these objects once they become unreachable, they need +to be identified first. To understand how the algorithm works, let’s take +the case of a circular linked list which has one link referenced by a +variable ``A``, and one self-referencing object which is completely +unreachable: + +```pycon + >>> import gc + + >>> class Link: + ... def __init__(self, next_link=None): + ... self.next_link = next_link + + >>> link_3 = Link() + >>> link_2 = Link(link_3) + >>> link_1 = Link(link_2) + >>> link_3.next_link = link_1 + >>> A = link_1 + >>> del link_1, link_2, link_3 + + >>> link_4 = Link() + >>> link_4.next_link = link_4 + >>> del link_4 + + # Collect the unreachable Link object (and its .__dict__ dict). + >>> gc.collect() + 2 +``` + +The GC starts with a set of candidate objects it wants to scan. In the +default build, these "objects to scan" might be all container objects or a +smaller subset (or "generation"). In the free-threaded build, the collector +always scans all container objects. + +The objective is to identify all the unreachable objects. The collector does +this by identifying reachable objects; the remaining objects must be +unreachable. The first step is to identify all of the "to scan" objects that +are **directly** reachable from outside the set of candidate objects. These +objects have a refcount larger than the number of incoming references from +within the candidate set. + +Every object that supports garbage collection will have an extra reference +count field initialized to the reference count (``gc_ref`` in the figures) +of that object when the algorithm starts. This is because the algorithm needs +to modify the reference count to do the computations and in this way the +interpreter will not modify the real reference count field. + +![gc-image1](images/python-cyclic-gc-1-new-page.png) + +The GC then iterates over all containers in the first list and decrements by one the +`gc_ref` field of any other object that container is referencing. Doing +this makes use of the ``tp_traverse`` slot in the container class (implemented +using the C API or inherited by a superclass) to know what objects are referenced by +each container. After all the objects have been scanned, only the objects that have +references from outside the “objects to scan” list will have ``gc_ref > 0``. + +![gc-image2](images/python-cyclic-gc-2-new-page.png) + +Notice that having ``gc_ref == 0`` does not imply that the object is unreachable. +This is because another object that is reachable from the outside (``gc_ref > 0``) +can still have references to it. For instance, the ``link_2`` object in our example +ended having ``gc_ref == 0`` but is referenced still by the ``link_1`` object that +is reachable from the outside. To obtain the set of objects that are really +unreachable, the garbage collector re-scans the container objects using the +``tp_traverse`` slot; this time with a different traverse function that marks objects with +``gc_ref == 0`` as "tentatively unreachable" and then moves them to the +tentatively unreachable list. The following image depicts the state of the lists in a +moment when the GC processed the ``link_3`` and ``link_4`` objects but has not +processed ``link_1`` and ``link_2`` yet. + +![gc-image3](images/python-cyclic-gc-3-new-page.png) + +Then the GC scans the next ``link_1`` object. Because it has ``gc_ref == 1``, +the gc does not do anything special because it knows it has to be reachable (and is +already in what will become the reachable list): + +![gc-image4](images/python-cyclic-gc-4-new-page.png) + +When the GC encounters an object which is reachable (``gc_ref > 0``), it traverses +its references using the ``tp_traverse`` slot to find all the objects that are +reachable from it, moving them to the end of the list of reachable objects (where +they started originally) and setting its ``gc_ref`` field to 1. This is what happens +to ``link_2`` and ``link_3`` below as they are reachable from ``link_1``. From the +state in the previous image and after examining the objects referred to by ``link_1`` +the GC knows that ``link_3`` is reachable after all, so it is moved back to the +original list and its ``gc_ref`` field is set to 1 so that if the GC visits it again, +it will know that it's reachable. To avoid visiting an object twice, the GC marks all +objects that have already been visited once (by unsetting the ``PREV_MASK_COLLECTING`` +flag) so that if an object that has already been processed is referenced by some other +object, the GC does not process it twice. + +![gc-image5](images/python-cyclic-gc-5-new-page.png) + +Notice that an object that was marked as "tentatively unreachable" and was later +moved back to the reachable list will be visited again by the garbage collector +as now all the references that that object has need to be processed as well. This +process is really a breadth first search over the object graph. Once all the objects +are scanned, the GC knows that all container objects in the tentatively unreachable +list are really unreachable and can thus be garbage collected. + +Pragmatically, it's important to note that no recursion is required by any of this, +and neither does it in any other way require additional memory proportional to the +number of objects, number of pointers, or the lengths of pointer chains. Apart from +``O(1)`` storage for internal C needs, the objects themselves contain all the storage +the GC algorithms require. + +Why moving unreachable objects is better +---------------------------------------- + +It sounds logical to move the unreachable objects under the premise that most objects +are usually reachable, until you think about it: the reason it pays isn't actually +obvious. + +Suppose we create objects A, B, C in that order. They appear in the young generation +in the same order. If B points to A, and C to B, and C is reachable from outside, +then the adjusted refcounts after the first step of the algorithm runs will be 0, 0, +and 1 respectively because the only reachable object from the outside is C. + +When the next step of the algorithm finds A, A is moved to the unreachable list. The +same for B when it's first encountered. Then C is traversed, B is moved *back* to +the reachable list. B is eventually traversed, and then A is moved back to the reachable +list. + +So instead of not moving at all, the reachable objects B and A are each moved twice. +Why is this a win? A straightforward algorithm to move the reachable objects instead +would move A, B, and C once each. The key is that this dance leaves the objects in +order C, B, A - it's reversed from the original order. On all *subsequent* scans, +none of them will move. Since most objects aren't in cycles, this can save an +unbounded number of moves across an unbounded number of later collections. The only +time the cost can be higher is the first time the chain is scanned. + +Destroying unreachable objects +============================== + +Once the GC knows the list of unreachable objects, a very delicate process starts +with the objective of completely destroying these objects. Roughly, the process +follows these steps in order: + +1. Handle and clear weak references (if any). Weak references to unreachable objects + are set to ``None``. If the weak reference has an associated callback, the callback + is enqueued to be called once the clearing of weak references is finished. We only + invoke callbacks for weak references that are themselves reachable. If both the weak + reference and the pointed-to object are unreachable we do not execute the callback. + This is partly for historical reasons: the callback could resurrect an unreachable + object and support for weak references predates support for object resurrection. + Ignoring the weak reference's callback is fine because both the object and the weakref + are going away, so it's legitimate to say the weak reference is going away first. +2. If an object has legacy finalizers (``tp_del`` slot) move it to the + ``gc.garbage`` list. +3. Call the finalizers (``tp_finalize`` slot) and mark the objects as already + finalized to avoid calling finalizers twice if the objects are resurrected or + if other finalizers have removed the object first. +4. Deal with resurrected objects. If some objects have been resurrected, the GC + finds the new subset of objects that are still unreachable by running the cycle + detection algorithm again and continues with them. +5. Call the ``tp_clear`` slot of every object so all internal links are broken and + the reference counts fall to 0, triggering the destruction of all unreachable + objects. + +Optimization: generations +========================= + +In order to limit the time each garbage collection takes, the GC +implementation for the default build uses a popular optimization: +generations. The main idea behind this concept is the assumption that most +objects have a very short lifespan and can thus be collected soon after their +creation. This has proven to be very close to the reality of many Python +programs as many temporary objects are created and destroyed very quickly. + +To take advantage of this fact, all container objects are segregated into +three spaces/generations. Every new +object starts in the first generation (generation 0). The previous algorithm is +executed only over the objects of a particular generation and if an object +survives a collection of its generation it will be moved to the next one +(generation 1), where it will be surveyed for collection less often. If +the same object survives another GC round in this new generation (generation 1) +it will be moved to the last generation (generation 2) where it will be +surveyed the least often. + +The GC implementation for the free-threaded build does not use multiple +generations. Every collection operates on the entire heap. + +In order to decide when to run, the collector keeps track of the number of object +allocations and deallocations since the last collection. When the number of +allocations minus the number of deallocations exceeds ``threshold_0``, +collection starts. Initially only generation 0 is examined. If generation 0 has +been examined more than ``threshold_1`` times since generation 1 has been +examined, then generation 1 is examined as well. With generation 2, +things are a bit more complicated; see +[Collecting the oldest generation](#Collecting-the-oldest-generation) for +more information. These thresholds can be examined using the +[`gc.get_threshold()`](https://docs.python.org/3/library/gc.html#gc.get_threshold) +function: + +```pycon + >>> import gc + >>> gc.get_threshold() + (700, 10, 10) +``` + +The content of these generations can be examined using the +``gc.get_objects(generation=NUM)`` function and collections can be triggered +specifically in a generation by calling ``gc.collect(generation=NUM)``. + +```pycon + >>> import gc + >>> class MyObj: + ... pass + ... + + # Move everything to the last generation so it's easier to inspect + # the younger generations. + + >>> gc.collect() + 0 + + # Create a reference cycle. + + >>> x = MyObj() + >>> x.self = x + + # Initially the object is in the youngest generation. + + >>> gc.get_objects(generation=0) + [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] + + # After a collection of the youngest generation the object + # moves to the next generation. + + >>> gc.collect(generation=0) + 0 + >>> gc.get_objects(generation=0) + [] + >>> gc.get_objects(generation=1) + [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] +``` + +Collecting the oldest generation +-------------------------------- + +In addition to the various configurable thresholds, the GC only triggers a full +collection of the oldest generation if the ratio ``long_lived_pending / long_lived_total`` +is above a given value (hardwired to 25%). The reason is that, while "non-full" +collections (that is, collections of the young and middle generations) will always +examine roughly the same number of objects (determined by the aforementioned +thresholds) the cost of a full collection is proportional to the total +number of long-lived objects, which is virtually unbounded. Indeed, it has +been remarked that doing a full collection every of object +creations entails a dramatic performance degradation in workloads which consist +of creating and storing lots of long-lived objects (for example, building a large list +of GC-tracked objects would show quadratic performance, instead of linear as +expected). Using the above ratio, instead, yields amortized linear performance +in the total number of objects (the effect of which can be summarized thusly: +"each full garbage collection is more and more costly as the number of objects +grows, but we do fewer and fewer of them"). + +Optimization: reusing fields to save memory +=========================================== + +In order to save memory, the two linked list pointers in every object with GC +support are reused for several purposes. This is a common optimization known +as "fat pointers" or "tagged pointers": pointers that carry additional data, +"folded" into the pointer, meaning stored inline in the data representing the +address, taking advantage of certain properties of memory addressing. This is +possible as most architectures align certain types of data +to the size of the data, often a word or multiple thereof. This discrepancy +leaves a few of the least significant bits of the pointer unused, which can be +used for tags or to keep other information – most often as a bit field (each +bit a separate tag) – as long as code that uses the pointer masks out these +bits before accessing memory. For example, on a 32-bit architecture (for both +addresses and word size), a word is 32 bits = 4 bytes, so word-aligned +addresses are always a multiple of 4, hence end in ``00``, leaving the last 2 bits +available; while on a 64-bit architecture, a word is 64 bits = 8 bytes, so +word-aligned addresses end in ``000``, leaving the last 3 bits available. + +The CPython GC makes use of two fat pointers that correspond to the extra fields +of ``PyGC_Head`` discussed in the `Memory layout and object structure`_ section: + +> [!WARNING] +> Because the presence of extra information, "tagged" or "fat" pointers cannot be +> dereferenced directly and the extra information must be stripped off before +> obtaining the real memory address. Special care needs to be taken with +> functions that directly manipulate the linked lists, as these functions +> normally assume the pointers inside the lists are in a consistent state. + + +- The ``_gc_prev`` field is normally used as the "previous" pointer to maintain the + doubly linked list but its lowest two bits are used to keep the flags + ``PREV_MASK_COLLECTING`` and ``_PyGC_PREV_MASK_FINALIZED``. Between collections, + the only flag that can be present is ``_PyGC_PREV_MASK_FINALIZED`` that indicates + if an object has been already finalized. During collections ``_gc_prev`` is + temporarily used for storing a copy of the reference count (``gc_ref``), in + addition to two flags, and the GC linked list becomes a singly linked list until + ``_gc_prev`` is restored. + +- The ``_gc_next`` field is used as the "next" pointer to maintain the doubly linked + list but during collection its lowest bit is used to keep the + ``NEXT_MASK_UNREACHABLE`` flag that indicates if an object is tentatively + unreachable during the cycle detection algorithm. This is a drawback to using only + doubly linked lists to implement partitions: while most needed operations are + constant-time, there is no efficient way to determine which partition an object is + currently in. Instead, when that's needed, ad hoc tricks (like the + ``NEXT_MASK_UNREACHABLE`` flag) are employed. + +Optimization: delay tracking containers +======================================= + +Certain types of containers cannot participate in a reference cycle, and so do +not need to be tracked by the garbage collector. Untracking these objects +reduces the cost of garbage collection. However, determining which objects may +be untracked is not free, and the costs must be weighed against the benefits +for garbage collection. There are two possible strategies for when to untrack +a container: + +1. When the container is created. +2. When the container is examined by the garbage collector. + +As a general rule, instances of atomic types aren't tracked and instances of +non-atomic types (containers, user-defined objects...) are. However, some +type-specific optimizations can be present in order to suppress the garbage +collector footprint of simple instances. Some examples of native types that +benefit from delayed tracking: + +- Tuples containing only immutable objects (integers, strings etc, + and recursively, tuples of immutable objects) do not need to be tracked. The + interpreter creates a large number of tuples, many of which will not survive + until garbage collection. It is therefore not worthwhile to untrack eligible + tuples at creation time. Instead, all tuples except the empty tuple are tracked + when created. During garbage collection it is determined whether any surviving + tuples can be untracked. A tuple can be untracked if all of its contents are + already not tracked. Tuples are examined for untracking in all garbage collection + cycles. It may take more than one cycle to untrack a tuple. + +- Dictionaries containing only immutable objects also do not need to be tracked. + Dictionaries are untracked when created. If a tracked item is inserted into a + dictionary (either as a key or value), the dictionary becomes tracked. During a + full garbage collection (all generations), the collector will untrack any dictionaries + whose contents are not tracked. + +The garbage collector module provides the Python function ``is_tracked(obj)``, which returns +the current tracking status of the object. Subsequent garbage collections may change the +tracking status of the object. + +```pycon + >>> gc.is_tracked(0) + False + >>> gc.is_tracked("a") + False + >>> gc.is_tracked([]) + True + >>> gc.is_tracked({}) + False + >>> gc.is_tracked({"a": 1}) + False + >>> gc.is_tracked({"a": []}) + True +``` + +Differences between GC implementations +====================================== + +This section summarizes the differences between the GC implementation in the +default build and the implementation in the free-threaded build. + +The default build implementation makes extensive use of the ``PyGC_Head`` data +structure, while the free-threaded build implementation does not use that +data structure. + +- The default build implementation stores all tracked objects in a doubly + linked list using ``PyGC_Head``. The free-threaded build implementation + instead relies on the embedded mimalloc memory allocator to scan the heap + for tracked objects. +- The default build implementation uses ``PyGC_Head`` for the unreachable + object list. The free-threaded build implementation repurposes the + ``ob_tid`` field to store a unreachable objects linked list. +- The default build implementation stores flags in the ``_gc_prev`` field of + ``PyGC_Head``. The free-threaded build implementation stores these flags + in ``ob_gc_bits``. + + +The default build implementation relies on the +[global interpreter lock](https://docs.python.org/3/glossary.html#term-global-interpreter-lock) +for thread safety. The free-threaded build implementation has two "stop the +world" pauses, in which all other executing threads are temporarily paused so +that the GC can safely access reference counts and object attributes. + +The default build implementation is a generational collector. The +free-threaded build is non-generational; each collection scans the entire +heap. + +- Keeping track of object generations is simple and inexpensive in the default + build. The free-threaded build relies on mimalloc for finding tracked + objects; identifying "young" objects without scanning the entire heap would + be more difficult. + + +> [!NOTE] +> **Document history** +> +> Pablo Galindo Salgado - Original author +> +> Irit Katriel - Convert to Markdown diff --git a/InternalDocs/images/python-cyclic-gc-1-new-page.png b/InternalDocs/images/python-cyclic-gc-1-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..2ddac50f4b5575888d8d19cd9b6f2e3863132776 GIT binary patch literal 4415 zcmZ`-cUV)~vfn9?Pz*(Sks?SF4DHY{v_R-ZsUjdq5iyj2R83TRQ>ubMBya=-k*@S2 z3Mdj0L~1}mA_z$D@Zve|-FwgdzW2whS$p=LHM3{dH*0Xq9R2J5_h(<=`5s@q#(=Bn-Jf z-S2tM!qOTyM=GQ_F{MtvGeD=0kzPv=j*Bz!5MIOG|JTF)DzhYW(esjJ z8f05Ovk=8t?*$Q51LL3qw;pW~)XD##-`3zt|49Z~^5l;x(40MO)Z%!tAv7mko!A0n zmG#6YS#c$AIH392w1k_$~Q!)rSP0RDWQ!sH=4h`Nb>*26BS5k7PBYnJF~ zbT4h4YeC+M?a%Iny|4wBj~)hRJUqkjTZ&4xuGN!2BhK(*%O4{t+91coptKo@;$BTd z8tby(2ExM#R#22;W_ASN3no4_?B;Et>+u|1uR`eH#BUM!{+2a9O!2SDGaL z+x{Le<&h{En)7esAe?j22{GF+P z8UCG4tAF_PcUmjyag17G>?K%Ygo#Mdu;kiXRH~!owO1KUka-VjL$^T~iu-|^u@FXV z*t)x+@Ra1!*_1hMv}rjUt#m~jP)}G?nl@feRtmFl^L;= z$w$i9gtOE2uM7y16@5beh=!|xal#WO^1>CK3c3wRQPDQmnCt>Q7sNH$b#l;) zb)+(&o){N*nz=H6uIKq_vQ?(XgJ1~pri#cb=ka%tsO0yaD6c9ES|SKSB*m4xoHb>m z$_9BqP2~1b(B(d}1jpSZTH$@p%HrR&7YP!UPvAn@1#CgiS#>w|7F~|cuC?9XU99#x z$Ghr;dli0#a>^~k9QBm<6${xGj~lH05#tcZbhGmK?PS*qcRgf-H#pL+UL?|%V(#{G z2t*ZTug-}8@;so^DG_%kkv4?~h09NmYa(`53@k#7>4#RGi|03&*kkMp#~^v#%ugi8 zVN|f|WyD79+FkpDyJhG7UTz1r=5@BeD6gcRA-165an3iQxUae=c-m{u@q88<9TY1d zVULl6)!4o>M8jSY;fHcTREPMUfU3z8isR+49X@S`b$x;2*nBH&sw~D3yFabBtx!&T z+E|`(IYttvd@BxUOacm%qfsZRSw2l^cFZ~a`1)#@SN?VQEc|w^)J*8LlYu5Lv^VlP zD=zu+Q#4av36X_j0`^}w(C@vtH|w!vEkl%u@A=ABGz3vElJN@3$n|>t$FF`S#v{ia z(KvSA2ym1#Gm*+KapVLNWJVh6T%OAQw)M!2x74+-Nzuug{(c4)v7ziD;r_=Ur9o2p zZ93zl?Pq4T;F-FG znp{`A*bDVz|?V`}Lei!y3+2TSH}ph&!A zqox-2Aik@nulVg@6stEGr=~R$?}brw=3a8WNqbPFO#abgyQu-3IQMDXl5LMrx2?>w z)?K8J3+ExnBJ%-ipr2LO2uOYI@cit($04`Xa2K(v*0-i}Pt7t3V{@BE7$I|S@&<2& zd3&{qlKEddrbr;%za?J$9saHoq;8Wr-4UulLT9nhTbY z9SN)Jx0p)gW9q~zR1dP;uG321FcX!EgNsl#2wYVGTmi;w*G;8S@_jPOULQ%ByN90T zt9Rem1pENF0O;*+?-P1v^bW21RVGh8NssFpsG8X7E>Aj{EEzI%ZF}l-r}W7r-L!yw z%9fH7%f{C(EZ-YX;q zyHp$UHq+e_sIbgZM)L_y^y8AW%r(cWs&mP= zVDI9!p>wm5%dMV>s5WdZ`n@cLWuBbf>QJh4_KdtaFUl7?q=9jqn8{tN$VwNCSn8Ln zltHvz0N@b_Ua7XLt zzuuy7V$k}qZN^HneP)UTI&xm_ht zoVnB^x#18saQk(B+R-HN+#ek^h&34t(t)<>88N$x}nsZE#MGvQXeL9`VO$Vv~|fgxZUxG(+O(b+i^Jc6irFph_OD& zd5tV7qf*%xa&Q@f(S5)cdK_qf5sA_H*fm-&^kEp}TZ~mE+U?bY8x46b;`4`*QnFEw zgo?@Lqp?+|#0Ke!Pi@Ot(`BCypCHD?uhaYHVYUA6^WbU$H4*;p$GIae8b=*K&ArG* zzuLN{vNw|*EfNDmmePc}z6MdUOq3I7@^q51Pg9KnWuP|fuI`fmx^2fv>X|_XHTKLlue~_{KBZ$4PXp$#_#s+x*UPa zwMM%4Ar|Wf!v}}16|3E($eVS;^&qlVXGwajZC^~edaw{D+i3G!azPsw71~7cTNguU-QSiIGhDAhI#41P8wN=Z;8Jr4N3QG@%5e zw&Ao_myO|hIm%$EP}6Zdnq#vbKKmlFw;r|_9Vx^~+XpqF^JuLQ&{(2<$>%zO6NMT{P!G%_z@~aqz z3x2?YHLXaGd`21UD+LSO~EC;)9T#smRQaXX?+8EXbl_n_`chHawo* zh`N#(Wmj6bLNPy8zn9OV?$`Otm}3mGgnKGwXcTtr>t8!<{yDh&!1A~;V(01A0b6ZS z%8L5%dPPL@JPl8E6>p&odBe@s0;PdMm+Q7y>9pQ#l!xNmCyxQ$zj_0}aj;22_vQ!)t#z_Gs7bqTm{2b&`T*6Y zNj@A55xJ+s;$|r~0#kRv9SD;v;Tw^yLI!$7$|sdlZE_QTej6*58y4Ocn~ZSoGh93r zxI~>_MtOq!+0=_w1qQU8o=a`$(Hi2Z%9yCq*MfwjS@FiVqN7E$2FY>(vb2pl79&H$ zRp^$04WfI8rg1q-ZEN*_5aE^^G0%quH=3@ggc+|Z|0+&)VWzSdcZrU^T+!6V+<@Y4 ziRaf)y;OshyGpX>?w`s_cZ6uQ+R7Tf9P4S>FMY;~rXy}EUzBnDu7vDU$vR2C;%eK literal 0 HcmV?d00001 diff --git a/InternalDocs/images/python-cyclic-gc-2-new-page.png b/InternalDocs/images/python-cyclic-gc-2-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..159aeeb05024a3247381e1c3aec2b986035b5e44 GIT binary patch literal 4337 zcmai2c|26z|GzV1UlU^N*_SX`LzplkQPwOGiO9~_w~?}rl$3;wY$aRPjEpP|m2?rJ zmR-!@|}8q%k%v4d;R`+pL1U4-uLI6d*1h+^LgL&^H!%h*@V~t0N^w?GqC{x zC`fM)v(O8`)bGhCG@A?yWf?=9IGv3e#VmmljFh{G#RiPKn)u5jej&5;;o z8)=E~t&T2Aeq=+&9f}n5;8Aw~cqzZYcC67;rB*^zaJuDiizk4>< zkypd{HI5{l{iu!+E4ww&`yu@rH~L6v^a0C;#QkF4Q8Ax;&#ka-P(%4D9neZqraj+Q zW0Mc($GG5_SwfG4WBmty56JNFpLpyhNPcn$X?uy;M@}@U!ax)}3Z!MI`Sv4OtRaBY zOKGTOycVtl^4GCKM=8d^Gzh@KG{)<**a#`QVs!s_I|OBVLg&h$_bYccI@^USK;QI@ zd{wgimy)Z7W=4co$(u;qI_4%JwEkb%D*3{KgJxcywn!H<+|zNG?mKzNDcLe!&^$%3 zbh=Gh8vM8f$r==tNaG%SfCeTlAm_1i;K!>l-8AMisw(HkL@@@pmpSR<2!nOo1qx70 z+&{qM^LrWpi3}vK&;R20O0#ItLBWci>v~K$1fHzCy<>yikak+MfMlIAIU;)cV0?G3 zGt+ILk?>Rwl2sis&_T?*$^N%$e`5WP_upD2$%1@;@LN!8 zhKD=e$kgd(u-nvjv3b;Ra-v<;yJVMx>~rI>Lw_%G@U(d2njA8A!S?RSB!#}b^UJRd zf3|QVVi~X-D9gN{&PI`YNZWBW@4cPOY(y*vc0X#E$`9_Wh1x+aUHzim88=Qp&}Sl2 zsHhN5G=1r+%8V%x+}yN#n$&;ILl*qE)_-*8CC@|kpH^L?Gx7j@@#hD*bnzkNP^t5D zrKjPOR`iJ*Ay4PAXxQ_Ut*4u9$6?M@-@vJmd`QUlH1gbR(e^NlHgt+iK;Qd}fq z9}=u|!6ri)!vvINzVwvCZEWfy7Gt4Pk&OLTA3l^+Go!@{!%P(iz`jB$p43%N^n>FA z0p31Wb)V-AtuRsw!*(S&Ng`x_Ql6gTSc&IhlM67i&OF(?C$cV7;%f>r`!>`?>f3`b zE-`KkT+nSNPeVD1J{?tNo-}`|-Vmup+Y2PrbjxRUe|lgn>{p|& zdgvus6?CDdb7E-AtLGC7dU%jaxk}`<9G_<3Bq#X+4=JZXq#$YYTihij><63F6`3xJ z$Tq`YvN%)-K0-hC2ieB5KHLA+_WO=OL*#H1yA+gUvcE|XQvEl7bHk2Wz#AGf$lz~X(ebpht!FA-G%PP zXCf`aP1-Z;K-S5tVb&c{S?^!z?r^DH3@9BTY72*W&Of>rD$?Vqb~mnE7Y)PlTw%bi zYKc7K80#80cq7o(N}R8R#B*s{t`>R>~q{WFM91BD}6(_0CSe9 zQY!U@qJi_r==;?s#;T%t05$IUI=AQ6aY}5Nh}UrV=LFXuK^yXp+}rmnx81a-S7GP^ zgqDL&sL4v)-3HgiExZE9?8+_wlW>7ZSBVJAxI4AK6qI({mfPW2I{^UCn0ytq6HI#AuHG>9V2f&L`;r@dQn?sE zpXpv^A)kEDU{Gp7q)<4^IxS7F0U_gRF`V(P4o>vO$t`@Yc03h4TiEXWv7X6ih_9Tz zNcx2_Z*qZde$IB;;dAb7=3durk>_Yxo#eXRr95HM+4cDuvH3cO}C9oW9>YXC# zlHO`24BX0H2FbT_+41q4NYjWFUs~9}tGafYN`A@wh(-!}Tp$?|dMwaQZp}0&KVGF; z3=%W*?E9dEd9opOC^T^YgH!u}UOn+yO>zQTQ`)Z1ewa4t zH8Qh5D5h^~+n5hSUT9A+#N}N~7)8{##WyVpm5XqHNg}`|#ShO!oArG8xT1_1{Gj@R z^S<`TX3)N)9&H+kNnTw`tYOZ$Avry@O!p@?O64_ie#YxpdJV)(Thh8M9!N{R zV_33YhC8ZART#JVcFW?#DN`Jja#h*%^1CNLVI^cNkU8o(DWTW)oln~w2zkV{G)zU{ zGI^CMUdn`Be*O5wLN7nZuu~+|;@oJ;zH8+=vqE3z9;L1*ExpI9}&va*iwfp#1wAGRCS@cn6sruI%E+egyACipOqeFPO&jPVT)Wk{Our zU-~AT_fmgT~-chP>VgCCMMsy~E3Yp9#qXJ&cnyM4) z{yYkENC_`0h-jr^dW6WBX!boNI_7+{L%2Y_1I<|{(OQpmAjne|-gqqm^6U zN8qP3HeHJOOy?Wr-}?Lr*5R4&a=e5Kb*4si>|Zvf`DdwKUr>Mamy2DX!O(n_59y}a1%DtW@5GEg!`_iv3d{*&|RO_WoiAM3A(Rjn~R#&T3 zed;2t2z3-A%Bqo6%hdERe9PCxn>>9);S?MF+!)1J5zifewCZ~?M9-Cn2TnhJ&>e96 z!l3!x6s#t4UKdu-JG=$MDc!td$mLlqqB~f{g+6n_v8dR7`}xH7t8(Q+1!#OC8@d@` zN8GW$1sfhsidbpt@@fUE)3({}qCz9RGgiOHGNQ{74FU?INh&SYZ$q?>Tvu|<;(?4EBJIyTKq;GWM4jJ}#1Lo-yg*&Xha^Jsk} zfsRiqQ4;lF4=ibuB)zj+%?0wke>0yp1?z@-=j{YB|EVMiN3Y*Bg^;*{#~M zkQBDPRjUD;q{3O09OCPSf&|_LvQQ!!V;Y8a=AAXYx&-IKfH|-mX}qZyd0^_T7)r7z8UaBAyYKJ-@ss0Wt`kSe~+jlY17oB%%{>_Vx!- z2O~jt-yO#?+W6k?vUw~6=#1mU6U`?)Wki`RiF-+pI=u+?qk&FJ-kT+#)vlvO?V^e}-zAd-rILnC+_S|gFq8e+tJ&n|HZU>X)^NSG6c*b}kTvC~@ zxOxV%6AHHH(_PfR%<8}W?B?wXx`o+gAnO&SGd#rK@4Wc2$xPgXD(E>AAhq$V=vbBl z!$W6vO@sO?e@P99c~-}|P=Yt9r)(WWo1%pV$T7c literal 0 HcmV?d00001 diff --git a/InternalDocs/images/python-cyclic-gc-3-new-page.png b/InternalDocs/images/python-cyclic-gc-3-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..29fab0498e5b106f813e08aa2799602269cd74d1 GIT binary patch literal 4876 zcma)AcT`i$w?1hgARUw{9f{J5KxmO50qIqw$(14qh(JsbDTxXJq)U?;L_n&b6afQr z0YPa&q)JgiiPEkRq)6li@9)05)_d!%_5L{Lo3-cLduH~WS!ebaZ*7TYXBA=v0D#@p z#K;x^z!XON?GeTcF!q0ZnE@=I1^lB3#Xj=55SyDZAlKN{sm-} zJ!`}HQU+LYVz}2x);DtOQ-JVC`l|Pn?_Z53u?fOgU7U=cA}lNEgk!9c(^U93WCfNv zx5BQ@1(V>Y2c^0vdAZ%QVT=KDCOE<=jBo@;lEdFo`Ft5eAy5C)cWR6mQXmbdR6c>I z-?t+no<$G6!9ftS7>WN=Nm21(6Aj*m6v zG2hDuqamMe-%5tDQD8b8dg`&H6ie9asgw+x+5!_BlEW}_gTQl;hm;}s%kGopIO?=# zF@vj5VuK~bsY{3vhSbY47T^5G_%RVAx!d<@5$0>fkJNDgJ$jn1HS;Ici|%-LlOM*v zEyFvn|JUOFULzy9Bz|8;a*bH>u~!>IB84v;#f9V zNQ%3rHTl<*-On9WbJe)WsuNK4gh_|ykptC^{lKxjz{d#lN+Lk+sa14+%!tlN>;K5^ z;nw{-kAJpx{wVd-3!z6;5{PvYSHck}5l`TR43srigzkDc_A5_*TL^<{I$?Oy`Zcr&|kzYj#A( zNYIw8U}&K&?%gcy@XecV)vS0>1ox_js6DRptTcsb*YtCYN%1pw{{I8*!c-w9pD#8H zZU>Z-R?SFdv?z&q7R3SEV*}~cgJ(h1l|Fvh9te3{#}ZyTGq=8WUF4$}kqR zh=Ll1VI$~A(QqJyoiUcrzmk6tg{p#$F+T=e;)eff_YqI;$IA7vaTK85e-adGoA);q^dHQ|Y~0^jk8QZk|`$x&Eo zg;gSU-CnNQsgP?f5(*4XyO^(+IBw&Xcy7A_MJ7DIdKyk?0|t-Q%|5NSb#zpu(|q_f?BK_{umQ}r1r-xiD$by9ao*7+s?1p9L5{XI8wJk zOP(zk%r;AGf==55*XTzT$mwePpJkTOQ{)@5P5DQ5y|{`->u+f+*v5}!q3h&TMwvR) z#`HxMv#+ExIQ+g6XmjBv53%vig52PWZr^_5pYpnQpZ2V$+m6n zVCrIA-#H9c9(XxjfJ(N?m5|j3Q5;kKz};tlv_Y8IiUW3%?&;xw`Qm+gzfr9qDh>CaRVzuCj?JGXjg)+ zQE?gCedj;DHk)Rr-kD7^PHS7ypFq%v9LG>1A_k`xwbVTaGqyDF#*gqQg6tG0wWpT+ zel1cbrUHwVf8&v)#0lA>Br37dME9IqL>|Ls{4`P9%O7~vQG}={QV$pR)I;PtJBA~j zpZW!vC-MAxE3_854r-8*yxQFQ2&ps*&GfRa29wn&ycWo(X9?GIQKP{5&oGgnJ@^y4 zfKe4vlPQ-aqLp6E@S^4Sd=w6doG@+5q*qq3P&6lNZdHdEAzfcWL~a#^%`;I82YoME zX3xmYT)qM4xwLv6F}vjzGAK86+o;QH(wGcze#ZR*&VYTW=WT*Wm z*`k=Y-13EYW$S>D{f(Xbmts4jb{EmPzg#2eKRaBe^qBW}+bptH$M1^7Q;9|67%twV z5o>ni4J10fubgm7uK300>Hwy2a?wxq5<$w>RFv8r0tG37sK%6b@p`|r?*^0?xT}w* z;;kl4%3QyW%{(_ifuaI^a7BXQai3Hu7&+#ha}&*Jomd6uAW0LzX0b@ag32gf+Aam`T{eZkZZ zK_R7gh{t&V>E8K0Eb__s&?27(*UwH{{b4M=9ne2RtrqhA*A#%ICCvzqEX~o~lb0N& z+KK?Wbn9eiS9A^YsU9PixDNma6xU-ec^`^z&VVz1^x1-{li%RKU4eP8-H z?~rbODeZoA-Qx3U<%mA}cCL{9eyf83uM6Xofn2BymRjV*ueN>PyL$aatIe}?pK4?o zzf4=e*-43IAK!@}r#^AioBgn)I2a~GT4J7-w1a)(;hs<)jFtydQ5LtQH}m|@6USm( zILul(Xelj*Gf$FM$~3Spx_q|AD)tuudikP;sL_K_$x+ueY(gBj%8;8PTrBJOr<*at z?ao`6dqua@OZ@6zZCoq^gth$5}!nij!AJ@`(Hq0S#r z$Ic#l9XD9Vm}D7hpWdi_ZEAr}*5XqOgZH*agrrwoPWOhjs!-+81RK(eso{s4_GS;#ngx?sTjvB1d}_M zpnJf?>gBqgi&55BzMTCQg&ba9X?C;}LVGB(_DAhT`}myjEa>M`**rqpuob*2G}YT` zFRr7I62VF7?@j6x$vQy$_&fj-LvgZY(m2M*JG@{9mVjMx4PznqWshGgr&~jnDNv6``*I|Iul@n~?MxN9aV^=i(&%0_wtzmuYk@qT`;WTj;JF}j&zcd? zy{d`QNi~<9VW)Y;N>SqY=0F90Je5FRPopTok}nzAC8qWW1kJ5FrqtXsP8_5l&wFd@ zg)&nN`3ca)w3PTjbHNxxi_n_w&5^4B?O2=PPO9Q4udl?Bz&2nblw?PO4x8}VvKoEn z3h{dB!p9mU4ifJl2tWWyo%T)y>+`SgN3v-HPAMjWU)>6HdI$Ca(o+BOZaeW$0>Lod zikCVGA{ceIS|Wq$PctDWIeq!WlbUN$9n3-jRez=TF7kYlb9_X~cOa9FJM=`Q#+KZ3J$p-~2#B_2$*`y`1PX&zkmIi}m3f*%3A`Fr@6Ot?Hy51)D z#)zl_X72l|$P}g=6A$pW?|mQgZF2~Thm*~N7tH$29jAK5WvS|GYMrs})ctM?z8fnO zYuRs;e{sAtBmH|Qy$X>Y#7^@7c_o-Y@iq5RD_O}4PJQJWlIIf3fSBy=gI<8VwXruG zfc``^QO@~5g~|&gN5y+DH9EvOoqh|YT3{8((RK$nd>69SlA5)e(gl00a|yLs9Go$Q z8y}a@S+4q4{8_GbUU^FIExhiWw7Wu5BkS=wyKqXNGj>fo5niY}0zxOWP#64j*k@Z% zd6UO^-g&HsA-J!AmoLG z>sl(Gvt8hF!0Y?MW}B&y`<92S&obG&NeIW8G(aJXOjezuXW`(E(Olo_b(d zF$;pZjt-W#q&!IADK;bCeHX~!nMWD64y0`uVwCzK=P|OmuvqxgiwfkE2sshViRpSJxM!2BN33vB1Ghrxmfy7kFsq{_ZVu*~ z*}I?DtL6eOtPrak+Ds=_=W_c0Xc~vltxR?_e*J*X`4MEi?8;$^RIH-6O9VWOyD(8B zK@4DSoq99iE;d#vXn|v8e>TPSEpQ~r=vNz9FZJS(DV%Z(yc*bZ@lB*!#}&!F?`{P5 z%V=2m6_R(m&`s*c?X)<#XgiXDFbvjeKpC?vm30ZQ_h~QbUubCoLb^=80+QJ#Mt5DJsz~qyZn7m+C#ZWkHH&RY`Hg2ikKXapFy2~KYn6zhDBSgSmsi22X& zG1HeqSiPTROyGM{Y`T2Z6N^Ke1@``O^)hY5y;)ZDd{-x!bU|BW%lY7dHT;chvQpWX zWy(2vEi#&gG!j&9&8R)azzutD|JJKxd548NL%2AEs0;sAvkv~Gg1yMbPsd0lh#$wM zbO>MWjj6s{zcO^*e?E7seEpSJ=SO|gLJy~PN5=$4@U30Ty}znFqaPB=e&S;#)XIwD z=^lbK1_hOXduZ1m6yPup(-*PFjO9{;S{?(mC5IWarm}j=>y)|Xr zBRmxB_gC4T?QcQhw_5X$ymF`tRQxRt90EgZAV2>l*7sDPT7U=so}n|zV9q&Vy#By= zBRoUA833p%tEej|YbvNJ+p8)gG?Wo4XHF|CBb1ei4-yVltA8B=gS`BFZvNj6Ke|;L Q7!H8xIZLBjLyz141yIB0(EtDd literal 0 HcmV?d00001 diff --git a/InternalDocs/images/python-cyclic-gc-4-new-page.png b/InternalDocs/images/python-cyclic-gc-4-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..51a2b1065ea64eaf009a96ce6fecd3132424ea0f GIT binary patch literal 4863 zcma)AXIN8Nw?0WE)W8S|NY{Xl^r8ZRK$LC)DN+R-YA{j_C}1ENg@{5B5k^!X6k+HJ z0Y+jdA{`}y6oCY000}}2NNACA<9Fw4KkoC~ALqRL+2^di)_T|8d+oJTF526`MC3#O z0D#%qS~>v$n9gqx3h`fnHRg9eJ_twHdR*tD?LQvt9*h~k0rop*V+rgL)B9ThK=QPm z82#6q+FnvrGKR}ilyOU0y zY+V+9bjatnv44;l36k}t$h^E*-KqRb^RdToT1q``4)%zjQR5@b*weiPHH>@Y2F`{^8;(ZxEGL1ZeRPC@)!%C-*AK#zFskKZ|H-ou3>c?44}EH&+LWMIAxb5(i}$3dChM z?u`r63*AJGT^zoUyxq_U-b}OlaGjeKUYaILw>LQa;PyuE-~eVFB5Cj|pcSV_hC9{R zrZX|;%nd_<=}yW;yfjjo{*e~b&$5dKBMQ>>c#A`4_{#5~Hbhbm1z|;gfsPkE(Cp=_ zk%1sAGMBPQpRPc8s9I+*h1G#i=+wBs56x%t-%kFgum5!NKYc~<1%B}QzjWli$aH>T z=_`abF1^Toj?c~vu<{jUk%78dBc8c~a^hud4DDfEv(XjQ_CW@946;K;FMHEb+L=q}txm9*+-at9*R-lvs|Z zDHpH4%Qk@THrX_p3Q|K`Wiyuc^EP;cUcKBv zYl`&7?`qmp-_L}Sch=%nQNFutI?RL5YVxx*P>D)qbAkjCm>VU(6tzwRaOtku0sHy! z{xKK+KY#%gOV>#JS+b*s*S`1zqw(V9w%vzOi3R&TMd>iSwxY`c@&?e^fknEfqf{;LTXW)tP| zTBuxr1;d+}9+pp~U+>quWkWXE_t46k#U`xF0WJ`-gq{tLgi05AI5#H1N_{_WKPOGv z695vnvpvMKRZtu-_M(EO#nhV>Bwi=Nv~sq(#DvU|YoY^v+Kr~?mI{`z%PvF3x} zV^$V9tY3NK>V`e}l7l6Js(>bHv;C%}6PkVeDC+c#;?esytg)++)_>&HGAkJIgqU&_ zr=$+l{K@F}3b{Yh@@ukWs}5P1$X2{COfY{af$)mG3r?l;$jSzYV zn2x(ZHa|)CcJTiwVQj{-e!D4#k{>kEABM^$L89KGNhuW<@VjL1!Bi7~ou%BB&>>nu zg;88#@?CTe)e+lu3_!GbrQTY-SPf9r;C^@7j)C0ZWOH<*d!0?TMf}61%YkHbUApHV zRfL4mG@svsfhUU7BJtcbyjgr;-En$pOy;RT2{H|Crzj7?t!Q8SV^_cUF#WorB!Ux( zeE0H#5+eBFF9C4!fKt;_Fz%XR#n(2ko8j4z+>OpzYm_pK(xB@*y>+_ioihqSeETWDiZgv{QiSDCMe+=L>a_6u_$7ps2}+1dF<>5_VTb)u6hXuVi{iu^ z@$wKH+Rc3YtADtNa~PD{);cJU&7gvE?bBOzgt-u&AI4bLUtuwg4ueg$$b-475Tw1p zEo)F5hJ1aw2=G>9y-yYLrpai4xo60wB3ORcA@sq!mbn6~P=a=EaLIcidKRIf$=$cf z9?z6!k?r)|yq>9|@~gk-_$CU`y@1>YTkFMHCInK#bB(i=V6GxN8&<2Jcl`ve48UR_ z8yxF*vg7D^lQ|^`WBq@yo(k#WQXX`KYokM;nWqpQzol1LxDrL5Ae*1}@ zqY5$OEo>=k_I%yWnM}GLr?Nj{QfE%{vcp|0~DGD^%qpU zm(TGTya+dzeXL7Eek0p_WT8R*_&WrJchMD-A>YI5V}@iKf$`&w0bTcu!}7=Z3u5PT zoiPH-kU#zA2=Bn%n}>t2 zb{fGm9K1NuZ$a>!#2)rZapaw2j4r6j$>vwzqKB_;(2DXc*iLaBx9{v16g6KPz~i+8CD(>WRG04|)W1HoEBY9_yaJ=C<~f~q4z|GK6d zk48(2LcGS>(ee6Y`9W=riL73?GbCl|a(?~zn&@~{1~CS*0G3HsV9*C!2f}Y8%k{b@ zBqWe6RC@I9obQ}zW49Y2d-d%~3M|cc9F#r$o&~pYL|)#4QWPbp&bumw7;2*%JBxpU zgy?Dmy!nJ{*RH&dG9j|JUklUKYQ_GvCs%x_*<1eFyFSI+3U1k5-Mcm#CP1!I^Y~ch zJ;nNL$*tMjY_x{@Py?j{HO#~JQ5sI*abR-G*i~xC=tdpK{pHQ%Fc;)VXsRr4ZvKD> zy$>%Q!Sp%+it{2D6<+EeRzD_8uZx3W`wZ=c=+Sul7ZFR2`&hlG%p`?EfEFo+RL{K` zTb&GeLs&~R&S5?72+AOHoAZYx8dDW*rhNAESQ)gT(OESQIswZO%T~?2pgO}9FKZn6 zo)KUks!ZEllSXX>Kd*6~?+ZdXloox$9;0U_{+b*78_s;ITq4O-L3J!sb6r%!QuVq1KM1U=m5FS>Wb9}noiWkq~c-baijNv(*p@K z7=5{Gq1vJpJE__C;o$5qvFof0hD$5WS9%Y4SsQU+2Noh#k1H^jjDKgo=|3o?V}t`( zX-$q2Xw0Mfk@8_w{6i39YwsH&3d}+uA7mO~0vNT&v6Qh^Ky8fizv(71LMD{S@OO@jO$IB3FA`v@1IO znTTKbi8B?~sOY9Hg3_fMM^gTzoX81y!*aB(_#A>Z_%ZYsQdSmmR2d(mM-pjQFt`&^ zQ2DaBeW7B+6Z4ZDTlRr%MdhXJa0^m|GJQW>O0(*CumHpq>O{Rf+_<8JWB*JO9}bzOTiQ}Y7t=6%Ev4DBm(@i&h!DtSmhr3Owd;4 z%_iao)5Ov`wK$3&g&Qw|QTeCTs|4c!yU;RTU$}HhJ0|V^UG#2RXsyC?-HjXb1p{`4 z!yR7M06wM89~b1fGrKvY5#!FGjl%F@V`)#|6#W2K%#F&{Oyry9>n%PpM+6zKD(%L|6(d26|LCG|-`)5adrs-6C+dNE&?gAwXDFrvLfw!J?xy+E7}mlV~c zk`%TDKFfCBiga>3^Gy8FCn&3sVDvcN7bnsE>~e~29F{nJQmNstLW$PVPT8!PyBV5D ze(p5<(wRpb&A%XGvl;s{*W<>}960{>W=N+LJ;77G5LUI`ET~^)mP#i2wurLC6f0{L z=+q%&xRR;~a??UgQ5~qm7#cQr0{_V{hbH(7{_#~qYmlI2RdpVzjY3Z59;z@Er zbb9?zjC^#_`k!mz_K&?pPMFNzbeG$m03Bpxe8R9h>okcC#Q0~LS1Qk~4(`Yf#Mn({ z%yqv%>fduEt4{{?+vy5zy}YXP!56)mOC9&tEoXP?*-kZC^07vdI!bm=z7-H@_0DY- zcL=HCB?I*qB>u{XMp8J}d^xFEf)pjzU+ojQ4Z{<(7I*mmvV2+tL*~}ng=WUF?+A>s z0yo6WB;R^l0VQZ;1n;vk6pW+1II(h)@Oj!l&Yrvgd!(t%P(nleLq6UIGn#*#_i_T? zWjf%iv*v7>uBNYj@OgF~uEuuwYBBEJ4-*!rJn_{+6gOO|UJxj5=7EKLFcdDjU6D@D zl3*Sn%YYS60GY0U;`o@?iEmO3@jCS5o%L|;i`xWcfYCkhdN8digsRYY&s|5Wr3JHx zSImFGAc`|DL$IA|5YmL~rZ_7ofy5e9Kr634Taxip*QFOxlxd59fNbzuO z+f#zHRHdas%p)1B2%{38=M)X||NouRD08~fs7FgmDk5z2UoYP`HR`qa3;?wz!G=EmDUuNZx%pi$CZIprNv1WZM zCJrK@$M^$aB^X5e!=r+6k7v5MIKuk}6h;GTad*3@eCGDXuFmH0_iY#aFe4>U%`A{$0@ko)B3wL z`e#ayX0qka>ACrcM#OG}z3vxcpx@0mym2_!vc=gPt&llWuU50>WOKW@(d!`95t-~- zyC8I+rUK`*P2O#)ju=CxP=0qxM|x%Z$lPOR5FjL6g*{N)SNp0ANMvs zUV9RccxLc;?>f@1$~0rl&0%X|UA&dT1h`plJI{|Ce<0u%rM literal 0 HcmV?d00001 diff --git a/InternalDocs/images/python-cyclic-gc-5-new-page.png b/InternalDocs/images/python-cyclic-gc-5-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..fe67a6896fe4b07c03291ca8b21781fde8785757 GIT binary patch literal 5712 zcmZ{oc|25W{Ksd(U>H&MeTi&kD-6RRYgw`{4T`ZO6oaB+CgB#@ibBH?ne3G1rjSZx zJ@y(GNf>)HBujpC@9*CG+g`sv&iOp&`99z0oaghL=e(ZtzHMh?&dn*p34uVkEiF#l zLm)7aSsr9(o*-sHPcJfsUr3MiX71YkS z46$;&&OfOq;KkV&VAeD?n6jEu#b}*Z-deqM^y;-f+eey7Npi>dh~_sRV&$;Or(ty# ze+x-XsESiz@uTdi-t6t>$w z?tqJflHqqz=fsXi>wp3X)|aM<0RvPBaoE)TL;G*cej%#ZWR?J^yBrn?$}oiwFoo4I zy9ZeRiDl4llWn9qz{hKd0HiE7*%L(=Sj$wrCK|Z07epI#mLCA%?Z=F3hX(xC6KHE1K*+6MMs3EA3G_fVf7p{ zopk@Yk-5&Z$QnEt$r3I1N%5~5!x`=WJ}!~~(=Vv|pTcyOB=FBD|Ic8VvHxrm^6`VB z>*F={09~pb0|!-A_;9C18CZZ;pi_TQx8`}uCsq83$p#z&*?isjtt#SQ%+;dOS2DFwE<5o-+B=AxsXVWaCp= z>uK#VnD~V+CSO=Mbe)xQgOOnL!)eyumioLPa)XO4*Ac+bNI0^4{AAaeT%wP*3ye}- zu%LvEOXTVjINl_9aZWghyCAVQ9Ky+Tg86L7f8;NFnA85JOaBU; z3P&d$GPYng-N!*D8fBhJF*BxE!KQM)Fm`Z?2Rv@YrHYL+$Bs2d)WaA^C{EQ;0PqJ^ zE~qmCol;&$OM%Xa&AH|HYAynoIbQdDM=wRGiIpCOaaFHzy;dTF?JvXqF(rm)X%ciz zgEvW~FY~8$IW&86O?U)tO3w-Tq^i_N2!#o8$t9#;n*$E5wE5IP{+%ZV(AxBoaIc&mHr&Em0-=x)oWFQeB*m-=|q-OvgM*1<0eb6?4 zo|Rro&R?#a!o&L4s&*?Zc0X^bD&|M-^Ohj}i)YTpY4%7q!E^6MNCKLquTHwJ z}eWu5d}US28D?~qLlHJ)ZF6RE|R!%`wp#f%FnG3piiS#QU56_MC1|Hs1uSEH`7QG~=s;SL4?8@2ZHAE9Y(MErPitJDJ_t(^F24pmdi^G*yy>7k1HP;Lg|#G zVmRiSK8T4nqks2UP3j@n9P8yy{;_pR5?NPY7rFXnZ}Mw!r#Y08E)d)+_Ye=^UyG;8;o%muPF(SRyRID8m(z8 z$%=%a6D$|Jc`{WN-=Ab=TF6BYj*Iv5@|h?aoC^P%ZYh6Q5<*BxDNStAkgiQ6%jsn; z43n`B?5w;uN}-rrR$(N6#|amW_a{5G)2(_Kz?|{Ot-H~veS;#3@d$0tIAkJ{~2Vz3@gi9(ZmU^{1ER9l+K3i-@yi%yla|zcZc~=i&FwoAB+n4 zw=6qr%{$K})dX_{S;s&rZ_Jc&!v|9c!fg-B)~5$kLG4x-Lbs0D(3ab}cMjH;3f10B zYMJN8IIQ3!w?hDA&&o(*a!JmKMX5D7t@LE~mf*qaiLPe_!R=ilJQ~L*`S|lu^z%92 z&4)G;UL7-{c%J|D>%;7EDio*hWlQ7%pXlDq%k8U?WNAtl+{DTS5yvlio_D8i@?z(0 zh&M{U;dnf0wV7WpioWyAnTONlGo2;^QLcDl?qzIpgZ$B;r{v}FR|kP_6axu$5P9wQ z`C>iR;W`CV^$90M&WaN%E`-g~O1`Iuz%Y$3>zs{~?BWx<^(jeT$yFM9PtFIP(Lcn# zR+z*hb#`jfC58ob-sM+z?s=86m5mVQKkK(Cg8QP+K~v^ue{ECAqpL3p4k$fex407? zF7=h=F!#pqdryz)32_M@M-=BM>9KItk-5T&$$H)SzZlP>^4{{udc^H@9C);>vKb?i zV%-($3Kx*T!Cv+Zf2PZ%kUv))BI>LEGtjJQ&Ut4(V@72b4u#h871(rK!uSlidkFxLm` zXMOZoW>>!>o^Sd_oMg%K+>Rf;1F4kVHvbL7KC=KPUEtYbh{jT4$y5p$cfA!Ss(cW3 z$`~6CcR;emUYMGkr@mp6K8NE(||!$-fpFm?WiRI|*PvIber#KT}&&dSB8 zNAulb_tW(4rOsBZnT4m|>KgWa&dqEjmi@X|UaX9TZ?_#R34@Vk*X$kgL~}inTBOx} ze=PO(-B?)DzNLk)F&Zi^0V8m>DL2Y|)p45L|9G(2grFL-%kgGMxKu+J9r+o&7XI0j zcY-}6sFosQliv)SRyST*#Dr_UMgkoT^3SV6cT_=_2q@$JVG*y;1GhsTphp2p{gxEq zlAV_QUBYwoGoyf(!CxEkQyD6L?qQQwZjlTqjj~V?dMaM-X@~E2MDkC)i_Ie+|4jw= zsHdrEfd{s77Qfb|m^gY=*?Pzc{r0yvu^a?n-J{Jb3Qty!H;Ohd8lfykfNMCRb4VFY z0qm~3?YP*D2Ur&*x05Zekd{_?0=bf*M(cA-&>aDrguolooRGx)7u3e(HxR60B!Zb6$wJIiCd_@55hyfgfL zdQ4m2RaHyuxwA;VmRh8`@wjgHg{c&ceh7i6iEo6`1zu#SIclVys@w-XRpfGZb72kv zp;e03rHPH>MSH-#X%VxIB`W4i>#Nw9)AqQ$C2qPdiZelwCt>Y+`jeG+8MLaGCcY^twGpkw`gmPh zEp7G%$wmp!+B_be#6zcgy&C%?E%w=K$%BPB)TN*O2kDaydcoBjW331M@wpRINIq1#+HfGkO z<_?VTl5I%ZMRm651X8Ox5j{m`D8nh-J?%%5Ap7pvdkM?;AQ(S38QtZ^=@*dwD?m8= z^kUl$FHXAi_VWm+ZNsSh)L27dYW?UvU+qzZLMX1H_=lXZs^wOslx5J%LmHOjVl{i) zXrQuwG(p2NUR>di4&%u`#;Z;^(ss2+C{*#^skD##>VjSof4q8~;(PpGfzqD0YB`F=+QB)vGaF0-SA9(v2m zD7)v%!zd0)X~7LYu?QzickxFz^{mHK4O-X&%1yp;=HD{l4fM9qP!_7@rE*x@id4Pr zi~S}No$u&z;|BF)LW@*PTqxH>%LjLPsqjbfoYKb##ZC%a7=~koIYM zmop%$v>&!H6j>ts>Fro#j5h2piQNFh+hXu41F>}BO1;FV0y1*BV+8w|&LD6jir zy*q@`a7%17vnRQ1U@I=&Qi)>W?v#xcrA~ZoyOiM%yzP@lu-zQ~IfD(-gHFUg!K=F% z=G4B$4$2xTfDWBTlW@W#^wjFh-*^+2-S65>^U?bpXLYksU(;ZeEF51c&PZ-IGU?cP z*hJAvNswxe(}6Tl-j-=WEfvohUnv}&Wv8gK4D$vIt?K}H%dTk4JC8ysU_W-Kmw)h4 zuTDO{+s_JYsKKjPmL}U$#KsX#-$7+(mM~BM;Sg7m%9PI1^+dhVqsL8BUqJ&N3<997 zy2<4P@O>ugy9e`?kjkLdd!;*#Ds^|K%T}?{itlqZDf)f$HMOl)nkTMwteuvk_!&)A zoi2|S4%^-*tvcO-L<$vtDzy?9O1BRKsp;_wnkDT9QI*yGY=kLPwPXW~8zNuZ?je0Y z5|S}gY@jM$kG^$CrIXZ^@=?Y+z6KIqg>dE2f;`oEzb-BJY^d_$<;ms-Vx`H&H{kK! z#Y5Rv%1V)4VfVY1hfwNT z<%(24Jfird_~p8}*k?aLcISR( zfzGM#J5P2=RWy~~xPcaud|-@NBe2KYrVfhtTwezL=fu_epP{7nBmfqLFxC36^5HbD9{^+0UkD5UyMIH=sPI4l(<6B?wq-}UIHOP zz%^XE3oF_^2M<+yRIVcz-16yE!62_qYSrLwN zA{pp^6;dgKOq`t_{xbXrhFmoz0-hro7$hD~PcqIdzPZ?A3cNfF&X|dNDPR2&$diC( z2jvi4a^)@v2u3{hhc%N8(CaRU#;*y}*Dq&1D|{Wd|0L&FU4NowN}Xq4nq06 zh0RUVXHjD^m8I-Nd4Ss49k~m)mT)cIE`IidI$@YSt2QR^A@(&gNXfW>gTCd41;HmdEn~#_NihACS>I&lmmz4Qi;myqczM4#*1lG3RG~EIHVNI_Y z_;0ig=AO_+DVApk+=!PYw~QeXDwlQ8!DFJ3uGtwTOM`>epL+F3@@EWRrRp*P)Q$jn z0+0Ha_56+h{u6%vONICVFL=NStYO~X7=$JXW!#ln`Zt>VTmFWl{Y3|ef%~pM z3N_a7Ojdi8VCqWny+FXDeZufefoQ0yYpJSft7Ulni@p(`R@Nn d2oCiP^t=B5CtNi{nllq1mZmnRtBk#G{s-d0T~Yu5 literal 0 HcmV?d00001 From 21ac0a7f4cf6d11da728b33ed5e8cfa65a5a8ae7 Mon Sep 17 00:00:00 2001 From: Victorien <65306057+Viicos@users.noreply.github.com> Date: Sat, 12 Oct 2024 01:05:13 +0200 Subject: [PATCH 053/114] gh-116938: Clarify documentation of `dict` and `dict.update` regarding the positional argument they accept (#125213) Co-authored-by: Alex Waygood --- Doc/library/stdtypes.rst | 25 +++++++++++++------------ Lib/_collections_abc.py | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 833c71c4ce4b9a..a6e2e3b8928ebe 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -4505,14 +4505,14 @@ can be used interchangeably to index the same dictionary entry. ``dict([('foo', 100), ('bar', 200)])``, ``dict(foo=100, bar=200)`` If no positional argument is given, an empty dictionary is created. - If a positional argument is given and it is a mapping object, a dictionary - is created with the same key-value pairs as the mapping object. Otherwise, - the positional argument must be an :term:`iterable` object. Each item in - the iterable must itself be an iterable with exactly two objects. The - first object of each item becomes a key in the new dictionary, and the - second object the corresponding value. If a key occurs more than once, the - last value for that key becomes the corresponding value in the new - dictionary. + If a positional argument is given and it defines a ``keys()`` method, a + dictionary is created by calling :meth:`~object.__getitem__` on the argument with + each returned key from the method. Otherwise, the positional argument must be an + :term:`iterable` object. Each item in the iterable must itself be an iterable + with exactly two elements. The first element of each item becomes a key in the + new dictionary, and the second element the corresponding value. If a key occurs + more than once, the last value for that key becomes the corresponding value in + the new dictionary. If keyword arguments are given, the keyword arguments and their values are added to the dictionary created from the positional argument. If a key @@ -4669,10 +4669,11 @@ can be used interchangeably to index the same dictionary entry. Update the dictionary with the key/value pairs from *other*, overwriting existing keys. Return ``None``. - :meth:`update` accepts either another dictionary object or an iterable of - key/value pairs (as tuples or other iterables of length two). If keyword - arguments are specified, the dictionary is then updated with those - key/value pairs: ``d.update(red=1, blue=2)``. + :meth:`update` accepts either another object with a ``keys()`` method (in + which case :meth:`~object.__getitem__` is called with every key returned from + the method). or an iterable of key/value pairs (as tuples or other iterables + of length two). If keyword arguments are specified, the dictionary is then + updated with those key/value pairs: ``d.update(red=1, blue=2)``. .. method:: values() diff --git a/Lib/_collections_abc.py b/Lib/_collections_abc.py index c2edf6c8856c21..06667b7434ccef 100644 --- a/Lib/_collections_abc.py +++ b/Lib/_collections_abc.py @@ -962,7 +962,7 @@ def clear(self): def update(self, other=(), /, **kwds): ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. - If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and has a .keys() method, does: for k in E.keys(): D[k] = E[k] If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v In either case, this is followed by: for k, v in F.items(): D[k] = v ''' From 979c0df7c0adfb744159a5fc184043dc733d8534 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Sat, 12 Oct 2024 00:31:06 +0100 Subject: [PATCH 054/114] gh-124309: fix staggered race on eager tasks (#124847) This patch is entirely by Thomas and Peter Co-authored-by: Thomas Grainger Co-authored-by: Peter Bierma --- Lib/asyncio/staggered.py | 17 +++++-- .../test_asyncio/test_eager_task_factory.py | 46 +++++++++++++++++++ Lib/test/test_asyncio/test_staggered.py | 27 +++++++++++ ...-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst | 1 + 4 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst diff --git a/Lib/asyncio/staggered.py b/Lib/asyncio/staggered.py index 326c6f708944af..0f4df8855a80b9 100644 --- a/Lib/asyncio/staggered.py +++ b/Lib/asyncio/staggered.py @@ -69,7 +69,11 @@ async def staggered_race(coro_fns, delay, *, loop=None): exceptions = [] running_tasks = [] - async def run_one_coro(previous_failed) -> None: + async def run_one_coro(ok_to_start, previous_failed) -> None: + # in eager tasks this waits for the calling task to append this task + # to running_tasks, in regular tasks this wait is a no-op that does + # not yield a future. See gh-124309. + await ok_to_start.wait() # Wait for the previous task to finish, or for delay seconds if previous_failed is not None: with contextlib.suppress(exceptions_mod.TimeoutError): @@ -85,8 +89,12 @@ async def run_one_coro(previous_failed) -> None: return # Start task that will run the next coroutine this_failed = locks.Event() - next_task = loop.create_task(run_one_coro(this_failed)) + next_ok_to_start = locks.Event() + next_task = loop.create_task(run_one_coro(next_ok_to_start, this_failed)) running_tasks.append(next_task) + # next_task has been appended to running_tasks so next_task is ok to + # start. + next_ok_to_start.set() assert len(running_tasks) == this_index + 2 # Prepare place to put this coroutine's exceptions if not won exceptions.append(None) @@ -116,8 +124,11 @@ async def run_one_coro(previous_failed) -> None: if i != this_index: t.cancel() - first_task = loop.create_task(run_one_coro(None)) + ok_to_start = locks.Event() + first_task = loop.create_task(run_one_coro(ok_to_start, None)) running_tasks.append(first_task) + # first_task has been appended to running_tasks so first_task is ok to start. + ok_to_start.set() try: # Wait for a growing list of tasks to all finish: poor man's version of # curio's TaskGroup or trio's nursery diff --git a/Lib/test/test_asyncio/test_eager_task_factory.py b/Lib/test/test_asyncio/test_eager_task_factory.py index 0777f39b572486..31d2a00dbb8c9c 100644 --- a/Lib/test/test_asyncio/test_eager_task_factory.py +++ b/Lib/test/test_asyncio/test_eager_task_factory.py @@ -213,6 +213,52 @@ async def run(): self.run_coro(run()) + def test_staggered_race_with_eager_tasks(self): + # See https://github.com/python/cpython/issues/124309 + + async def fail(): + await asyncio.sleep(0) + raise ValueError("no good") + + async def run(): + winner, index, excs = await asyncio.staggered.staggered_race( + [ + lambda: asyncio.sleep(2, result="sleep2"), + lambda: asyncio.sleep(1, result="sleep1"), + lambda: fail() + ], + delay=0.25 + ) + self.assertEqual(winner, 'sleep1') + self.assertEqual(index, 1) + self.assertIsNone(excs[index]) + self.assertIsInstance(excs[0], asyncio.CancelledError) + self.assertIsInstance(excs[2], ValueError) + + self.run_coro(run()) + + def test_staggered_race_with_eager_tasks_no_delay(self): + # See https://github.com/python/cpython/issues/124309 + async def fail(): + raise ValueError("no good") + + async def run(): + winner, index, excs = await asyncio.staggered.staggered_race( + [ + lambda: fail(), + lambda: asyncio.sleep(1, result="sleep1"), + lambda: asyncio.sleep(0, result="sleep0"), + ], + delay=None + ) + self.assertEqual(winner, 'sleep1') + self.assertEqual(index, 1) + self.assertIsNone(excs[index]) + self.assertIsInstance(excs[0], ValueError) + self.assertEqual(len(excs), 2) + + self.run_coro(run()) + class PyEagerTaskFactoryLoopTests(EagerTaskFactoryLoopTests, test_utils.TestCase): Task = tasks._PyTask diff --git a/Lib/test/test_asyncio/test_staggered.py b/Lib/test/test_asyncio/test_staggered.py index e6e32f7dbbbcba..74941f704c4890 100644 --- a/Lib/test/test_asyncio/test_staggered.py +++ b/Lib/test/test_asyncio/test_staggered.py @@ -95,3 +95,30 @@ async def coro(index): self.assertEqual(len(excs), 2) self.assertIsInstance(excs[0], ValueError) self.assertIsInstance(excs[1], ValueError) + + + async def test_multiple_winners(self): + event = asyncio.Event() + + async def coro(index): + await event.wait() + return index + + async def do_set(): + event.set() + await asyncio.Event().wait() + + winner, index, excs = await staggered_race( + [ + lambda: coro(0), + lambda: coro(1), + do_set, + ], + delay=0.1, + ) + self.assertIs(winner, 0) + self.assertIs(index, 0) + self.assertEqual(len(excs), 3) + self.assertIsNone(excs[0], None) + self.assertIsInstance(excs[1], asyncio.CancelledError) + self.assertIsInstance(excs[2], asyncio.CancelledError) diff --git a/Misc/NEWS.d/next/Library/2024-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst b/Misc/NEWS.d/next/Library/2024-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst new file mode 100644 index 00000000000000..89610fa44bf743 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst @@ -0,0 +1 @@ +Fixed :exc:`AssertionError` when using :func:`!asyncio.staggered.staggered_race` with :attr:`asyncio.eager_task_factory`. From 5a074aab845f82f4a150c27b905dae05c337d381 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Fri, 11 Oct 2024 21:40:33 -0300 Subject: [PATCH 055/114] Doc: Fix a typo in "Function Examples" in the control-flow tutorial (#125338) --- Doc/tutorial/controlflow.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index fd765e58ff2485..9b73ac475c78d5 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -832,7 +832,7 @@ parameters as there is a ``/`` in the function definition:: File "", line 1, in TypeError: pos_only_arg() got some positional-only arguments passed as keyword arguments: 'arg' -The third function ``kwd_only_args`` only allows keyword arguments as indicated +The third function ``kwd_only_arg`` only allows keyword arguments as indicated by a ``*`` in the function definition:: >>> kwd_only_arg(3) From 5d8739e956cd20d3860133b384518a3c5c74e5ae Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sat, 12 Oct 2024 12:40:34 +0530 Subject: [PATCH 056/114] gh-111924: use atomics for interp id refcounting (#125321) --- Include/internal/pycore_interp.h | 6 ++-- Python/pystate.c | 54 ++++---------------------------- 2 files changed, 8 insertions(+), 52 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index d7e584094f7839..36cd71e5a007d5 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -102,9 +102,8 @@ struct _is { PyInterpreterState *next; int64_t id; - int64_t id_refcount; + Py_ssize_t id_refcount; int requires_idref; - PyThread_type_lock id_mutex; #define _PyInterpreterState_WHENCE_NOTSET -1 #define _PyInterpreterState_WHENCE_UNKNOWN 0 @@ -318,8 +317,7 @@ _PyInterpreterState_SetFinalizing(PyInterpreterState *interp, PyThreadState *tst PyAPI_FUNC(int64_t) _PyInterpreterState_ObjectToID(PyObject *); PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpID(int64_t); PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpIDObject(PyObject *); -PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); -PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); +PyAPI_FUNC(void) _PyInterpreterState_IDIncref(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IsReady(PyInterpreterState *interp); diff --git a/Python/pystate.c b/Python/pystate.c index 45e79ade7b6035..5d94b7714bd607 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -523,12 +523,6 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) _PyTypes_AfterFork(); - /* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does - * not force the default allocator. */ - if (_PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex) < 0) { - return _PyStatus_ERR("Failed to reinitialize runtime locks"); - } - PyStatus status = gilstate_tss_reinit(runtime); if (_PyStatus_EXCEPTION(status)) { return status; @@ -629,6 +623,8 @@ init_interpreter(PyInterpreterState *interp, assert(id > 0 || (id == 0 && interp == runtime->interpreters.main)); interp->id = id; + interp->id_refcount = 0; + assert(runtime->interpreters.head == interp); assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; @@ -989,10 +985,6 @@ PyInterpreterState_Delete(PyInterpreterState *interp) } HEAD_UNLOCK(runtime); - if (interp->id_mutex != NULL) { - PyThread_free_lock(interp->id_mutex); - } - _Py_qsbr_fini(interp); _PyObject_FiniState(interp); @@ -1031,9 +1023,6 @@ _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime) // the "current" tstate to be set? PyInterpreterState_Clear(interp); // XXX must activate? zapthreads(interp); - if (interp->id_mutex != NULL) { - PyThread_free_lock(interp->id_mutex); - } PyInterpreterState *prev_interp = interp; interp = interp->next; free_interpreter(prev_interp); @@ -1247,9 +1236,6 @@ PyInterpreterState_GetID(PyInterpreterState *interp) PyObject * _PyInterpreterState_GetIDObject(PyInterpreterState *interp) { - if (_PyInterpreterState_IDInitref(interp) != 0) { - return NULL; - }; int64_t interpid = interp->id; if (interpid < 0) { return NULL; @@ -1259,50 +1245,22 @@ _PyInterpreterState_GetIDObject(PyInterpreterState *interp) } -int -_PyInterpreterState_IDInitref(PyInterpreterState *interp) -{ - if (interp->id_mutex != NULL) { - return 0; - } - interp->id_mutex = PyThread_allocate_lock(); - if (interp->id_mutex == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "failed to create init interpreter ID mutex"); - return -1; - } - interp->id_refcount = 0; - return 0; -} - -int +void _PyInterpreterState_IDIncref(PyInterpreterState *interp) { - if (_PyInterpreterState_IDInitref(interp) < 0) { - return -1; - } - - PyThread_acquire_lock(interp->id_mutex, WAIT_LOCK); - interp->id_refcount += 1; - PyThread_release_lock(interp->id_mutex); - return 0; + _Py_atomic_add_ssize(&interp->id_refcount, 1); } void _PyInterpreterState_IDDecref(PyInterpreterState *interp) { - assert(interp->id_mutex != NULL); _PyRuntimeState *runtime = interp->runtime; - PyThread_acquire_lock(interp->id_mutex, WAIT_LOCK); - assert(interp->id_refcount != 0); - interp->id_refcount -= 1; - int64_t refcount = interp->id_refcount; - PyThread_release_lock(interp->id_mutex); + Py_ssize_t refcount = _Py_atomic_add_ssize(&interp->id_refcount, -1); - if (refcount == 0 && interp->requires_idref) { + if (refcount == 1 && interp->requires_idref) { PyThreadState *tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_FINI); From 4a943c3251d1b3fdf50cfb9264ae74e5bc845c3c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 12 Oct 2024 09:28:34 +0200 Subject: [PATCH 057/114] gh-125196: Use PyUnicodeWriter in parser (#125271) Replace the private _PyUnicodeWriter API with the public PyUnicodeWriter API in _PyPegen_concatenate_strings(). --- Parser/action_helpers.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 24b817c6f8ff27..cb21777f566189 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1615,7 +1615,6 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, } /* build folded list */ - _PyUnicodeWriter writer; current_pos = 0; for (i = 0; i < n_flattened_elements; i++) { expr_ty elem = asdl_seq_GET(flattened, i); @@ -1635,14 +1634,17 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, "abc" u"abc" -> "abcabc" */ PyObject *kind = elem->v.Constant.kind; - _PyUnicodeWriter_Init(&writer); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } expr_ty last_elem = elem; for (j = i; j < n_flattened_elements; j++) { expr_ty current_elem = asdl_seq_GET(flattened, j); if (current_elem->kind == Constant_kind) { - if (_PyUnicodeWriter_WriteStr( - &writer, current_elem->v.Constant.value)) { - _PyUnicodeWriter_Dealloc(&writer); + if (PyUnicodeWriter_WriteStr(writer, + current_elem->v.Constant.value)) { + PyUnicodeWriter_Discard(writer); return NULL; } last_elem = current_elem; @@ -1652,9 +1654,8 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, } i = j - 1; - PyObject *concat_str = _PyUnicodeWriter_Finish(&writer); + PyObject *concat_str = PyUnicodeWriter_Finish(writer); if (concat_str == NULL) { - _PyUnicodeWriter_Dealloc(&writer); return NULL; } if (_PyArena_AddPyObject(p->arena, concat_str) < 0) { From eb2d268ac7480b5e2b4ffb9a644cad7ac75ae954 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 13:10:50 +0300 Subject: [PATCH 058/114] gh-65865: Raise early errors for invalid help strings in argparse (GH-124899) --- Lib/argparse.py | 31 ++++++++++++++----- Lib/test/test_argparse.py | 31 +++++++++++++++++++ ...4-10-02-16-35-07.gh-issue-65865.S2D4wq.rst | 3 ++ 3 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-02-16-35-07.gh-issue-65865.S2D4wq.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 2d8a7ef343a4ef..208c1827f9aca7 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -588,17 +588,20 @@ def _format_args(self, action, default_metavar): return result def _expand_help(self, action): + help_string = self._get_help_string(action) + if '%' not in help_string: + return help_string params = dict(vars(action), prog=self._prog) for name in list(params): - if params[name] is SUPPRESS: + value = params[name] + if value is SUPPRESS: del params[name] - for name in list(params): - if hasattr(params[name], '__name__'): - params[name] = params[name].__name__ + elif hasattr(value, '__name__'): + params[name] = value.__name__ if params.get('choices') is not None: choices_str = ', '.join([str(c) for c in params['choices']]) params['choices'] = choices_str - return self._get_help_string(action) % params + return help_string % params def _iter_indented_subactions(self, action): try: @@ -1180,9 +1183,13 @@ def add_parser(self, name, *, deprecated=False, **kwargs): help = kwargs.pop('help') choice_action = self._ChoicesPseudoAction(name, aliases, help) self._choices_actions.append(choice_action) + else: + choice_action = None # create the parser and add it to the map parser = self._parser_class(**kwargs) + if choice_action is not None: + parser._check_help(choice_action) self._name_parser_map[name] = parser # make parser available under aliases also @@ -1449,11 +1456,12 @@ def add_argument(self, *args, **kwargs): # raise an error if the metavar does not match the type if hasattr(self, "_get_formatter"): + formatter = self._get_formatter() try: - self._get_formatter()._format_args(action, None) + formatter._format_args(action, None) except TypeError: raise ValueError("length of metavar tuple does not match nargs") - + self._check_help(action) return self._add_action(action) def add_argument_group(self, *args, **kwargs): @@ -1635,6 +1643,14 @@ def _handle_conflict_resolve(self, action, conflicting_actions): if not action.option_strings: action.container._remove_action(action) + def _check_help(self, action): + if action.help and hasattr(self, "_get_formatter"): + formatter = self._get_formatter() + try: + formatter._expand_help(action) + except (ValueError, TypeError, KeyError) as exc: + raise ValueError('badly formed help string') from exc + class _ArgumentGroup(_ActionsContainer): @@ -1852,6 +1868,7 @@ def add_subparsers(self, **kwargs): # create the parsers action and add it to the positionals list parsers_class = self._pop_action_class(kwargs, 'parsers') action = parsers_class(option_strings=[], **kwargs) + self._check_help(action) self._subparsers._add_action(action) # return the created parsers action diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 1ebbc21bc1755b..000b810454f584 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2623,6 +2623,29 @@ def test_parser_command_help(self): --foo foo help ''')) + def assert_bad_help(self, context_type, func, *args, **kwargs): + with self.assertRaisesRegex(ValueError, 'badly formed help string') as cm: + func(*args, **kwargs) + self.assertIsInstance(cm.exception.__context__, context_type) + + def test_invalid_subparsers_help(self): + parser = ErrorRaisingArgumentParser(prog='PROG') + self.assert_bad_help(ValueError, parser.add_subparsers, help='%Y-%m-%d') + parser = ErrorRaisingArgumentParser(prog='PROG') + self.assert_bad_help(KeyError, parser.add_subparsers, help='%(spam)s') + parser = ErrorRaisingArgumentParser(prog='PROG') + self.assert_bad_help(TypeError, parser.add_subparsers, help='%(prog)d') + + def test_invalid_subparser_help(self): + parser = ErrorRaisingArgumentParser(prog='PROG') + subparsers = parser.add_subparsers() + self.assert_bad_help(ValueError, subparsers.add_parser, '1', + help='%Y-%m-%d') + self.assert_bad_help(KeyError, subparsers.add_parser, '1', + help='%(spam)s') + self.assert_bad_help(TypeError, subparsers.add_parser, '1', + help='%(prog)d') + def test_subparser_title_help(self): parser = ErrorRaisingArgumentParser(prog='PROG', description='main description') @@ -5375,6 +5398,14 @@ def test_invalid_action(self): self.assertValueError('--foo', action="store-true", errmsg='unknown action') + def test_invalid_help(self): + self.assertValueError('--foo', help='%Y-%m-%d', + errmsg='badly formed help string') + self.assertValueError('--foo', help='%(spam)s', + errmsg='badly formed help string') + self.assertValueError('--foo', help='%(prog)d', + errmsg='badly formed help string') + def test_multiple_dest(self): parser = argparse.ArgumentParser() parser.add_argument(dest='foo') diff --git a/Misc/NEWS.d/next/Library/2024-10-02-16-35-07.gh-issue-65865.S2D4wq.rst b/Misc/NEWS.d/next/Library/2024-10-02-16-35-07.gh-issue-65865.S2D4wq.rst new file mode 100644 index 00000000000000..106a8b81140520 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-02-16-35-07.gh-issue-65865.S2D4wq.rst @@ -0,0 +1,3 @@ +:mod:`argparse` now raises early error for invalid ``help`` arguments to +:meth:`~argparse.ArgumentParser.add_argument`, +:meth:`~argparse.ArgumentParser.add_subparsers` and :meth:`!add_parser`. From a6c0c64de0ade400df7995f1e9480b6fc0f863aa Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 14:46:28 +0300 Subject: [PATCH 059/114] gh-59330: Improve error message for dest= for positionals (GH-125215) Also improve the documentation. Specify how dest and metavar are derived from add_argument() positional arguments. Co-authored-by: Simon Law --- Doc/library/argparse.rst | 19 +++++++++++++++++++ Lib/argparse.py | 3 ++- Lib/test/test_argparse.py | 3 ++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index d337de87ca8f39..19f832051a9ee8 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -636,6 +636,25 @@ be positional:: usage: PROG [-h] [-f FOO] bar PROG: error: the following arguments are required: bar +By default, argparse automatically handles the internal naming and +display names of arguments, simplifying the process without requiring +additional configuration. +As such, you do not need to specify the dest_ and metavar_ parameters. +The dest_ parameter defaults to the argument name with underscores ``_`` +replacing hyphens ``-`` . The metavar_ parameter defaults to the +upper-cased name. For example:: + + >>> parser = argparse.ArgumentParser(prog='PROG') + >>> parser.add_argument('--foo-bar') + >>> parser.parse_args(['--foo-bar', 'FOO-BAR'] + Namespace(foo_bar='FOO-BAR') + >>> parser.print_help() + usage: [-h] [--foo-bar FOO-BAR] + + optional arguments: + -h, --help show this help message and exit + --foo-bar FOO-BAR + .. _action: diff --git a/Lib/argparse.py b/Lib/argparse.py index 208c1827f9aca7..64dbd7149e769c 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1424,7 +1424,8 @@ def add_argument(self, *args, **kwargs): chars = self.prefix_chars if not args or len(args) == 1 and args[0][0] not in chars: if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') + raise ValueError('dest supplied twice for positional argument,' + ' did you mean metavar?') kwargs = self._get_positional_kwargs(*args, **kwargs) # otherwise, we're adding an optional argument diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 000b810454f584..61ddb5f16cc44f 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -5411,7 +5411,8 @@ def test_multiple_dest(self): parser.add_argument(dest='foo') with self.assertRaises(ValueError) as cm: parser.add_argument('bar', dest='baz') - self.assertIn('dest supplied twice for positional argument', + self.assertIn('dest supplied twice for positional argument,' + ' did you mean metavar?', str(cm.exception)) def test_no_argument_actions(self): From 07c2d15977738165e9dc4248e7edda7c75ecc14b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 14:53:00 +0300 Subject: [PATCH 060/114] gh-85935: Explicitly document the case nargs=0 in argparse (GH-125302) --- Doc/library/argparse.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 19f832051a9ee8..d58c75eef3e739 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -751,6 +751,9 @@ how the command-line arguments should be handled. The supplied actions are: .. versionadded:: 3.8 +Only actions that consume command-line arguments (e.g. ``'store'``, +``'append'`` or ``'extend'``) can be used with positional arguments. + You may also specify an arbitrary action by passing an Action subclass or other object that implements the same interface. The ``BooleanOptionalAction`` is available in ``argparse`` and adds support for boolean actions such as @@ -878,6 +881,8 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: If the ``nargs`` keyword argument is not provided, the number of arguments consumed is determined by the action_. Generally this means a single command-line argument will be consumed and a single item (not a list) will be produced. +Actions that do not consume command-line arguments (e.g. +``'store_const'``) set ``nargs=0``. .. _const: From 63cf4e914f879ee28a75c02e867baa7c6047ea2b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 15:15:37 +0300 Subject: [PATCH 061/114] gh-125254: Fix error report about ambiguous option in argparse (GH-125273) This was a regression introduced in gh-58573. It was only tested for the case when the ambiguous option is the last argument in the command line. --- Lib/argparse.py | 2 +- Lib/test/test_argparse.py | 14 ++++++++++++-- .../2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 64dbd7149e769c..cbecb3b753c2b9 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -2019,7 +2019,7 @@ def consume_optional(start_index): if len(option_tuples) > 1: options = ', '.join([option_string for action, option_string, sep, explicit_arg in option_tuples]) - args = {'option': arg_string, 'matches': options} + args = {'option': arg_strings[start_index], 'matches': options} msg = _('ambiguous option: %(option)s could match %(matches)s') raise ArgumentError(None, msg % args) diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 61ddb5f16cc44f..1fc97de78f7f89 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -6730,9 +6730,19 @@ def test_conflicting_mutually_exclusive_args_zero_or_more_with_metavar2(self): def test_ambiguous_option(self): self.parser.add_argument('--foobaz') self.parser.add_argument('--fooble', action='store_true') + self.parser.add_argument('--foogle') self.assertRaisesRegex(argparse.ArgumentError, - "ambiguous option: --foob could match --foobaz, --fooble", - self.parser.parse_args, ['--foob']) + "ambiguous option: --foob could match --foobaz, --fooble", + self.parser.parse_args, ['--foob']) + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob=1 could match --foobaz, --fooble$", + self.parser.parse_args, ['--foob=1']) + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob could match --foobaz, --fooble$", + self.parser.parse_args, ['--foob', '1', '--foogle', '2']) + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob=1 could match --foobaz, --fooble$", + self.parser.parse_args, ['--foob=1', '--foogle', '2']) def test_os_error(self): self.parser.add_argument('file') diff --git a/Misc/NEWS.d/next/Library/2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst b/Misc/NEWS.d/next/Library/2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst new file mode 100644 index 00000000000000..abe37fefedc3be --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst @@ -0,0 +1 @@ +Fix a bug where ArgumentError includes the incorrect ambiguous option in :mod:`argparse`. From 9944ad388c457325456152257b977410c4ec3593 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 16:04:17 +0300 Subject: [PATCH 062/114] gh-85935: Check for nargs=0 for positional arguments in argparse (GH-124839) Raise ValueError in add_argument() if either explicit nargs=0 or action that does not consume arguments (like 'store_const' or 'store_true') is specified for positional argument. --- Lib/argparse.py | 10 +++++++++- Lib/test/test_argparse.py | 7 +++++-- .../2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst | 4 ++++ 3 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index cbecb3b753c2b9..550415dc93478b 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1441,11 +1441,17 @@ def add_argument(self, *args, **kwargs): kwargs['default'] = self.argument_default # create the action object, and add it to the parser + action_name = kwargs.get('action') action_class = self._pop_action_class(kwargs) if not callable(action_class): raise ValueError('unknown action "%s"' % (action_class,)) action = action_class(**kwargs) + # raise an error if action for positional argument does not + # consume arguments + if not action.option_strings and action.nargs == 0: + raise ValueError(f'action {action_name!r} is not valid for positional arguments') + # raise an error if the action type is not callable type_func = self._registry_get('type', action.type, action.type) if not callable(type_func): @@ -1554,7 +1560,9 @@ def _get_positional_kwargs(self, dest, **kwargs): # mark positional arguments as required if at least one is # always required nargs = kwargs.get('nargs') - if nargs not in [OPTIONAL, ZERO_OR_MORE, REMAINDER, SUPPRESS, 0]: + if nargs == 0: + raise ValueError('nargs for positionals must be != 0') + if nargs not in [OPTIONAL, ZERO_OR_MORE, REMAINDER, SUPPRESS]: kwargs['required'] = True # return the keyword arguments with no option strings diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 1fc97de78f7f89..f52a4b6bdd8aca 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -5424,8 +5424,11 @@ def test_no_argument_actions(self): with self.subTest(attrs=attrs): self.assertTypeError('-x', action=action, **attrs) self.assertTypeError('x', action=action, **attrs) + self.assertValueError('x', action=action, + errmsg=f"action '{action}' is not valid for positional arguments") self.assertTypeError('-x', action=action, nargs=0) - self.assertTypeError('x', action=action, nargs=0) + self.assertValueError('x', action=action, nargs=0, + errmsg='nargs for positionals must be != 0') def test_no_argument_no_const_actions(self): # options with zero arguments @@ -5445,7 +5448,7 @@ def test_more_than_one_argument_actions(self): self.assertValueError('-x', nargs=0, action=action, errmsg=f'nargs for {action_name} actions must be != 0') self.assertValueError('spam', nargs=0, action=action, - errmsg=f'nargs for {action_name} actions must be != 0') + errmsg='nargs for positionals must be != 0') # const is disallowed with non-optional arguments for nargs in [1, '*', '+']: diff --git a/Misc/NEWS.d/next/Library/2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst b/Misc/NEWS.d/next/Library/2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst new file mode 100644 index 00000000000000..553f206bf26337 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst @@ -0,0 +1,4 @@ +:meth:`argparse.ArgumentParser.add_argument` now raises an exception if +an :ref:`action` that does not consume arguments (like 'store_const' or +'store_true') or explicit ``nargs=0`` are specified for positional +arguments. From dcd58c50844dae0d83517e88518a677914ea594b Mon Sep 17 00:00:00 2001 From: "Bernhard M. Wiedemann" Date: Sat, 12 Oct 2024 19:18:48 +0200 Subject: [PATCH 063/114] gh-125260: Change the default ``gzip.compress()`` mtime to 0 (#125261) This follows GNU gzip, which defaults to using 0 as the mtime for compressing stdin, where no file mtime is involved. This makes the output of gzip.compress() deterministic by default, greatly helping reproducible builds. Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/gzip.rst | 9 +++++++-- Lib/gzip.py | 6 +++--- Lib/test/test_gzip.py | 11 +++++++++++ .../2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst | 2 ++ 4 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 6b6e158f6eba2c..f24e73517e5767 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -184,11 +184,12 @@ The module defines the following items: attribute instead. -.. function:: compress(data, compresslevel=9, *, mtime=None) +.. function:: compress(data, compresslevel=9, *, mtime=0) Compress the *data*, returning a :class:`bytes` object containing the compressed data. *compresslevel* and *mtime* have the same meaning as in - the :class:`GzipFile` constructor above. + the :class:`GzipFile` constructor above, + but *mtime* defaults to 0 for reproducible output. .. versionadded:: 3.2 .. versionchanged:: 3.8 @@ -203,6 +204,10 @@ The module defines the following items: .. versionchanged:: 3.13 The gzip header OS byte is guaranteed to be set to 255 when this function is used as was the case in 3.10 and earlier. + .. versionchanged:: 3.14 + The *mtime* parameter now defaults to 0 for reproducible output. + For the previous behaviour of using the current time, + pass ``None`` to *mtime*. .. function:: decompress(data) diff --git a/Lib/gzip.py b/Lib/gzip.py index ba753ce3050dd8..1a3c82ce7e0711 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -580,12 +580,12 @@ def _rewind(self): self._new_member = True -def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): +def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0): """Compress data in one shot and return the compressed string. compresslevel sets the compression level in range of 0-9. - mtime can be used to set the modification time. The modification time is - set to the current time by default. + mtime can be used to set the modification time. + The modification time is set to 0 by default, for reproducibility. """ # Wbits=31 automatically includes a gzip header and trailer. gzip_data = zlib.compress(data, level=compresslevel, wbits=31) diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index ae384c3849d49e..bf6e1703db8451 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -713,6 +713,17 @@ def test_compress_mtime(self): f.read(1) # to set mtime attribute self.assertEqual(f.mtime, mtime) + def test_compress_mtime_default(self): + # test for gh-125260 + datac = gzip.compress(data1, mtime=0) + datac2 = gzip.compress(data1) + self.assertEqual(datac, datac2) + datac3 = gzip.compress(data1, mtime=None) + self.assertNotEqual(datac, datac3) + with gzip.GzipFile(fileobj=io.BytesIO(datac3), mode="rb") as f: + f.read(1) # to set mtime attribute + self.assertGreater(f.mtime, 1) + def test_compress_correct_level(self): for mtime in (0, 42): with self.subTest(mtime=mtime): diff --git a/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst b/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst new file mode 100644 index 00000000000000..fab524ea0185c2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst @@ -0,0 +1,2 @@ +The :func:`gzip.compress` *mtime* parameter now defaults to 0 for reproducible output. +Patch by Bernhard M. Wiedemann and Adam Turner. From c05f9dde8a12dfd63d3ade93da616042df2dc925 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 20:46:21 +0300 Subject: [PATCH 064/114] gh-53203: Fix strptime() for %c and %x formats on many locales (GH-124946) In some locales (like French or Hebrew) the full or abbreviated names of the default month and weekday used in __calc_date_time can be part of other name or constant part of the %c format. The month name can also match %m with constant suffix (like in Japanese). So the code failed to correctly distinguish formats %a, %A, %b, %B and %m. Cycle all month and all days of the week to find the variable part and distinguish %a from %A and %b from %B or %m. Fixed locales for the following languges: Arabic, Bislama, Breton, Bodo, Kashubian, Chuvash, Estonian, French, Irish, Ge'ez, Gurajati, Manx Gaelic, Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, Japanese, Kannada, Korean, Marathi, Malay, Norwegian, Nynorsk, Punjabi, Rajasthani, Tok Pisin, Yoruba, Yue Chinese, Yau/Nungon and Chinese. Co-authored-by: Eli Bendersky --- Lib/_strptime.py | 127 +++++++++++++++--- Lib/test/test_strptime.py | 36 +++-- ...4-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst | 5 + 3 files changed, 134 insertions(+), 34 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index a3f8bb544d518d..89adc174e5ad30 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -28,6 +28,18 @@ def _getlang(): # Figure out what the current language is set to. return locale.getlocale(locale.LC_TIME) +def _findall(haystack, needle): + # Find all positions of needle in haystack. + if not needle: + return + i = 0 + while True: + i = haystack.find(needle, i) + if i < 0: + break + yield i + i += len(needle) + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -102,7 +114,8 @@ def __calc_am_pm(self): am_pm = [] for hour in (1, 22): time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) - am_pm.append(time.strftime("%p", time_tuple).lower()) + # br_FR has AM/PM info (' ',' '). + am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm def __calc_date_time(self): @@ -114,42 +127,114 @@ def __calc_date_time(self): # values within the format string is very important; it eliminates # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) - date_time = [None, None, None] - date_time[0] = time.strftime("%c", time_tuple).lower() - date_time[1] = time.strftime("%x", time_tuple).lower() - date_time[2] = time.strftime("%X", time_tuple).lower() - replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), - (self.a_month[3], '%b'), (self.am_pm[1], '%p'), + time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) + replacement_pairs = [ ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I')] - replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) - for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): - current_format = date_time[offset] - for old, new in replacement_pairs: + date_time = [] + for directive in ('%c', '%x', '%X'): + current_format = time.strftime(directive, time_tuple).lower() + current_format = current_format.replace('%', '%%') + # The month and the day of the week formats are treated specially + # because of a possible ambiguity in some locales where the full + # and abbreviated names are equal or names of different types + # are equal. See doc of __find_month_format for more details. + lst, fmt = self.__find_weekday_format(directive) + if lst: + current_format = current_format.replace(lst[2], fmt, 1) + lst, fmt = self.__find_month_format(directive) + if lst: + current_format = current_format.replace(lst[3], fmt, 1) + if self.am_pm[1]: # Must deal with possible lack of locale info # manifesting itself as the empty string (e.g., Swedish's # lack of AM/PM info) or a platform returning a tuple of empty # strings (e.g., MacOS 9 having timezone as ('','')). - if old: - current_format = current_format.replace(old, new) + current_format = current_format.replace(self.am_pm[1], '%p') + for tz_values in self.timezone: + for tz in tz_values: + if tz: + current_format = current_format.replace(tz, "%Z") + for old, new in replacement_pairs: + current_format = current_format.replace(old, new) # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since # 2005-01-03 occurs before the first Monday of the year. Otherwise # %U is used. - time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) - if '00' in time.strftime(directive, time_tuple): + if '00' in time.strftime(directive, time_tuple2): U_W = '%W' else: U_W = '%U' - date_time[offset] = current_format.replace('11', U_W) + current_format = current_format.replace('11', U_W) + date_time.append(current_format) self.LC_date_time = date_time[0] self.LC_date = date_time[1] self.LC_time = date_time[2] + def __find_month_format(self, directive): + """Find the month format appropriate for the current locale. + + In some locales (for example French and Hebrew), the default month + used in __calc_date_time has the same name in full and abbreviated + form. Also, the month name can by accident match other part of the + representation: the day of the week name (for example in Morisyen) + or the month number (for example in Japanese). Thus, cycle months + of the year and find all positions that match the month name for + each month, If no common positions are found, the representation + does not use the month name. + """ + full_indices = abbr_indices = None + for m in range(1, 13): + time_tuple = time.struct_time((1999, m, 17, 22, 44, 55, 2, 76, 0)) + datetime = time.strftime(directive, time_tuple).lower() + indices = set(_findall(datetime, self.f_month[m])) + if full_indices is None: + full_indices = indices + else: + full_indices &= indices + indices = set(_findall(datetime, self.a_month[m])) + if abbr_indices is None: + abbr_indices = indices + else: + abbr_indices &= indices + if not full_indices and not abbr_indices: + return None, None + if full_indices: + return self.f_month, '%B' + if abbr_indices: + return self.a_month, '%b' + return None, None + + def __find_weekday_format(self, directive): + """Find the day of the week format appropriate for the current locale. + + Similar to __find_month_format(). + """ + full_indices = abbr_indices = None + for wd in range(7): + time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, wd, 76, 0)) + datetime = time.strftime(directive, time_tuple).lower() + indices = set(_findall(datetime, self.f_weekday[wd])) + if full_indices is None: + full_indices = indices + else: + full_indices &= indices + if self.f_weekday[wd] != self.a_weekday[wd]: + indices = set(_findall(datetime, self.a_weekday[wd])) + if abbr_indices is None: + abbr_indices = indices + else: + abbr_indices &= indices + if not full_indices and not abbr_indices: + return None, None + if full_indices: + return self.f_weekday, '%A' + if abbr_indices: + return self.a_weekday, '%a' + return None, None + def __calc_timezone(self): # Set self.timezone by using time.tzname. # Do not worry about possibility of time.tzname[0] == time.tzname[1] @@ -187,7 +272,7 @@ def __init__(self, locale_time=None): 'd': r"(?P3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", 'H': r"(?P2[0-3]|[0-1]\d|\d)", - 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -349,8 +434,8 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): _regex_cache[format] = format_regex found = format_regex.match(data_string) if not found: - raise ValueError("time data %r does not match format %r" % - (data_string, format)) + raise ValueError("time data %r does not match format %r :: /%s/" % + (data_string, format, format_regex.pattern)) if len(data_string) != found.end(): raise ValueError("unconverted data remains: %s" % data_string[found.end():]) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 37f6b08db28b3e..79f48dfe44abde 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -5,6 +5,7 @@ import locale import re import os +import platform import sys from test import support from test.support import warnings_helper @@ -13,6 +14,13 @@ import _strptime +libc_ver = platform.libc_ver() +if libc_ver[0] == 'glibc': + glibc_ver = tuple(map(int, libc_ver[1].split('.'))) +else: + glibc_ver = None + + class getlang_Tests(unittest.TestCase): """Test _getlang""" def test_basic(self): @@ -478,16 +486,16 @@ def test_bad_timezone(self): # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # - # BUG: Generates invalid regexp for br_FR, csb_PL, Arabic. - # BUG: Generates regexp that does not match the current date and time - # for fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG. # BUG: Generates regexp that does not match the current date and time - # for fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG, - # fr_FR, ja_JP, he_IL, ko_KR, zh_CN, etc. - @run_with_locales('LC_TIME', 'C', 'en_US', 'de_DE', - 'eu_ES', 'mfe_MU') + # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') def test_date_time_locale(self): # Test %c directive + loc = locale.getlocale(locale.LC_TIME)[0] + if glibc_ver and glibc_ver < (2, 31) and loc == 'br_FR': + self.skipTest('%c in locale br_FR does not include time') now = time.time() self.roundtrip('%c', slice(0, 6), time.localtime(now)) # 1 hour 20 minutes 30 seconds ago @@ -505,7 +513,9 @@ def test_date_time_locale(self): # NB: Dates before 1969 do not roundtrip on some locales: # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. - @run_with_locales('LC_TIME', 'C', 'en_US', 'de_DE', 'ja_JP') + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG', + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') def test_date_time_locale2(self): # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) @@ -513,10 +523,9 @@ def test_date_time_locale2(self): # NB: Does not roundtrip because use non-Gregorian calendar: # lo_LA, thai, th_TH. # BUG: Generates regexp that does not match the current date - # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM, - # Arabic, ja_JP, ko_KR, zh_CN, etc. - @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', - 'he_IL', 'eu_ES') + # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'he_IL', 'eu_ES', 'ar_AE') def test_date_locale(self): # Test %x directive now = time.time() @@ -535,7 +544,8 @@ def test_date_locale(self): support.is_emscripten or support.is_wasi, "musl libc issue on Emscripten, bpo-46390" ) - @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'eu_ES', 'ar_AE') def test_date_locale2(self): # Test %x directive self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) diff --git a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst new file mode 100644 index 00000000000000..6895cffcf545fd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst @@ -0,0 +1,5 @@ +Fix :func:`time.strptime` for ``%c`` and ``%x`` formats in many locales: +Arabic, Bislama, Breton, Bodo, Kashubian, Chuvash, Estonian, French, Irish, +Ge'ez, Gurajati, Manx Gaelic, Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, +Japanese, Kannada, Korean, Marathi, Malay, Norwegian, Nynorsk, Punjabi, +Rajasthani, Tok Pisin, Yoruba, Yue Chinese, Yau/Nungon and Chinese. From 62d5a53a0b2a5262a86984cfe9817aeb653ebfca Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sat, 12 Oct 2024 15:33:00 -0400 Subject: [PATCH 065/114] gh-124872: Move PyThreadState to first argument for consistency (#124774) --- Python/context.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Python/context.c b/Python/context.c index 36e2677c398f59..9b742136b0726d 100644 --- a/Python/context.c +++ b/Python/context.c @@ -112,7 +112,8 @@ context_event_name(PyContextEvent event) { Py_UNREACHABLE(); } -static void notify_context_watchers(PyContextEvent event, PyContext *ctx, PyThreadState *ts) +static void +notify_context_watchers(PyThreadState *ts, PyContextEvent event, PyContext *ctx) { assert(Py_REFCNT(ctx) > 0); PyInterpreterState *interp = ts->interp; @@ -192,7 +193,7 @@ _PyContext_Enter(PyThreadState *ts, PyObject *octx) ts->context = Py_NewRef(ctx); ts->context_ver++; - notify_context_watchers(Py_CONTEXT_EVENT_ENTER, ctx, ts); + notify_context_watchers(ts, Py_CONTEXT_EVENT_ENTER, ctx); return 0; } @@ -226,7 +227,7 @@ _PyContext_Exit(PyThreadState *ts, PyObject *octx) return -1; } - notify_context_watchers(Py_CONTEXT_EVENT_EXIT, ctx, ts); + notify_context_watchers(ts, Py_CONTEXT_EVENT_EXIT, ctx); Py_SETREF(ts->context, (PyObject *)ctx->ctx_prev); ts->context_ver++; From 4a2282b0679bbf7b7fbd36aae1b1565145238961 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Sat, 12 Oct 2024 15:21:55 -0500 Subject: [PATCH 066/114] Prefer "similar" over "equivalent" in tutorial (#125343) In the datastructures tutorial doc, some operations are described as "equivalent to" others. This has led to some user-confusion -- at least in the Discourse forums -- about cases in which the operations differ. This change doesn't systematically eliminate the word "equivalent" from the tutorial. It just substitutes "similar to" in several cases in which "equivalent to" could mislead users into expecting exact equivalence. --- Doc/tutorial/datastructures.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index 73f17adeea72de..31941bc112a135 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -19,13 +19,13 @@ objects: .. method:: list.append(x) :noindex: - Add an item to the end of the list. Equivalent to ``a[len(a):] = [x]``. + Add an item to the end of the list. Similar to ``a[len(a):] = [x]``. .. method:: list.extend(iterable) :noindex: - Extend the list by appending all the items from the iterable. Equivalent to + Extend the list by appending all the items from the iterable. Similar to ``a[len(a):] = iterable``. @@ -56,7 +56,7 @@ objects: .. method:: list.clear() :noindex: - Remove all items from the list. Equivalent to ``del a[:]``. + Remove all items from the list. Similar to ``del a[:]``. .. method:: list.index(x[, start[, end]]) @@ -93,7 +93,7 @@ objects: .. method:: list.copy() :noindex: - Return a shallow copy of the list. Equivalent to ``a[:]``. + Return a shallow copy of the list. Similar to ``a[:]``. An example that uses most of the list methods:: From fa52b82c91a8e1a0971bd5fef656473ec93f41e3 Mon Sep 17 00:00:00 2001 From: "Ghorban M. Tavakoly" <58617996+galmyk@users.noreply.github.com> Date: Sun, 13 Oct 2024 00:08:13 +0330 Subject: [PATCH 067/114] gh-125289: Update sample code in asyncio-task.rst (GH-125292) * Update sample code in asyncio-task.rst This will change **coroutines** sample code in the **Awaitables** section and make the example clearer. * Update Doc/library/asyncio-task.rst Revert the added print Co-authored-by: Carol Willing * Update Doc/library/asyncio-task.rst Co-authored-by: Carol Willing --------- Co-authored-by: Carol Willing --- Doc/library/asyncio-task.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/asyncio-task.rst b/Doc/library/asyncio-task.rst index 4716a3f9c8ac79..f27e858cf420f4 100644 --- a/Doc/library/asyncio-task.rst +++ b/Doc/library/asyncio-task.rst @@ -158,7 +158,7 @@ other coroutines:: # Nothing happens if we just call "nested()". # A coroutine object is created but not awaited, # so it *won't run at all*. - nested() + nested() # will raise a "RuntimeWarning". # Let's do it differently now and await it: print(await nested()) # will print "42". From 330c527299a5380f39c658bfa9321706cabc445d Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sat, 12 Oct 2024 16:57:27 -0400 Subject: [PATCH 068/114] gh-124872: Change PyContext_WatchCallback to take PyObject (#124737) The PyContext struct is not intended to be public, and users of the API don't need anything more specific than PyObject. Also see gh-78943. --- Doc/c-api/contextvars.rst | 2 +- Include/cpython/context.h | 2 +- Modules/_testcapi/watchers.c | 10 +++++----- Python/context.c | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/c-api/contextvars.rst b/Doc/c-api/contextvars.rst index 59e74ba1ac7022..8eba54a80dc80d 100644 --- a/Doc/c-api/contextvars.rst +++ b/Doc/c-api/contextvars.rst @@ -136,7 +136,7 @@ Context object management functions: .. versionadded:: 3.14 -.. c:type:: int (*PyContext_WatchCallback)(PyContextEvent event, PyContext* ctx) +.. c:type:: int (*PyContext_WatchCallback)(PyContextEvent event, PyObject *obj) Context object watcher callback function. The object passed to the callback is event-specific; see :c:type:`PyContextEvent` for details. diff --git a/Include/cpython/context.h b/Include/cpython/context.h index d722b4d93134f7..3c9be7873b9399 100644 --- a/Include/cpython/context.h +++ b/Include/cpython/context.h @@ -52,7 +52,7 @@ typedef enum { * if the callback returns with an exception set, it must return -1. Otherwise * it should return 0 */ -typedef int (*PyContext_WatchCallback)(PyContextEvent, PyContext *); +typedef int (*PyContext_WatchCallback)(PyContextEvent, PyObject *); /* * Register a per-interpreter callback that will be invoked for context object diff --git a/Modules/_testcapi/watchers.c b/Modules/_testcapi/watchers.c index 689863d098ad8a..b4233d07134aea 100644 --- a/Modules/_testcapi/watchers.c +++ b/Modules/_testcapi/watchers.c @@ -630,7 +630,7 @@ static int num_context_object_enter_events[NUM_CONTEXT_WATCHERS] = {0, 0}; static int num_context_object_exit_events[NUM_CONTEXT_WATCHERS] = {0, 0}; static int -handle_context_watcher_event(int which_watcher, PyContextEvent event, PyContext *ctx) { +handle_context_watcher_event(int which_watcher, PyContextEvent event, PyObject *ctx) { if (event == Py_CONTEXT_EVENT_ENTER) { num_context_object_enter_events[which_watcher]++; } @@ -644,22 +644,22 @@ handle_context_watcher_event(int which_watcher, PyContextEvent event, PyContext } static int -first_context_watcher_callback(PyContextEvent event, PyContext *ctx) { +first_context_watcher_callback(PyContextEvent event, PyObject *ctx) { return handle_context_watcher_event(0, event, ctx); } static int -second_context_watcher_callback(PyContextEvent event, PyContext *ctx) { +second_context_watcher_callback(PyContextEvent event, PyObject *ctx) { return handle_context_watcher_event(1, event, ctx); } static int -noop_context_event_handler(PyContextEvent event, PyContext *ctx) { +noop_context_event_handler(PyContextEvent event, PyObject *ctx) { return 0; } static int -error_context_event_handler(PyContextEvent event, PyContext *ctx) { +error_context_event_handler(PyContextEvent event, PyObject *ctx) { PyErr_SetString(PyExc_RuntimeError, "boom!"); return -1; } diff --git a/Python/context.c b/Python/context.c index 9b742136b0726d..8bc487a33c890b 100644 --- a/Python/context.c +++ b/Python/context.c @@ -113,7 +113,7 @@ context_event_name(PyContextEvent event) { } static void -notify_context_watchers(PyThreadState *ts, PyContextEvent event, PyContext *ctx) +notify_context_watchers(PyThreadState *ts, PyContextEvent event, PyObject *ctx) { assert(Py_REFCNT(ctx) > 0); PyInterpreterState *interp = ts->interp; @@ -193,7 +193,7 @@ _PyContext_Enter(PyThreadState *ts, PyObject *octx) ts->context = Py_NewRef(ctx); ts->context_ver++; - notify_context_watchers(ts, Py_CONTEXT_EVENT_ENTER, ctx); + notify_context_watchers(ts, Py_CONTEXT_EVENT_ENTER, octx); return 0; } @@ -227,7 +227,7 @@ _PyContext_Exit(PyThreadState *ts, PyObject *octx) return -1; } - notify_context_watchers(ts, Py_CONTEXT_EVENT_EXIT, ctx); + notify_context_watchers(ts, Py_CONTEXT_EVENT_EXIT, octx); Py_SETREF(ts->context, (PyObject *)ctx->ctx_prev); ts->context_ver++; From a8fa4ad9e9f7aa0cba8b23af2c583d17bb1d1847 Mon Sep 17 00:00:00 2001 From: Ruoyu Zhong Date: Sun, 13 Oct 2024 08:53:28 +0800 Subject: [PATCH 069/114] Doc: Fix suggested usage of `-X gil=0` in the glossary (#125366) Currently, the "global interpreter lock" entry in the glossary mentions that `-X gil 0` can be used to disable the GIL [1]. However, this is invalid; the correct usage should be `-X gil=0`. $ python -X gil 0 -c 'print("Hello, world")' Fatal Python error: config_read_gil: PYTHON_GIL / -X gil must be "0" or "1" Python runtime state: preinitialized $ python -X gil=0 -c 'print("Hello, world")' Hello, world [1]: https://docs.python.org/3/using/cmdline.html#cmdoption-X Signed-off-by: Ruoyu Zhong --- Doc/glossary.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 1d407732eef576..f67f3ecad0bc40 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -617,7 +617,7 @@ Glossary As of Python 3.13, the GIL can be disabled using the :option:`--disable-gil` build configuration. After building Python with this option, code must be - run with :option:`-X gil 0 <-X>` or after setting the :envvar:`PYTHON_GIL=0 ` + run with :option:`-X gil=0 <-X>` or after setting the :envvar:`PYTHON_GIL=0 ` environment variable. This feature enables improved performance for multi-threaded applications and makes it easier to use multi-core CPUs efficiently. For more details, see :pep:`703`. From 82bcaf15890cf85b76b4f62d2dd1710bb49c3ed1 Mon Sep 17 00:00:00 2001 From: Andrew Athan <24279435+aathan@users.noreply.github.com> Date: Sun, 13 Oct 2024 00:22:05 -0700 Subject: [PATCH 070/114] Trivial change: Update comments in activate about what running hash -r does (GH-125385) Update comments about what running hash -r does The old comment said "hash -r" forgets "past commands." However, the documentation for "hash" states that it forgets past locations. The old comment was, in my opinion, confusing. This is because it could be interpreted to mean it does something to the command history (HISTORY/HISTFILE etc) vs the cache of locations. --- Lib/venv/scripts/common/activate | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/venv/scripts/common/activate b/Lib/venv/scripts/common/activate index cbd4873f012246..4593799b7e9b0e 100644 --- a/Lib/venv/scripts/common/activate +++ b/Lib/venv/scripts/common/activate @@ -14,8 +14,9 @@ deactivate () { unset _OLD_VIRTUAL_PYTHONHOME fi - # Call hash to forget past commands. Without forgetting - # past commands the $PATH changes we made may not be respected + # Call hash to forget past locations. Without forgetting + # past locations the $PATH changes we made may not be respected. + # See "man bash" for more details. hash is usually a builtin of your shell hash -r 2> /dev/null if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then From 4197a796ecf3a751ad7245b8d4f980d6d444b614 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 13 Oct 2024 09:38:47 +0200 Subject: [PATCH 071/114] gh-86673: Loosen test_ttk.test_identify() requirements (#125335) In aeca373b3 (PR gh-12011, issue gh-71500), test_identify() was changed to expect different results on Darwin. Ned's fix was later adjusted by e52f9bee8. This workaround is only needed for some variants of Tk/Tcl on macOS, so we now allow both the workaround and the generic results for these tests. --- Lib/test/test_ttk/test_widgets.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_ttk/test_widgets.py b/Lib/test/test_ttk/test_widgets.py index 88740b18864006..10bec33be617a1 100644 --- a/Lib/test/test_ttk/test_widgets.py +++ b/Lib/test/test_ttk/test_widgets.py @@ -336,7 +336,8 @@ class EntryTest(AbstractWidgetTest, unittest.TestCase): 'show', 'state', 'style', 'takefocus', 'textvariable', 'validate', 'validatecommand', 'width', 'xscrollcommand', ) - IDENTIFY_AS = 'Entry.field' if sys.platform == 'darwin' else 'textarea' + # bpo-27313: macOS Tk/Tcl may or may not report 'Entry.field'. + IDENTIFY_AS = {'Entry.field', 'textarea'} def setUp(self): super().setUp() @@ -373,8 +374,7 @@ def test_identify(self): self.entry.pack() self.entry.update() - # bpo-27313: macOS Cocoa widget differs from X, allow either - self.assertEqual(self.entry.identify(5, 5), self.IDENTIFY_AS) + self.assertIn(self.entry.identify(5, 5), self.IDENTIFY_AS) self.assertEqual(self.entry.identify(-1, -1), "") self.assertRaises(tkinter.TclError, self.entry.identify, None, 5) @@ -461,7 +461,7 @@ class ComboboxTest(EntryTest, unittest.TestCase): 'validate', 'validatecommand', 'values', 'width', 'xscrollcommand', ) - IDENTIFY_AS = 'Combobox.button' if sys.platform == 'darwin' else 'textarea' + IDENTIFY_AS = {'Combobox.button', 'textarea'} def setUp(self): super().setUp() @@ -1204,7 +1204,7 @@ class SpinboxTest(EntryTest, unittest.TestCase): 'takefocus', 'textvariable', 'to', 'validate', 'validatecommand', 'values', 'width', 'wrap', 'xscrollcommand', ) - IDENTIFY_AS = 'Spinbox.field' if sys.platform == 'darwin' else 'textarea' + IDENTIFY_AS = {'Spinbox.field', 'textarea'} def setUp(self): super().setUp() From ce740d46246b28bb675ba9d62214b59be9b8411e Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 13 Oct 2024 10:22:31 +0200 Subject: [PATCH 072/114] gh-61698: Use launchctl to detect macOS window manager in tests (#118390) --- Lib/test/support/__init__.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 72ce5dacd1be4c..d768bead7120c7 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -253,22 +253,16 @@ class USEROBJECTFLAGS(ctypes.Structure): # process not running under the same user id as the current console # user. To avoid that, raise an exception if the window manager # connection is not available. - from ctypes import cdll, c_int, pointer, Structure - from ctypes.util import find_library - - app_services = cdll.LoadLibrary(find_library("ApplicationServices")) - - if app_services.CGMainDisplayID() == 0: - reason = "gui tests cannot run without OS X window manager" + import subprocess + try: + rc = subprocess.run(["launchctl", "managername"], + capture_output=True, check=True) + managername = rc.stdout.decode("utf-8").strip() + except subprocess.CalledProcessError: + reason = "unable to detect macOS launchd job manager" else: - class ProcessSerialNumber(Structure): - _fields_ = [("highLongOfPSN", c_int), - ("lowLongOfPSN", c_int)] - psn = ProcessSerialNumber() - psn_p = pointer(psn) - if ( (app_services.GetCurrentProcess(psn_p) < 0) or - (app_services.SetFrontProcess(psn_p) < 0) ): - reason = "cannot run without OS X gui process" + if managername != "Aqua": + reason = f"{managername=} -- can only run in a macOS GUI session" # check on every platform whether tkinter can actually do anything if not reason: From 283ea5f3b2b6a18605b8598a979afe263b0f21ce Mon Sep 17 00:00:00 2001 From: Wulian Date: Sun, 13 Oct 2024 19:10:59 +0800 Subject: [PATCH 073/114] gh-125383: Update `fib` function comment for accuracy (#125386) `Doc/tutorial/controlflow.rst`: fix comment for `fib` function --- Doc/tutorial/controlflow.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index 9b73ac475c78d5..b830ce94ba4f47 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -461,8 +461,8 @@ Defining Functions We can create a function that writes the Fibonacci series to an arbitrary boundary:: - >>> def fib(n): # write Fibonacci series up to n - ... """Print a Fibonacci series up to n.""" + >>> def fib(n): # write Fibonacci series less than n + ... """Print a Fibonacci series less than n.""" ... a, b = 0, 1 ... while a < n: ... print(a, end=' ') From 022c50d190e14affb952a244c4eb6e4a644ad0c9 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sun, 13 Oct 2024 20:59:07 +0530 Subject: [PATCH 074/114] fix comment in _PyMutex_TryUnlock (#125319) --- Include/internal/pycore_lock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index e6da083b807ce5..cd7deda00c7bee 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -64,8 +64,8 @@ PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags) } } -// Unlock a mutex, returns 0 if the mutex is not locked (used for improved -// error messages). +// Unlock a mutex, returns -1 if the mutex is not locked (used for improved +// error messages) otherwise returns 0. extern int _PyMutex_TryUnlock(PyMutex *m); From 08489325d1cd94eba97c5f5f8cac49521fd0b0d7 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sun, 13 Oct 2024 20:59:41 +0530 Subject: [PATCH 075/114] gh-125161: return non zero value in pthread_self on wasi (#125303) --- Python/thread_pthread_stubs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/thread_pthread_stubs.h b/Python/thread_pthread_stubs.h index 4741e594e52e65..458f8fc5951720 100644 --- a/Python/thread_pthread_stubs.h +++ b/Python/thread_pthread_stubs.h @@ -1,5 +1,9 @@ #include "cpython/pthread_stubs.h" +typedef struct py_stub_tls_entry py_tls_entry; + +#define py_tls_entries (_PyRuntime.threads.stubs.tls_entries) + // mutex int pthread_mutex_init(pthread_mutex_t *restrict mutex, @@ -105,7 +109,7 @@ pthread_join(pthread_t thread, void** value_ptr) PyAPI_FUNC(pthread_t) pthread_self(void) { - return 0; + return (pthread_t)(uintptr_t)&py_tls_entries; } int @@ -134,10 +138,6 @@ pthread_attr_destroy(pthread_attr_t *attr) } -typedef struct py_stub_tls_entry py_tls_entry; - -#define py_tls_entries (_PyRuntime.threads.stubs.tls_entries) - int pthread_key_create(pthread_key_t *key, void (*destr_function)(void *)) { From cd0f9d111a040ad863c680e9f464419640c8c3fd Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sun, 13 Oct 2024 21:05:05 +0530 Subject: [PATCH 076/114] gh-89967: make WeakKeyDictionary and WeakValueDictionary thread safe (#125325) Make `WeakKeyDictionary` and `WeakValueDictionary` thread safe by copying the underlying the dict before iterating over it. --- Lib/_weakrefset.py | 25 --- Lib/weakref.py | 198 +++++------------- ...4-10-11-16-19-46.gh-issue-89967.vhWUOR.rst | 1 + 3 files changed, 50 insertions(+), 174 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-11-16-19-46.gh-issue-89967.vhWUOR.rst diff --git a/Lib/_weakrefset.py b/Lib/_weakrefset.py index 2071755d71dfc8..d1c7fcaeec9821 100644 --- a/Lib/_weakrefset.py +++ b/Lib/_weakrefset.py @@ -8,31 +8,6 @@ __all__ = ['WeakSet'] -class _IterationGuard: - # This context manager registers itself in the current iterators of the - # weak container, such as to delay all removals until the context manager - # exits. - # This technique should be relatively thread-safe (since sets are). - - def __init__(self, weakcontainer): - # Don't create cycles - self.weakcontainer = ref(weakcontainer) - - def __enter__(self): - w = self.weakcontainer() - if w is not None: - w._iterating.add(self) - return self - - def __exit__(self, e, t, b): - w = self.weakcontainer() - if w is not None: - s = w._iterating - s.remove(self) - if not s: - w._commit_removals() - - class WeakSet: def __init__(self, data=None): self.data = set() diff --git a/Lib/weakref.py b/Lib/weakref.py index 25b70927e29c31..94e4278143c987 100644 --- a/Lib/weakref.py +++ b/Lib/weakref.py @@ -19,7 +19,7 @@ ReferenceType, _remove_dead_weakref) -from _weakrefset import WeakSet, _IterationGuard +from _weakrefset import WeakSet import _collections_abc # Import after _weakref to avoid circular import. import sys @@ -105,34 +105,14 @@ def __init__(self, other=(), /, **kw): def remove(wr, selfref=ref(self), _atomic_removal=_remove_dead_weakref): self = selfref() if self is not None: - if self._iterating: - self._pending_removals.append(wr.key) - else: - # Atomic removal is necessary since this function - # can be called asynchronously by the GC - _atomic_removal(self.data, wr.key) + # Atomic removal is necessary since this function + # can be called asynchronously by the GC + _atomic_removal(self.data, wr.key) self._remove = remove - # A list of keys to be removed - self._pending_removals = [] - self._iterating = set() self.data = {} self.update(other, **kw) - def _commit_removals(self, _atomic_removal=_remove_dead_weakref): - pop = self._pending_removals.pop - d = self.data - # We shouldn't encounter any KeyError, because this method should - # always be called *before* mutating the dict. - while True: - try: - key = pop() - except IndexError: - return - _atomic_removal(d, key) - def __getitem__(self, key): - if self._pending_removals: - self._commit_removals() o = self.data[key]() if o is None: raise KeyError(key) @@ -140,18 +120,12 @@ def __getitem__(self, key): return o def __delitem__(self, key): - if self._pending_removals: - self._commit_removals() del self.data[key] def __len__(self): - if self._pending_removals: - self._commit_removals() return len(self.data) def __contains__(self, key): - if self._pending_removals: - self._commit_removals() try: o = self.data[key]() except KeyError: @@ -162,38 +136,28 @@ def __repr__(self): return "<%s at %#x>" % (self.__class__.__name__, id(self)) def __setitem__(self, key, value): - if self._pending_removals: - self._commit_removals() self.data[key] = KeyedRef(value, self._remove, key) def copy(self): - if self._pending_removals: - self._commit_removals() new = WeakValueDictionary() - with _IterationGuard(self): - for key, wr in self.data.items(): - o = wr() - if o is not None: - new[key] = o + for key, wr in self.data.copy().items(): + o = wr() + if o is not None: + new[key] = o return new __copy__ = copy def __deepcopy__(self, memo): from copy import deepcopy - if self._pending_removals: - self._commit_removals() new = self.__class__() - with _IterationGuard(self): - for key, wr in self.data.items(): - o = wr() - if o is not None: - new[deepcopy(key, memo)] = o + for key, wr in self.data.copy().items(): + o = wr() + if o is not None: + new[deepcopy(key, memo)] = o return new def get(self, key, default=None): - if self._pending_removals: - self._commit_removals() try: wr = self.data[key] except KeyError: @@ -207,21 +171,15 @@ def get(self, key, default=None): return o def items(self): - if self._pending_removals: - self._commit_removals() - with _IterationGuard(self): - for k, wr in self.data.items(): - v = wr() - if v is not None: - yield k, v + for k, wr in self.data.copy().items(): + v = wr() + if v is not None: + yield k, v def keys(self): - if self._pending_removals: - self._commit_removals() - with _IterationGuard(self): - for k, wr in self.data.items(): - if wr() is not None: - yield k + for k, wr in self.data.copy().items(): + if wr() is not None: + yield k __iter__ = keys @@ -235,23 +193,15 @@ def itervaluerefs(self): keep the values around longer than needed. """ - if self._pending_removals: - self._commit_removals() - with _IterationGuard(self): - yield from self.data.values() + yield from self.data.copy().values() def values(self): - if self._pending_removals: - self._commit_removals() - with _IterationGuard(self): - for wr in self.data.values(): - obj = wr() - if obj is not None: - yield obj + for wr in self.data.copy().values(): + obj = wr() + if obj is not None: + yield obj def popitem(self): - if self._pending_removals: - self._commit_removals() while True: key, wr = self.data.popitem() o = wr() @@ -259,8 +209,6 @@ def popitem(self): return key, o def pop(self, key, *args): - if self._pending_removals: - self._commit_removals() try: o = self.data.pop(key)() except KeyError: @@ -279,16 +227,12 @@ def setdefault(self, key, default=None): except KeyError: o = None if o is None: - if self._pending_removals: - self._commit_removals() self.data[key] = KeyedRef(default, self._remove, key) return default else: return o def update(self, other=None, /, **kwargs): - if self._pending_removals: - self._commit_removals() d = self.data if other is not None: if not hasattr(other, "items"): @@ -308,9 +252,7 @@ def valuerefs(self): keep the values around longer than needed. """ - if self._pending_removals: - self._commit_removals() - return list(self.data.values()) + return list(self.data.copy().values()) def __ior__(self, other): self.update(other) @@ -369,57 +311,22 @@ def __init__(self, dict=None): def remove(k, selfref=ref(self)): self = selfref() if self is not None: - if self._iterating: - self._pending_removals.append(k) - else: - try: - del self.data[k] - except KeyError: - pass + try: + del self.data[k] + except KeyError: + pass self._remove = remove - # A list of dead weakrefs (keys to be removed) - self._pending_removals = [] - self._iterating = set() - self._dirty_len = False if dict is not None: self.update(dict) - def _commit_removals(self): - # NOTE: We don't need to call this method before mutating the dict, - # because a dead weakref never compares equal to a live weakref, - # even if they happened to refer to equal objects. - # However, it means keys may already have been removed. - pop = self._pending_removals.pop - d = self.data - while True: - try: - key = pop() - except IndexError: - return - - try: - del d[key] - except KeyError: - pass - - def _scrub_removals(self): - d = self.data - self._pending_removals = [k for k in self._pending_removals if k in d] - self._dirty_len = False - def __delitem__(self, key): - self._dirty_len = True del self.data[ref(key)] def __getitem__(self, key): return self.data[ref(key)] def __len__(self): - if self._dirty_len and self._pending_removals: - # self._pending_removals may still contain keys which were - # explicitly removed, we have to scrub them (see issue #21173). - self._scrub_removals() - return len(self.data) - len(self._pending_removals) + return len(self.data) def __repr__(self): return "<%s at %#x>" % (self.__class__.__name__, id(self)) @@ -429,11 +336,10 @@ def __setitem__(self, key, value): def copy(self): new = WeakKeyDictionary() - with _IterationGuard(self): - for key, value in self.data.items(): - o = key() - if o is not None: - new[o] = value + for key, value in self.data.copy().items(): + o = key() + if o is not None: + new[o] = value return new __copy__ = copy @@ -441,11 +347,10 @@ def copy(self): def __deepcopy__(self, memo): from copy import deepcopy new = self.__class__() - with _IterationGuard(self): - for key, value in self.data.items(): - o = key() - if o is not None: - new[o] = deepcopy(value, memo) + for key, value in self.data.copy().items(): + o = key() + if o is not None: + new[o] = deepcopy(value, memo) return new def get(self, key, default=None): @@ -459,26 +364,23 @@ def __contains__(self, key): return wr in self.data def items(self): - with _IterationGuard(self): - for wr, value in self.data.items(): - key = wr() - if key is not None: - yield key, value + for wr, value in self.data.copy().items(): + key = wr() + if key is not None: + yield key, value def keys(self): - with _IterationGuard(self): - for wr in self.data: - obj = wr() - if obj is not None: - yield obj + for wr in self.data.copy(): + obj = wr() + if obj is not None: + yield obj __iter__ = keys def values(self): - with _IterationGuard(self): - for wr, value in self.data.items(): - if wr() is not None: - yield value + for wr, value in self.data.copy().items(): + if wr() is not None: + yield value def keyrefs(self): """Return a list of weak references to the keys. @@ -493,7 +395,6 @@ def keyrefs(self): return list(self.data) def popitem(self): - self._dirty_len = True while True: key, value = self.data.popitem() o = key() @@ -501,7 +402,6 @@ def popitem(self): return o, value def pop(self, key, *args): - self._dirty_len = True return self.data.pop(ref(key), *args) def setdefault(self, key, default=None): diff --git a/Misc/NEWS.d/next/Library/2024-10-11-16-19-46.gh-issue-89967.vhWUOR.rst b/Misc/NEWS.d/next/Library/2024-10-11-16-19-46.gh-issue-89967.vhWUOR.rst new file mode 100644 index 00000000000000..d0860457c8e813 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-11-16-19-46.gh-issue-89967.vhWUOR.rst @@ -0,0 +1 @@ +Make :class:`~weakref.WeakKeyDictionary` and :class:`~weakref.WeakValueDictionary` safe against concurrent mutations from other threads. Patch by Kumar Aditya. From 6c386b703d19aaec9a34fd1e843a4d0a144ad14b Mon Sep 17 00:00:00 2001 From: partev Date: Sun, 13 Oct 2024 12:46:15 -0400 Subject: [PATCH 077/114] gh-125403: fix console formatting in Chapter 12 of the tutorial (#125404) --- Doc/tutorial/venv.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/tutorial/venv.rst b/Doc/tutorial/venv.rst index 91e4ce18acef1d..f362e1943b666f 100644 --- a/Doc/tutorial/venv.rst +++ b/Doc/tutorial/venv.rst @@ -76,7 +76,7 @@ virtual environment you're using, and modify the environment so that running ``python`` will get you that particular version and installation of Python. For example: -.. code-block:: bash +.. code-block:: console $ source ~/envs/tutorial-env/bin/activate (tutorial-env) $ python @@ -108,7 +108,7 @@ complete documentation for ``pip``.) You can install the latest version of a package by specifying a package's name: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip install novas Collecting novas @@ -120,7 +120,7 @@ You can install the latest version of a package by specifying a package's name: You can also install a specific version of a package by giving the package name followed by ``==`` and the version number: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip install requests==2.6.0 Collecting requests==2.6.0 @@ -133,7 +133,7 @@ version is already installed and do nothing. You can supply a different version number to get that version, or you can run ``python -m pip install --upgrade`` to upgrade the package to the latest version: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip install --upgrade requests Collecting requests @@ -148,7 +148,7 @@ remove the packages from the virtual environment. ``python -m pip show`` will display information about a particular package: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip show requests --- @@ -166,7 +166,7 @@ remove the packages from the virtual environment. ``python -m pip list`` will display all of the packages installed in the virtual environment: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip list novas (3.1.1.3) @@ -179,7 +179,7 @@ the virtual environment: but the output uses the format that ``python -m pip install`` expects. A common convention is to put this list in a ``requirements.txt`` file: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip freeze > requirements.txt (tutorial-env) $ cat requirements.txt @@ -191,7 +191,7 @@ The ``requirements.txt`` can then be committed to version control and shipped as part of an application. Users can then install all the necessary packages with ``install -r``: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip install -r requirements.txt Collecting novas==3.1.1.3 (from -r requirements.txt (line 1)) From e79bbd147fd58e825572f1aa93c5398953289fb2 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sun, 13 Oct 2024 22:29:27 +0530 Subject: [PATCH 078/114] add Kumar Aditya as codeowner for weakref (#125405) --- .github/CODEOWNERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 7e9c3caf23f079..221008717b29b1 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -280,3 +280,5 @@ Lib/test/test_configparser.py @jaraco # Doc sections Doc/reference/ @willingc + +**/*weakref* @kumaraditya303 \ No newline at end of file From c6d7b644c2425b397cfb641f336bea70eb8a329a Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sun, 13 Oct 2024 20:38:42 +0300 Subject: [PATCH 079/114] gh-101291: Add versionadded directives for PyUnstable_Long_* (#125384) --- Doc/c-api/long.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index e0ae0f77a01db9..02ef8aa7846468 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -608,6 +608,9 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Exactly what values are considered compact is an implementation detail and is subject to change. + .. versionadded:: 3.12 + + .. c:function:: Py_ssize_t PyUnstable_Long_CompactValue(const PyLongObject* op) If *op* is compact, as determined by :c:func:`PyUnstable_Long_IsCompact`, @@ -615,3 +618,5 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Otherwise, the return value is undefined. + .. versionadded:: 3.12 + From cb8e5995d89d9b90e83cf43310ec50e177484e70 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 13 Oct 2024 18:46:10 +0100 Subject: [PATCH 080/114] GH-125069: Fix inconsistent joining in `WindowsPath(PosixPath(...))` (#125156) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `PurePath.__init__()` incorrectly uses the `_raw_paths` of a given `PurePath` object with a different flavour, even though the procedure to join path segments can differ between flavours. This change makes the `_raw_paths`-enabled deferred joining apply _only_ when the path flavours match. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/pathlib/_local.py | 4 ++-- Lib/test/test_pathlib/test_pathlib.py | 9 +++++++++ .../2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst | 4 ++++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 1c02e4168d3a9e..a78997179820b1 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -119,9 +119,9 @@ def __init__(self, *args): paths = [] for arg in args: if isinstance(arg, PurePath): - if arg.parser is ntpath and self.parser is posixpath: + if arg.parser is not self.parser: # GH-103631: Convert separators for backwards compatibility. - paths.extend(path.replace('\\', '/') for path in arg._raw_paths) + paths.append(arg.as_posix()) else: paths.extend(arg._raw_paths) else: diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index b47b4a194cfaa9..c7104bfda90f6c 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -131,6 +131,15 @@ def test_constructor_nested(self): self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c"))) self.assertEqual(P(P('./a:b')), P('./a:b')) + @needs_windows + def test_constructor_nested_foreign_flavour(self): + # See GH-125069. + p1 = pathlib.PurePosixPath('b/c:\\d') + p2 = pathlib.PurePosixPath('b/', 'c:\\d') + self.assertEqual(p1, p2) + self.assertEqual(self.cls(p1), self.cls('b/c:/d')) + self.assertEqual(self.cls(p2), self.cls('b/c:/d')) + def _check_parse_path(self, raw_path, *expected): sep = self.parser.sep actual = self.cls._parse_path(raw_path.replace('/', sep)) diff --git a/Misc/NEWS.d/next/Library/2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst b/Misc/NEWS.d/next/Library/2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst new file mode 100644 index 00000000000000..9f1fd871e1d0b5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst @@ -0,0 +1,4 @@ +Fix an issue where providing a :class:`pathlib.PurePath` object as an +initializer argument to a second :class:`~pathlib.PurePath` object with a +different :attr:`~pathlib.PurePath.parser` resulted in arguments to the +former object's initializer being joined by the latter object's parser. From f1d33dbddd3496b062e1fbe024fb6d7b023a35f5 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sun, 13 Oct 2024 16:17:51 -0400 Subject: [PATCH 081/114] gh-125243: Fix ZoneInfo data race in free threading build (#125281) Lock `ZoneInfoType` to protect accesses to `ZONEINFO_STRONG_CACHE`. Refactor the `tp_new` handler to use Argument Clinic so that we can just use `@critical_section` annotations on the relevant functions. Also use `PyDict_SetDefaultRef` instead of `PyDict_SetDefault` when inserting into the `TIMEDELTA_CACHE`. --- ...-10-10-20-39-57.gh-issue-125243.eUbbtu.rst | 2 + Modules/_zoneinfo.c | 44 +++++++------ Modules/clinic/_zoneinfo.c.h | 61 ++++++++++++++++++- 3 files changed, 87 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-10-20-39-57.gh-issue-125243.eUbbtu.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-10-20-39-57.gh-issue-125243.eUbbtu.rst b/Misc/NEWS.d/next/Library/2024-10-10-20-39-57.gh-issue-125243.eUbbtu.rst new file mode 100644 index 00000000000000..49f84d9711819f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-10-20-39-57.gh-issue-125243.eUbbtu.rst @@ -0,0 +1,2 @@ +Fix data race when creating :class:`zoneinfo.ZoneInfo` objects in the free +threading build. diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index 902ece795b575b..c5292575c22f23 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -3,6 +3,7 @@ #endif #include "Python.h" +#include "pycore_critical_section.h" // _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED() #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() @@ -298,15 +299,20 @@ get_weak_cache(zoneinfo_state *state, PyTypeObject *type) } } +/*[clinic input] +@critical_section +@classmethod +zoneinfo.ZoneInfo.__new__ + + key: object + +Create a new ZoneInfo instance. +[clinic start generated code]*/ + static PyObject * -zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw) +zoneinfo_ZoneInfo_impl(PyTypeObject *type, PyObject *key) +/*[clinic end generated code: output=95e61dab86bb95c3 input=ef73d7a83bf8790e]*/ { - PyObject *key = NULL; - static char *kwlist[] = {"key", NULL}; - if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) { - return NULL; - } - zoneinfo_state *state = zoneinfo_get_state_by_self(type); PyObject *instance = zone_from_strong_cache(state, type, key); if (instance != NULL || PyErr_Occurred()) { @@ -467,6 +473,7 @@ zoneinfo_ZoneInfo_no_cache_impl(PyTypeObject *type, PyTypeObject *cls, } /*[clinic input] +@critical_section @classmethod zoneinfo.ZoneInfo.clear_cache @@ -481,7 +488,7 @@ Clear the ZoneInfo cache. static PyObject * zoneinfo_ZoneInfo_clear_cache_impl(PyTypeObject *type, PyTypeObject *cls, PyObject *only_keys) -/*[clinic end generated code: output=114d9b7c8a22e660 input=e32ca3bb396788ba]*/ +/*[clinic end generated code: output=114d9b7c8a22e660 input=35944715df26d24e]*/ { zoneinfo_state *state = zoneinfo_get_state_by_cls(cls); PyObject *weak_cache = get_weak_cache(state, type); @@ -816,14 +823,10 @@ zoneinfo_ZoneInfo__unpickle_impl(PyTypeObject *type, PyTypeObject *cls, /*[clinic end generated code: output=556712fc709deecb input=6ac8c73eed3de316]*/ { if (from_cache) { - PyObject *val_args = PyTuple_Pack(1, key); - if (val_args == NULL) { - return NULL; - } - - PyObject *rv = zoneinfo_new(type, val_args, NULL); - - Py_DECREF(val_args); + PyObject *rv; + Py_BEGIN_CRITICAL_SECTION(type); + rv = zoneinfo_ZoneInfo_impl(type, key); + Py_END_CRITICAL_SECTION(); return rv; } else { @@ -858,8 +861,7 @@ load_timedelta(zoneinfo_state *state, long seconds) 0, seconds, 0, 1, PyDateTimeAPI->DeltaType); if (tmp != NULL) { - rv = PyDict_SetDefault(state->TIMEDELTA_CACHE, pyoffset, tmp); - Py_XINCREF(rv); + PyDict_SetDefaultRef(state->TIMEDELTA_CACHE, pyoffset, tmp, &rv); Py_DECREF(tmp); } } @@ -2368,6 +2370,7 @@ strong_cache_free(StrongCacheNode *root) static void remove_from_strong_cache(zoneinfo_state *state, StrongCacheNode *node) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType); if (state->ZONEINFO_STRONG_CACHE == node) { state->ZONEINFO_STRONG_CACHE = node->next; } @@ -2422,6 +2425,7 @@ eject_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type, return 0; } + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType); StrongCacheNode *cache = state->ZONEINFO_STRONG_CACHE; StrongCacheNode *node = find_in_strong_cache(cache, key); if (node != NULL) { @@ -2478,6 +2482,7 @@ zone_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type, return NULL; // Strong cache currently only implemented for base class } + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType); StrongCacheNode *cache = state->ZONEINFO_STRONG_CACHE; StrongCacheNode *node = find_in_strong_cache(cache, key); @@ -2504,6 +2509,7 @@ update_strong_cache(zoneinfo_state *state, const PyTypeObject *const type, return; } + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType); StrongCacheNode *new_node = strong_cache_node_new(key, zone); if (new_node == NULL) { return; @@ -2631,7 +2637,7 @@ static PyType_Slot zoneinfo_slots[] = { {Py_tp_getattro, PyObject_GenericGetAttr}, {Py_tp_methods, zoneinfo_methods}, {Py_tp_members, zoneinfo_members}, - {Py_tp_new, zoneinfo_new}, + {Py_tp_new, zoneinfo_ZoneInfo}, {Py_tp_dealloc, zoneinfo_dealloc}, {Py_tp_traverse, zoneinfo_traverse}, {Py_tp_clear, zoneinfo_clear}, diff --git a/Modules/clinic/_zoneinfo.c.h b/Modules/clinic/_zoneinfo.c.h index 9905b6425e2f79..bde88b5c4fa65b 100644 --- a/Modules/clinic/_zoneinfo.c.h +++ b/Modules/clinic/_zoneinfo.c.h @@ -6,8 +6,65 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() +PyDoc_STRVAR(zoneinfo_ZoneInfo__doc__, +"ZoneInfo(key)\n" +"--\n" +"\n" +"Create a new ZoneInfo instance."); + +static PyObject * +zoneinfo_ZoneInfo_impl(PyTypeObject *type, PyObject *key); + +static PyObject * +zoneinfo_ZoneInfo(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(key), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"key", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "ZoneInfo", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + PyObject *key; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 0, argsbuf); + if (!fastargs) { + goto exit; + } + key = fastargs[0]; + Py_BEGIN_CRITICAL_SECTION(type); + return_value = zoneinfo_ZoneInfo_impl(type, key); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + PyDoc_STRVAR(zoneinfo_ZoneInfo_from_file__doc__, "from_file($type, file_obj, /, key=None)\n" "--\n" @@ -182,7 +239,9 @@ zoneinfo_ZoneInfo_clear_cache(PyTypeObject *type, PyTypeObject *cls, PyObject *c } only_keys = args[0]; skip_optional_kwonly: + Py_BEGIN_CRITICAL_SECTION(type); return_value = zoneinfo_ZoneInfo_clear_cache_impl(type, cls, only_keys); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -372,4 +431,4 @@ zoneinfo_ZoneInfo__unpickle(PyTypeObject *type, PyTypeObject *cls, PyObject *con exit: return return_value; } -/*[clinic end generated code: output=2a15f32fdd2ab6cd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b4fdc0b30247110a input=a9049054013a1b77]*/ From cfc27bc50fe165330f2295f9ac0ad56ca5b0f31c Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 14 Oct 2024 07:44:48 +0300 Subject: [PATCH 082/114] gh-123133: clarify p=0 case for "f" and "e" formatting types (GH-125426) Co-authored-by: Serhiy Storchaka --- Doc/library/string.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 57a1f920523035..49aeb28d57c8d1 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -509,9 +509,8 @@ The available presentation types for :class:`float` and | | significant digits. With no precision given, uses a | | | precision of ``6`` digits after the decimal point for | | | :class:`float`, and shows all coefficient digits | - | | for :class:`~decimal.Decimal`. If no digits follow the | - | | decimal point, the decimal point is also removed unless | - | | the ``#`` option is used. | + | | for :class:`~decimal.Decimal`. If ``p=0``, the decimal | + | | point is omitted unless the ``#`` option is used. | +---------+----------------------------------------------------------+ | ``'E'`` | Scientific notation. Same as ``'e'`` except it uses | | | an upper case 'E' as the separator character. | @@ -522,9 +521,8 @@ The available presentation types for :class:`float` and | | precision given, uses a precision of ``6`` digits after | | | the decimal point for :class:`float`, and uses a | | | precision large enough to show all coefficient digits | - | | for :class:`~decimal.Decimal`. If no digits follow the | - | | decimal point, the decimal point is also removed unless | - | | the ``#`` option is used. | + | | for :class:`~decimal.Decimal`. If ``p=0``, the decimal | + | | point is omitted unless the ``#`` option is used. | +---------+----------------------------------------------------------+ | ``'F'`` | Fixed-point notation. Same as ``'f'``, but converts | | | ``nan`` to ``NAN`` and ``inf`` to ``INF``. | From 66b3922b97388c328c9bd8df050eef11c0261fae Mon Sep 17 00:00:00 2001 From: rindeal Date: Mon, 14 Oct 2024 06:36:53 +0000 Subject: [PATCH 083/114] gh-86357: argparse: use str() consistently and explicitly to print choices (GH-117766) Signed-off-by: Jan Chren ~rindeal --- Lib/argparse.py | 12 +++---- Lib/test/test_argparse.py | 31 ++++++++++++++++++- ...-04-19-05-58-50.gh-issue-117766.J3xepp.rst | 1 + 3 files changed, 36 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-19-05-58-50.gh-issue-117766.J3xepp.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 550415dc93478b..fa9f5211257e96 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -547,8 +547,7 @@ def _metavar_formatter(self, action, default_metavar): if action.metavar is not None: result = action.metavar elif action.choices is not None: - choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) + result = '{%s}' % ','.join(map(str, action.choices)) else: result = default_metavar @@ -599,8 +598,7 @@ def _expand_help(self, action): elif hasattr(value, '__name__'): params[name] = value.__name__ if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str + params['choices'] = ', '.join(map(str, params['choices'])) return help_string % params def _iter_indented_subactions(self, action): @@ -717,7 +715,7 @@ def _get_action_name(argument): elif argument.dest not in (None, SUPPRESS): return argument.dest elif argument.choices: - return '{' + ','.join(argument.choices) + '}' + return '{%s}' % ','.join(map(str, argument.choices)) else: return None @@ -2607,8 +2605,8 @@ def _check_value(self, action, value): if isinstance(choices, str): choices = iter(choices) if value not in choices: - args = {'value': value, - 'choices': ', '.join(map(repr, action.choices))} + args = {'value': str(value), + 'choices': ', '.join(map(str, action.choices))} msg = _('invalid choice: %(value)r (choose from %(choices)s)') raise ArgumentError(action, msg % args) diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index f52a4b6bdd8aca..78692fd3474782 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -16,6 +16,7 @@ import argparse import warnings +from enum import StrEnum from test.support import captured_stderr from test.support import import_helper from test.support import os_helper @@ -985,6 +986,34 @@ class TestDisallowLongAbbreviationAllowsShortGroupingPrefix(ParserTestCase): ] +class TestStrEnumChoices(TestCase): + class Color(StrEnum): + RED = "red" + GREEN = "green" + BLUE = "blue" + + def test_parse_enum_value(self): + parser = argparse.ArgumentParser() + parser.add_argument('--color', choices=self.Color) + args = parser.parse_args(['--color', 'red']) + self.assertEqual(args.color, self.Color.RED) + + def test_help_message_contains_enum_choices(self): + parser = argparse.ArgumentParser() + parser.add_argument('--color', choices=self.Color, help='Choose a color') + self.assertIn('[--color {red,green,blue}]', parser.format_usage()) + self.assertIn(' --color {red,green,blue}', parser.format_help()) + + def test_invalid_enum_value_raises_error(self): + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('--color', choices=self.Color) + self.assertRaisesRegex( + argparse.ArgumentError, + r"invalid choice: 'yellow' \(choose from red, green, blue\)", + parser.parse_args, + ['--color', 'yellow'], + ) + # ================ # Positional tests # ================ @@ -2485,7 +2514,7 @@ def test_wrong_argument_subparsers_no_destination_error(self): parser.parse_args(('baz',)) self.assertRegex( excinfo.exception.stderr, - r"error: argument {foo,bar}: invalid choice: 'baz' \(choose from 'foo', 'bar'\)\n$" + r"error: argument {foo,bar}: invalid choice: 'baz' \(choose from foo, bar\)\n$" ) def test_optional_subparsers(self): diff --git a/Misc/NEWS.d/next/Library/2024-04-19-05-58-50.gh-issue-117766.J3xepp.rst b/Misc/NEWS.d/next/Library/2024-04-19-05-58-50.gh-issue-117766.J3xepp.rst new file mode 100644 index 00000000000000..d090f931f0238d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-19-05-58-50.gh-issue-117766.J3xepp.rst @@ -0,0 +1 @@ +Always use :func:`str` to print ``choices`` in :mod:`argparse`. From b52c7306ea4470f9d7548655c2a1b89a07ff5504 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 14 Oct 2024 10:54:59 +0300 Subject: [PATCH 084/114] gh-121797: Add class method Fraction.from_number() (GH-121800) It is an alternative constructor which only accepts a single numeric argument. Unlike to Fraction.from_float() and Fraction.from_decimal() it accepts any real numbers supported by the standard constructor (int, float, Decimal, Rational numbers, objects with as_integer_ratio()). Unlike to the standard constructor, it does not accept strings. --- Doc/library/fractions.rst | 10 ++++ Doc/whatsnew/3.14.rst | 4 ++ Lib/fractions.py | 25 +++++++++- Lib/test/test_fractions.py | 49 ++++++++++++++++--- ...-07-15-19-34-56.gh-issue-121797.qDqj59.rst | 2 + 5 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-15-19-34-56.gh-issue-121797.qDqj59.rst diff --git a/Doc/library/fractions.rst b/Doc/library/fractions.rst index 2ee154952549ac..fc7f9a6301a915 100644 --- a/Doc/library/fractions.rst +++ b/Doc/library/fractions.rst @@ -166,6 +166,16 @@ another rational number, or from a string. instance. + .. classmethod:: from_number(number) + + Alternative constructor which only accepts instances of + :class:`numbers.Integral`, :class:`numbers.Rational`, + :class:`float` or :class:`decimal.Decimal`, and objects with + the :meth:`!as_integer_ratio` method, but not strings. + + .. versionadded:: 3.14 + + .. method:: limit_denominator(max_denominator=1000000) Finds and returns the closest :class:`Fraction` to ``self`` that has diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index c62a3ca5872eef..b22d1bd1e99d4e 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -263,6 +263,10 @@ fractions :meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`. (Contributed by Serhiy Storchaka in :gh:`82017`.) +* Add alternative :class:`~fractions.Fraction` constructor + :meth:`Fraction.from_number() `. + (Contributed by Serhiy Storchaka in :gh:`121797`.) + functools --------- diff --git a/Lib/fractions.py b/Lib/fractions.py index 34fd0803d1b1ab..f0cbc8c2e6c012 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -279,7 +279,8 @@ def __new__(cls, numerator=0, denominator=None): numerator = -numerator else: - raise TypeError("argument should be a string or a number") + raise TypeError("argument should be a string or a Rational " + "instance or have the as_integer_ratio() method") elif type(numerator) is int is type(denominator): pass # *very* normal case @@ -305,6 +306,28 @@ def __new__(cls, numerator=0, denominator=None): self._denominator = denominator return self + @classmethod + def from_number(cls, number): + """Converts a finite real number to a rational number, exactly. + + Beware that Fraction.from_number(0.3) != Fraction(3, 10). + + """ + if type(number) is int: + return cls._from_coprime_ints(number, 1) + + elif isinstance(number, numbers.Rational): + return cls._from_coprime_ints(number.numerator, number.denominator) + + elif (isinstance(number, float) or + (not isinstance(number, type) and + hasattr(number, 'as_integer_ratio'))): + return cls._from_coprime_ints(*number.as_integer_ratio()) + + else: + raise TypeError("argument should be a Rational instance or " + "have the as_integer_ratio() method") + @classmethod def from_float(cls, f): """Converts a finite float to a rational number, exactly. diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index 4907f4093f52c9..98dccbec9566ac 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -283,6 +283,13 @@ def __repr__(self): class RectComplex(Rect, complex): pass +class Ratio: + def __init__(self, ratio): + self._ratio = ratio + def as_integer_ratio(self): + return self._ratio + + class FractionTest(unittest.TestCase): def assertTypedEquals(self, expected, actual): @@ -355,14 +362,9 @@ def testInitFromDecimal(self): self.assertRaises(OverflowError, F, Decimal('-inf')) def testInitFromIntegerRatio(self): - class Ratio: - def __init__(self, ratio): - self._ratio = ratio - def as_integer_ratio(self): - return self._ratio - self.assertEqual((7, 3), _components(F(Ratio((7, 3))))) - errmsg = "argument should be a string or a number" + errmsg = (r"argument should be a string or a Rational instance or " + r"have the as_integer_ratio\(\) method") # the type also has an "as_integer_ratio" attribute. self.assertRaisesRegex(TypeError, errmsg, F, Ratio) # bad ratio @@ -388,6 +390,8 @@ class B(metaclass=M): pass self.assertRaisesRegex(TypeError, errmsg, F, B) self.assertRaisesRegex(TypeError, errmsg, F, B()) + self.assertRaises(TypeError, F.from_number, B) + self.assertRaises(TypeError, F.from_number, B()) def testFromString(self): self.assertEqual((5, 1), _components(F("5"))) @@ -594,6 +598,37 @@ def testFromDecimal(self): ValueError, "cannot convert NaN to integer ratio", F.from_decimal, Decimal("snan")) + def testFromNumber(self, cls=F): + def check(arg, numerator, denominator): + f = cls.from_number(arg) + self.assertIs(type(f), cls) + self.assertEqual(f.numerator, numerator) + self.assertEqual(f.denominator, denominator) + + check(10, 10, 1) + check(2.5, 5, 2) + check(Decimal('2.5'), 5, 2) + check(F(22, 7), 22, 7) + check(DummyFraction(22, 7), 22, 7) + check(Rat(22, 7), 22, 7) + check(Ratio((22, 7)), 22, 7) + self.assertRaises(TypeError, cls.from_number, 3+4j) + self.assertRaises(TypeError, cls.from_number, '5/2') + self.assertRaises(TypeError, cls.from_number, []) + self.assertRaises(OverflowError, cls.from_number, float('inf')) + self.assertRaises(OverflowError, cls.from_number, Decimal('inf')) + + # as_integer_ratio not defined in a class + class A: + pass + a = A() + a.as_integer_ratio = lambda: (9, 5) + check(a, 9, 5) + + def testFromNumber_subclass(self): + self.testFromNumber(DummyFraction) + + def test_is_integer(self): self.assertTrue(F(1, 1).is_integer()) self.assertTrue(F(-1, 1).is_integer()) diff --git a/Misc/NEWS.d/next/Library/2024-07-15-19-34-56.gh-issue-121797.qDqj59.rst b/Misc/NEWS.d/next/Library/2024-07-15-19-34-56.gh-issue-121797.qDqj59.rst new file mode 100644 index 00000000000000..9525379587f6cd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-15-19-34-56.gh-issue-121797.qDqj59.rst @@ -0,0 +1,2 @@ +Add alternative :class:`~fractions.Fraction` constructor +:meth:`Fraction.from_number() `. From 4b358ee647809019813f106eb901f466a3846d98 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Mon, 14 Oct 2024 16:17:51 +0800 Subject: [PATCH 085/114] gh-125323: Remove some unsafe Py_DECREFs in bytecodes.c, replacing them with PyStackRef_CLOSEs (GH-125324) --- Include/internal/pycore_stackref.h | 3 ++ Python/bytecodes.c | 40 +++++++++++----------- Python/executor_cases.c.h | 40 +++++++++++----------- Python/generated_cases.c.h | 40 +++++++++++----------- Tools/cases_generator/analyzer.py | 1 + Tools/cases_generator/generators_common.py | 1 + 6 files changed, 65 insertions(+), 60 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 7d1eb11aa5ecb8..0e6410466b924b 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -153,6 +153,8 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) return PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(stackref)); } +#define PyStackRef_CLOSE_SPECIALIZED(stackref, dealloc) PyStackRef_CLOSE(stackref) + #else // Py_GIL_DISABLED @@ -177,6 +179,7 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; #define PyStackRef_DUP(stackref) PyStackRef_FromPyObjectSteal(Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref))) +#define PyStackRef_CLOSE_SPECIALIZED(stackref, dealloc) _Py_DECREF_SPECIALIZED(PyStackRef_AsPyObjectBorrow(stackref), dealloc) #endif // Py_GIL_DISABLED diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 34fdfcb05e3c18..299608f252c546 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -474,8 +474,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -487,8 +487,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -500,8 +500,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -594,8 +594,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -636,12 +636,12 @@ dummy_func( * that the string is safe to mutate. */ assert(Py_REFCNT(left_o) >= 2); - _Py_DECREF_NO_DEALLOC(left_o); + PyStackRef_CLOSE(left); DEAD(left); PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); PyUnicode_Append(&temp, right_o); *target_local = PyStackRef_FromPyObjectSteal(temp); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); DEAD(right); ERROR_IF(PyStackRef_IsNull(*target_local), error); #if TIER_ONE @@ -755,7 +755,7 @@ dummy_func( PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -775,7 +775,7 @@ dummy_func( DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -796,7 +796,7 @@ dummy_func( PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -908,7 +908,7 @@ dummy_func( PyList_SET_ITEM(list, index, PyStackRef_AsPyObjectSteal(value)); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(list_st); } @@ -2398,9 +2398,9 @@ dummy_func( double dright = PyFloat_AS_DOUBLE(right_o); // 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches low four bits of the oparg int sign_ish = COMPARISON_BIT(dleft, dright); - _Py_DECREF_SPECIALIZED(left_o, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); DEAD(left); - _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); DEAD(right); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. @@ -2420,9 +2420,9 @@ dummy_func( Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); DEAD(left); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); DEAD(right); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. @@ -2436,9 +2436,9 @@ dummy_func( STAT_INC(COMPARE_OP, hit); int eq = _PyUnicode_Equal(left_o, right_o); assert((oparg >> 5) == Py_EQ || (oparg >> 5) == Py_NE); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); DEAD(left); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); DEAD(right); assert(eq == 0 || eq == 1); assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ef110e2e2a794a..5532c04e497a75 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -541,8 +541,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -561,8 +561,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -581,8 +581,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -722,8 +722,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -764,11 +764,11 @@ * that the string is safe to mutate. */ assert(Py_REFCNT(left_o) >= 2); - _Py_DECREF_NO_DEALLOC(left_o); + PyStackRef_CLOSE(left); PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); PyUnicode_Append(&temp, right_o); *target_local = PyStackRef_FromPyObjectSteal(temp); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) JUMP_TO_ERROR(); #if TIER_ONE // The STORE_FAST is already done. This is done here in tier one, @@ -904,7 +904,7 @@ PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -946,7 +946,7 @@ } STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -985,7 +985,7 @@ PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1160,7 +1160,7 @@ PyList_SET_ITEM(list, index, PyStackRef_AsPyObjectSteal(value)); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); @@ -2912,8 +2912,8 @@ double dright = PyFloat_AS_DOUBLE(right_o); // 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches low four bits of the oparg int sign_ish = COMPARISON_BIT(dleft, dright); - _Py_DECREF_SPECIALIZED(left_o, _PyFloat_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. stack_pointer[-2] = res; @@ -2946,8 +2946,8 @@ Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. stack_pointer[-2] = res; @@ -2968,8 +2968,8 @@ STAT_INC(COMPARE_OP, hit); int eq = _PyUnicode_Equal(left_o, right_o); assert((oparg >> 5) == Py_EQ || (oparg >> 5) == Py_NE); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); assert(eq == 0 || eq == 1); assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS); assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7023aea369db49..0eeb566a0adadc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -118,8 +118,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -153,8 +153,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -207,11 +207,11 @@ * that the string is safe to mutate. */ assert(Py_REFCNT(left_o) >= 2); - _Py_DECREF_NO_DEALLOC(left_o); + PyStackRef_CLOSE(left); PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); PyUnicode_Append(&temp, right_o); *target_local = PyStackRef_FromPyObjectSteal(temp); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) goto pop_2_error; #if TIER_ONE // The STORE_FAST is already done. This is done here in tier one, @@ -285,8 +285,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -356,8 +356,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -582,7 +582,7 @@ PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -614,7 +614,7 @@ DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c, BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -646,7 +646,7 @@ PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -3284,8 +3284,8 @@ double dright = PyFloat_AS_DOUBLE(right_o); // 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches low four bits of the oparg int sign_ish = COMPARISON_BIT(dleft, dright); - _Py_DECREF_SPECIALIZED(left_o, _PyFloat_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. } @@ -3326,8 +3326,8 @@ Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. } @@ -3362,8 +3362,8 @@ STAT_INC(COMPARE_OP, hit); int eq = _PyUnicode_Equal(left_o, right_o); assert((oparg >> 5) == Py_EQ || (oparg >> 5) == Py_NE); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); assert(eq == 0 || eq == 1); assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS); assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS); @@ -7720,7 +7720,7 @@ PyList_SET_ITEM(list, index, PyStackRef_AsPyObjectSteal(value)); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 60f5d010a7a083..19fdeac65cf2df 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -540,6 +540,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "PyStackRef_AsPyObjectSteal", "PyStackRef_CLEAR", "PyStackRef_CLOSE", + "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", "PyStackRef_False", "PyStackRef_FromPyObjectImmortal", diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 0bfa1a3b56fbc2..7e032c21d2485c 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -116,6 +116,7 @@ def __init__(self, out: CWriter): "SAVE_STACK": self.save_stack, "RELOAD_STACK": self.reload_stack, "PyStackRef_CLOSE": self.stackref_close, + "PyStackRef_CLOSE_SPECIALIZED": self.stackref_close, "PyStackRef_AsPyObjectSteal": self.stackref_steal, "DISPATCH": self.dispatch } From 5217328f93f599755bd70418952392c54f705a71 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 14 Oct 2024 11:24:01 +0300 Subject: [PATCH 086/114] gh-121798: Add class method Decimal.from_number() (GH-121801) It is an alternate constructor which only accepts a single numeric argument. Unlike to Decimal.from_float() it accepts also Decimal. Unlike to the standard constructor, it does not accept strings and tuples. --- Doc/library/decimal.rst | 17 +++++++ Doc/whatsnew/3.14.rst | 6 +++ Lib/_pydecimal.py | 15 ++++++ Lib/test/test_decimal.py | 23 ++++++++++ ...-07-15-19-25-25.gh-issue-121798.GmuBDu.rst | 2 + Modules/_decimal/_decimal.c | 46 +++++++++++++++++++ Modules/_decimal/docstrings.h | 13 ++++++ 7 files changed, 122 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-07-15-19-25-25.gh-issue-121798.GmuBDu.rst diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst index 916f17cadfaa7e..c9a3e448cad063 100644 --- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -598,6 +598,23 @@ Decimal objects .. versionadded:: 3.1 + .. classmethod:: from_number(number) + + Alternative constructor that only accepts instances of + :class:`float`, :class:`int` or :class:`Decimal`, but not strings + or tuples. + + .. doctest:: + + >>> Decimal.from_number(314) + Decimal('314') + >>> Decimal.from_number(0.1) + Decimal('0.1000000000000000055511151231257827021181583404541015625') + >>> Decimal.from_number(Decimal('3.14')) + Decimal('3.14') + + .. versionadded:: 3.14 + .. method:: fma(other, third, context=None) Fused multiply-add. Return self*other+third with no rounding of the diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b22d1bd1e99d4e..25e69a59bdec62 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -239,6 +239,12 @@ ctypes to help match a non-default ABI. (Contributed by Petr Viktorin in :gh:`97702`.) +decimal +------- + +* Add alternative :class:`~decimal.Decimal` constructor + :meth:`Decimal.from_number() `. + (Contributed by Serhiy Storchaka in :gh:`121798`.) dis --- diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index 75df3db262470b..5b60570c6c592a 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -582,6 +582,21 @@ def __new__(cls, value="0", context=None): raise TypeError("Cannot convert %r to Decimal" % value) + @classmethod + def from_number(cls, number): + """Converts a real number to a decimal number, exactly. + + >>> Decimal.from_number(314) # int + Decimal('314') + >>> Decimal.from_number(0.1) # float + Decimal('0.1000000000000000055511151231257827021181583404541015625') + >>> Decimal.from_number(Decimal('3.14')) # another decimal instance + Decimal('3.14') + """ + if isinstance(number, (int, Decimal, float)): + return cls(number) + raise TypeError("Cannot convert %r to Decimal" % number) + @classmethod def from_float(cls, f): """Converts a float to a decimal number, exactly. diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index d1e7e69e7e951b..bc6c6427740949 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -812,6 +812,29 @@ def test_explicit_context_create_from_float(self): x = random.expovariate(0.01) * (random.random() * 2.0 - 1.0) self.assertEqual(x, float(nc.create_decimal(x))) # roundtrip + def test_from_number(self, cls=None): + Decimal = self.decimal.Decimal + if cls is None: + cls = Decimal + + def check(arg, expected): + d = cls.from_number(arg) + self.assertIs(type(d), cls) + self.assertEqual(d, expected) + + check(314, Decimal(314)) + check(3.14, Decimal.from_float(3.14)) + check(Decimal('3.14'), Decimal('3.14')) + self.assertRaises(TypeError, cls.from_number, 3+4j) + self.assertRaises(TypeError, cls.from_number, '314') + self.assertRaises(TypeError, cls.from_number, (0, (3, 1, 4), 0)) + self.assertRaises(TypeError, cls.from_number, object()) + + def test_from_number_subclass(self, cls=None): + class DecimalSubclass(self.decimal.Decimal): + pass + self.test_from_number(DecimalSubclass) + def test_unicode_digits(self): Decimal = self.decimal.Decimal diff --git a/Misc/NEWS.d/next/Library/2024-07-15-19-25-25.gh-issue-121798.GmuBDu.rst b/Misc/NEWS.d/next/Library/2024-07-15-19-25-25.gh-issue-121798.GmuBDu.rst new file mode 100644 index 00000000000000..5706e4bffeb4a1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-15-19-25-25.gh-issue-121798.GmuBDu.rst @@ -0,0 +1,2 @@ +Add alternative :class:`~decimal.Decimal` constructor +:meth:`Decimal.from_number() `. diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index a33c9793b5ad17..c564813036e504 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -2857,6 +2857,51 @@ dec_from_float(PyObject *type, PyObject *pyfloat) return result; } +/* 'v' can have any numeric type accepted by the Decimal constructor. Attempt + an exact conversion. If the result does not meet the restrictions + for an mpd_t, fail with InvalidOperation. */ +static PyObject * +PyDecType_FromNumberExact(PyTypeObject *type, PyObject *v, PyObject *context) +{ + decimal_state *state = get_module_state_by_def(type); + assert(v != NULL); + if (PyDec_Check(state, v)) { + return PyDecType_FromDecimalExact(type, v, context); + } + else if (PyLong_Check(v)) { + return PyDecType_FromLongExact(type, v, context); + } + else if (PyFloat_Check(v)) { + if (dec_addstatus(context, MPD_Float_operation)) { + return NULL; + } + return PyDecType_FromFloatExact(type, v, context); + } + else { + PyErr_Format(PyExc_TypeError, + "conversion from %s to Decimal is not supported", + Py_TYPE(v)->tp_name); + return NULL; + } +} + +/* class method */ +static PyObject * +dec_from_number(PyObject *type, PyObject *number) +{ + PyObject *context; + PyObject *result; + + decimal_state *state = get_module_state_by_def((PyTypeObject *)type); + CURRENT_CONTEXT(state, context); + result = PyDecType_FromNumberExact(state->PyDec_Type, number, context); + if (type != (PyObject *)state->PyDec_Type && result != NULL) { + Py_SETREF(result, PyObject_CallFunctionObjArgs(type, result, NULL)); + } + + return result; +} + /* create_decimal_from_float */ static PyObject * ctx_from_float(PyObject *context, PyObject *v) @@ -5052,6 +5097,7 @@ static PyMethodDef dec_methods [] = /* Miscellaneous */ { "from_float", dec_from_float, METH_O|METH_CLASS, doc_from_float }, + { "from_number", dec_from_number, METH_O|METH_CLASS, doc_from_number }, { "as_tuple", PyDec_AsTuple, METH_NOARGS, doc_as_tuple }, { "as_integer_ratio", dec_as_integer_ratio, METH_NOARGS, doc_as_integer_ratio }, diff --git a/Modules/_decimal/docstrings.h b/Modules/_decimal/docstrings.h index a1823cdd32b74c..b34bff83d3f4e9 100644 --- a/Modules/_decimal/docstrings.h +++ b/Modules/_decimal/docstrings.h @@ -189,6 +189,19 @@ Decimal.from_float(0.1) is not the same as Decimal('0.1').\n\ \n\ \n"); +PyDoc_STRVAR(doc_from_number, +"from_number($type, number, /)\n--\n\n\ +Class method that converts a real number to a decimal number, exactly.\n\ +\n\ + >>> Decimal.from_number(314) # int\n\ + Decimal('314')\n\ + >>> Decimal.from_number(0.1) # float\n\ + Decimal('0.1000000000000000055511151231257827021181583404541015625')\n\ + >>> Decimal.from_number(Decimal('3.14')) # another decimal instance\n\ + Decimal('3.14')\n\ +\n\ +\n"); + PyDoc_STRVAR(doc_fma, "fma($self, /, other, third, context=None)\n--\n\n\ Fused multiply-add. Return self*other+third with no rounding of the\n\ From 67f6e08147bc005e460d82fcce85bf5d56009cf5 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 14 Oct 2024 14:06:31 +0530 Subject: [PATCH 087/114] gh-125139: use `_PyRecursiveMutex` in `_thread.RLock` (#125144) --- Include/internal/pycore_lock.h | 3 +- Modules/_threadmodule.c | 151 +++++++-------------------------- Python/lock.c | 31 ++++++- 3 files changed, 63 insertions(+), 122 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index cd7deda00c7bee..57cbce8f126aca 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -160,8 +160,9 @@ typedef struct { PyAPI_FUNC(int) _PyRecursiveMutex_IsLockedByCurrentThread(_PyRecursiveMutex *m); PyAPI_FUNC(void) _PyRecursiveMutex_Lock(_PyRecursiveMutex *m); +extern PyLockStatus _PyRecursiveMutex_LockTimed(_PyRecursiveMutex *m, PyTime_t timeout, _PyLockFlags flags); PyAPI_FUNC(void) _PyRecursiveMutex_Unlock(_PyRecursiveMutex *m); - +extern int _PyRecursiveMutex_TryUnlock(_PyRecursiveMutex *m); // A readers-writer (RW) lock. The lock supports multiple concurrent readers or // a single writer. The lock is write-preferring: if a writer is waiting while diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 9617f9cafe76ff..d4408aa9e42d9d 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -726,11 +726,6 @@ lock_dealloc(PyObject *op) Py_DECREF(tp); } -static inline PyLockStatus -acquire_timed(PyThread_type_lock lock, PyTime_t timeout) -{ - return PyThread_acquire_lock_timed_with_retries(lock, timeout); -} static int lock_acquire_parse_args(PyObject *args, PyObject *kwds, @@ -973,10 +968,7 @@ static PyType_Spec lock_type_spec = { typedef struct { PyObject_HEAD - PyThread_type_lock rlock_lock; - PyThread_ident_t rlock_owner; - unsigned long rlock_count; - PyObject *in_weakreflist; + _PyRecursiveMutex lock; } rlockobject; static int @@ -992,59 +984,26 @@ rlock_dealloc(PyObject *op) { rlockobject *self = (rlockobject*)op; PyObject_GC_UnTrack(self); - if (self->in_weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) self); - /* self->rlock_lock can be NULL if PyThread_allocate_lock() failed - in rlock_new() */ - if (self->rlock_lock != NULL) { - /* Unlock the lock so it's safe to free it */ - if (self->rlock_count > 0) - PyThread_release_lock(self->rlock_lock); - - PyThread_free_lock(self->rlock_lock); - } + PyObject_ClearWeakRefs((PyObject *) self); PyTypeObject *tp = Py_TYPE(self); tp->tp_free(self); Py_DECREF(tp); } -static bool -rlock_is_owned_by(rlockobject *self, PyThread_ident_t tid) -{ - PyThread_ident_t owner_tid = - _Py_atomic_load_ullong_relaxed(&self->rlock_owner); - return owner_tid == tid && self->rlock_count > 0; -} static PyObject * rlock_acquire(PyObject *op, PyObject *args, PyObject *kwds) { rlockobject *self = (rlockobject*)op; PyTime_t timeout; - PyThread_ident_t tid; - PyLockStatus r = PY_LOCK_ACQUIRED; - if (lock_acquire_parse_args(args, kwds, &timeout) < 0) + if (lock_acquire_parse_args(args, kwds, &timeout) < 0) { return NULL; - - tid = PyThread_get_thread_ident_ex(); - if (rlock_is_owned_by(self, tid)) { - unsigned long count = self->rlock_count + 1; - if (count <= self->rlock_count) { - PyErr_SetString(PyExc_OverflowError, - "Internal lock count overflowed"); - return NULL; - } - self->rlock_count = count; - Py_RETURN_TRUE; - } - r = acquire_timed(self->rlock_lock, timeout); - if (r == PY_LOCK_ACQUIRED) { - assert(self->rlock_count == 0); - _Py_atomic_store_ullong_relaxed(&self->rlock_owner, tid); - self->rlock_count = 1; } - else if (r == PY_LOCK_INTR) { + + PyLockStatus r = _PyRecursiveMutex_LockTimed(&self->lock, timeout, + _PY_LOCK_HANDLE_SIGNALS | _PY_LOCK_DETACH); + if (r == PY_LOCK_INTR) { return NULL; } @@ -1078,17 +1037,12 @@ static PyObject * rlock_release(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t tid = PyThread_get_thread_ident_ex(); - if (!rlock_is_owned_by(self, tid)) { + if (_PyRecursiveMutex_TryUnlock(&self->lock) < 0) { PyErr_SetString(PyExc_RuntimeError, "cannot release un-acquired lock"); return NULL; } - if (--self->rlock_count == 0) { - _Py_atomic_store_ullong_relaxed(&self->rlock_owner, 0); - PyThread_release_lock(self->rlock_lock); - } Py_RETURN_NONE; } @@ -1116,25 +1070,15 @@ rlock_acquire_restore(PyObject *op, PyObject *args) { rlockobject *self = (rlockobject*)op; PyThread_ident_t owner; - unsigned long count; - int r = 1; + Py_ssize_t count; - if (!PyArg_ParseTuple(args, "(k" Py_PARSE_THREAD_IDENT_T "):_acquire_restore", + if (!PyArg_ParseTuple(args, "(n" Py_PARSE_THREAD_IDENT_T "):_acquire_restore", &count, &owner)) return NULL; - if (!PyThread_acquire_lock(self->rlock_lock, 0)) { - Py_BEGIN_ALLOW_THREADS - r = PyThread_acquire_lock(self->rlock_lock, 1); - Py_END_ALLOW_THREADS - } - if (!r) { - PyErr_SetString(ThreadError, "couldn't acquire lock"); - return NULL; - } - assert(self->rlock_count == 0); - _Py_atomic_store_ullong_relaxed(&self->rlock_owner, owner); - self->rlock_count = count; + _PyRecursiveMutex_Lock(&self->lock); + _Py_atomic_store_ullong_relaxed(&self->lock.thread, owner); + self->lock.level = (size_t)count - 1; Py_RETURN_NONE; } @@ -1148,21 +1092,18 @@ static PyObject * rlock_release_save(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t owner; - unsigned long count; - if (self->rlock_count == 0) { + if (!_PyRecursiveMutex_IsLockedByCurrentThread(&self->lock)) { PyErr_SetString(PyExc_RuntimeError, "cannot release un-acquired lock"); return NULL; } - owner = self->rlock_owner; - count = self->rlock_count; - self->rlock_count = 0; - _Py_atomic_store_ullong_relaxed(&self->rlock_owner, 0); - PyThread_release_lock(self->rlock_lock); - return Py_BuildValue("k" Py_PARSE_THREAD_IDENT_T, count, owner); + PyThread_ident_t owner = self->lock.thread; + Py_ssize_t count = self->lock.level + 1; + self->lock.level = 0; // ensure the unlock releases the lock + _PyRecursiveMutex_Unlock(&self->lock); + return Py_BuildValue("n" Py_PARSE_THREAD_IDENT_T, count, owner); } PyDoc_STRVAR(rlock_release_save_doc, @@ -1175,10 +1116,10 @@ static PyObject * rlock_recursion_count(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t tid = PyThread_get_thread_ident_ex(); - PyThread_ident_t owner = - _Py_atomic_load_ullong_relaxed(&self->rlock_owner); - return PyLong_FromUnsignedLong(owner == tid ? self->rlock_count : 0UL); + if (_PyRecursiveMutex_IsLockedByCurrentThread(&self->lock)) { + return PyLong_FromSize_t(self->lock.level + 1); + } + return PyLong_FromLong(0); } PyDoc_STRVAR(rlock_recursion_count_doc, @@ -1191,12 +1132,8 @@ static PyObject * rlock_is_owned(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t tid = PyThread_get_thread_ident_ex(); - - if (rlock_is_owned_by(self, tid)) { - Py_RETURN_TRUE; - } - Py_RETURN_FALSE; + long owned = _PyRecursiveMutex_IsLockedByCurrentThread(&self->lock); + return PyBool_FromLong(owned); } PyDoc_STRVAR(rlock_is_owned_doc, @@ -1212,16 +1149,7 @@ rlock_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (self == NULL) { return NULL; } - self->in_weakreflist = NULL; - self->rlock_owner = 0; - self->rlock_count = 0; - - self->rlock_lock = PyThread_allocate_lock(); - if (self->rlock_lock == NULL) { - Py_DECREF(self); - PyErr_SetString(ThreadError, "can't allocate lock"); - return NULL; - } + self->lock = (_PyRecursiveMutex){0}; return (PyObject *) self; } @@ -1229,13 +1157,13 @@ static PyObject * rlock_repr(PyObject *op) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t owner = - _Py_atomic_load_ullong_relaxed(&self->rlock_owner); + PyThread_ident_t owner = self->lock.thread; + size_t count = self->lock.level + 1; return PyUnicode_FromFormat( - "<%s %s object owner=%" PY_FORMAT_THREAD_IDENT_T " count=%lu at %p>", - self->rlock_count ? "locked" : "unlocked", + "<%s %s object owner=%" PY_FORMAT_THREAD_IDENT_T " count=%zu at %p>", + owner ? "locked" : "unlocked", Py_TYPE(self)->tp_name, owner, - self->rlock_count, self); + count, self); } @@ -1243,14 +1171,7 @@ rlock_repr(PyObject *op) static PyObject * rlock__at_fork_reinit(rlockobject *self, PyObject *Py_UNUSED(args)) { - if (_PyThread_at_fork_reinit(&self->rlock_lock) < 0) { - PyErr_SetString(ThreadError, "failed to reinitialize lock at fork"); - return NULL; - } - - self->rlock_owner = 0; - self->rlock_count = 0; - + self->lock = (_PyRecursiveMutex){0}; Py_RETURN_NONE; } #endif /* HAVE_FORK */ @@ -1281,18 +1202,12 @@ static PyMethodDef rlock_methods[] = { }; -static PyMemberDef rlock_type_members[] = { - {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(rlockobject, in_weakreflist), Py_READONLY}, - {NULL}, -}; - static PyType_Slot rlock_type_slots[] = { {Py_tp_dealloc, rlock_dealloc}, {Py_tp_repr, rlock_repr}, {Py_tp_methods, rlock_methods}, {Py_tp_alloc, PyType_GenericAlloc}, {Py_tp_new, rlock_new}, - {Py_tp_members, rlock_type_members}, {Py_tp_traverse, rlock_traverse}, {0, 0}, }; @@ -1301,7 +1216,7 @@ static PyType_Spec rlock_type_spec = { .name = "_thread.RLock", .basicsize = sizeof(rlockobject), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_IMMUTABLETYPE), + Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_MANAGED_WEAKREF), .slots = rlock_type_slots, }; diff --git a/Python/lock.c b/Python/lock.c index 57675fe1873fa2..554c51d7780322 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -377,21 +377,46 @@ _PyRecursiveMutex_Lock(_PyRecursiveMutex *m) assert(m->level == 0); } +PyLockStatus +_PyRecursiveMutex_LockTimed(_PyRecursiveMutex *m, PyTime_t timeout, _PyLockFlags flags) +{ + PyThread_ident_t thread = PyThread_get_thread_ident_ex(); + if (recursive_mutex_is_owned_by(m, thread)) { + m->level++; + return PY_LOCK_ACQUIRED; + } + PyLockStatus s = _PyMutex_LockTimed(&m->mutex, timeout, flags); + if (s == PY_LOCK_ACQUIRED) { + _Py_atomic_store_ullong_relaxed(&m->thread, thread); + assert(m->level == 0); + } + return s; +} + void _PyRecursiveMutex_Unlock(_PyRecursiveMutex *m) +{ + if (_PyRecursiveMutex_TryUnlock(m) < 0) { + Py_FatalError("unlocking a recursive mutex that is not " + "owned by the current thread"); + } +} + +int +_PyRecursiveMutex_TryUnlock(_PyRecursiveMutex *m) { PyThread_ident_t thread = PyThread_get_thread_ident_ex(); if (!recursive_mutex_is_owned_by(m, thread)) { - Py_FatalError("unlocking a recursive mutex that is not owned by the" - " current thread"); + return -1; } if (m->level > 0) { m->level--; - return; + return 0; } assert(m->level == 0); _Py_atomic_store_ullong_relaxed(&m->thread, 0); PyMutex_Unlock(&m->mutex); + return 0; } #define _Py_WRITE_LOCKED 1 From 06ca33020e1168459fc6c3e0df93664daf801339 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 14 Oct 2024 14:18:57 +0100 Subject: [PATCH 088/114] GH-125323: Convert DECREF_INPUTS_AND_REUSE_FLOAT into a function that takes PyStackRefs. (GH-125439) --- Include/internal/pycore_ceval.h | 2 ++ Include/internal/pycore_opcode_metadata.h | 6 ++-- Include/internal/pycore_stackref.h | 7 +++++ Include/internal/pycore_uop_metadata.h | 6 ++-- Objects/floatobject.c | 35 +++++++++++++++++++++++ Python/bytecodes.c | 12 ++++---- Python/ceval_macros.h | 20 ------------- Python/executor_cases.c.h | 12 ++++---- Python/generated_cases.c.h | 12 ++++---- Tools/cases_generator/analyzer.py | 1 + 10 files changed, 69 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 594fbb1c8e443b..cff2b1f7114793 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -316,6 +316,8 @@ _Py_eval_breaker_bit_is_set(PyThreadState *tstate, uintptr_t bit) void _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); void _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); +PyAPI_FUNC(PyObject *) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value); + #ifdef __cplusplus } diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 8fec45b1e8d5c3..c18423476d3962 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1015,13 +1015,13 @@ extern const struct opcode_metadata _PyOpcode_opcode_metadata[266]; #ifdef NEED_OPCODE_METADATA const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BINARY_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG }, + [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG }, + [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG }, + [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 0e6410466b924b..588e57f6cd97e0 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -76,6 +76,13 @@ PyStackRef_AsPyObjectBorrow(_PyStackRef stackref) #define PyStackRef_IsDeferred(ref) (((ref).bits & Py_TAG_BITS) == Py_TAG_DEFERRED) +static inline PyObject * +PyStackRef_NotDeferred_AsPyObject(_PyStackRef stackref) +{ + assert(!PyStackRef_IsDeferred(stackref)); + return (PyObject *)stackref.bits; +} + static inline PyObject * PyStackRef_AsPyObjectSteal(_PyStackRef stackref) { diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index fd41e9a5fe862b..2f0a7fb2f6e549 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -69,9 +69,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_BOTH_FLOAT] = HAS_EXIT_FLAG, [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG, [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG, - [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG, - [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, + [_BINARY_OP_MULTIPLY_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_GUARD_BOTH_UNICODE] = HAS_EXIT_FLAG, [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG, diff --git a/Objects/floatobject.c b/Objects/floatobject.c index a48a210adee3b9..d66863febe8c86 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -134,6 +134,41 @@ PyFloat_FromDouble(double fval) return (PyObject *) op; } +#ifdef Py_GIL_DISABLED + +PyObject *_PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) +{ + PyStackRef_CLOSE(left); + PyStackRef_CLOSE(right); + return PyFloat_FromDouble(value); +} + +#else // Py_GIL_DISABLED + +PyObject *_PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) +{ + PyObject *left_o = PyStackRef_AsPyObjectSteal(left); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); + if (Py_REFCNT(left_o) == 1) { + ((PyFloatObject *)left_o)->ob_fval = value; + _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); + return left_o; + } + else if (Py_REFCNT(right_o) == 1) { + ((PyFloatObject *)right_o)->ob_fval = value; + _Py_DECREF_NO_DEALLOC(left_o); + return right_o; + } + else { + PyObject *result = PyFloat_FromDouble(value); + _Py_DECREF_NO_DEALLOC(left_o); + _Py_DECREF_NO_DEALLOC(right_o); + return result; + } +} + +#endif // Py_GIL_DISABLED + static PyObject * float_from_string_inner(const char *s, Py_ssize_t len, void *obj) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 299608f252c546..b22916aeaa248b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -539,9 +539,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); + ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); } @@ -553,9 +553,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); + ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); } @@ -567,9 +567,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); + ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); } diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index e0e9cc156ed62f..6674c4ccf9f693 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -327,26 +327,6 @@ GETITEM(PyObject *v, Py_ssize_t i) { " in enclosing scope" #define NAME_ERROR_MSG "name '%.200s' is not defined" -#define DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dval, result) \ -do { \ - if (Py_REFCNT(left) == 1) { \ - ((PyFloatObject *)left)->ob_fval = (dval); \ - _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc);\ - result = (left); \ - } \ - else if (Py_REFCNT(right) == 1) {\ - ((PyFloatObject *)right)->ob_fval = (dval); \ - _Py_DECREF_NO_DEALLOC(left); \ - result = (right); \ - }\ - else { \ - result = PyFloat_FromDouble(dval); \ - if ((result) == NULL) GOTO_ERROR(error); \ - _Py_DECREF_NO_DEALLOC(left); \ - _Py_DECREF_NO_DEALLOC(right); \ - } \ -} while (0) - // If a trace function sets a new f_lineno and // *then* raises, we use the destination when searching // for an exception handler, displaying the traceback, and so on diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5532c04e497a75..0ed361a2ee7fb0 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -643,8 +643,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; @@ -664,8 +664,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; @@ -685,8 +685,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 0eeb566a0adadc..7bd1b7dd5aba27 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -84,8 +84,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; @@ -251,8 +251,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; @@ -322,8 +322,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 19fdeac65cf2df..381ad3a4e2082c 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -575,6 +575,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_PyDictValues_AddToInsertionOrder", "_PyErr_Occurred", "_PyEval_FrameClearAndPop", + "_PyFloat_FromDouble_ConsumeInputs", "_PyFrame_GetCode", "_PyFrame_IsIncomplete", "_PyFrame_PushUnchecked", From 5f4e5b598cab86d5fd5727d423c9728221889ed0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 14 Oct 2024 16:29:20 +0300 Subject: [PATCH 089/114] gh-53203: Fix strptime() for %c, %x and %X formats on many locales (GH-125406) Fixed most locales that use non-ASCII digits, like Persian, Burmese, Odia and Shan. --- Lib/_strptime.py | 68 ++++++++++++------- Lib/test/test_strptime.py | 34 ++++++---- Lib/test/test_time.py | 2 +- ...4-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst | 2 + 4 files changed, 66 insertions(+), 40 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 89adc174e5ad30..5f4d2475c0169b 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -15,6 +15,7 @@ import locale import calendar from re import compile as re_compile +from re import sub as re_sub from re import IGNORECASE from re import escape as re_escape from datetime import (date as datetime_date, @@ -129,11 +130,23 @@ def __calc_date_time(self): time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) replacement_pairs = [ - ('1999', '%Y'), ('99', '%y'), ('22', '%H'), - ('44', '%M'), ('55', '%S'), ('76', '%j'), - ('17', '%d'), ('03', '%m'), ('3', '%m'), - # '3' needed for when no leading zero. - ('2', '%w'), ('10', '%I')] + ('1999', '%Y'), ('99', '%y'), ('22', '%H'), + ('44', '%M'), ('55', '%S'), ('76', '%j'), + ('17', '%d'), ('03', '%m'), ('3', '%m'), + # '3' needed for when no leading zero. + ('2', '%w'), ('10', '%I'), + # Non-ASCII digits + ('\u0661\u0669\u0669\u0669', '%Y'), + ('\u0669\u0669', '%Oy'), + ('\u0662\u0662', '%OH'), + ('\u0664\u0664', '%OM'), + ('\u0665\u0665', '%OS'), + ('\u0661\u0667', '%Od'), + ('\u0660\u0663', '%Om'), + ('\u0663', '%Om'), + ('\u0662', '%Ow'), + ('\u0661\u0660', '%OI'), + ] date_time = [] for directive in ('%c', '%x', '%X'): current_format = time.strftime(directive, time_tuple).lower() @@ -158,6 +171,10 @@ def __calc_date_time(self): for tz in tz_values: if tz: current_format = current_format.replace(tz, "%Z") + # Transform all non-ASCII digits to digits in range U+0660 to U+0669. + current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", @@ -296,11 +313,15 @@ def __init__(self, locale_time=None): 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), 'Z'), - '%': '%'}) - base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) - base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + '%': '%'} + for d in 'dmyHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) + base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) def __seqToRE(self, to_convert, directive): """Convert a list to a regex string for matching a directive. @@ -328,28 +349,25 @@ def pattern(self, format): regex syntax are escaped. """ - processed_format = '' # The sub() call escapes all characters that might be misconstrued # as regex syntax. Cannot use re.escape since we have to deal with # format directives (%m, etc.). - regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") - format = regex_chars.sub(r"\\\1", format) - whitespace_replacement = re_compile(r'\s+') - format = whitespace_replacement.sub(r'\\s+', format) + format = re_sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", format) + format = re_sub(r'\s+', r'\\s+', format) + format = re_sub(r"'", "['\u02bc]", format) # needed for br_FR year_in_format = False day_of_month_in_format = False - while '%' in format: - directive_index = format.index('%')+1 - format_char = format[directive_index] - processed_format = "%s%s%s" % (processed_format, - format[:directive_index-1], - self[format_char]) - format = format[directive_index+1:] + def repl(m): + format_char = m[1] match format_char: case 'Y' | 'y' | 'G': + nonlocal year_in_format year_in_format = True case 'd': + nonlocal day_of_month_in_format day_of_month_in_format = True + return self[format_char] + format = re_sub(r'%(O?.)', repl, format) if day_of_month_in_format and not year_in_format: import warnings warnings.warn("""\ @@ -360,7 +378,7 @@ def pattern(self, format): See https://github.com/python/cpython/issues/70647.""", DeprecationWarning, skip_file_prefixes=(os.path.dirname(__file__),)) - return "%s%s" % (processed_format, format) + return format def compile(self, format): """Return a compiled re object for the format string.""" @@ -434,8 +452,8 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): _regex_cache[format] = format_regex found = format_regex.match(data_string) if not found: - raise ValueError("time data %r does not match format %r :: /%s/" % - (data_string, format, format_regex.pattern)) + raise ValueError("time data %r does not match format %r" % + (data_string, format)) if len(data_string) != found.end(): raise ValueError("unconverted data remains: %s" % data_string[found.end():]) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 79f48dfe44abde..12366b053a2fc1 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -292,7 +292,7 @@ def test_strptime_exception_context(self): # additional check for IndexError branch (issue #19545) with self.assertRaises(ValueError) as e: _strptime._strptime_time('19', '%Y %') - self.assertIs(e.exception.__suppress_context__, True) + self.assertIsNone(e.exception.__context__) def test_unconverteddata(self): # Check ValueError is raised when there is unconverted data @@ -485,12 +485,14 @@ def test_bad_timezone(self): # id_ID, ms_MY. # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. + # On Windows: ar_IN, ar_SA, fa_IR, ps_AF. # # BUG: Generates regexp that does not match the current date and time - # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. + # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', + 'my_MM', 'or_IN', 'shn_MM', 'az_IR') def test_date_time_locale(self): # Test %c directive loc = locale.getlocale(locale.LC_TIME)[0] @@ -512,20 +514,23 @@ def test_date_time_locale(self): self.roundtrip('%c', slice(0, 6), time.localtime(now - 366*24*3600)) # NB: Dates before 1969 do not roundtrip on some locales: - # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. + # az_IR, bo_CN, bo_IN, dz_BT, eu_ES, eu_FR, fa_IR, or_IN. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', + 'my_MM', 'shn_MM') def test_date_time_locale2(self): # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) + self.roundtrip('%c', slice(0, 6), (1800, 1, 1, 0, 0, 0, 0, 1, 0)) # NB: Does not roundtrip because use non-Gregorian calendar: - # lo_LA, thai, th_TH. + # lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF. # BUG: Generates regexp that does not match the current date - # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. + # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'he_IL', 'eu_ES', 'ar_AE') + 'he_IL', 'eu_ES', 'ar_AE', + 'az_IR', 'my_MM', 'or_IN', 'shn_MM') def test_date_locale(self): # Test %x directive now = time.time() @@ -545,10 +550,11 @@ def test_date_locale(self): "musl libc issue on Emscripten, bpo-46390" ) @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'eu_ES', 'ar_AE') + 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM') def test_date_locale2(self): # Test %x directive self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) + self.roundtrip('%x', slice(0, 3), (1800, 1, 1, 0, 0, 0, 0, 1, 0)) # NB: Does not roundtrip in some locales due to the ambiguity of # the time representation (bugs in locales?): @@ -556,11 +562,11 @@ def test_date_locale2(self): # norwegian, nynorsk. # * Hours are in 12-hour notation without AM/PM indication: hy_AM, # ms_MY, sm_WS. - # BUG: Generates regexp that does not match the current time for - # aa_DJ, aa_ER, aa_ET, am_ET, az_IR, byn_ER, fa_IR, gez_ER, gez_ET, - # lzh_TW, my_MM, om_ET, om_KE, or_IN, shn_MM, sid_ET, so_DJ, so_ET, - # so_SO, ti_ER, ti_ET, tig_ER, wal_ET. - @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + # BUG: Generates regexp that does not match the current time for lzh_TW. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'aa_ET', 'am_ET', 'az_IR', 'byn_ER', 'fa_IR', 'gez_ET', + 'my_MM', 'om_ET', 'or_IN', 'shn_MM', 'sid_ET', 'so_SO', + 'ti_ET', 'tig_ER', 'wal_ET') def test_time_locale(self): # Test %X directive now = time.time() diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 5b5779231f06ce..27c0f51acc58ab 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -298,7 +298,7 @@ def test_strptime_exception_context(self): # additional check for IndexError branch (issue #19545) with self.assertRaises(ValueError) as e: time.strptime('19', '%Y %') - self.assertIs(e.exception.__suppress_context__, True) + self.assertIsNone(e.exception.__context__) def test_strptime_leap_year(self): # GH-70647: warns if parsing a format with a day and no year. diff --git a/Misc/NEWS.d/next/Library/2024-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst b/Misc/NEWS.d/next/Library/2024-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst new file mode 100644 index 00000000000000..cdfa8c191e8242 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst @@ -0,0 +1,2 @@ +Fix :func:`time.strptime` for ``%c``, ``%x`` and ``%X`` formats in many +locales that use non-ASCII digits, like Persian, Burmese, Odia and Shan. From 6a08a753b702ac63c9b6ac58dd204d1fe9662e9d Mon Sep 17 00:00:00 2001 From: Wulian Date: Mon, 14 Oct 2024 21:53:50 +0800 Subject: [PATCH 090/114] gh-124960: Fixed `barry_as_FLUFL` future flag does not work in new REPL (#124999) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nice Zombies Co-authored-by: Łukasz Langa --- Lib/_pyrepl/console.py | 10 +++++-- Lib/codeop.py | 7 +++-- Lib/test/test_pyrepl/test_interact.py | 27 ++++++++++++++++++- ...-10-05-15-49-53.gh-issue-124960.Bol9hT.rst | 1 + 4 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-05-15-49-53.gh-issue-124960.Bol9hT.rst diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index 3e72a56807f6fb..03266c4dfc2dd8 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -174,7 +174,13 @@ def _excepthook(self, typ, value, tb): def runsource(self, source, filename="", symbol="single"): try: - tree = ast.parse(source) + tree = self.compile.compiler( + source, + filename, + "exec", + ast.PyCF_ONLY_AST, + incomplete_input=False, + ) except (SyntaxError, OverflowError, ValueError): self.showsyntaxerror(filename, source=source) return False @@ -185,7 +191,7 @@ def runsource(self, source, filename="", symbol="single"): the_symbol = symbol if stmt is last_stmt else "exec" item = wrapper([stmt]) try: - code = self.compile.compiler(item, filename, the_symbol, dont_inherit=True) + code = self.compile.compiler(item, filename, the_symbol) except SyntaxError as e: if e.args[0] == "'await' outside function": python = os.path.basename(sys.executable) diff --git a/Lib/codeop.py b/Lib/codeop.py index a0276b52d484e3..adf000ba29f88c 100644 --- a/Lib/codeop.py +++ b/Lib/codeop.py @@ -44,6 +44,7 @@ # Caveat emptor: These flags are undocumented on purpose and depending # on their effect outside the standard library is **unsupported**. PyCF_DONT_IMPLY_DEDENT = 0x200 +PyCF_ONLY_AST = 0x400 PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000 def _maybe_compile(compiler, source, filename, symbol): @@ -109,12 +110,14 @@ class Compile: def __init__(self): self.flags = PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT - def __call__(self, source, filename, symbol, **kwargs): - flags = self.flags + def __call__(self, source, filename, symbol, flags=0, **kwargs): + flags |= self.flags if kwargs.get('incomplete_input', True) is False: flags &= ~PyCF_DONT_IMPLY_DEDENT flags &= ~PyCF_ALLOW_INCOMPLETE_INPUT codeob = compile(source, filename, symbol, flags, True) + if flags & PyCF_ONLY_AST: + return codeob # this is an ast.Module in this case for feature in _features: if codeob.co_flags & feature.compiler_flag: self.flags |= feature.compiler_flag diff --git a/Lib/test/test_pyrepl/test_interact.py b/Lib/test/test_pyrepl/test_interact.py index b7adaffbac0e22..0c6df4e5dae869 100644 --- a/Lib/test/test_pyrepl/test_interact.py +++ b/Lib/test/test_pyrepl/test_interact.py @@ -119,13 +119,38 @@ def test_runsource_shows_syntax_error_for_failed_compilation(self): def test_no_active_future(self): console = InteractiveColoredConsole() - source = "x: int = 1; print(__annotate__(1))" + source = dedent("""\ + x: int = 1 + print(__annotate__(1)) + """) f = io.StringIO() with contextlib.redirect_stdout(f): result = console.runsource(source) self.assertFalse(result) self.assertEqual(f.getvalue(), "{'x': }\n") + def test_future_annotations(self): + console = InteractiveColoredConsole() + source = dedent("""\ + from __future__ import annotations + def g(x: int): ... + print(g.__annotations__) + """) + f = io.StringIO() + with contextlib.redirect_stdout(f): + result = console.runsource(source) + self.assertFalse(result) + self.assertEqual(f.getvalue(), "{'x': 'int'}\n") + + def test_future_barry_as_flufl(self): + console = InteractiveColoredConsole() + f = io.StringIO() + with contextlib.redirect_stdout(f): + result = console.runsource("from __future__ import barry_as_FLUFL\n") + result = console.runsource("""print("black" <> 'blue')\n""") + self.assertFalse(result) + self.assertEqual(f.getvalue(), "True\n") + class TestMoreLines(unittest.TestCase): def test_invalid_syntax_single_line(self): diff --git a/Misc/NEWS.d/next/Library/2024-10-05-15-49-53.gh-issue-124960.Bol9hT.rst b/Misc/NEWS.d/next/Library/2024-10-05-15-49-53.gh-issue-124960.Bol9hT.rst new file mode 100644 index 00000000000000..332d6bb54d80c7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-05-15-49-53.gh-issue-124960.Bol9hT.rst @@ -0,0 +1 @@ +Fix support for the ``barry_as_FLUFL`` future flag in the new REPL. From c77121e9f19702b1ab280299394e38e8f15c0fd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:23:05 +0200 Subject: [PATCH 091/114] gh-111178: fix USAN failures for `partialobject` (#124733) --- Modules/_functoolsmodule.c | 59 ++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 4ab3adc0fe44cc..802b1cf792c555 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -144,10 +144,13 @@ typedef struct { vectorcallfunc vectorcall; } partialobject; +// cast a PyObject pointer PTR to a partialobject pointer (no type checks) +#define _PyPartialObject_CAST(PTR) ((partialobject *)(PTR)) + static void partial_setvectorcall(partialobject *pto); static struct PyModuleDef _functools_module; static PyObject * -partial_call(partialobject *pto, PyObject *args, PyObject *kwargs); +partial_call(PyObject *pto, PyObject *args, PyObject *kwargs); static inline _functools_state * get_functools_state_by_type(PyTypeObject *type) @@ -307,8 +310,9 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw) } static int -partial_clear(partialobject *pto) +partial_clear(PyObject *self) { + partialobject *pto = _PyPartialObject_CAST(self); Py_CLEAR(pto->fn); Py_CLEAR(pto->args); Py_CLEAR(pto->kw); @@ -317,8 +321,9 @@ partial_clear(partialobject *pto) } static int -partial_traverse(partialobject *pto, visitproc visit, void *arg) +partial_traverse(PyObject *self, visitproc visit, void *arg) { + partialobject *pto = _PyPartialObject_CAST(self); Py_VISIT(Py_TYPE(pto)); Py_VISIT(pto->fn); Py_VISIT(pto->args); @@ -328,16 +333,16 @@ partial_traverse(partialobject *pto, visitproc visit, void *arg) } static void -partial_dealloc(partialobject *pto) +partial_dealloc(PyObject *self) { - PyTypeObject *tp = Py_TYPE(pto); + PyTypeObject *tp = Py_TYPE(self); /* bpo-31095: UnTrack is needed before calling any callbacks */ - PyObject_GC_UnTrack(pto); - if (pto->weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *) pto); + PyObject_GC_UnTrack(self); + if (_PyPartialObject_CAST(self)->weakreflist != NULL) { + PyObject_ClearWeakRefs(self); } - (void)partial_clear(pto); - tp->tp_free(pto); + (void)partial_clear(self); + tp->tp_free(self); Py_DECREF(tp); } @@ -360,14 +365,14 @@ partial_vectorcall_fallback(PyThreadState *tstate, partialobject *pto, { pto->vectorcall = NULL; Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); - return _PyObject_MakeTpCall(tstate, (PyObject *)pto, - args, nargs, kwnames); + return _PyObject_MakeTpCall(tstate, (PyObject *)pto, args, nargs, kwnames); } static PyObject * -partial_vectorcall(partialobject *pto, PyObject *const *args, +partial_vectorcall(PyObject *self, PyObject *const *args, size_t nargsf, PyObject *kwnames) { + partialobject *pto = _PyPartialObject_CAST(self);; PyThreadState *tstate = _PyThreadState_GET(); Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); @@ -468,15 +473,16 @@ partial_setvectorcall(partialobject *pto) * but that is unlikely (why use partial without arguments?), * so we don't optimize that */ else { - pto->vectorcall = (vectorcallfunc)partial_vectorcall; + pto->vectorcall = partial_vectorcall; } } // Not converted to argument clinic, because of `*args, **kwargs` arguments. static PyObject * -partial_call(partialobject *pto, PyObject *args, PyObject *kwargs) +partial_call(PyObject *self, PyObject *args, PyObject *kwargs) { + partialobject *pto = _PyPartialObject_CAST(self); assert(PyCallable_Check(pto->fn)); assert(PyTuple_Check(pto->args)); assert(PyDict_Check(pto->kw)); @@ -587,8 +593,9 @@ static PyGetSetDef partial_getsetlist[] = { }; static PyObject * -partial_repr(partialobject *pto) +partial_repr(PyObject *self) { + partialobject *pto = _PyPartialObject_CAST(self); PyObject *result = NULL; PyObject *arglist; PyObject *mod; @@ -597,7 +604,7 @@ partial_repr(partialobject *pto) PyObject *key, *value; int status; - status = Py_ReprEnter((PyObject *)pto); + status = Py_ReprEnter(self); if (status != 0) { if (status < 0) return NULL; @@ -608,7 +615,7 @@ partial_repr(partialobject *pto) if (arglist == NULL) goto done; /* Pack positional arguments */ - assert (PyTuple_Check(pto->args)); + assert(PyTuple_Check(pto->args)); n = PyTuple_GET_SIZE(pto->args); for (i = 0; i < n; i++) { Py_SETREF(arglist, PyUnicode_FromFormat("%U, %R", arglist, @@ -643,11 +650,11 @@ partial_repr(partialobject *pto) Py_DECREF(arglist); done: - Py_ReprLeave((PyObject *)pto); + Py_ReprLeave(self); return result; error: Py_DECREF(arglist); - Py_ReprLeave((PyObject *)pto); + Py_ReprLeave(self); return NULL; } @@ -659,16 +666,18 @@ partial_repr(partialobject *pto) */ static PyObject * -partial_reduce(partialobject *pto, PyObject *unused) +partial_reduce(PyObject *self, PyObject *Py_UNUSED(args)) { + partialobject *pto = _PyPartialObject_CAST(self); return Py_BuildValue("O(O)(OOOO)", Py_TYPE(pto), pto->fn, pto->fn, pto->args, pto->kw, pto->dict ? pto->dict : Py_None); } static PyObject * -partial_setstate(partialobject *pto, PyObject *state) +partial_setstate(PyObject *self, PyObject *state) { + partialobject *pto = _PyPartialObject_CAST(self); PyObject *fn, *fnargs, *kw, *dict; if (!PyTuple_Check(state)) { @@ -730,8 +739,8 @@ partial_setstate(partialobject *pto, PyObject *state) } static PyMethodDef partial_methods[] = { - {"__reduce__", (PyCFunction)partial_reduce, METH_NOARGS}, - {"__setstate__", (PyCFunction)partial_setstate, METH_O}, + {"__reduce__", partial_reduce, METH_NOARGS}, + {"__setstate__", partial_setstate, METH_O}, {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL} /* sentinel */ @@ -749,7 +758,7 @@ static PyType_Slot partial_type_slots[] = { {Py_tp_methods, partial_methods}, {Py_tp_members, partial_memberlist}, {Py_tp_getset, partial_getsetlist}, - {Py_tp_descr_get, (descrgetfunc)partial_descr_get}, + {Py_tp_descr_get, partial_descr_get}, {Py_tp_new, partial_new}, {Py_tp_free, PyObject_GC_Del}, {0, 0} From 45df264f3ffbc0893cbfd257131d3abe21043786 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Mon, 14 Oct 2024 17:53:08 +0300 Subject: [PATCH 092/114] gh-112088: aclocal version is updated to 1.16.5 in docs (#125457) --- Doc/using/configure.rst | 4 ++-- Doc/whatsnew/3.13.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 4976418ba33cf8..10cdf2376229ff 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -29,7 +29,7 @@ Features and minimum versions required to build CPython: * Tcl/Tk 8.5.12 for the :mod:`tkinter` module. -* Autoconf 2.71 and aclocal 1.16.4 are required to regenerate the +* Autoconf 2.71 and aclocal 1.16.5 are required to regenerate the :file:`configure` script. .. versionchanged:: 3.1 @@ -56,7 +56,7 @@ Features and minimum versions required to build CPython: Tcl/Tk version 8.5.12 is now required for the :mod:`tkinter` module. .. versionchanged:: 3.13 - Autoconf 2.71, aclocal 1.16.4 and SQLite 3.15.2 are now required. + Autoconf 2.71, aclocal 1.16.5 and SQLite 3.15.2 are now required. See also :pep:`7` "Style Guide for C Code" and :pep:`11` "CPython platform support". diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index a2897097aaba57..f9e74a9b8ff9c6 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -2495,9 +2495,9 @@ Build Changes * Building CPython now requires a compiler with support for the C11 atomic library, GCC built-in atomic functions, or MSVC interlocked intrinsics. -* Autoconf 2.71 and aclocal 1.16.4 are now required to regenerate +* Autoconf 2.71 and aclocal 1.16.5 are now required to regenerate the :file:`configure` script. - (Contributed by Christian Heimes in :gh:`89886`.) + (Contributed by Christian Heimes in :gh:`89886` and by Victor Stinner in :gh:`112090`.) * SQLite 3.15.2 or newer is required to build the :mod:`sqlite3` extension module. From d5dbbf4372cd3dbf3eead1cc70ddc4261c061fd9 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Mon, 14 Oct 2024 16:19:56 +0100 Subject: [PATCH 093/114] gh-124958: fix asyncio.TaskGroup and _PyFuture refcycles (#124959) --- Lib/asyncio/futures.py | 6 +- Lib/asyncio/taskgroups.py | 41 +++++++-- Lib/test/test_asyncio/test_futures.py | 22 +++++ Lib/test/test_asyncio/test_taskgroups.py | 92 ++++++++++++++++++- ...-10-04-08-46-00.gh-issue-124958.rea9-x.rst | 1 + 5 files changed, 147 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py index 5f6fa2348726cf..c95fce035cd548 100644 --- a/Lib/asyncio/futures.py +++ b/Lib/asyncio/futures.py @@ -190,8 +190,7 @@ def result(self): the future is done and has an exception set, this exception is raised. """ if self._state == _CANCELLED: - exc = self._make_cancelled_error() - raise exc + raise self._make_cancelled_error() if self._state != _FINISHED: raise exceptions.InvalidStateError('Result is not ready.') self.__log_traceback = False @@ -208,8 +207,7 @@ def exception(self): InvalidStateError. """ if self._state == _CANCELLED: - exc = self._make_cancelled_error() - raise exc + raise self._make_cancelled_error() if self._state != _FINISHED: raise exceptions.InvalidStateError('Exception is not set.') self.__log_traceback = False diff --git a/Lib/asyncio/taskgroups.py b/Lib/asyncio/taskgroups.py index f2ee9648c43876..9fa772ca9d02cc 100644 --- a/Lib/asyncio/taskgroups.py +++ b/Lib/asyncio/taskgroups.py @@ -66,6 +66,20 @@ async def __aenter__(self): return self async def __aexit__(self, et, exc, tb): + tb = None + try: + return await self._aexit(et, exc) + finally: + # Exceptions are heavy objects that can have object + # cycles (bad for GC); let's not keep a reference to + # a bunch of them. It would be nicer to use a try/finally + # in __aexit__ directly but that introduced some diff noise + self._parent_task = None + self._errors = None + self._base_error = None + exc = None + + async def _aexit(self, et, exc): self._exiting = True if (exc is not None and @@ -122,7 +136,10 @@ async def __aexit__(self, et, exc, tb): assert not self._tasks if self._base_error is not None: - raise self._base_error + try: + raise self._base_error + finally: + exc = None if self._parent_cancel_requested: # If this flag is set we *must* call uncancel(). @@ -133,8 +150,14 @@ async def __aexit__(self, et, exc, tb): # Propagate CancelledError if there is one, except if there # are other errors -- those have priority. - if propagate_cancellation_error is not None and not self._errors: - raise propagate_cancellation_error + try: + if propagate_cancellation_error is not None and not self._errors: + try: + raise propagate_cancellation_error + finally: + exc = None + finally: + propagate_cancellation_error = None if et is not None and not issubclass(et, exceptions.CancelledError): self._errors.append(exc) @@ -146,14 +169,14 @@ async def __aexit__(self, et, exc, tb): if self._parent_task.cancelling(): self._parent_task.uncancel() self._parent_task.cancel() - # Exceptions are heavy objects that can have object - # cycles (bad for GC); let's not keep a reference to - # a bunch of them. try: - me = BaseExceptionGroup('unhandled errors in a TaskGroup', self._errors) - raise me from None + raise BaseExceptionGroup( + 'unhandled errors in a TaskGroup', + self._errors, + ) from None finally: - self._errors = None + exc = None + def create_task(self, coro, *, name=None, context=None): """Create a new task in this group and return it. diff --git a/Lib/test/test_asyncio/test_futures.py b/Lib/test/test_asyncio/test_futures.py index 458b70451a306a..c566b28adb2408 100644 --- a/Lib/test/test_asyncio/test_futures.py +++ b/Lib/test/test_asyncio/test_futures.py @@ -659,6 +659,28 @@ def __del__(self): fut = self._new_future(loop=self.loop) fut.set_result(Evil()) + def test_future_cancelled_result_refcycles(self): + f = self._new_future(loop=self.loop) + f.cancel() + exc = None + try: + f.result() + except asyncio.CancelledError as e: + exc = e + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + + def test_future_cancelled_exception_refcycles(self): + f = self._new_future(loop=self.loop) + f.cancel() + exc = None + try: + f.exception() + except asyncio.CancelledError as e: + exc = e + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + @unittest.skipUnless(hasattr(futures, '_CFuture'), 'requires the C _asyncio module') diff --git a/Lib/test/test_asyncio/test_taskgroups.py b/Lib/test/test_asyncio/test_taskgroups.py index 4852536defc93d..138f59ebf57ef7 100644 --- a/Lib/test/test_asyncio/test_taskgroups.py +++ b/Lib/test/test_asyncio/test_taskgroups.py @@ -1,7 +1,7 @@ # Adapted with permission from the EdgeDB project; # license: PSFL. - +import gc import asyncio import contextvars import contextlib @@ -11,7 +11,6 @@ from test.test_asyncio.utils import await_without_task - # To prevent a warning "test altered the execution environment" def tearDownModule(): asyncio.set_event_loop_policy(None) @@ -899,6 +898,95 @@ async def outer(): await outer() + async def test_exception_refcycles_direct(self): + """Test that TaskGroup doesn't keep a reference to the raised ExceptionGroup""" + tg = asyncio.TaskGroup() + exc = None + + class _Done(Exception): + pass + + try: + async with tg: + raise _Done + except ExceptionGroup as e: + exc = e + + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + + + async def test_exception_refcycles_errors(self): + """Test that TaskGroup deletes self._errors, and __aexit__ args""" + tg = asyncio.TaskGroup() + exc = None + + class _Done(Exception): + pass + + try: + async with tg: + raise _Done + except* _Done as excs: + exc = excs.exceptions[0] + + self.assertIsInstance(exc, _Done) + self.assertListEqual(gc.get_referrers(exc), []) + + + async def test_exception_refcycles_parent_task(self): + """Test that TaskGroup deletes self._parent_task""" + tg = asyncio.TaskGroup() + exc = None + + class _Done(Exception): + pass + + async def coro_fn(): + async with tg: + raise _Done + + try: + async with asyncio.TaskGroup() as tg2: + tg2.create_task(coro_fn()) + except* _Done as excs: + exc = excs.exceptions[0].exceptions[0] + + self.assertIsInstance(exc, _Done) + self.assertListEqual(gc.get_referrers(exc), []) + + async def test_exception_refcycles_propagate_cancellation_error(self): + """Test that TaskGroup deletes propagate_cancellation_error""" + tg = asyncio.TaskGroup() + exc = None + + try: + async with asyncio.timeout(-1): + async with tg: + await asyncio.sleep(0) + except TimeoutError as e: + exc = e.__cause__ + + self.assertIsInstance(exc, asyncio.CancelledError) + self.assertListEqual(gc.get_referrers(exc), []) + + async def test_exception_refcycles_base_error(self): + """Test that TaskGroup deletes self._base_error""" + class MyKeyboardInterrupt(KeyboardInterrupt): + pass + + tg = asyncio.TaskGroup() + exc = None + + try: + async with tg: + raise MyKeyboardInterrupt + except MyKeyboardInterrupt as e: + exc = e + + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst b/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst new file mode 100644 index 00000000000000..534d5bb8c898da --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst @@ -0,0 +1 @@ +Fix refcycles in exceptions raised from :class:`asyncio.TaskGroup` and the python implementation of :class:`asyncio.Future` From 5dac0dceda9097d46a0b5a6ad7c927e002c6c7a5 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 14 Oct 2024 08:26:57 -0700 Subject: [PATCH 094/114] gh-125461: Remove Python 2 from identifiers in doc (GH-125462) Remove Python 2 from identifiers in doc --- Doc/reference/lexical_analysis.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index ae5408ee386bbd..f7167032ad7df9 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -284,11 +284,10 @@ UAX-31, with elaboration and changes as defined below; see also :pep:`3131` for further details. Within the ASCII range (U+0001..U+007F), the valid characters for identifiers -are the same as in Python 2.x: the uppercase and lowercase letters ``A`` through +include the uppercase and lowercase letters ``A`` through ``Z``, the underscore ``_`` and, except for the first character, the digits ``0`` through ``9``. - -Python 3.0 introduces additional characters from outside the ASCII range (see +Python 3.0 introduced additional characters from outside the ASCII range (see :pep:`3131`). For these characters, the classification uses the version of the Unicode Character Database as included in the :mod:`unicodedata` module. From 187580d95c8339a3b6e2b012f98d86101c346cfa Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Mon, 14 Oct 2024 20:24:54 +0300 Subject: [PATCH 095/114] gh-119786: [doc] broken link and typo fix in interpreter_definition.md (#125455) --- InternalDocs/README.md | 2 ++ Tools/cases_generator/interpreter_definition.md | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 805e2f97937e1e..0a6ecf899458ed 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -11,6 +11,8 @@ The core dev team attempts to keep this documentation up to date. If it is not, please report that through the [issue tracker](https://github.com/python/cpython/issues). +Index: +----- [Guide to the parser](parser.md) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index ba09931c541646..6cf36f343d5fa7 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -74,7 +74,7 @@ We update it as the need arises. ### Syntax Each op definition has a kind, a name, a stack and instruction stream effect, -and a piece of C code describing its semantics:: +and a piece of C code describing its semantics: ``` file: @@ -245,7 +245,8 @@ The same is true for all members of a pseudo instruction ## Examples -(Another source of examples can be found in the [tests](test_generator.py).) +(Another source of examples can be found in the +[tests](https://github.com/python/cpython/blob/main/Lib/test/test_generated_cases.py).) Some examples: From e99650b80ace3893c2a80b3f2a4aca99cb305191 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 14 Oct 2024 20:59:13 +0300 Subject: [PATCH 096/114] =?UTF-8?q?gh-125472:=20Revert=20"gh-124958:=20fix?= =?UTF-8?q?=20asyncio.TaskGroup=20and=20=5FPyFuture=20refcycles=20(#12?= =?UTF-8?q?=E2=80=A6=20(#125476)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert "gh-124958: fix asyncio.TaskGroup and _PyFuture refcycles (#124959)" This reverts commit d5dbbf4372cd3dbf3eead1cc70ddc4261c061fd9. --- Lib/asyncio/futures.py | 6 +- Lib/asyncio/taskgroups.py | 41 ++------- Lib/test/test_asyncio/test_futures.py | 22 ----- Lib/test/test_asyncio/test_taskgroups.py | 92 +------------------ ...-10-04-08-46-00.gh-issue-124958.rea9-x.rst | 1 - 5 files changed, 15 insertions(+), 147 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py index c95fce035cd548..5f6fa2348726cf 100644 --- a/Lib/asyncio/futures.py +++ b/Lib/asyncio/futures.py @@ -190,7 +190,8 @@ def result(self): the future is done and has an exception set, this exception is raised. """ if self._state == _CANCELLED: - raise self._make_cancelled_error() + exc = self._make_cancelled_error() + raise exc if self._state != _FINISHED: raise exceptions.InvalidStateError('Result is not ready.') self.__log_traceback = False @@ -207,7 +208,8 @@ def exception(self): InvalidStateError. """ if self._state == _CANCELLED: - raise self._make_cancelled_error() + exc = self._make_cancelled_error() + raise exc if self._state != _FINISHED: raise exceptions.InvalidStateError('Exception is not set.') self.__log_traceback = False diff --git a/Lib/asyncio/taskgroups.py b/Lib/asyncio/taskgroups.py index 9fa772ca9d02cc..f2ee9648c43876 100644 --- a/Lib/asyncio/taskgroups.py +++ b/Lib/asyncio/taskgroups.py @@ -66,20 +66,6 @@ async def __aenter__(self): return self async def __aexit__(self, et, exc, tb): - tb = None - try: - return await self._aexit(et, exc) - finally: - # Exceptions are heavy objects that can have object - # cycles (bad for GC); let's not keep a reference to - # a bunch of them. It would be nicer to use a try/finally - # in __aexit__ directly but that introduced some diff noise - self._parent_task = None - self._errors = None - self._base_error = None - exc = None - - async def _aexit(self, et, exc): self._exiting = True if (exc is not None and @@ -136,10 +122,7 @@ async def _aexit(self, et, exc): assert not self._tasks if self._base_error is not None: - try: - raise self._base_error - finally: - exc = None + raise self._base_error if self._parent_cancel_requested: # If this flag is set we *must* call uncancel(). @@ -150,14 +133,8 @@ async def _aexit(self, et, exc): # Propagate CancelledError if there is one, except if there # are other errors -- those have priority. - try: - if propagate_cancellation_error is not None and not self._errors: - try: - raise propagate_cancellation_error - finally: - exc = None - finally: - propagate_cancellation_error = None + if propagate_cancellation_error is not None and not self._errors: + raise propagate_cancellation_error if et is not None and not issubclass(et, exceptions.CancelledError): self._errors.append(exc) @@ -169,14 +146,14 @@ async def _aexit(self, et, exc): if self._parent_task.cancelling(): self._parent_task.uncancel() self._parent_task.cancel() + # Exceptions are heavy objects that can have object + # cycles (bad for GC); let's not keep a reference to + # a bunch of them. try: - raise BaseExceptionGroup( - 'unhandled errors in a TaskGroup', - self._errors, - ) from None + me = BaseExceptionGroup('unhandled errors in a TaskGroup', self._errors) + raise me from None finally: - exc = None - + self._errors = None def create_task(self, coro, *, name=None, context=None): """Create a new task in this group and return it. diff --git a/Lib/test/test_asyncio/test_futures.py b/Lib/test/test_asyncio/test_futures.py index c566b28adb2408..458b70451a306a 100644 --- a/Lib/test/test_asyncio/test_futures.py +++ b/Lib/test/test_asyncio/test_futures.py @@ -659,28 +659,6 @@ def __del__(self): fut = self._new_future(loop=self.loop) fut.set_result(Evil()) - def test_future_cancelled_result_refcycles(self): - f = self._new_future(loop=self.loop) - f.cancel() - exc = None - try: - f.result() - except asyncio.CancelledError as e: - exc = e - self.assertIsNotNone(exc) - self.assertListEqual(gc.get_referrers(exc), []) - - def test_future_cancelled_exception_refcycles(self): - f = self._new_future(loop=self.loop) - f.cancel() - exc = None - try: - f.exception() - except asyncio.CancelledError as e: - exc = e - self.assertIsNotNone(exc) - self.assertListEqual(gc.get_referrers(exc), []) - @unittest.skipUnless(hasattr(futures, '_CFuture'), 'requires the C _asyncio module') diff --git a/Lib/test/test_asyncio/test_taskgroups.py b/Lib/test/test_asyncio/test_taskgroups.py index 138f59ebf57ef7..4852536defc93d 100644 --- a/Lib/test/test_asyncio/test_taskgroups.py +++ b/Lib/test/test_asyncio/test_taskgroups.py @@ -1,7 +1,7 @@ # Adapted with permission from the EdgeDB project; # license: PSFL. -import gc + import asyncio import contextvars import contextlib @@ -11,6 +11,7 @@ from test.test_asyncio.utils import await_without_task + # To prevent a warning "test altered the execution environment" def tearDownModule(): asyncio.set_event_loop_policy(None) @@ -898,95 +899,6 @@ async def outer(): await outer() - async def test_exception_refcycles_direct(self): - """Test that TaskGroup doesn't keep a reference to the raised ExceptionGroup""" - tg = asyncio.TaskGroup() - exc = None - - class _Done(Exception): - pass - - try: - async with tg: - raise _Done - except ExceptionGroup as e: - exc = e - - self.assertIsNotNone(exc) - self.assertListEqual(gc.get_referrers(exc), []) - - - async def test_exception_refcycles_errors(self): - """Test that TaskGroup deletes self._errors, and __aexit__ args""" - tg = asyncio.TaskGroup() - exc = None - - class _Done(Exception): - pass - - try: - async with tg: - raise _Done - except* _Done as excs: - exc = excs.exceptions[0] - - self.assertIsInstance(exc, _Done) - self.assertListEqual(gc.get_referrers(exc), []) - - - async def test_exception_refcycles_parent_task(self): - """Test that TaskGroup deletes self._parent_task""" - tg = asyncio.TaskGroup() - exc = None - - class _Done(Exception): - pass - - async def coro_fn(): - async with tg: - raise _Done - - try: - async with asyncio.TaskGroup() as tg2: - tg2.create_task(coro_fn()) - except* _Done as excs: - exc = excs.exceptions[0].exceptions[0] - - self.assertIsInstance(exc, _Done) - self.assertListEqual(gc.get_referrers(exc), []) - - async def test_exception_refcycles_propagate_cancellation_error(self): - """Test that TaskGroup deletes propagate_cancellation_error""" - tg = asyncio.TaskGroup() - exc = None - - try: - async with asyncio.timeout(-1): - async with tg: - await asyncio.sleep(0) - except TimeoutError as e: - exc = e.__cause__ - - self.assertIsInstance(exc, asyncio.CancelledError) - self.assertListEqual(gc.get_referrers(exc), []) - - async def test_exception_refcycles_base_error(self): - """Test that TaskGroup deletes self._base_error""" - class MyKeyboardInterrupt(KeyboardInterrupt): - pass - - tg = asyncio.TaskGroup() - exc = None - - try: - async with tg: - raise MyKeyboardInterrupt - except MyKeyboardInterrupt as e: - exc = e - - self.assertIsNotNone(exc) - self.assertListEqual(gc.get_referrers(exc), []) - if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst b/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst deleted file mode 100644 index 534d5bb8c898da..00000000000000 --- a/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst +++ /dev/null @@ -1 +0,0 @@ -Fix refcycles in exceptions raised from :class:`asyncio.TaskGroup` and the python implementation of :class:`asyncio.Future` From 843d28f59d2616d052d9d45f31823976da07f0f3 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Mon, 14 Oct 2024 15:28:41 -0400 Subject: [PATCH 097/114] gh-124872: Replace enter/exit events with "switched" (#124776) Users want to know when the current context switches to a different context object. Right now this happens when and only when a context is entered or exited, so the enter and exit events are synonymous with "switched". However, if the changes proposed for gh-99633 are implemented, the current context will also switch for reasons other than context enter or exit. Since users actually care about context switches and not enter or exit, replace the enter and exit events with a single switched event. The former exit event was emitted just before exiting the context. The new switched event is emitted after the context is exited to match the semantics users expect of an event with a past-tense name. If users need the ability to clean up before the switch takes effect, another event type can be added in the future. It is not added here because YAGNI. I skipped 0 in the enum as a matter of practice. Skipping 0 makes it easier to troubleshoot when code forgets to set zeroed memory, and it aligns with best practices for other tools (e.g., https://protobuf.dev/programming-guides/dos-donts/#unspecified-enum). --- Doc/c-api/contextvars.rst | 14 ++--- Include/cpython/context.h | 17 ++---- Lib/test/test_capi/test_watchers.py | 89 ++++++++++++++-------------- Modules/_testcapi/watchers.c | 79 ++++++++++++------------ Python/context.c | 31 ++++++---- Tools/c-analyzer/cpython/ignored.tsv | 4 +- 6 files changed, 117 insertions(+), 117 deletions(-) diff --git a/Doc/c-api/contextvars.rst b/Doc/c-api/contextvars.rst index 8eba54a80dc80d..b7c6550ff34aac 100644 --- a/Doc/c-api/contextvars.rst +++ b/Doc/c-api/contextvars.rst @@ -123,16 +123,10 @@ Context object management functions: Enumeration of possible context object watcher events: - - ``Py_CONTEXT_EVENT_ENTER``: A context has been entered, causing the - :term:`current context` to switch to it. The object passed to the watch - callback is the now-current :class:`contextvars.Context` object. Each - enter event will eventually have a corresponding exit event for the same - context object after any subsequently entered contexts have themselves been - exited. - - ``Py_CONTEXT_EVENT_EXIT``: A context is about to be exited, which will - cause the :term:`current context` to switch back to what it was before the - context was entered. The object passed to the watch callback is the - still-current :class:`contextvars.Context` object. + - ``Py_CONTEXT_SWITCHED``: The :term:`current context` has switched to a + different context. The object passed to the watch callback is the + now-current :class:`contextvars.Context` object, or None if no context is + current. .. versionadded:: 3.14 diff --git a/Include/cpython/context.h b/Include/cpython/context.h index 3c9be7873b9399..3a7a4b459c09ad 100644 --- a/Include/cpython/context.h +++ b/Include/cpython/context.h @@ -29,20 +29,11 @@ PyAPI_FUNC(int) PyContext_Exit(PyObject *); typedef enum { /* - * A context has been entered, causing the "current context" to switch to - * it. The object passed to the watch callback is the now-current - * contextvars.Context object. Each enter event will eventually have a - * corresponding exit event for the same context object after any - * subsequently entered contexts have themselves been exited. + * The current context has switched to a different context. The object + * passed to the watch callback is the now-current contextvars.Context + * object, or None if no context is current. */ - Py_CONTEXT_EVENT_ENTER, - /* - * A context is about to be exited, which will cause the "current context" - * to switch back to what it was before the context was entered. The - * object passed to the watch callback is the still-current - * contextvars.Context object. - */ - Py_CONTEXT_EVENT_EXIT, + Py_CONTEXT_SWITCHED = 1, } PyContextEvent; /* diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index f21d2627c6094b..4680d6765de122 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -577,68 +577,62 @@ class TestContextObjectWatchers(unittest.TestCase): def context_watcher(self, which_watcher): wid = _testcapi.add_context_watcher(which_watcher) try: - yield wid + switches = _testcapi.get_context_switches(which_watcher) + except ValueError: + switches = None + try: + yield switches finally: _testcapi.clear_context_watcher(wid) - def assert_event_counts(self, exp_enter_0, exp_exit_0, - exp_enter_1, exp_exit_1): - self.assertEqual( - exp_enter_0, _testcapi.get_context_watcher_num_enter_events(0)) - self.assertEqual( - exp_exit_0, _testcapi.get_context_watcher_num_exit_events(0)) - self.assertEqual( - exp_enter_1, _testcapi.get_context_watcher_num_enter_events(1)) - self.assertEqual( - exp_exit_1, _testcapi.get_context_watcher_num_exit_events(1)) + def assert_event_counts(self, want_0, want_1): + self.assertEqual(len(_testcapi.get_context_switches(0)), want_0) + self.assertEqual(len(_testcapi.get_context_switches(1)), want_1) def test_context_object_events_dispatched(self): # verify that all counts are zero before any watchers are registered - self.assert_event_counts(0, 0, 0, 0) + self.assert_event_counts(0, 0) # verify that all counts remain zero when a context object is # entered and exited with no watchers registered ctx = contextvars.copy_context() - ctx.run(self.assert_event_counts, 0, 0, 0, 0) - self.assert_event_counts(0, 0, 0, 0) + ctx.run(self.assert_event_counts, 0, 0) + self.assert_event_counts(0, 0) # verify counts are as expected when first watcher is registered with self.context_watcher(0): - self.assert_event_counts(0, 0, 0, 0) - ctx.run(self.assert_event_counts, 1, 0, 0, 0) - self.assert_event_counts(1, 1, 0, 0) + self.assert_event_counts(0, 0) + ctx.run(self.assert_event_counts, 1, 0) + self.assert_event_counts(2, 0) # again with second watcher registered with self.context_watcher(1): - self.assert_event_counts(1, 1, 0, 0) - ctx.run(self.assert_event_counts, 2, 1, 1, 0) - self.assert_event_counts(2, 2, 1, 1) + self.assert_event_counts(2, 0) + ctx.run(self.assert_event_counts, 3, 1) + self.assert_event_counts(4, 2) # verify counts are reset and don't change after both watchers are cleared - ctx.run(self.assert_event_counts, 0, 0, 0, 0) - self.assert_event_counts(0, 0, 0, 0) - - def test_enter_error(self): - with self.context_watcher(2): - with catch_unraisable_exception() as cm: - ctx = contextvars.copy_context() - ctx.run(int, 0) - self.assertEqual( - cm.unraisable.err_msg, - "Exception ignored in " - f"Py_CONTEXT_EVENT_EXIT watcher callback for {ctx!r}" - ) - self.assertEqual(str(cm.unraisable.exc_value), "boom!") - - def test_exit_error(self): - ctx = contextvars.copy_context() - def _in_context(stack): - stack.enter_context(self.context_watcher(2)) - - with catch_unraisable_exception() as cm: - with ExitStack() as stack: - ctx.run(_in_context, stack) - self.assertEqual(str(cm.unraisable.exc_value), "boom!") + ctx.run(self.assert_event_counts, 0, 0) + self.assert_event_counts(0, 0) + + def test_callback_error(self): + ctx_outer = contextvars.copy_context() + ctx_inner = contextvars.copy_context() + unraisables = [] + + def _in_outer(): + with self.context_watcher(2): + with catch_unraisable_exception() as cm: + ctx_inner.run(lambda: unraisables.append(cm.unraisable)) + unraisables.append(cm.unraisable) + + ctx_outer.run(_in_outer) + self.assertEqual([x.err_msg for x in unraisables], + ["Exception ignored in Py_CONTEXT_SWITCHED " + f"watcher callback for {ctx!r}" + for ctx in [ctx_inner, ctx_outer]]) + self.assertEqual([str(x.exc_value) for x in unraisables], + ["boom!", "boom!"]) def test_clear_out_of_range_watcher_id(self): with self.assertRaisesRegex(ValueError, r"Invalid context watcher ID -1"): @@ -654,5 +648,12 @@ def test_allocate_too_many_watchers(self): with self.assertRaisesRegex(RuntimeError, r"no more context watcher IDs available"): _testcapi.allocate_too_many_context_watchers() + def test_exit_base_context(self): + ctx = contextvars.Context() + _testcapi.clear_context_stack() + with self.context_watcher(0) as switches: + ctx.run(lambda: None) + self.assertEqual(switches, [ctx, None]) + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testcapi/watchers.c b/Modules/_testcapi/watchers.c index b4233d07134aea..321d3aeffb6ad1 100644 --- a/Modules/_testcapi/watchers.c +++ b/Modules/_testcapi/watchers.c @@ -626,16 +626,12 @@ allocate_too_many_func_watchers(PyObject *self, PyObject *args) // Test contexct object watchers #define NUM_CONTEXT_WATCHERS 2 static int context_watcher_ids[NUM_CONTEXT_WATCHERS] = {-1, -1}; -static int num_context_object_enter_events[NUM_CONTEXT_WATCHERS] = {0, 0}; -static int num_context_object_exit_events[NUM_CONTEXT_WATCHERS] = {0, 0}; +static PyObject *context_switches[NUM_CONTEXT_WATCHERS]; static int handle_context_watcher_event(int which_watcher, PyContextEvent event, PyObject *ctx) { - if (event == Py_CONTEXT_EVENT_ENTER) { - num_context_object_enter_events[which_watcher]++; - } - else if (event == Py_CONTEXT_EVENT_EXIT) { - num_context_object_exit_events[which_watcher]++; + if (event == Py_CONTEXT_SWITCHED) { + PyList_Append(context_switches[which_watcher], ctx); } else { return -1; @@ -667,31 +663,28 @@ error_context_event_handler(PyContextEvent event, PyObject *ctx) { static PyObject * add_context_watcher(PyObject *self, PyObject *which_watcher) { - int watcher_id; + static const PyContext_WatchCallback callbacks[] = { + &first_context_watcher_callback, + &second_context_watcher_callback, + &error_context_event_handler, + }; assert(PyLong_Check(which_watcher)); long which_l = PyLong_AsLong(which_watcher); - if (which_l == 0) { - watcher_id = PyContext_AddWatcher(first_context_watcher_callback); - context_watcher_ids[0] = watcher_id; - num_context_object_enter_events[0] = 0; - num_context_object_exit_events[0] = 0; - } - else if (which_l == 1) { - watcher_id = PyContext_AddWatcher(second_context_watcher_callback); - context_watcher_ids[1] = watcher_id; - num_context_object_enter_events[1] = 0; - num_context_object_exit_events[1] = 0; - } - else if (which_l == 2) { - watcher_id = PyContext_AddWatcher(error_context_event_handler); - } - else { + if (which_l < 0 || which_l >= (long)Py_ARRAY_LENGTH(callbacks)) { PyErr_Format(PyExc_ValueError, "invalid watcher %d", which_l); return NULL; } + int watcher_id = PyContext_AddWatcher(callbacks[which_l]); if (watcher_id < 0) { return NULL; } + if (which_l >= 0 && which_l < NUM_CONTEXT_WATCHERS) { + context_watcher_ids[which_l] = watcher_id; + Py_XSETREF(context_switches[which_l], PyList_New(0)); + if (context_switches[which_l] == NULL) { + return NULL; + } + } return PyLong_FromLong(watcher_id); } @@ -708,8 +701,7 @@ clear_context_watcher(PyObject *self, PyObject *watcher_id) for (int i = 0; i < NUM_CONTEXT_WATCHERS; i++) { if (watcher_id_l == context_watcher_ids[i]) { context_watcher_ids[i] = -1; - num_context_object_enter_events[i] = 0; - num_context_object_exit_events[i] = 0; + Py_CLEAR(context_switches[i]); } } } @@ -717,21 +709,34 @@ clear_context_watcher(PyObject *self, PyObject *watcher_id) } static PyObject * -get_context_watcher_num_enter_events(PyObject *self, PyObject *watcher_id) +clear_context_stack(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) { - assert(PyLong_Check(watcher_id)); - long watcher_id_l = PyLong_AsLong(watcher_id); - assert(watcher_id_l >= 0 && watcher_id_l < NUM_CONTEXT_WATCHERS); - return PyLong_FromLong(num_context_object_enter_events[watcher_id_l]); + PyThreadState *tstate = PyThreadState_Get(); + if (tstate->context == NULL) { + Py_RETURN_NONE; + } + if (((PyContext *)tstate->context)->ctx_prev != NULL) { + PyErr_SetString(PyExc_RuntimeError, + "must first exit all non-base contexts"); + return NULL; + } + Py_CLEAR(tstate->context); + Py_RETURN_NONE; } static PyObject * -get_context_watcher_num_exit_events(PyObject *self, PyObject *watcher_id) +get_context_switches(PyObject *Py_UNUSED(self), PyObject *watcher_id) { assert(PyLong_Check(watcher_id)); long watcher_id_l = PyLong_AsLong(watcher_id); - assert(watcher_id_l >= 0 && watcher_id_l < NUM_CONTEXT_WATCHERS); - return PyLong_FromLong(num_context_object_exit_events[watcher_id_l]); + if (watcher_id_l < 0 || watcher_id_l >= NUM_CONTEXT_WATCHERS) { + PyErr_Format(PyExc_ValueError, "invalid watcher %ld", watcher_id_l); + return NULL; + } + if (context_switches[watcher_id_l] == NULL) { + return PyList_New(0); + } + return Py_NewRef(context_switches[watcher_id_l]); } static PyObject * @@ -835,10 +840,8 @@ static PyMethodDef test_methods[] = { // Code object watchers. {"add_context_watcher", add_context_watcher, METH_O, NULL}, {"clear_context_watcher", clear_context_watcher, METH_O, NULL}, - {"get_context_watcher_num_enter_events", - get_context_watcher_num_enter_events, METH_O, NULL}, - {"get_context_watcher_num_exit_events", - get_context_watcher_num_exit_events, METH_O, NULL}, + {"clear_context_stack", clear_context_stack, METH_NOARGS, NULL}, + {"get_context_switches", get_context_switches, METH_O, NULL}, {"allocate_too_many_context_watchers", (PyCFunction) allocate_too_many_context_watchers, METH_NOARGS, NULL}, {NULL}, diff --git a/Python/context.c b/Python/context.c index 8bc487a33c890b..95aa82206270f9 100644 --- a/Python/context.c +++ b/Python/context.c @@ -102,10 +102,8 @@ PyContext_CopyCurrent(void) static const char * context_event_name(PyContextEvent event) { switch (event) { - case Py_CONTEXT_EVENT_ENTER: - return "Py_CONTEXT_EVENT_ENTER"; - case Py_CONTEXT_EVENT_EXIT: - return "Py_CONTEXT_EVENT_EXIT"; + case Py_CONTEXT_SWITCHED: + return "Py_CONTEXT_SWITCHED"; default: return "?"; } @@ -115,6 +113,13 @@ context_event_name(PyContextEvent event) { static void notify_context_watchers(PyThreadState *ts, PyContextEvent event, PyObject *ctx) { + if (ctx == NULL) { + // This will happen after exiting the last context in the stack, which + // can occur if context_get was never called before entering a context + // (e.g., called `contextvars.Context().run()` on a fresh thread, as + // PyContext_Enter doesn't call context_get). + ctx = Py_None; + } assert(Py_REFCNT(ctx) > 0); PyInterpreterState *interp = ts->interp; assert(interp->_initialized); @@ -175,6 +180,16 @@ PyContext_ClearWatcher(int watcher_id) } +static inline void +context_switched(PyThreadState *ts) +{ + ts->context_ver++; + // ts->context is used instead of context_get() because context_get() might + // throw if ts->context is NULL. + notify_context_watchers(ts, Py_CONTEXT_SWITCHED, ts->context); +} + + static int _PyContext_Enter(PyThreadState *ts, PyObject *octx) { @@ -191,9 +206,7 @@ _PyContext_Enter(PyThreadState *ts, PyObject *octx) ctx->ctx_entered = 1; ts->context = Py_NewRef(ctx); - ts->context_ver++; - - notify_context_watchers(ts, Py_CONTEXT_EVENT_ENTER, octx); + context_switched(ts); return 0; } @@ -227,13 +240,11 @@ _PyContext_Exit(PyThreadState *ts, PyObject *octx) return -1; } - notify_context_watchers(ts, Py_CONTEXT_EVENT_EXIT, octx); Py_SETREF(ts->context, (PyObject *)ctx->ctx_prev); - ts->context_ver++; ctx->ctx_prev = NULL; ctx->ctx_entered = 0; - + context_switched(ts); return 0; } diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index e6c599a2ac4a46..2605825d3d0078 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -455,8 +455,8 @@ Modules/_testcapi/watchers.c - pyfunc_watchers - Modules/_testcapi/watchers.c - func_watcher_ids - Modules/_testcapi/watchers.c - func_watcher_callbacks - Modules/_testcapi/watchers.c - context_watcher_ids - -Modules/_testcapi/watchers.c - num_context_object_enter_events - -Modules/_testcapi/watchers.c - num_context_object_exit_events - +Modules/_testcapi/watchers.c - context_switches - +Modules/_testcapi/watchers.c add_context_watcher callbacks - Modules/_testcapimodule.c - BasicStaticTypes - Modules/_testcapimodule.c - num_basic_static_types_used - Modules/_testcapimodule.c - ContainerNoGC_members - From 0c8c665581ede95fe119f902b070e395614b78ed Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 14 Oct 2024 23:46:17 +0300 Subject: [PATCH 098/114] gh-125470: Fix warning in `Python/generated_cases.c.h` (#125471) Co-authored-by: Kirill Podoprigora --- Python/bytecodes.c | 2 ++ Python/executor_cases.c.h | 2 ++ Python/generated_cases.c.h | 2 ++ 3 files changed, 6 insertions(+) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b22916aeaa248b..e6525657cabc2b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -611,7 +611,9 @@ dummy_func( // specializations, but there is no output. // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { + #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0ed361a2ee7fb0..15a6c7bc1a7966 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -737,7 +737,9 @@ _PyStackRef left; right = stack_pointer[-1]; left = stack_pointer[-2]; + #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7bd1b7dd5aba27..a9290986c24f45 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -183,7 +183,9 @@ /* Skip 1 cache entry */ // _BINARY_OP_INPLACE_ADD_UNICODE { + #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE From 3fea1d000ef0a74062fd3fe218ad94618b08d9f2 Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Mon, 14 Oct 2024 17:11:58 -0400 Subject: [PATCH 099/114] Fix idlelib typos (#125484) Propagate fixes in Doc/library/idle.rst to help.html. Change 'interruptable' to 'interruptible' in run.py. The latter was reported by ember91 in PR 125473. --- Lib/idlelib/help.html | 21 ++++++++++++--------- Lib/idlelib/run.py | 12 ++++++------ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/Lib/idlelib/help.html b/Lib/idlelib/help.html index 827d230b54e159..2a4adc6a4d395f 100644 --- a/Lib/idlelib/help.html +++ b/Lib/idlelib/help.html @@ -5,7 +5,7 @@ - IDLE — Python 3.13.0a2 documentation + IDLE — Python 3.14.0a0 documentation @@ -18,7 +18,7 @@ @@ -26,6 +26,7 @@ + @@ -45,6 +46,8 @@ + + @@ -184,7 +187,7 @@

Navigation

  • - 3.13.0a2 Documentation » + 3.14.0a0 Documentation »
  • @@ -554,7 +557,7 @@

    Key bindingsControl key on Windows and -Unix and the Command key on macOS. (And all such dicussions +Unix and the Command key on macOS. (And all such discussions assume that the keys have not been re-bound to something else.)

    @@ -694,7 +697,7 @@

    Shell window -
  • C-c attemps to interrupt statement execution (but may fail).

  • +
  • C-c attempts to interrupt statement execution (but may fail).

  • C-d closes Shell if typed at a >>> prompt.

  • Alt-p and Alt-n (C-p and C-n on macOS) retrieve to the current prompt the previous or next previously @@ -1136,7 +1139,7 @@

    Navigation

  • - 3.13.0a2 Documentation » + 3.14.0a0 Documentation »
  • @@ -1180,7 +1183,7 @@

    Navigation



    - Last updated on Jan 17, 2024 (06:57 UTC). + Last updated on Oct 14, 2024 (20:27 UTC). Found a bug?
    diff --git a/Lib/idlelib/run.py b/Lib/idlelib/run.py index 8f98e73258e778..a30db99a619a93 100644 --- a/Lib/idlelib/run.py +++ b/Lib/idlelib/run.py @@ -108,11 +108,11 @@ def handle_tk_events(tcl=tcl): # Thread shared globals: Establish a queue between a subthread (which handles # the socket) and the main thread (which runs user code), plus global -# completion, exit and interruptable (the main thread) flags: +# completion, exit and interruptible (the main thread) flags: exit_now = False quitting = False -interruptable = False +interruptible = False def main(del_exitfunc=False): """Start the Python execution server in a subprocess @@ -582,14 +582,14 @@ def __init__(self, rpchandler): self.locals = {} def runcode(self, code): - global interruptable + global interruptible try: self.user_exc_info = None - interruptable = True + interruptible = True try: exec(code, self.locals) finally: - interruptable = False + interruptible = False except SystemExit as e: if e.args: # SystemExit called with an argument. ob = e.args[0] @@ -615,7 +615,7 @@ def runcode(self, code): flush_stdout() def interrupt_the_server(self): - if interruptable: + if interruptible: thread.interrupt_main() def start_the_debugger(self, gui_adap_oid): From 2a5cdb251674ce8d9a824c102f7cd846d944cfa4 Mon Sep 17 00:00:00 2001 From: edson duarte Date: Mon, 14 Oct 2024 19:02:58 -0300 Subject: [PATCH 100/114] gh-85453: Improve variable mark up for datetime.rst (#120702) Variables and literals are marked up using backticks. --- Doc/library/datetime.rst | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index f0b465bc9ce39c..2f81080d525f86 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -180,19 +180,19 @@ Objects of the :class:`date` type are always naive. An object of type :class:`.time` or :class:`.datetime` may be aware or naive. -A :class:`.datetime` object *d* is aware if both of the following hold: +A :class:`.datetime` object ``d`` is aware if both of the following hold: 1. ``d.tzinfo`` is not ``None`` 2. ``d.tzinfo.utcoffset(d)`` does not return ``None`` -Otherwise, *d* is naive. +Otherwise, ``d`` is naive. -A :class:`.time` object *t* is aware if both of the following hold: +A :class:`.time` object ``t`` is aware if both of the following hold: 1. ``t.tzinfo`` is not ``None`` 2. ``t.tzinfo.utcoffset(None)`` does not return ``None``. -Otherwise, *t* is naive. +Otherwise, ``t`` is naive. The distinction between aware and naive doesn't apply to :class:`timedelta` objects. @@ -358,8 +358,8 @@ Supported operations: +--------------------------------+-----------------------------------------------+ | ``q, r = divmod(t1, t2)`` | Computes the quotient and the remainder: | | | ``q = t1 // t2`` (3) and ``r = t1 % t2``. | -| | q is an integer and r is a :class:`timedelta` | -| | object. | +| | ``q`` is an integer and ``r`` is a | +| | :class:`timedelta` object. | +--------------------------------+-----------------------------------------------+ | ``+t1`` | Returns a :class:`timedelta` object with the | | | same value. (2) | @@ -526,7 +526,7 @@ Other constructors, all class methods: January 1 of year 1 has ordinal 1. :exc:`ValueError` is raised unless ``1 <= ordinal <= - date.max.toordinal()``. For any date *d*, + date.max.toordinal()``. For any date ``d``, ``date.fromordinal(d.toordinal()) == d``. @@ -730,7 +730,7 @@ Instance methods: .. method:: date.toordinal() Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 - has ordinal 1. For any :class:`date` object *d*, + has ordinal 1. For any :class:`date` object ``d``, ``date.fromordinal(d.toordinal()) == d``. @@ -782,7 +782,7 @@ Instance methods: .. method:: date.__str__() - For a date *d*, ``str(d)`` is equivalent to ``d.isoformat()``. + For a date ``d``, ``str(d)`` is equivalent to ``d.isoformat()``. .. method:: date.ctime() @@ -1063,7 +1063,7 @@ Other constructors, all class methods: is used. If the *date* argument is a :class:`.datetime` object, its time components and :attr:`.tzinfo` attributes are ignored. - For any :class:`.datetime` object *d*, + For any :class:`.datetime` object ``d``, ``d == datetime.combine(d.date(), d.time(), d.tzinfo)``. .. versionchanged:: 3.6 @@ -1270,11 +1270,11 @@ Supported operations: If both are naive, or both are aware and have the same :attr:`~.datetime.tzinfo` attribute, the :attr:`~.datetime.tzinfo` attributes are ignored, and the result is a :class:`timedelta` - object *t* such that ``datetime2 + t == datetime1``. No time zone adjustments + object ``t`` such that ``datetime2 + t == datetime1``. No time zone adjustments are done in this case. If both are aware and have different :attr:`~.datetime.tzinfo` attributes, ``a-b`` acts - as if *a* and *b* were first converted to naive UTC datetimes. The + as if ``a`` and ``b`` were first converted to naive UTC datetimes. The result is ``(a.replace(tzinfo=None) - a.utcoffset()) - (b.replace(tzinfo=None) - b.utcoffset())`` except that the implementation never overflows. @@ -1454,11 +1454,11 @@ Instance methods: .. method:: datetime.utctimetuple() - If :class:`.datetime` instance *d* is naive, this is the same as + If :class:`.datetime` instance ``d`` is naive, this is the same as ``d.timetuple()`` except that :attr:`~.time.struct_time.tm_isdst` is forced to 0 regardless of what ``d.dst()`` returns. DST is never in effect for a UTC time. - If *d* is aware, *d* is normalized to UTC time, by subtracting + If ``d`` is aware, ``d`` is normalized to UTC time, by subtracting ``d.utcoffset()``, and a :class:`time.struct_time` for the normalized time is returned. :attr:`!tm_isdst` is forced to 0. Note that an :exc:`OverflowError` may be raised if ``d.year`` was @@ -1606,7 +1606,7 @@ Instance methods: .. method:: datetime.__str__() - For a :class:`.datetime` instance *d*, ``str(d)`` is equivalent to + For a :class:`.datetime` instance ``d``, ``str(d)`` is equivalent to ``d.isoformat(' ')``. @@ -1853,7 +1853,7 @@ Instance attributes (read-only): .. versionadded:: 3.6 :class:`.time` objects support equality and order comparisons, -where *a* is considered less than *b* when *a* precedes *b* in time. +where ``a`` is considered less than ``b`` when ``a`` precedes ``b`` in time. Naive and aware :class:`!time` objects are never equal. Order comparison between naive and aware :class:`!time` objects raises @@ -2000,7 +2000,7 @@ Instance methods: .. method:: time.__str__() - For a time *t*, ``str(t)`` is equivalent to ``t.isoformat()``. + For a time ``t``, ``str(t)`` is equivalent to ``t.isoformat()``. .. method:: time.strftime(format) From 1bffd7a2a738506a4ad50c6c3c2c32926cce6d14 Mon Sep 17 00:00:00 2001 From: Steven Jin Date: Mon, 14 Oct 2024 18:36:38 -0400 Subject: [PATCH 101/114] gh-124944: Add socket.SO_ORIGINAL_DST (#124945) --- .../Library/2024-10-03-17-13-22.gh-issue-124944.YyLAzf.rst | 1 + Modules/socketmodule.c | 3 +++ Modules/socketmodule.h | 4 ++++ configure | 6 ++++++ configure.ac | 2 +- pyconfig.h.in | 3 +++ 6 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-03-17-13-22.gh-issue-124944.YyLAzf.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-03-17-13-22.gh-issue-124944.YyLAzf.rst b/Misc/NEWS.d/next/Library/2024-10-03-17-13-22.gh-issue-124944.YyLAzf.rst new file mode 100644 index 00000000000000..66af712c5ae5a8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-03-17-13-22.gh-issue-124944.YyLAzf.rst @@ -0,0 +1 @@ +Add ``SO_ORIGINAL_DST`` to the :mod:`socket` module. diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 0829d2358129d2..744e5e0c0b2b54 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -7921,6 +7921,9 @@ socket_exec(PyObject *m) #ifdef SO_OOBINLINE ADD_INT_MACRO(m, SO_OOBINLINE); #endif +#ifdef SO_ORIGINAL_DST + ADD_INT_MACRO(m, SO_ORIGINAL_DST); +#endif #ifndef __GNU__ #ifdef SO_REUSEPORT ADD_INT_MACRO(m, SO_REUSEPORT); diff --git a/Modules/socketmodule.h b/Modules/socketmodule.h index a77c620c2ef630..e1d96377728eb9 100644 --- a/Modules/socketmodule.h +++ b/Modules/socketmodule.h @@ -172,6 +172,10 @@ typedef int socklen_t; # undef AF_VSOCK #endif +#ifdef HAVE_LINUX_NETFILTER_IPV4_H +# include +#endif + #ifdef HAVE_SOCKADDR_ALG # include diff --git a/configure b/configure index 0cc73e4e66552d..c5bec6a1b0d7c2 100755 --- a/configure +++ b/configure @@ -11092,6 +11092,12 @@ if test "x$ac_cv_header_linux_memfd_h" = xyes then : printf "%s\n" "#define HAVE_LINUX_MEMFD_H 1" >>confdefs.h +fi +ac_fn_c_check_header_compile "$LINENO" "linux/netfilter_ipv4.h" "ac_cv_header_linux_netfilter_ipv4_h" "$ac_includes_default" +if test "x$ac_cv_header_linux_netfilter_ipv4_h" = xyes +then : + printf "%s\n" "#define HAVE_LINUX_NETFILTER_IPV4_H 1" >>confdefs.h + fi ac_fn_c_check_header_compile "$LINENO" "linux/random.h" "ac_cv_header_linux_random_h" "$ac_includes_default" if test "x$ac_cv_header_linux_random_h" = xyes diff --git a/configure.ac b/configure.ac index 1864e94ace9243..d4b7942190207a 100644 --- a/configure.ac +++ b/configure.ac @@ -3013,7 +3013,7 @@ AC_DEFINE([STDC_HEADERS], [1], AC_CHECK_HEADERS([ \ alloca.h asm/types.h bluetooth.h conio.h direct.h dlfcn.h endian.h errno.h fcntl.h grp.h \ io.h langinfo.h libintl.h libutil.h linux/auxvec.h sys/auxv.h linux/fs.h linux/limits.h linux/memfd.h \ - linux/random.h linux/soundcard.h \ + linux/netfilter_ipv4.h linux/random.h linux/soundcard.h \ linux/tipc.h linux/wait.h netdb.h net/ethernet.h netinet/in.h netpacket/packet.h poll.h process.h pthread.h pty.h \ sched.h setjmp.h shadow.h signal.h spawn.h stropts.h sys/audioio.h sys/bsdtty.h sys/devpoll.h \ sys/endian.h sys/epoll.h sys/event.h sys/eventfd.h sys/file.h sys/ioctl.h sys/kern_control.h \ diff --git a/pyconfig.h.in b/pyconfig.h.in index 7f02603e26f5d0..1947d8ee14f83e 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -739,6 +739,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_LINUX_MEMFD_H +/* Define to 1 if you have the header file. */ +#undef HAVE_LINUX_NETFILTER_IPV4_H + /* Define to 1 if you have the header file. */ #undef HAVE_LINUX_NETLINK_H From 0b28ea4a35dc7c68c97127f7aad8f0175d77c520 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Mon, 14 Oct 2024 23:45:58 +0100 Subject: [PATCH 102/114] gh-124958: Revert "gh-125472: Revert "gh-124958: fix asyncio.TaskGroup and _PyFuture refcycles ... (#125486) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Revert "gh-125472: Revert "gh-124958: fix asyncio.TaskGroup and _PyFuture refcycles (#12… (#125476)" This reverts commit e99650b80ace3893c2a80b3f2a4aca99cb305191. * fix incompatability with gh-124392 --- Lib/asyncio/futures.py | 6 +- Lib/asyncio/taskgroups.py | 41 +++++-- Lib/test/test_asyncio/test_futures.py | 22 ++++ Lib/test/test_asyncio/test_taskgroups.py | 102 +++++++++++++++++- ...-10-04-08-46-00.gh-issue-124958.rea9-x.rst | 1 + 5 files changed, 157 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py index 5f6fa2348726cf..c95fce035cd548 100644 --- a/Lib/asyncio/futures.py +++ b/Lib/asyncio/futures.py @@ -190,8 +190,7 @@ def result(self): the future is done and has an exception set, this exception is raised. """ if self._state == _CANCELLED: - exc = self._make_cancelled_error() - raise exc + raise self._make_cancelled_error() if self._state != _FINISHED: raise exceptions.InvalidStateError('Result is not ready.') self.__log_traceback = False @@ -208,8 +207,7 @@ def exception(self): InvalidStateError. """ if self._state == _CANCELLED: - exc = self._make_cancelled_error() - raise exc + raise self._make_cancelled_error() if self._state != _FINISHED: raise exceptions.InvalidStateError('Exception is not set.') self.__log_traceback = False diff --git a/Lib/asyncio/taskgroups.py b/Lib/asyncio/taskgroups.py index f2ee9648c43876..9fa772ca9d02cc 100644 --- a/Lib/asyncio/taskgroups.py +++ b/Lib/asyncio/taskgroups.py @@ -66,6 +66,20 @@ async def __aenter__(self): return self async def __aexit__(self, et, exc, tb): + tb = None + try: + return await self._aexit(et, exc) + finally: + # Exceptions are heavy objects that can have object + # cycles (bad for GC); let's not keep a reference to + # a bunch of them. It would be nicer to use a try/finally + # in __aexit__ directly but that introduced some diff noise + self._parent_task = None + self._errors = None + self._base_error = None + exc = None + + async def _aexit(self, et, exc): self._exiting = True if (exc is not None and @@ -122,7 +136,10 @@ async def __aexit__(self, et, exc, tb): assert not self._tasks if self._base_error is not None: - raise self._base_error + try: + raise self._base_error + finally: + exc = None if self._parent_cancel_requested: # If this flag is set we *must* call uncancel(). @@ -133,8 +150,14 @@ async def __aexit__(self, et, exc, tb): # Propagate CancelledError if there is one, except if there # are other errors -- those have priority. - if propagate_cancellation_error is not None and not self._errors: - raise propagate_cancellation_error + try: + if propagate_cancellation_error is not None and not self._errors: + try: + raise propagate_cancellation_error + finally: + exc = None + finally: + propagate_cancellation_error = None if et is not None and not issubclass(et, exceptions.CancelledError): self._errors.append(exc) @@ -146,14 +169,14 @@ async def __aexit__(self, et, exc, tb): if self._parent_task.cancelling(): self._parent_task.uncancel() self._parent_task.cancel() - # Exceptions are heavy objects that can have object - # cycles (bad for GC); let's not keep a reference to - # a bunch of them. try: - me = BaseExceptionGroup('unhandled errors in a TaskGroup', self._errors) - raise me from None + raise BaseExceptionGroup( + 'unhandled errors in a TaskGroup', + self._errors, + ) from None finally: - self._errors = None + exc = None + def create_task(self, coro, *, name=None, context=None): """Create a new task in this group and return it. diff --git a/Lib/test/test_asyncio/test_futures.py b/Lib/test/test_asyncio/test_futures.py index 458b70451a306a..c566b28adb2408 100644 --- a/Lib/test/test_asyncio/test_futures.py +++ b/Lib/test/test_asyncio/test_futures.py @@ -659,6 +659,28 @@ def __del__(self): fut = self._new_future(loop=self.loop) fut.set_result(Evil()) + def test_future_cancelled_result_refcycles(self): + f = self._new_future(loop=self.loop) + f.cancel() + exc = None + try: + f.result() + except asyncio.CancelledError as e: + exc = e + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + + def test_future_cancelled_exception_refcycles(self): + f = self._new_future(loop=self.loop) + f.cancel() + exc = None + try: + f.exception() + except asyncio.CancelledError as e: + exc = e + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + @unittest.skipUnless(hasattr(futures, '_CFuture'), 'requires the C _asyncio module') diff --git a/Lib/test/test_asyncio/test_taskgroups.py b/Lib/test/test_asyncio/test_taskgroups.py index 4852536defc93d..1b4de96a572fb9 100644 --- a/Lib/test/test_asyncio/test_taskgroups.py +++ b/Lib/test/test_asyncio/test_taskgroups.py @@ -1,7 +1,8 @@ # Adapted with permission from the EdgeDB project; # license: PSFL. - +import sys +import gc import asyncio import contextvars import contextlib @@ -11,7 +12,6 @@ from test.test_asyncio.utils import await_without_task - # To prevent a warning "test altered the execution environment" def tearDownModule(): asyncio.set_event_loop_policy(None) @@ -29,6 +29,15 @@ def get_error_types(eg): return {type(exc) for exc in eg.exceptions} +def no_other_refs(): + # due to gh-124392 coroutines now refer to their locals + coro = asyncio.current_task().get_coro() + frame = sys._getframe(1) + while coro.cr_frame != frame: + coro = coro.cr_await + return [coro] + + class TestTaskGroup(unittest.IsolatedAsyncioTestCase): async def test_taskgroup_01(self): @@ -899,6 +908,95 @@ async def outer(): await outer() + async def test_exception_refcycles_direct(self): + """Test that TaskGroup doesn't keep a reference to the raised ExceptionGroup""" + tg = asyncio.TaskGroup() + exc = None + + class _Done(Exception): + pass + + try: + async with tg: + raise _Done + except ExceptionGroup as e: + exc = e + + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), no_other_refs()) + + + async def test_exception_refcycles_errors(self): + """Test that TaskGroup deletes self._errors, and __aexit__ args""" + tg = asyncio.TaskGroup() + exc = None + + class _Done(Exception): + pass + + try: + async with tg: + raise _Done + except* _Done as excs: + exc = excs.exceptions[0] + + self.assertIsInstance(exc, _Done) + self.assertListEqual(gc.get_referrers(exc), no_other_refs()) + + + async def test_exception_refcycles_parent_task(self): + """Test that TaskGroup deletes self._parent_task""" + tg = asyncio.TaskGroup() + exc = None + + class _Done(Exception): + pass + + async def coro_fn(): + async with tg: + raise _Done + + try: + async with asyncio.TaskGroup() as tg2: + tg2.create_task(coro_fn()) + except* _Done as excs: + exc = excs.exceptions[0].exceptions[0] + + self.assertIsInstance(exc, _Done) + self.assertListEqual(gc.get_referrers(exc), no_other_refs()) + + async def test_exception_refcycles_propagate_cancellation_error(self): + """Test that TaskGroup deletes propagate_cancellation_error""" + tg = asyncio.TaskGroup() + exc = None + + try: + async with asyncio.timeout(-1): + async with tg: + await asyncio.sleep(0) + except TimeoutError as e: + exc = e.__cause__ + + self.assertIsInstance(exc, asyncio.CancelledError) + self.assertListEqual(gc.get_referrers(exc), no_other_refs()) + + async def test_exception_refcycles_base_error(self): + """Test that TaskGroup deletes self._base_error""" + class MyKeyboardInterrupt(KeyboardInterrupt): + pass + + tg = asyncio.TaskGroup() + exc = None + + try: + async with tg: + raise MyKeyboardInterrupt + except MyKeyboardInterrupt as e: + exc = e + + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), no_other_refs()) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst b/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst new file mode 100644 index 00000000000000..534d5bb8c898da --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst @@ -0,0 +1 @@ +Fix refcycles in exceptions raised from :class:`asyncio.TaskGroup` and the python implementation of :class:`asyncio.Future` From 8d42e2d915c3096e7eac1c649751d1da567bb7c3 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 14 Oct 2024 20:09:48 -0400 Subject: [PATCH 103/114] gh-125269: Use `AC_LINK_IF_ELSE` to detect if `-latomic` is needed (#125416) We previously used `AC_RUN_IF_ELSE` with a short test program to detect if `-latomic` is needed, but that requires choosing a specific default value when cross-compiling because the test program is not run. Some cross compilation targets like `wasm32-emscripten` do not support `-latomic`, while other cross compilation targets, like `arm-linux-gnueabi` require it. --- .../2024-10-13-21-11-30.gh-issue-125269.BC-fdo.rst | 2 ++ configure | 12 +++--------- configure.ac | 7 +++---- 3 files changed, 8 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-10-13-21-11-30.gh-issue-125269.BC-fdo.rst diff --git a/Misc/NEWS.d/next/Build/2024-10-13-21-11-30.gh-issue-125269.BC-fdo.rst b/Misc/NEWS.d/next/Build/2024-10-13-21-11-30.gh-issue-125269.BC-fdo.rst new file mode 100644 index 00000000000000..24f5469e8a664b --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-10-13-21-11-30.gh-issue-125269.BC-fdo.rst @@ -0,0 +1,2 @@ +Fix detection of whether ``-latomic`` is needed when cross-compiling CPython +using the configure script. diff --git a/configure b/configure index c5bec6a1b0d7c2..be119f108a060b 100755 --- a/configure +++ b/configure @@ -28999,10 +28999,6 @@ printf %s "checking whether libatomic is needed by ... " >&6; } if test ${ac_cv_libatomic_needed+y} then : printf %s "(cached) " >&6 -else $as_nop - if test "$cross_compiling" = yes -then : - ac_cv_libatomic_needed=no else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -29044,16 +29040,14 @@ int main() } _ACEOF -if ac_fn_c_try_run "$LINENO" +if ac_fn_c_try_link "$LINENO" then : ac_cv_libatomic_needed=no else $as_nop ac_cv_libatomic_needed=yes fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext -fi - +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_libatomic_needed" >&5 printf "%s\n" "$ac_cv_libatomic_needed" >&6; } diff --git a/configure.ac b/configure.ac index d4b7942190207a..582851695e400f 100644 --- a/configure.ac +++ b/configure.ac @@ -7497,7 +7497,7 @@ CPPFLAGS="${BASECPPFLAGS} -I. -I${srcdir}/Include ${CPPFLAGS}" AC_CACHE_CHECK([whether libatomic is needed by ], [ac_cv_libatomic_needed], -[AC_RUN_IFELSE([AC_LANG_SOURCE([[ +[AC_LINK_IFELSE([AC_LANG_SOURCE([[ // pyatomic.h needs uint64_t and Py_ssize_t types #include // int64_t, intptr_t #ifdef HAVE_SYS_TYPES_H @@ -7534,9 +7534,8 @@ int main() return 0; // all good } ]])], - [ac_cv_libatomic_needed=no], dnl build succeeded - [ac_cv_libatomic_needed=yes], dnl build failed - [ac_cv_libatomic_needed=no]) dnl cross compilation + [ac_cv_libatomic_needed=no], dnl build and link succeeded + [ac_cv_libatomic_needed=yes]) dnl build and link failed ]) AS_VAR_IF([ac_cv_libatomic_needed], [yes], From a2fe9ff11d6104074c19bf328a104caa99becc11 Mon Sep 17 00:00:00 2001 From: Mariusz Felisiak Date: Tue, 15 Oct 2024 09:29:08 +0200 Subject: [PATCH 104/114] Doc: Update CVE URL (#125489) --- Doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/conf.py b/Doc/conf.py index d7197b17865854..839beaad08bebd 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -614,7 +614,7 @@ # Sphinx 8.1 has in-built CVE and CWE roles. extlinks |= { "cve": ( - "https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-%s", + "https://www.cve.org/CVERecord?id=CVE-%s", "CVE-%s", ), "cwe": ("https://cwe.mitre.org/data/definitions/%s.html", "CWE-%s"), From 66064c342c6fb54b443aae8ccf8db74bb9d8bc50 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 15 Oct 2024 10:39:15 +0300 Subject: [PATCH 105/114] gh-123299: Copyedit "What's New in Python 3.14" (#125438) --- .../c-api-pending-removal-in-3.14.rst | 2 +- .../c-api-pending-removal-in-3.15.rst | 2 +- .../c-api-pending-removal-in-future.rst | 2 +- Doc/deprecations/index.rst | 2 +- Doc/deprecations/pending-removal-in-3.13.rst | 2 +- Doc/deprecations/pending-removal-in-3.14.rst | 2 +- Doc/deprecations/pending-removal-in-3.15.rst | 4 +- Doc/deprecations/pending-removal-in-3.16.rst | 29 +++-- .../pending-removal-in-future.rst | 2 +- Doc/whatsnew/3.14.rst | 110 +++++++++--------- 10 files changed, 79 insertions(+), 78 deletions(-) diff --git a/Doc/deprecations/c-api-pending-removal-in-3.14.rst b/Doc/deprecations/c-api-pending-removal-in-3.14.rst index d16da66c29abe7..9e10bf2691e5c8 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.14.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.14.rst @@ -1,4 +1,4 @@ -Pending Removal in Python 3.14 +Pending removal in Python 3.14 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * The ``ma_version_tag`` field in :c:type:`PyDictObject` for extension modules diff --git a/Doc/deprecations/c-api-pending-removal-in-3.15.rst b/Doc/deprecations/c-api-pending-removal-in-3.15.rst index e3974415e0cc89..1bb49e5b4874f2 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.15.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.15.rst @@ -1,4 +1,4 @@ -Pending Removal in Python 3.15 +Pending removal in Python 3.15 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * The bundled copy of ``libmpdecimal``. diff --git a/Doc/deprecations/c-api-pending-removal-in-future.rst b/Doc/deprecations/c-api-pending-removal-in-future.rst index 0c3ae52b87ff74..8fc1c80c35d092 100644 --- a/Doc/deprecations/c-api-pending-removal-in-future.rst +++ b/Doc/deprecations/c-api-pending-removal-in-future.rst @@ -1,4 +1,4 @@ -Pending Removal in Future Versions +Pending removal in future versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The following APIs are deprecated and will be removed, diff --git a/Doc/deprecations/index.rst b/Doc/deprecations/index.rst index a9efb0bc744335..bac6e3f18d4594 100644 --- a/Doc/deprecations/index.rst +++ b/Doc/deprecations/index.rst @@ -7,7 +7,7 @@ Deprecations .. include:: pending-removal-in-future.rst -C API Deprecations +C API deprecations ------------------ .. include:: c-api-pending-removal-in-3.15.rst diff --git a/Doc/deprecations/pending-removal-in-3.13.rst b/Doc/deprecations/pending-removal-in-3.13.rst index 89790497816e83..2fd2f12cc6a2c4 100644 --- a/Doc/deprecations/pending-removal-in-3.13.rst +++ b/Doc/deprecations/pending-removal-in-3.13.rst @@ -1,4 +1,4 @@ -Pending Removal in Python 3.13 +Pending removal in Python 3.13 ------------------------------ Modules (see :pep:`594`): diff --git a/Doc/deprecations/pending-removal-in-3.14.rst b/Doc/deprecations/pending-removal-in-3.14.rst index de30f4695059ed..b8791b8d6c387e 100644 --- a/Doc/deprecations/pending-removal-in-3.14.rst +++ b/Doc/deprecations/pending-removal-in-3.14.rst @@ -1,4 +1,4 @@ -Pending Removal in Python 3.14 +Pending removal in Python 3.14 ------------------------------ * The import system: diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index a55fb6bea3fdaa..17029b8d4773bd 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -1,4 +1,4 @@ -Pending Removal in Python 3.15 +Pending removal in Python 3.15 ------------------------------ * The import system: @@ -63,7 +63,7 @@ Pending Removal in Python 3.15 * The undocumented keyword argument syntax for creating :class:`~typing.NamedTuple` classes - (e.g. ``Point = NamedTuple("Point", x=int, y=int)``) + (for example, ``Point = NamedTuple("Point", x=int, y=int)``) has been deprecated since Python 3.13. Use the class-based syntax or the functional syntax instead. diff --git a/Doc/deprecations/pending-removal-in-3.16.rst b/Doc/deprecations/pending-removal-in-3.16.rst index fc2ef33de5e5cc..fac500d34742ca 100644 --- a/Doc/deprecations/pending-removal-in-3.16.rst +++ b/Doc/deprecations/pending-removal-in-3.16.rst @@ -1,15 +1,6 @@ -Pending Removal in Python 3.16 +Pending removal in Python 3.16 ------------------------------ -* :mod:`builtins`: - - * Bitwise inversion on boolean types, ``~True`` or ``~False`` - has been deprecated since Python 3.12, - as it produces surprising and unintuitive results (``-2`` and ``-1``). - Use ``not x`` instead for the logical negation of a Boolean. - In the rare case that you need the bitwise inversion of - the underlying integer, convert to ``int`` explicitly (``~int(x)``). - * :mod:`array`: * The ``'u'`` format code (:c:type:`wchar_t`) @@ -20,11 +11,19 @@ Pending Removal in Python 3.16 * :mod:`asyncio`: - * :mod:`asyncio`: - :func:`!asyncio.iscoroutinefunction` is deprecated - and will be removed in Python 3.16, - use :func:`inspect.iscoroutinefunction` instead. - (Contributed by Jiahao Li and Kumar Aditya in :gh:`122875`.) + * :func:`!asyncio.iscoroutinefunction` is deprecated + and will be removed in Python 3.16, + use :func:`inspect.iscoroutinefunction` instead. + (Contributed by Jiahao Li and Kumar Aditya in :gh:`122875`.) + +* :mod:`builtins`: + + * Bitwise inversion on boolean types, ``~True`` or ``~False`` + has been deprecated since Python 3.12, + as it produces surprising and unintuitive results (``-2`` and ``-1``). + Use ``not x`` instead for the logical negation of a Boolean. + In the rare case that you need the bitwise inversion of + the underlying integer, convert to ``int`` explicitly (``~int(x)``). * :mod:`shutil`: diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index 3f9cf6f208221a..f916797c07a068 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -1,4 +1,4 @@ -Pending Removal in Future Versions +Pending removal in future versions ---------------------------------- The following APIs will be removed in the future, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 25e69a59bdec62..b106578fe9e8b0 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1,6 +1,6 @@ **************************** - What's New In Python 3.14 + What's new in Python 3.14 **************************** :Editor: TBD @@ -56,7 +56,7 @@ For full details, see the :ref:`changelog `. so it's worth checking back even after reading earlier versions. -Summary -- Release highlights +Summary -- release highlights ============================= .. This section singles out the most important changes in Python 3.14. @@ -67,12 +67,12 @@ Summary -- Release highlights -New Features +New features ============ -.. _whatsnew-314-pep649: +.. _whatsnew314-pep649: -PEP 649: Deferred Evaluation of Annotations +PEP 649: deferred evaluation of annotations ------------------------------------------- The :term:`annotations ` on functions, classes, and modules are no @@ -150,12 +150,12 @@ In Python 3.7, :pep:`563` introduced the ``from __future__ import annotations`` directive, which turns all annotations into strings. This directive is now considered deprecated and it is expected to be removed in a future version of Python. However, this removal will not happen until after Python 3.13, the last version of -Python without deferred evaluation of annotations, reaches its end of life. +Python without deferred evaluation of annotations, reaches its end of life in 2029. In Python 3.14, the behavior of code using ``from __future__ import annotations`` is unchanged. -Improved Error Messages +Improved error messages ----------------------- * When unpacking assignment fails due to incorrect number of variables, the @@ -172,16 +172,16 @@ Improved Error Messages ValueError: too many values to unpack (expected 3, got 4) -Other Language Changes +Other language changes ====================== * Incorrect usage of :keyword:`await` and asynchronous comprehensions is now detected even if the code is optimized away by the :option:`-O` - command line option. For example, ``python -O -c 'assert await 1'`` + command-line option. For example, ``python -O -c 'assert await 1'`` now produces a :exc:`SyntaxError`. (Contributed by Jelle Zijlstra in :gh:`121637`.) * Writes to ``__debug__`` are now detected even if the code is optimized - away by the :option:`-O` command line option. For example, + away by the :option:`-O` command-line option. For example, ``python -O -c 'assert (__debug__ := 1)'`` now produces a :exc:`SyntaxError`. (Contributed by Irit Katriel in :gh:`122245`.) @@ -191,7 +191,7 @@ Other Language Changes (Contributed by Serhiy Storchaka in :gh:`84978`.) -New Modules +New modules =========== * :mod:`annotationlib`: For introspecting :term:`annotations `. @@ -199,7 +199,7 @@ New Modules (Contributed by Jelle Zijlstra in :gh:`119180`.) -Improved Modules +Improved modules ================ argparse @@ -214,7 +214,7 @@ ast --- * Add :func:`ast.compare` for comparing two ASTs. - (Contributed by Batuhan Taskaya and Jeremy Hylton in :issue:`15987`.) + (Contributed by Batuhan Taskaya and Jeremy Hylton in :gh:`60191`.) * Add support for :func:`copy.replace` for AST nodes. (Contributed by Bénédikt Tran in :gh:`121141`.) @@ -246,6 +246,12 @@ decimal :meth:`Decimal.from_number() `. (Contributed by Serhiy Storchaka in :gh:`121798`.) +datetime +-------- + +* Add :meth:`datetime.time.strptime` and :meth:`datetime.date.strptime`. + (Contributed by Wannes Boeykens in :gh:`41431`.) + dis --- @@ -254,9 +260,10 @@ dis This feature is added to the following interfaces via the *show_positions* keyword argument: - - :class:`dis.Bytecode`, - - :func:`dis.dis`, :func:`dis.distb`, and - - :func:`dis.disassemble`. + - :class:`dis.Bytecode` + - :func:`dis.dis` + - :func:`dis.distb` + - :func:`dis.disassemble` This feature is also exposed via :option:`dis --show-positions`. (Contributed by Bénédikt Tran in :gh:`123165`.) @@ -310,7 +317,8 @@ json of the error. (Contributed by Serhiy Storchaka in :gh:`122163`.) -* Enable the :mod:`json` module to work as a script using the :option:`-m` switch: ``python -m json``. +* Enable the :mod:`json` module to work as a script using the :option:`-m` + switch: :program:`python -m json`. See the :ref:`JSON command-line interface ` documentation. (Contributed by Trey Hunner in :gh:`122873`.) @@ -325,12 +333,6 @@ operator (Contributed by Raymond Hettinger and Nico Mexis in :gh:`115808`.) -datetime --------- - -* Add :meth:`datetime.time.strptime` and :meth:`datetime.date.strptime`. - (Contributed by Wannes Boeykens in :gh:`41431`.) - os -- @@ -357,11 +359,11 @@ pathlib pdb --- -* Hard-coded breakpoints (:func:`breakpoint` and :func:`pdb.set_trace`) now +* Hardcoded breakpoints (:func:`breakpoint` and :func:`pdb.set_trace`) now reuse the most recent :class:`~pdb.Pdb` instance that calls :meth:`~pdb.Pdb.set_trace`, instead of creating a new one each time. As a result, all the instance specific data like :pdbcmd:`display` and - :pdbcmd:`commands` are preserved across hard-coded breakpoints. + :pdbcmd:`commands` are preserved across hardcoded breakpoints. (Contributed by Tian Gao in :gh:`121450`.) * Add a new argument *mode* to :class:`pdb.Pdb`. Disable the ``restart`` @@ -391,9 +393,9 @@ symtable * Expose the following :class:`symtable.Symbol` methods: - * :meth:`~symtable.Symbol.is_free_class` - * :meth:`~symtable.Symbol.is_comp_iter` * :meth:`~symtable.Symbol.is_comp_cell` + * :meth:`~symtable.Symbol.is_comp_iter` + * :meth:`~symtable.Symbol.is_free_class` (Contributed by Bénédikt Tran in :gh:`120029`.) @@ -472,11 +474,11 @@ ast * Remove the following classes. They were all deprecated since Python 3.8, and have emitted deprecation warnings since Python 3.12: - * :class:`!ast.Num` - * :class:`!ast.Str` * :class:`!ast.Bytes` - * :class:`!ast.NameConstant` * :class:`!ast.Ellipsis` + * :class:`!ast.NameConstant` + * :class:`!ast.Num` + * :class:`!ast.Str` Use :class:`ast.Constant` instead. As a consequence of these removals, user-defined ``visit_Num``, ``visit_Str``, ``visit_Bytes``, @@ -501,16 +503,16 @@ asyncio * Remove the following classes and functions. They were all deprecated and emitted deprecation warnings since Python 3.12: + * :func:`!asyncio.get_child_watcher` + * :func:`!asyncio.set_child_watcher` + * :meth:`!asyncio.AbstractEventLoopPolicy.get_child_watcher` + * :meth:`!asyncio.AbstractEventLoopPolicy.set_child_watcher` * :class:`!asyncio.AbstractChildWatcher` - * :class:`!asyncio.SafeChildWatcher` - * :class:`!asyncio.MultiLoopChildWatcher` * :class:`!asyncio.FastChildWatcher` - * :class:`!asyncio.ThreadedChildWatcher` + * :class:`!asyncio.MultiLoopChildWatcher` * :class:`!asyncio.PidfdChildWatcher` - * :meth:`!asyncio.AbstractEventLoopPolicy.get_child_watcher` - * :meth:`!asyncio.AbstractEventLoopPolicy.set_child_watcher` - * :func:`!asyncio.get_child_watcher` - * :func:`!asyncio.set_child_watcher` + * :class:`!asyncio.SafeChildWatcher` + * :class:`!asyncio.ThreadedChildWatcher` (Contributed by Kumar Aditya in :gh:`120804`.) @@ -623,14 +625,14 @@ Changes in the Python API (Contributed by Serhiy Storchaka in :gh:`69998`.) -Build Changes +Build changes ============= -C API Changes +C API changes ============= -New Features +New features ------------ * Add :c:func:`PyLong_GetSign` function to get the sign of :class:`int` objects. @@ -640,17 +642,17 @@ New Features object: * :c:func:`PyUnicodeWriter_Create` + * :c:func:`PyUnicodeWriter_DecodeUTF8Stateful` * :c:func:`PyUnicodeWriter_Discard` * :c:func:`PyUnicodeWriter_Finish` + * :c:func:`PyUnicodeWriter_Format` * :c:func:`PyUnicodeWriter_WriteChar` - * :c:func:`PyUnicodeWriter_WriteUTF8` - * :c:func:`PyUnicodeWriter_WriteUCS4` - * :c:func:`PyUnicodeWriter_WriteWideChar` - * :c:func:`PyUnicodeWriter_WriteStr` * :c:func:`PyUnicodeWriter_WriteRepr` + * :c:func:`PyUnicodeWriter_WriteStr` * :c:func:`PyUnicodeWriter_WriteSubstring` - * :c:func:`PyUnicodeWriter_Format` - * :c:func:`PyUnicodeWriter_DecodeUTF8Stateful` + * :c:func:`PyUnicodeWriter_WriteUCS4` + * :c:func:`PyUnicodeWriter_WriteUTF8` + * :c:func:`PyUnicodeWriter_WriteWideChar` (Contributed by Victor Stinner in :gh:`119182`.) @@ -671,14 +673,14 @@ New Features * Add new functions to convert C ```` numbers from/to Python :class:`int`: - * :c:func:`PyLong_FromInt32` - * :c:func:`PyLong_FromInt64` - * :c:func:`PyLong_FromUInt32` - * :c:func:`PyLong_FromUInt64` * :c:func:`PyLong_AsInt32` * :c:func:`PyLong_AsInt64` * :c:func:`PyLong_AsUInt32` * :c:func:`PyLong_AsUInt64` + * :c:func:`PyLong_FromInt32` + * :c:func:`PyLong_FromInt64` + * :c:func:`PyLong_FromUInt32` + * :c:func:`PyLong_FromUInt64` (Contributed by Victor Stinner in :gh:`120389`.) @@ -701,20 +703,20 @@ New Features * Add functions to configure the Python initialization (:pep:`741`): + * :c:func:`Py_InitializeFromInitConfig` + * :c:func:`PyInitConfig_AddModule` * :c:func:`PyInitConfig_Create` * :c:func:`PyInitConfig_Free` + * :c:func:`PyInitConfig_FreeStrList` * :c:func:`PyInitConfig_GetError` * :c:func:`PyInitConfig_GetExitCode` - * :c:func:`PyInitConfig_HasOption` * :c:func:`PyInitConfig_GetInt` * :c:func:`PyInitConfig_GetStr` * :c:func:`PyInitConfig_GetStrList` - * :c:func:`PyInitConfig_FreeStrList` + * :c:func:`PyInitConfig_HasOption` * :c:func:`PyInitConfig_SetInt` * :c:func:`PyInitConfig_SetStr` * :c:func:`PyInitConfig_SetStrList` - * :c:func:`PyInitConfig_AddModule` - * :c:func:`Py_InitializeFromInitConfig` (Contributed by Victor Stinner in :gh:`107954`.) From 92af191a6a5f266b71373f5374ca0c9c522d62d9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 15 Oct 2024 11:05:40 +0300 Subject: [PATCH 106/114] gh-53203: Fix strptime() tests for %X on glibc < 2.29 (#125469) --- Lib/test/test_strptime.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 12366b053a2fc1..09f6f656bfcb0d 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -569,12 +569,20 @@ def test_date_locale2(self): 'ti_ET', 'tig_ER', 'wal_ET') def test_time_locale(self): # Test %X directive + loc = locale.getlocale(locale.LC_TIME)[0] + pos = slice(3, 6) + if glibc_ver and glibc_ver < (2, 29) and loc in { + 'aa_ET', 'am_ET', 'byn_ER', 'gez_ET', 'om_ET', + 'sid_ET', 'so_SO', 'ti_ET', 'tig_ER', 'wal_ET'}: + # Hours are in 12-hour notation without AM/PM indication. + # Ignore hours. + pos = slice(4, 6) now = time.time() - self.roundtrip('%X', slice(3, 6), time.localtime(now)) + self.roundtrip('%X', pos, time.localtime(now)) # 1 hour 20 minutes 30 seconds ago - self.roundtrip('%X', slice(3, 6), time.localtime(now - 4830)) + self.roundtrip('%X', pos, time.localtime(now - 4830)) # 12 hours ago - self.roundtrip('%X', slice(3, 6), time.localtime(now - 12*3600)) + self.roundtrip('%X', pos, time.localtime(now - 12*3600)) def test_percent(self): # Make sure % signs are handled properly From 546dddca43a2a69dbe33d230e9e540636b403270 Mon Sep 17 00:00:00 2001 From: "RUANG (Roy James)" Date: Tue, 15 Oct 2024 17:21:16 +0800 Subject: [PATCH 107/114] gh-125234: Make PyInitConfig_Free(NULL) a no-op (#125266) --- Doc/c-api/init_config.rst | 2 ++ Programs/_testembed.c | 1 + Python/initconfig.c | 3 +++ 3 files changed, 6 insertions(+) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 6f8962afc7af0d..66e845df2e6aa5 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -1621,6 +1621,8 @@ Create Config Free memory of the initialization configuration *config*. + If *config* is ``NULL``, no operation is performed. + Error Handling -------------- diff --git a/Programs/_testembed.c b/Programs/_testembed.c index ab619e32429d63..0fb45b2265e3c6 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1896,6 +1896,7 @@ static int test_initconfig_api(void) goto error; } PyInitConfig_Free(config); + PyInitConfig_Free(NULL); dump_config(); Py_Finalize(); diff --git a/Python/initconfig.c b/Python/initconfig.c index 58ac5e7d7eaeff..c142438b02bfd9 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -3457,6 +3457,9 @@ PyInitConfig_Create(void) void PyInitConfig_Free(PyInitConfig *config) { + if (config == NULL) { + return; + } free(config->err_msg); free(config); } From aa18fd55d575a04e3aa782fedcd08dced26676e0 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 15 Oct 2024 11:47:36 +0200 Subject: [PATCH 108/114] gh-125196: Use PyUnicodeWriter in HAMT (#125458) --- Python/hamt.c | 81 ++++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 50 deletions(-) diff --git a/Python/hamt.c b/Python/hamt.c index a8fbb00b807934..cfd211f4541446 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -349,7 +349,7 @@ hamt_node_find(PyHamtNode *node, #ifdef Py_DEBUG static int hamt_node_dump(PyHamtNode *node, - _PyUnicodeWriter *writer, int level); + PyUnicodeWriter *writer, int level); #endif static PyHamtNode * @@ -444,7 +444,7 @@ hamt_bitindex(uint32_t bitmap, uint32_t bit) #ifdef Py_DEBUG static int -_hamt_dump_ident(_PyUnicodeWriter *writer, int level) +_hamt_dump_ident(PyUnicodeWriter *writer, int level) { /* Write `' ' * level` to the `writer` */ PyObject *str = NULL; @@ -467,7 +467,7 @@ _hamt_dump_ident(_PyUnicodeWriter *writer, int level) goto error; } - ret = _PyUnicodeWriter_WriteStr(writer, res); + ret = PyUnicodeWriter_WriteStr(writer, res); error: Py_XDECREF(res); @@ -476,29 +476,6 @@ _hamt_dump_ident(_PyUnicodeWriter *writer, int level) return ret; } -static int -_hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...) -{ - /* A convenient helper combining _PyUnicodeWriter_WriteStr and - PyUnicode_FromFormatV. - */ - PyObject* msg; - int ret; - - va_list vargs; - va_start(vargs, format); - msg = PyUnicode_FromFormatV(format, vargs); - va_end(vargs); - - if (msg == NULL) { - return -1; - } - - ret = _PyUnicodeWriter_WriteStr(writer, msg); - Py_DECREF(msg); - return ret; -} - #endif /* Py_DEBUG */ /////////////////////////////////// Bitmap Node @@ -1154,7 +1131,7 @@ hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self) #ifdef Py_DEBUG static int hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, - _PyUnicodeWriter *writer, int level) + PyUnicodeWriter *writer, int level) { /* Debug build: __dump__() method implementation for Bitmap nodes. */ @@ -1166,8 +1143,8 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, goto error; } - if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ", - Py_SIZE(node), Py_SIZE(node) / 2)) + if (PyUnicodeWriter_Format(writer, "BitmapNode(size=%zd count=%zd ", + Py_SIZE(node), Py_SIZE(node) / 2) < 0) { goto error; } @@ -1181,7 +1158,9 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, if (tmp2 == NULL) { goto error; } - if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) { + if (PyUnicodeWriter_Format(writer, "bitmap=%S id=%p):\n", + tmp2, node) < 0) + { Py_DECREF(tmp2); goto error; } @@ -1196,7 +1175,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } if (key_or_null == NULL) { - if (_hamt_dump_format(writer, "NULL:\n")) { + if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) { goto error; } @@ -1207,14 +1186,14 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } } else { - if (_hamt_dump_format(writer, "%R: %R", key_or_null, - val_or_node)) + if (PyUnicodeWriter_Format(writer, "%R: %R", + key_or_null, val_or_node) < 0) { goto error; } } - if (_hamt_dump_format(writer, "\n")) { + if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { goto error; } } @@ -1548,7 +1527,7 @@ hamt_node_collision_dealloc(PyHamtNode_Collision *self) #ifdef Py_DEBUG static int hamt_node_collision_dump(PyHamtNode_Collision *node, - _PyUnicodeWriter *writer, int level) + PyUnicodeWriter *writer, int level) { /* Debug build: __dump__() method implementation for Collision nodes. */ @@ -1558,8 +1537,8 @@ hamt_node_collision_dump(PyHamtNode_Collision *node, goto error; } - if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n", - Py_SIZE(node), node)) + if (PyUnicodeWriter_Format(writer, "CollisionNode(size=%zd id=%p):\n", + Py_SIZE(node), node) < 0) { goto error; } @@ -1572,7 +1551,7 @@ hamt_node_collision_dump(PyHamtNode_Collision *node, goto error; } - if (_hamt_dump_format(writer, "%R: %R\n", key, val)) { + if (PyUnicodeWriter_Format(writer, "%R: %R\n", key, val) < 0) { goto error; } } @@ -1924,7 +1903,7 @@ hamt_node_array_dealloc(PyHamtNode_Array *self) #ifdef Py_DEBUG static int hamt_node_array_dump(PyHamtNode_Array *node, - _PyUnicodeWriter *writer, int level) + PyUnicodeWriter *writer, int level) { /* Debug build: __dump__() method implementation for Array nodes. */ @@ -1934,7 +1913,7 @@ hamt_node_array_dump(PyHamtNode_Array *node, goto error; } - if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) { + if (PyUnicodeWriter_Format(writer, "ArrayNode(id=%p):\n", node) < 0) { goto error; } @@ -1947,7 +1926,7 @@ hamt_node_array_dump(PyHamtNode_Array *node, goto error; } - if (_hamt_dump_format(writer, "%zd::\n", i)) { + if (PyUnicodeWriter_Format(writer, "%zd::\n", i) < 0) { goto error; } @@ -1955,7 +1934,7 @@ hamt_node_array_dump(PyHamtNode_Array *node, goto error; } - if (_hamt_dump_format(writer, "\n")) { + if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { goto error; } } @@ -2071,7 +2050,7 @@ hamt_node_find(PyHamtNode *node, #ifdef Py_DEBUG static int hamt_node_dump(PyHamtNode *node, - _PyUnicodeWriter *writer, int level) + PyUnicodeWriter *writer, int level) { /* Debug build: __dump__() method implementation for a node. @@ -2440,22 +2419,24 @@ _PyHamt_New(void) static PyObject * hamt_dump(PyHamtObject *self) { - _PyUnicodeWriter writer; - - _PyUnicodeWriter_Init(&writer); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } - if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) { + if (PyUnicodeWriter_Format(writer, "HAMT(len=%zd):\n", + self->h_count) < 0) { goto error; } - if (hamt_node_dump(self->h_root, &writer, 0)) { + if (hamt_node_dump(self->h_root, writer, 0)) { goto error; } - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } #endif /* Py_DEBUG */ From fcef3fc9a593e2aa868d23cf2d91c57d8bf60ac6 Mon Sep 17 00:00:00 2001 From: foreignmeloman Date: Tue, 15 Oct 2024 14:26:19 +0400 Subject: [PATCH 109/114] =?UTF-8?q?gh-119535:=20Support=20=F0=9D=9C=8Bthon?= =?UTF-8?q?=20in=20Python=203.14=20venvs=20(#125035)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Lib/venv/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index a00fa690fa0b88..a5d348ba4cf121 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -306,7 +306,7 @@ def setup_python(self, context): suffixes = ['python', 'python3', f'python3.{sys.version_info[1]}'] if sys.version_info[:2] == (3, 14): - suffixes.append('python𝜋') + suffixes.append('𝜋thon') for suffix in suffixes: path = os.path.join(binpath, suffix) if not os.path.exists(path): From cc5a225cdc2a5d4e035dd08d59cef39182c10a6c Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 15 Oct 2024 13:29:43 +0200 Subject: [PATCH 110/114] gh-125041: test_zlib: For s390x HW acceleration, only skip checking the compressed bytes (#125042) --- Lib/test/support/__init__.py | 6 ++--- Lib/test/test_zlib.py | 25 ++++++++++++------- ...-10-07-14-13-38.gh-issue-125041.PKLWDf.rst | 3 +++ 3 files changed, 22 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-10-07-14-13-38.gh-issue-125041.PKLWDf.rst diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index d768bead7120c7..f05be2b6bdf496 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2625,9 +2625,9 @@ def exceeds_recursion_limit(): return get_c_recursion_limit() * 3 -#Windows doesn't have os.uname() but it doesn't support s390x. -skip_on_s390x = unittest.skipIf(hasattr(os, 'uname') and os.uname().machine == 's390x', - 'skipped on s390x') +# Windows doesn't have os.uname() but it doesn't support s390x. +is_s390x = hasattr(os, 'uname') and os.uname().machine == 's390x' +skip_on_s390x = unittest.skipIf(is_s390x, 'skipped on s390x') Py_TRACE_REFS = hasattr(sys, 'getobjects') diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index ef02c64f886f8a..8b4bb8750f8f5c 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -6,7 +6,7 @@ import pickle import random import sys -from test.support import bigmemtest, _1G, _4G, skip_on_s390x +from test.support import bigmemtest, _1G, _4G, is_s390x zlib = import_helper.import_module('zlib') @@ -33,8 +33,9 @@ def _zlib_runtime_version_tuple(zlib_version=zlib.ZLIB_RUNTIME_VERSION): ZLIB_RUNTIME_VERSION_TUPLE = _zlib_runtime_version_tuple() -# bpo-46623: On s390x, when a hardware accelerator is used, using different -# ways to compress data with zlib can produce different compressed data. +# bpo-46623: When a hardware accelerator is used (currently only on s390x), +# using different ways to compress data with zlib can produce different +# compressed data. # Simplified test_pair() code: # # def func1(data): @@ -57,8 +58,10 @@ def _zlib_runtime_version_tuple(zlib_version=zlib.ZLIB_RUNTIME_VERSION): # # zlib.decompress(func1(data)) == zlib.decompress(func2(data)) == data # -# Make the assumption that s390x always has an accelerator to simplify the skip -# condition. +# To simplify the skip condition, make the assumption that s390x always has an +# accelerator, and nothing else has it. +HW_ACCELERATED = is_s390x + class VersionTestCase(unittest.TestCase): @@ -223,12 +226,14 @@ def test_keywords(self): bufsize=zlib.DEF_BUF_SIZE), HAMLET_SCENE) - @skip_on_s390x def test_speech128(self): # compress more data data = HAMLET_SCENE * 128 x = zlib.compress(data) - self.assertEqual(zlib.compress(bytearray(data)), x) + # With hardware acceleration, the compressed bytes + # might not be identical. + if not HW_ACCELERATED: + self.assertEqual(zlib.compress(bytearray(data)), x) for ob in x, bytearray(x): self.assertEqual(zlib.decompress(ob), data) @@ -275,7 +280,6 @@ def test_64bit_compress(self, size): class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): # Test compression object - @skip_on_s390x def test_pair(self): # straightforward compress/decompress objects datasrc = HAMLET_SCENE * 128 @@ -286,7 +290,10 @@ def test_pair(self): x1 = co.compress(data) x2 = co.flush() self.assertRaises(zlib.error, co.flush) # second flush should not work - self.assertEqual(x1 + x2, datazip) + # With hardware acceleration, the compressed bytes might not + # be identical. + if not HW_ACCELERATED: + self.assertEqual(x1 + x2, datazip) for v1, v2 in ((x1, x2), (bytearray(x1), bytearray(x2))): dco = zlib.decompressobj() y1 = dco.decompress(v1 + v2) diff --git a/Misc/NEWS.d/next/Tests/2024-10-07-14-13-38.gh-issue-125041.PKLWDf.rst b/Misc/NEWS.d/next/Tests/2024-10-07-14-13-38.gh-issue-125041.PKLWDf.rst new file mode 100644 index 00000000000000..c7181eb9c1f3a9 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-10-07-14-13-38.gh-issue-125041.PKLWDf.rst @@ -0,0 +1,3 @@ +Re-enable skipped tests for :mod:`zlib` on the s390x architecture: only skip +checks of the compressed bytes, which can be different between zlib's +software implementation and the hardware-accelerated implementation. From c8a1818fb01937b66b93728c11d68c9f9af688a5 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Tue, 15 Oct 2024 16:12:32 +0300 Subject: [PATCH 111/114] gh-125517: Fix unreachable code warnings in `_testembed.c` (#125518) --- Doc/c-api/init_config.rst | 16 ++++++++++------ Programs/_testembed.c | 20 ++++++++++++-------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 66e845df2e6aa5..6194d7446c73e4 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -1825,14 +1825,18 @@ return ``-1`` on error: PyInitConfig_Free(config); return 0; - // Display the error message - const char *err_msg; error: - (void)PyInitConfig_GetError(config, &err_msg); - printf("PYTHON INIT ERROR: %s\n", err_msg); - PyInitConfig_Free(config); + { + // Display the error message + // This uncommon braces style is used, because you cannot make + // goto targets point to variable declarations. + const char *err_msg; + (void)PyInitConfig_GetError(config, &err_msg); + printf("PYTHON INIT ERROR: %s\n", err_msg); + PyInitConfig_Free(config); - return -1; + return -1; + } } diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 0fb45b2265e3c6..d15dd519dbf6af 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1902,11 +1902,13 @@ static int test_initconfig_api(void) Py_Finalize(); return 0; - const char *err_msg; error: - (void)PyInitConfig_GetError(config, &err_msg); - printf("Python init failed: %s\n", err_msg); - exit(1); + { + const char *err_msg; + (void)PyInitConfig_GetError(config, &err_msg); + printf("Python init failed: %s\n", err_msg); + exit(1); + } } @@ -2050,11 +2052,13 @@ static int test_initconfig_module(void) Py_Finalize(); return 0; - const char *err_msg; error: - (void)PyInitConfig_GetError(config, &err_msg); - printf("Python init failed: %s\n", err_msg); - exit(1); + { + const char *err_msg; + (void)PyInitConfig_GetError(config, &err_msg); + printf("Python init failed: %s\n", err_msg); + exit(1); + } } From 55c4f4c30b49734ce35dc88139b8b4fdc94c66fd Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Tue, 15 Oct 2024 15:11:02 +0100 Subject: [PATCH 112/114] gh-125514: fix bug in test_traceback utility. Specify exception types in except: clauses (#125516) --- Lib/test/test_traceback.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 455fea034198a6..77ef0c5b3c480d 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -150,7 +150,7 @@ def test_no_caret_with_no_debug_ranges_flag_python_traceback(self): import traceback try: x = 1 / 0 - except: + except ZeroDivisionError: traceback.print_exc() """) try: @@ -550,9 +550,10 @@ class PurePythonExceptionFormattingMixin: def get_exception(self, callable, slice_start=0, slice_end=-1): try: callable() - self.fail("No exception thrown.") - except: + except BaseException: return traceback.format_exc().splitlines()[slice_start:slice_end] + else: + self.fail("No exception thrown.") callable_line = get_exception.__code__.co_firstlineno + 2 @@ -2237,7 +2238,7 @@ def test_context_suppression(self): try: try: raise Exception - except: + except Exception: raise ZeroDivisionError from None except ZeroDivisionError as _: e = _ @@ -2589,9 +2590,9 @@ def exc(): try: try: raise EG("eg1", [ValueError(1), TypeError(2)]) - except: + except EG: raise EG("eg2", [ValueError(3), TypeError(4)]) - except: + except EG: raise ImportError(5) expected = ( @@ -2641,7 +2642,7 @@ def exc(): except Exception as e: exc = e raise EG("eg", [VE(1), exc, VE(4)]) - except: + except EG: raise EG("top", [VE(5)]) expected = (f' + Exception Group Traceback (most recent call last):\n' @@ -3454,7 +3455,7 @@ def test_long_context_chain(self): def f(): try: 1/0 - except: + except ZeroDivisionError: f() try: @@ -3558,7 +3559,7 @@ def test_comparison_params_variations(self): def raise_exc(): try: raise ValueError('bad value') - except: + except ValueError: raise def raise_with_locals(): From d3c82b9ccedd77fc302f5ab8ab0220b3372f574c Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Tue, 15 Oct 2024 17:42:16 +0300 Subject: [PATCH 113/114] gh-125512: Revert "gh-124872: Replace enter/exit events with "switched" (#124776)" (#125513) --- Doc/c-api/contextvars.rst | 14 +++-- Include/cpython/context.h | 17 ++++-- Lib/test/test_capi/test_watchers.py | 89 ++++++++++++++-------------- Modules/_testcapi/watchers.c | 79 ++++++++++++------------ Python/context.c | 31 ++++------ Tools/c-analyzer/cpython/ignored.tsv | 4 +- 6 files changed, 117 insertions(+), 117 deletions(-) diff --git a/Doc/c-api/contextvars.rst b/Doc/c-api/contextvars.rst index b7c6550ff34aac..8eba54a80dc80d 100644 --- a/Doc/c-api/contextvars.rst +++ b/Doc/c-api/contextvars.rst @@ -123,10 +123,16 @@ Context object management functions: Enumeration of possible context object watcher events: - - ``Py_CONTEXT_SWITCHED``: The :term:`current context` has switched to a - different context. The object passed to the watch callback is the - now-current :class:`contextvars.Context` object, or None if no context is - current. + - ``Py_CONTEXT_EVENT_ENTER``: A context has been entered, causing the + :term:`current context` to switch to it. The object passed to the watch + callback is the now-current :class:`contextvars.Context` object. Each + enter event will eventually have a corresponding exit event for the same + context object after any subsequently entered contexts have themselves been + exited. + - ``Py_CONTEXT_EVENT_EXIT``: A context is about to be exited, which will + cause the :term:`current context` to switch back to what it was before the + context was entered. The object passed to the watch callback is the + still-current :class:`contextvars.Context` object. .. versionadded:: 3.14 diff --git a/Include/cpython/context.h b/Include/cpython/context.h index 3a7a4b459c09ad..3c9be7873b9399 100644 --- a/Include/cpython/context.h +++ b/Include/cpython/context.h @@ -29,11 +29,20 @@ PyAPI_FUNC(int) PyContext_Exit(PyObject *); typedef enum { /* - * The current context has switched to a different context. The object - * passed to the watch callback is the now-current contextvars.Context - * object, or None if no context is current. + * A context has been entered, causing the "current context" to switch to + * it. The object passed to the watch callback is the now-current + * contextvars.Context object. Each enter event will eventually have a + * corresponding exit event for the same context object after any + * subsequently entered contexts have themselves been exited. */ - Py_CONTEXT_SWITCHED = 1, + Py_CONTEXT_EVENT_ENTER, + /* + * A context is about to be exited, which will cause the "current context" + * to switch back to what it was before the context was entered. The + * object passed to the watch callback is the still-current + * contextvars.Context object. + */ + Py_CONTEXT_EVENT_EXIT, } PyContextEvent; /* diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index 4680d6765de122..f21d2627c6094b 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -577,62 +577,68 @@ class TestContextObjectWatchers(unittest.TestCase): def context_watcher(self, which_watcher): wid = _testcapi.add_context_watcher(which_watcher) try: - switches = _testcapi.get_context_switches(which_watcher) - except ValueError: - switches = None - try: - yield switches + yield wid finally: _testcapi.clear_context_watcher(wid) - def assert_event_counts(self, want_0, want_1): - self.assertEqual(len(_testcapi.get_context_switches(0)), want_0) - self.assertEqual(len(_testcapi.get_context_switches(1)), want_1) + def assert_event_counts(self, exp_enter_0, exp_exit_0, + exp_enter_1, exp_exit_1): + self.assertEqual( + exp_enter_0, _testcapi.get_context_watcher_num_enter_events(0)) + self.assertEqual( + exp_exit_0, _testcapi.get_context_watcher_num_exit_events(0)) + self.assertEqual( + exp_enter_1, _testcapi.get_context_watcher_num_enter_events(1)) + self.assertEqual( + exp_exit_1, _testcapi.get_context_watcher_num_exit_events(1)) def test_context_object_events_dispatched(self): # verify that all counts are zero before any watchers are registered - self.assert_event_counts(0, 0) + self.assert_event_counts(0, 0, 0, 0) # verify that all counts remain zero when a context object is # entered and exited with no watchers registered ctx = contextvars.copy_context() - ctx.run(self.assert_event_counts, 0, 0) - self.assert_event_counts(0, 0) + ctx.run(self.assert_event_counts, 0, 0, 0, 0) + self.assert_event_counts(0, 0, 0, 0) # verify counts are as expected when first watcher is registered with self.context_watcher(0): - self.assert_event_counts(0, 0) - ctx.run(self.assert_event_counts, 1, 0) - self.assert_event_counts(2, 0) + self.assert_event_counts(0, 0, 0, 0) + ctx.run(self.assert_event_counts, 1, 0, 0, 0) + self.assert_event_counts(1, 1, 0, 0) # again with second watcher registered with self.context_watcher(1): - self.assert_event_counts(2, 0) - ctx.run(self.assert_event_counts, 3, 1) - self.assert_event_counts(4, 2) + self.assert_event_counts(1, 1, 0, 0) + ctx.run(self.assert_event_counts, 2, 1, 1, 0) + self.assert_event_counts(2, 2, 1, 1) # verify counts are reset and don't change after both watchers are cleared - ctx.run(self.assert_event_counts, 0, 0) - self.assert_event_counts(0, 0) - - def test_callback_error(self): - ctx_outer = contextvars.copy_context() - ctx_inner = contextvars.copy_context() - unraisables = [] - - def _in_outer(): - with self.context_watcher(2): - with catch_unraisable_exception() as cm: - ctx_inner.run(lambda: unraisables.append(cm.unraisable)) - unraisables.append(cm.unraisable) - - ctx_outer.run(_in_outer) - self.assertEqual([x.err_msg for x in unraisables], - ["Exception ignored in Py_CONTEXT_SWITCHED " - f"watcher callback for {ctx!r}" - for ctx in [ctx_inner, ctx_outer]]) - self.assertEqual([str(x.exc_value) for x in unraisables], - ["boom!", "boom!"]) + ctx.run(self.assert_event_counts, 0, 0, 0, 0) + self.assert_event_counts(0, 0, 0, 0) + + def test_enter_error(self): + with self.context_watcher(2): + with catch_unraisable_exception() as cm: + ctx = contextvars.copy_context() + ctx.run(int, 0) + self.assertEqual( + cm.unraisable.err_msg, + "Exception ignored in " + f"Py_CONTEXT_EVENT_EXIT watcher callback for {ctx!r}" + ) + self.assertEqual(str(cm.unraisable.exc_value), "boom!") + + def test_exit_error(self): + ctx = contextvars.copy_context() + def _in_context(stack): + stack.enter_context(self.context_watcher(2)) + + with catch_unraisable_exception() as cm: + with ExitStack() as stack: + ctx.run(_in_context, stack) + self.assertEqual(str(cm.unraisable.exc_value), "boom!") def test_clear_out_of_range_watcher_id(self): with self.assertRaisesRegex(ValueError, r"Invalid context watcher ID -1"): @@ -648,12 +654,5 @@ def test_allocate_too_many_watchers(self): with self.assertRaisesRegex(RuntimeError, r"no more context watcher IDs available"): _testcapi.allocate_too_many_context_watchers() - def test_exit_base_context(self): - ctx = contextvars.Context() - _testcapi.clear_context_stack() - with self.context_watcher(0) as switches: - ctx.run(lambda: None) - self.assertEqual(switches, [ctx, None]) - if __name__ == "__main__": unittest.main() diff --git a/Modules/_testcapi/watchers.c b/Modules/_testcapi/watchers.c index 321d3aeffb6ad1..b4233d07134aea 100644 --- a/Modules/_testcapi/watchers.c +++ b/Modules/_testcapi/watchers.c @@ -626,12 +626,16 @@ allocate_too_many_func_watchers(PyObject *self, PyObject *args) // Test contexct object watchers #define NUM_CONTEXT_WATCHERS 2 static int context_watcher_ids[NUM_CONTEXT_WATCHERS] = {-1, -1}; -static PyObject *context_switches[NUM_CONTEXT_WATCHERS]; +static int num_context_object_enter_events[NUM_CONTEXT_WATCHERS] = {0, 0}; +static int num_context_object_exit_events[NUM_CONTEXT_WATCHERS] = {0, 0}; static int handle_context_watcher_event(int which_watcher, PyContextEvent event, PyObject *ctx) { - if (event == Py_CONTEXT_SWITCHED) { - PyList_Append(context_switches[which_watcher], ctx); + if (event == Py_CONTEXT_EVENT_ENTER) { + num_context_object_enter_events[which_watcher]++; + } + else if (event == Py_CONTEXT_EVENT_EXIT) { + num_context_object_exit_events[which_watcher]++; } else { return -1; @@ -663,28 +667,31 @@ error_context_event_handler(PyContextEvent event, PyObject *ctx) { static PyObject * add_context_watcher(PyObject *self, PyObject *which_watcher) { - static const PyContext_WatchCallback callbacks[] = { - &first_context_watcher_callback, - &second_context_watcher_callback, - &error_context_event_handler, - }; + int watcher_id; assert(PyLong_Check(which_watcher)); long which_l = PyLong_AsLong(which_watcher); - if (which_l < 0 || which_l >= (long)Py_ARRAY_LENGTH(callbacks)) { + if (which_l == 0) { + watcher_id = PyContext_AddWatcher(first_context_watcher_callback); + context_watcher_ids[0] = watcher_id; + num_context_object_enter_events[0] = 0; + num_context_object_exit_events[0] = 0; + } + else if (which_l == 1) { + watcher_id = PyContext_AddWatcher(second_context_watcher_callback); + context_watcher_ids[1] = watcher_id; + num_context_object_enter_events[1] = 0; + num_context_object_exit_events[1] = 0; + } + else if (which_l == 2) { + watcher_id = PyContext_AddWatcher(error_context_event_handler); + } + else { PyErr_Format(PyExc_ValueError, "invalid watcher %d", which_l); return NULL; } - int watcher_id = PyContext_AddWatcher(callbacks[which_l]); if (watcher_id < 0) { return NULL; } - if (which_l >= 0 && which_l < NUM_CONTEXT_WATCHERS) { - context_watcher_ids[which_l] = watcher_id; - Py_XSETREF(context_switches[which_l], PyList_New(0)); - if (context_switches[which_l] == NULL) { - return NULL; - } - } return PyLong_FromLong(watcher_id); } @@ -701,7 +708,8 @@ clear_context_watcher(PyObject *self, PyObject *watcher_id) for (int i = 0; i < NUM_CONTEXT_WATCHERS; i++) { if (watcher_id_l == context_watcher_ids[i]) { context_watcher_ids[i] = -1; - Py_CLEAR(context_switches[i]); + num_context_object_enter_events[i] = 0; + num_context_object_exit_events[i] = 0; } } } @@ -709,34 +717,21 @@ clear_context_watcher(PyObject *self, PyObject *watcher_id) } static PyObject * -clear_context_stack(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) +get_context_watcher_num_enter_events(PyObject *self, PyObject *watcher_id) { - PyThreadState *tstate = PyThreadState_Get(); - if (tstate->context == NULL) { - Py_RETURN_NONE; - } - if (((PyContext *)tstate->context)->ctx_prev != NULL) { - PyErr_SetString(PyExc_RuntimeError, - "must first exit all non-base contexts"); - return NULL; - } - Py_CLEAR(tstate->context); - Py_RETURN_NONE; + assert(PyLong_Check(watcher_id)); + long watcher_id_l = PyLong_AsLong(watcher_id); + assert(watcher_id_l >= 0 && watcher_id_l < NUM_CONTEXT_WATCHERS); + return PyLong_FromLong(num_context_object_enter_events[watcher_id_l]); } static PyObject * -get_context_switches(PyObject *Py_UNUSED(self), PyObject *watcher_id) +get_context_watcher_num_exit_events(PyObject *self, PyObject *watcher_id) { assert(PyLong_Check(watcher_id)); long watcher_id_l = PyLong_AsLong(watcher_id); - if (watcher_id_l < 0 || watcher_id_l >= NUM_CONTEXT_WATCHERS) { - PyErr_Format(PyExc_ValueError, "invalid watcher %ld", watcher_id_l); - return NULL; - } - if (context_switches[watcher_id_l] == NULL) { - return PyList_New(0); - } - return Py_NewRef(context_switches[watcher_id_l]); + assert(watcher_id_l >= 0 && watcher_id_l < NUM_CONTEXT_WATCHERS); + return PyLong_FromLong(num_context_object_exit_events[watcher_id_l]); } static PyObject * @@ -840,8 +835,10 @@ static PyMethodDef test_methods[] = { // Code object watchers. {"add_context_watcher", add_context_watcher, METH_O, NULL}, {"clear_context_watcher", clear_context_watcher, METH_O, NULL}, - {"clear_context_stack", clear_context_stack, METH_NOARGS, NULL}, - {"get_context_switches", get_context_switches, METH_O, NULL}, + {"get_context_watcher_num_enter_events", + get_context_watcher_num_enter_events, METH_O, NULL}, + {"get_context_watcher_num_exit_events", + get_context_watcher_num_exit_events, METH_O, NULL}, {"allocate_too_many_context_watchers", (PyCFunction) allocate_too_many_context_watchers, METH_NOARGS, NULL}, {NULL}, diff --git a/Python/context.c b/Python/context.c index 95aa82206270f9..8bc487a33c890b 100644 --- a/Python/context.c +++ b/Python/context.c @@ -102,8 +102,10 @@ PyContext_CopyCurrent(void) static const char * context_event_name(PyContextEvent event) { switch (event) { - case Py_CONTEXT_SWITCHED: - return "Py_CONTEXT_SWITCHED"; + case Py_CONTEXT_EVENT_ENTER: + return "Py_CONTEXT_EVENT_ENTER"; + case Py_CONTEXT_EVENT_EXIT: + return "Py_CONTEXT_EVENT_EXIT"; default: return "?"; } @@ -113,13 +115,6 @@ context_event_name(PyContextEvent event) { static void notify_context_watchers(PyThreadState *ts, PyContextEvent event, PyObject *ctx) { - if (ctx == NULL) { - // This will happen after exiting the last context in the stack, which - // can occur if context_get was never called before entering a context - // (e.g., called `contextvars.Context().run()` on a fresh thread, as - // PyContext_Enter doesn't call context_get). - ctx = Py_None; - } assert(Py_REFCNT(ctx) > 0); PyInterpreterState *interp = ts->interp; assert(interp->_initialized); @@ -180,16 +175,6 @@ PyContext_ClearWatcher(int watcher_id) } -static inline void -context_switched(PyThreadState *ts) -{ - ts->context_ver++; - // ts->context is used instead of context_get() because context_get() might - // throw if ts->context is NULL. - notify_context_watchers(ts, Py_CONTEXT_SWITCHED, ts->context); -} - - static int _PyContext_Enter(PyThreadState *ts, PyObject *octx) { @@ -206,7 +191,9 @@ _PyContext_Enter(PyThreadState *ts, PyObject *octx) ctx->ctx_entered = 1; ts->context = Py_NewRef(ctx); - context_switched(ts); + ts->context_ver++; + + notify_context_watchers(ts, Py_CONTEXT_EVENT_ENTER, octx); return 0; } @@ -240,11 +227,13 @@ _PyContext_Exit(PyThreadState *ts, PyObject *octx) return -1; } + notify_context_watchers(ts, Py_CONTEXT_EVENT_EXIT, octx); Py_SETREF(ts->context, (PyObject *)ctx->ctx_prev); + ts->context_ver++; ctx->ctx_prev = NULL; ctx->ctx_entered = 0; - context_switched(ts); + return 0; } diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 2605825d3d0078..e6c599a2ac4a46 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -455,8 +455,8 @@ Modules/_testcapi/watchers.c - pyfunc_watchers - Modules/_testcapi/watchers.c - func_watcher_ids - Modules/_testcapi/watchers.c - func_watcher_callbacks - Modules/_testcapi/watchers.c - context_watcher_ids - -Modules/_testcapi/watchers.c - context_switches - -Modules/_testcapi/watchers.c add_context_watcher callbacks - +Modules/_testcapi/watchers.c - num_context_object_enter_events - +Modules/_testcapi/watchers.c - num_context_object_exit_events - Modules/_testcapimodule.c - BasicStaticTypes - Modules/_testcapimodule.c - num_basic_static_types_used - Modules/_testcapimodule.c - ContainerNoGC_members - From 703227dd021491ceb9343f69fa48f4b6a05adbb3 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Tue, 15 Oct 2024 07:51:37 -0700 Subject: [PATCH 114/114] gh-125422: Don't set the caller's f_trace if it's botframe (#125427) --- Lib/bdb.py | 5 +++-- Lib/test/test_bdb.py | 13 +++++++++++++ Lib/test/test_pdb.py | 14 ++++++++++++++ .../2024-10-14-04-44-12.gh-issue-125422.MlVuC6.rst | 1 + 4 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-14-04-44-12.gh-issue-125422.MlVuC6.rst diff --git a/Lib/bdb.py b/Lib/bdb.py index 666f9714eb9b7a..9755d61dd2696c 100644 --- a/Lib/bdb.py +++ b/Lib/bdb.py @@ -350,9 +350,10 @@ def _set_caller_tracefunc(self, current_frame): # Issue #13183: pdb skips frames after hitting a breakpoint and running # step commands. # Restore the trace function in the caller (that may not have been set - # for performance reasons) when returning from the current frame. + # for performance reasons) when returning from the current frame, unless + # the caller is the botframe. caller_frame = current_frame.f_back - if caller_frame and not caller_frame.f_trace: + if caller_frame and not caller_frame.f_trace and caller_frame is not self.botframe: caller_frame.f_trace = self.trace_dispatch # Derived classes and clients can call the following methods diff --git a/Lib/test/test_bdb.py b/Lib/test/test_bdb.py index 10c58c04dfd25e..f15dae13eb384e 100644 --- a/Lib/test/test_bdb.py +++ b/Lib/test/test_bdb.py @@ -1217,6 +1217,19 @@ def main(): with TracerRun(self) as tracer: tracer.runcall(tfunc_import) + def test_next_to_botframe(self): + # gh-125422 + # Check that next command won't go to the bottom frame. + code = """ + lno = 2 + """ + self.expect_set = [ + ('line', 2, ''), ('step', ), + ('return', 2, ''), ('next', ), + ] + with TracerRun(self) as tracer: + tracer.run(compile(textwrap.dedent(code), '', 'exec')) + class TestRegressions(unittest.TestCase): def test_format_stack_entry_no_lineno(self): diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 46eb00261042bc..3dc65fdfc03409 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -3393,6 +3393,20 @@ def test_issue26053(self): self.assertRegex(res, "Restarting .* with arguments:\na b c") self.assertRegex(res, "Restarting .* with arguments:\nd e f") + def test_step_into_botframe(self): + # gh-125422 + # pdb should not be able to step into the botframe (bdb.py) + script = "x = 1" + commands = """ + step + step + step + quit + """ + stdout, _ = self.run_pdb_script(script, commands) + self.assertIn("The program finished", stdout) + self.assertNotIn("bdb.py", stdout) + def test_pdbrc_basic(self): script = textwrap.dedent(""" a = 1 diff --git a/Misc/NEWS.d/next/Library/2024-10-14-04-44-12.gh-issue-125422.MlVuC6.rst b/Misc/NEWS.d/next/Library/2024-10-14-04-44-12.gh-issue-125422.MlVuC6.rst new file mode 100644 index 00000000000000..c890ecec8beaf8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-14-04-44-12.gh-issue-125422.MlVuC6.rst @@ -0,0 +1 @@ +Fixed the bug where :mod:`pdb` and :mod:`bdb` can step into the bottom caller frame.