From 7f7e706d78ab968a1221c6179dfdba714860bd12 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 8 May 2020 19:20:26 -0400 Subject: [PATCH] bpo-39791: Add files() to importlib.resources (GH-19722) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bpo-39791: Update importlib.resources to support files() API (importlib_resources 1.5). * 📜🤖 Added by blurb_it. * Add some documentation about the new objects added. Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Doc/library/importlib.rst | 37 ++++ Lib/importlib/_common.py | 72 ++++++++ Lib/importlib/abc.py | 86 ++++++++++ Lib/importlib/resources.py | 161 +++++++----------- Lib/test/test_importlib/test_files.py | 39 +++++ Lib/test/test_importlib/test_path.py | 1 + .../2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst | 1 + 7 files changed, 295 insertions(+), 102 deletions(-) create mode 100644 Lib/importlib/_common.py create mode 100644 Lib/test/test_importlib/test_files.py create mode 100644 Misc/NEWS.d/next/Library/2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst index a612b1e1455a0c..99bfeacbbc7407 100644 --- a/Doc/library/importlib.rst +++ b/Doc/library/importlib.rst @@ -480,6 +480,8 @@ ABC hierarchy:: .. class:: ResourceReader + *Superseded by TraversableReader* + An :term:`abstract base class` to provide the ability to read *resources*. @@ -795,6 +797,28 @@ ABC hierarchy:: itself does not end in ``__init__``. +.. class:: Traversable + + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + + .. versionadded:: 3.9 + + +.. class:: TraversableReader + + An abstract base class for resource readers capable of serving + the ``files`` interface. Subclasses ResourceReader and provides + concrete implementations of the ResourceReader's abstract + methods. Therefore, any loader supplying TraversableReader + also supplies ResourceReader. + + Loaders that wish to support resource reading are expected to + implement this interface. + + .. versionadded:: 3.9 + + :mod:`importlib.resources` -- Resources --------------------------------------- @@ -853,6 +877,19 @@ The following types are defined. The following functions are available. + +.. function:: files(package) + + Returns an :class:`importlib.resources.abc.Traversable` object + representing the resource container for the package (think directory) + and its resources (think files). A Traversable may contain other + containers (think subdirectories). + + *package* is either a name or a module object which conforms to the + ``Package`` requirements. + + .. versionadded:: 3.9 + .. function:: open_binary(package, resource) Open for binary reading the *resource* within *package*. diff --git a/Lib/importlib/_common.py b/Lib/importlib/_common.py new file mode 100644 index 00000000000000..ba7cbac3c9bfda --- /dev/null +++ b/Lib/importlib/_common.py @@ -0,0 +1,72 @@ +import os +import pathlib +import zipfile +import tempfile +import functools +import contextlib + + +def from_package(package): + """ + Return a Traversable object for the given package. + + """ + spec = package.__spec__ + return from_traversable_resources(spec) or fallback_resources(spec) + + +def from_traversable_resources(spec): + """ + If the spec.loader implements TraversableResources, + directly or implicitly, it will have a ``files()`` method. + """ + with contextlib.suppress(AttributeError): + return spec.loader.files() + + +def fallback_resources(spec): + package_directory = pathlib.Path(spec.origin).parent + try: + archive_path = spec.loader.archive + rel_path = package_directory.relative_to(archive_path) + return zipfile.Path(archive_path, str(rel_path) + '/') + except Exception: + pass + return package_directory + + +@contextlib.contextmanager +def _tempfile(reader, suffix=''): + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on Windows + # properly. + fd, raw_path = tempfile.mkstemp(suffix=suffix) + try: + os.write(fd, reader()) + os.close(fd) + yield pathlib.Path(raw_path) + finally: + try: + os.remove(raw_path) + except FileNotFoundError: + pass + + +@functools.singledispatch +@contextlib.contextmanager +def as_file(path): + """ + Given a Traversable object, return that object as a + path on the local file system in a context manager. + """ + with _tempfile(path.read_bytes, suffix=path.name) as local: + yield local + + +@as_file.register(pathlib.Path) +@contextlib.contextmanager +def _(path): + """ + Degenerate behavior for pathlib.Path objects. + """ + yield path diff --git a/Lib/importlib/abc.py b/Lib/importlib/abc.py index b1b5ccce4bd35d..b8a9bb1a21ef77 100644 --- a/Lib/importlib/abc.py +++ b/Lib/importlib/abc.py @@ -14,6 +14,7 @@ _frozen_importlib_external = _bootstrap_external import abc import warnings +from typing import Protocol, runtime_checkable def _register(abstract_cls, *classes): @@ -386,3 +387,88 @@ def contents(self): _register(ResourceReader, machinery.SourceFileLoader) + + +@runtime_checkable +class Traversable(Protocol): + """ + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + """ + + @abc.abstractmethod + def iterdir(self): + """ + Yield Traversable objects in self + """ + + @abc.abstractmethod + def read_bytes(self): + """ + Read contents of self as bytes + """ + + @abc.abstractmethod + def read_text(self, encoding=None): + """ + Read contents of self as bytes + """ + + @abc.abstractmethod + def is_dir(self): + """ + Return True if self is a dir + """ + + @abc.abstractmethod + def is_file(self): + """ + Return True if self is a file + """ + + @abc.abstractmethod + def joinpath(self, child): + """ + Return Traversable child in self + """ + + @abc.abstractmethod + def __truediv__(self, child): + """ + Return Traversable child in self + """ + + @abc.abstractmethod + def open(self, mode='r', *args, **kwargs): + """ + mode may be 'r' or 'rb' to open as text or binary. Return a handle + suitable for reading (same as pathlib.Path.open). + + When opening as text, accepts encoding parameters such as those + accepted by io.TextIOWrapper. + """ + + @abc.abstractproperty + def name(self): + # type: () -> str + """ + The base name of this object without any parent references. + """ + + +class TraversableResources(ResourceReader): + @abc.abstractmethod + def files(self): + """Return a Traversable object for the loaded package.""" + + def open_resource(self, resource): + return self.files().joinpath(resource).open('rb') + + def resource_path(self, resource): + raise FileNotFoundError(resource) + + def is_resource(self, path): + return self.files().joinpath(path).isfile() + + def contents(self): + return (item.name for item in self.files().iterdir()) diff --git a/Lib/importlib/resources.py b/Lib/importlib/resources.py index f51886557466c7..b803a01c91d652 100644 --- a/Lib/importlib/resources.py +++ b/Lib/importlib/resources.py @@ -1,14 +1,15 @@ import os -import tempfile from . import abc as resources_abc +from . import _common +from ._common import as_file from contextlib import contextmanager, suppress from importlib import import_module from importlib.abc import ResourceLoader from io import BytesIO, TextIOWrapper from pathlib import Path from types import ModuleType -from typing import Iterable, Iterator, Optional, Union # noqa: F401 +from typing import ContextManager, Iterable, Optional, Union from typing import cast from typing.io import BinaryIO, TextIO @@ -16,7 +17,9 @@ __all__ = [ 'Package', 'Resource', + 'as_file', 'contents', + 'files', 'is_resource', 'open_binary', 'open_text', @@ -30,24 +33,23 @@ Resource = Union[str, os.PathLike] +def _resolve(name) -> ModuleType: + """If name is a string, resolve to a module.""" + if hasattr(name, '__spec__'): + return name + return import_module(name) + + def _get_package(package) -> ModuleType: """Take a package name or module object and return the module. - If a name, the module is imported. If the passed or imported module + If a name, the module is imported. If the resolved module object is not a package, raise an exception. """ - if hasattr(package, '__spec__'): - if package.__spec__.submodule_search_locations is None: - raise TypeError('{!r} is not a package'.format( - package.__spec__.name)) - else: - return package - else: - module = import_module(package) - if module.__spec__.submodule_search_locations is None: - raise TypeError('{!r} is not a package'.format(package)) - else: - return module + module = _resolve(package) + if module.__spec__.submodule_search_locations is None: + raise TypeError('{!r} is not a package'.format(package)) + return module def _normalize_path(path) -> str: @@ -58,8 +60,7 @@ def _normalize_path(path) -> str: parent, file_name = os.path.split(path) if parent: raise ValueError('{!r} must be only a file name'.format(path)) - else: - return file_name + return file_name def _get_resource_reader( @@ -88,8 +89,8 @@ def open_binary(package: Package, resource: Resource) -> BinaryIO: reader = _get_resource_reader(package) if reader is not None: return reader.open_resource(resource) - _check_location(package) - absolute_package_path = os.path.abspath(package.__spec__.origin) + absolute_package_path = os.path.abspath( + package.__spec__.origin or 'non-existent file') package_path = os.path.dirname(absolute_package_path) full_path = os.path.join(package_path, resource) try: @@ -108,8 +109,7 @@ def open_binary(package: Package, resource: Resource) -> BinaryIO: message = '{!r} resource not found in {!r}'.format( resource, package_name) raise FileNotFoundError(message) - else: - return BytesIO(data) + return BytesIO(data) def open_text(package: Package, @@ -117,39 +117,12 @@ def open_text(package: Package, encoding: str = 'utf-8', errors: str = 'strict') -> TextIO: """Return a file-like object opened for text reading of the resource.""" - resource = _normalize_path(resource) - package = _get_package(package) - reader = _get_resource_reader(package) - if reader is not None: - return TextIOWrapper(reader.open_resource(resource), encoding, errors) - _check_location(package) - absolute_package_path = os.path.abspath(package.__spec__.origin) - package_path = os.path.dirname(absolute_package_path) - full_path = os.path.join(package_path, resource) - try: - return open(full_path, mode='r', encoding=encoding, errors=errors) - except OSError: - # Just assume the loader is a resource loader; all the relevant - # importlib.machinery loaders are and an AttributeError for - # get_data() will make it clear what is needed from the loader. - loader = cast(ResourceLoader, package.__spec__.loader) - data = None - if hasattr(package.__spec__.loader, 'get_data'): - with suppress(OSError): - data = loader.get_data(full_path) - if data is None: - package_name = package.__spec__.name - message = '{!r} resource not found in {!r}'.format( - resource, package_name) - raise FileNotFoundError(message) - else: - return TextIOWrapper(BytesIO(data), encoding, errors) + return TextIOWrapper( + open_binary(package, resource), encoding=encoding, errors=errors) def read_binary(package: Package, resource: Resource) -> bytes: """Return the binary contents of the resource.""" - resource = _normalize_path(resource) - package = _get_package(package) with open_binary(package, resource) as fp: return fp.read() @@ -163,14 +136,20 @@ def read_text(package: Package, The decoding-related arguments have the same semantics as those of bytes.decode(). """ - resource = _normalize_path(resource) - package = _get_package(package) with open_text(package, resource, encoding, errors) as fp: return fp.read() -@contextmanager -def path(package: Package, resource: Resource) -> Iterator[Path]: +def files(package: Package) -> resources_abc.Traversable: + """ + Get a Traversable resource from a package + """ + return _common.from_package(_get_package(package)) + + +def path( + package: Package, resource: Resource, + ) -> 'ContextManager[Path]': """A context manager providing a file path object to the resource. If the resource does not already exist on its own on the file system, @@ -179,39 +158,23 @@ def path(package: Package, resource: Resource) -> Iterator[Path]: raised if the file was deleted prior to the context manager exiting). """ - resource = _normalize_path(resource) - package = _get_package(package) - reader = _get_resource_reader(package) - if reader is not None: - try: - yield Path(reader.resource_path(resource)) - return - except FileNotFoundError: - pass - else: - _check_location(package) - # Fall-through for both the lack of resource_path() *and* if - # resource_path() raises FileNotFoundError. - package_directory = Path(package.__spec__.origin).parent - file_path = package_directory / resource - if file_path.exists(): - yield file_path - else: - with open_binary(package, resource) as fp: - data = fp.read() - # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' - # blocks due to the need to close the temporary file to work on - # Windows properly. - fd, raw_path = tempfile.mkstemp() - try: - os.write(fd, data) - os.close(fd) - yield Path(raw_path) - finally: - try: - os.remove(raw_path) - except FileNotFoundError: - pass + reader = _get_resource_reader(_get_package(package)) + return ( + _path_from_reader(reader, resource) + if reader else + _common.as_file(files(package).joinpath(_normalize_path(resource))) + ) + + +@contextmanager +def _path_from_reader(reader, resource): + norm_resource = _normalize_path(resource) + with suppress(FileNotFoundError): + yield Path(reader.resource_path(norm_resource)) + return + opener_reader = reader.open_resource(norm_resource) + with _common._tempfile(opener_reader.read, suffix=norm_resource) as res: + yield res def is_resource(package: Package, name: str) -> bool: @@ -224,17 +187,10 @@ def is_resource(package: Package, name: str) -> bool: reader = _get_resource_reader(package) if reader is not None: return reader.is_resource(name) - try: - package_contents = set(contents(package)) - except (NotADirectoryError, FileNotFoundError): - return False + package_contents = set(contents(package)) if name not in package_contents: return False - # Just because the given file_name lives as an entry in the package's - # contents doesn't necessarily mean it's a resource. Directories are not - # resources, so let's try to find out if it's a directory or not. - path = Path(package.__spec__.origin).parent / name - return path.is_file() + return (_common.from_package(package) / name).is_file() def contents(package: Package) -> Iterable[str]: @@ -249,10 +205,11 @@ def contents(package: Package) -> Iterable[str]: if reader is not None: return reader.contents() # Is the package a namespace package? By definition, namespace packages - # cannot have resources. We could use _check_location() and catch the - # exception, but that's extra work, so just inline the check. - elif package.__spec__.origin is None or not package.__spec__.has_location: + # cannot have resources. + namespace = ( + package.__spec__.origin is None or + package.__spec__.origin == 'namespace' + ) + if namespace or not package.__spec__.has_location: return () - else: - package_directory = Path(package.__spec__.origin).parent - return os.listdir(package_directory) + return list(item.name for item in _common.from_package(package).iterdir()) diff --git a/Lib/test/test_importlib/test_files.py b/Lib/test/test_importlib/test_files.py new file mode 100644 index 00000000000000..fa7af82bf0c28b --- /dev/null +++ b/Lib/test/test_importlib/test_files.py @@ -0,0 +1,39 @@ +import typing +import unittest + +from importlib import resources +from importlib.abc import Traversable +from . import data01 +from . import util + + +class FilesTests: + def test_read_bytes(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_bytes() + assert actual == b'Hello, UTF-8 world!\n' + + def test_read_text(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_text() + assert actual == 'Hello, UTF-8 world!\n' + + @unittest.skipUnless( + hasattr(typing, 'runtime_checkable'), + "Only suitable when typing supports runtime_checkable", + ) + def test_traversable(self): + assert isinstance(resources.files(self.data), Traversable) + + +class OpenDiskTests(FilesTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class OpenZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_importlib/test_path.py b/Lib/test/test_importlib/test_path.py index 2d3dcda7ed2e79..c4e7285411322c 100644 --- a/Lib/test/test_importlib/test_path.py +++ b/Lib/test/test_importlib/test_path.py @@ -17,6 +17,7 @@ def test_reading(self): # Test also implicitly verifies the returned object is a pathlib.Path # instance. with resources.path(self.data, 'utf-8.file') as path: + self.assertTrue(path.name.endswith("utf-8.file"), repr(path)) # pathlib.Path.read_text() was introduced in Python 3.5. with path.open('r', encoding='utf-8') as file: text = file.read() diff --git a/Misc/NEWS.d/next/Library/2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst b/Misc/NEWS.d/next/Library/2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst new file mode 100644 index 00000000000000..237bcf7f99b0f0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst @@ -0,0 +1 @@ +Added ``files()`` function to importlib.resources with support for subdirectories in package data, matching backport in importlib_resources 1.5. \ No newline at end of file