diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c3f6dcbce2a64..0f669beaa036f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -468,6 +468,7 @@ I/O - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`) - Bug in :func:`read_sql` when reading multiple timezone aware columns with the same column name (:issue:`44421`) - Bug when writing and reading empty Stata dta files where dtype information was lost (:issue:`46240`) +- Bug where ``bz2`` was treated as a hard requirement (:issue:`53857`) Period ^^^^^^ diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index cf9e7ade15757..49fde6d08fa11 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -1,6 +1,5 @@ from __future__ import annotations -import bz2 import gzip import io import pathlib @@ -12,7 +11,10 @@ ) import zipfile -from pandas.compat import get_lzma_file +from pandas.compat import ( + get_bz2_file, + get_lzma_file, +) from pandas.compat._optional import import_optional_dependency import pandas as pd @@ -156,7 +158,7 @@ def write_to_compressed(compression, path, data, dest: str = "test"): elif compression == "gzip": compress_method = gzip.GzipFile elif compression == "bz2": - compress_method = bz2.BZ2File + compress_method = get_bz2_file() elif compression == "zstd": compress_method = import_optional_dependency("zstandard").open elif compression == "xz": diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 00957c45a7fbe..de4b91e44da19 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -154,6 +154,29 @@ def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]: return pandas.compat.compressors.LZMAFile +def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]: + """ + Importing the `BZ2File` class from the `bz2` module. + + Returns + ------- + class + The `BZ2File` class from the `bz2` module. + + Raises + ------ + RuntimeError + If the `bz2` module was not imported correctly, or didn't exist. + """ + if not pandas.compat.compressors.has_bz2: + raise RuntimeError( + "bz2 module not available. " + "A Python re-install with the proper dependencies, " + "might be required to solve this issue." + ) + return pandas.compat.compressors.BZ2File + + __all__ = [ "is_numpy_dev", "pa_version_under7p0", diff --git a/pandas/compat/compressors.py b/pandas/compat/compressors.py index a4f39c4e34bd4..1f31e34c092c9 100644 --- a/pandas/compat/compressors.py +++ b/pandas/compat/compressors.py @@ -4,11 +4,17 @@ from __future__ import annotations -import bz2 from pickle import PickleBuffer from pandas.compat._constants import PY310 +try: + import bz2 + + has_bz2 = True +except ImportError: + has_bz2 = False + try: import lzma @@ -41,17 +47,19 @@ def flatten_buffer( return memoryview(b).tobytes("A") -class BZ2File(bz2.BZ2File): - if not PY310: +if has_bz2: - def write(self, b) -> int: - # Workaround issue where `bz2.BZ2File` expects `len` - # to return the number of bytes in `b` by converting - # `b` into something that meets that constraint with - # minimal copying. - # - # Note: This is fixed in Python 3.10. - return super().write(flatten_buffer(b)) + class BZ2File(bz2.BZ2File): + if not PY310: + + def write(self, b) -> int: + # Workaround issue where `bz2.BZ2File` expects `len` + # to return the number of bytes in `b` by converting + # `b` into something that meets that constraint with + # minimal copying. + # + # Note: This is fixed in Python 3.10. + return super().write(flatten_buffer(b)) if has_lzma: diff --git a/pandas/io/common.py b/pandas/io/common.py index 43780a08a4339..6199491be71a5 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -57,9 +57,11 @@ StorageOptions, WriteBuffer, ) -from pandas.compat import get_lzma_file +from pandas.compat import ( + get_bz2_file, + get_lzma_file, +) from pandas.compat._optional import import_optional_dependency -from pandas.compat.compressors import BZ2File as _BZ2File from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level @@ -766,7 +768,7 @@ def get_handle( elif compression == "bz2": # Overload of "BZ2File" to handle pickle protocol 5 # "Union[str, BaseBuffer]", "str", "Dict[str, Any]" - handle = _BZ2File( # type: ignore[call-overload] + handle = get_bz2_file()( # type: ignore[call-overload] handle, mode=ioargs.mode, **compression_args, diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index fa7750397369b..52fded5a6ee55 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -3,6 +3,7 @@ import string import subprocess import sys +import textwrap import numpy as np import pytest @@ -246,3 +247,21 @@ def test_str_size(): ] result = subprocess.check_output(call).decode()[-4:-1].strip("\n") assert int(result) == int(expected) + + +@pytest.mark.single_cpu +def test_bz2_missing_import(): + # Check whether bz2 missing import is handled correctly (issue #53857) + code = """ + import sys + sys.modules['bz2'] = None + import pytest + import pandas as pd + from pandas.compat import get_bz2_file + msg = 'bz2 module not available.' + with pytest.raises(RuntimeError, match=msg): + get_bz2_file() + """ + code = textwrap.dedent(code) + call = [sys.executable, "-c", code] + subprocess.check_output(call)