Skip to content

Commit

Permalink
BUG: Fix pandas compatibility with Python installations lacking bzip2…
Browse files Browse the repository at this point in the history
… headers (pandas-dev#53858)

* BUG: Make bz2 import optional

* CLN: Create `get_bz2_file` to match `get_lzma_file`

* DOC: Add bz2 bugfix to changelog

* TST: Test bz2 non-import works

* TST: Test bz2 non-import from subprocess

* TST: Fix bz2 non-import test

* TST: Fix indentation issues in bz2 import test

* MAINT: Clean up merge commit

* Mark bz2 missing test with `single_cpu`
  • Loading branch information
MilesCranmer authored and im-vinicius committed Jul 8, 2023
1 parent 689620a commit 99fb314
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 17 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,7 @@ I/O
- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
- Bug in :func:`read_sql` when reading multiple timezone aware columns with the same column name (:issue:`44421`)
- Bug when writing and reading empty Stata dta files where dtype information was lost (:issue:`46240`)
- Bug where ``bz2`` was treated as a hard requirement (:issue:`53857`)

Period
^^^^^^
Expand Down
8 changes: 5 additions & 3 deletions pandas/_testing/_io.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import bz2
import gzip
import io
import pathlib
Expand All @@ -12,7 +11,10 @@
)
import zipfile

from pandas.compat import get_lzma_file
from pandas.compat import (
get_bz2_file,
get_lzma_file,
)
from pandas.compat._optional import import_optional_dependency

import pandas as pd
Expand Down Expand Up @@ -156,7 +158,7 @@ def write_to_compressed(compression, path, data, dest: str = "test"):
elif compression == "gzip":
compress_method = gzip.GzipFile
elif compression == "bz2":
compress_method = bz2.BZ2File
compress_method = get_bz2_file()
elif compression == "zstd":
compress_method = import_optional_dependency("zstandard").open
elif compression == "xz":
Expand Down
23 changes: 23 additions & 0 deletions pandas/compat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,29 @@ def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
return pandas.compat.compressors.LZMAFile


def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
"""
Importing the `BZ2File` class from the `bz2` module.
Returns
-------
class
The `BZ2File` class from the `bz2` module.
Raises
------
RuntimeError
If the `bz2` module was not imported correctly, or didn't exist.
"""
if not pandas.compat.compressors.has_bz2:
raise RuntimeError(
"bz2 module not available. "
"A Python re-install with the proper dependencies, "
"might be required to solve this issue."
)
return pandas.compat.compressors.BZ2File


__all__ = [
"is_numpy_dev",
"pa_version_under7p0",
Expand Down
30 changes: 19 additions & 11 deletions pandas/compat/compressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,17 @@

from __future__ import annotations

import bz2
from pickle import PickleBuffer

from pandas.compat._constants import PY310

try:
import bz2

has_bz2 = True
except ImportError:
has_bz2 = False

try:
import lzma

Expand Down Expand Up @@ -41,17 +47,19 @@ def flatten_buffer(
return memoryview(b).tobytes("A")


class BZ2File(bz2.BZ2File):
if not PY310:
if has_bz2:

def write(self, b) -> int:
# Workaround issue where `bz2.BZ2File` expects `len`
# to return the number of bytes in `b` by converting
# `b` into something that meets that constraint with
# minimal copying.
#
# Note: This is fixed in Python 3.10.
return super().write(flatten_buffer(b))
class BZ2File(bz2.BZ2File):
if not PY310:

def write(self, b) -> int:
# Workaround issue where `bz2.BZ2File` expects `len`
# to return the number of bytes in `b` by converting
# `b` into something that meets that constraint with
# minimal copying.
#
# Note: This is fixed in Python 3.10.
return super().write(flatten_buffer(b))


if has_lzma:
Expand Down
8 changes: 5 additions & 3 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@
StorageOptions,
WriteBuffer,
)
from pandas.compat import get_lzma_file
from pandas.compat import (
get_bz2_file,
get_lzma_file,
)
from pandas.compat._optional import import_optional_dependency
from pandas.compat.compressors import BZ2File as _BZ2File
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level

Expand Down Expand Up @@ -766,7 +768,7 @@ def get_handle(
elif compression == "bz2":
# Overload of "BZ2File" to handle pickle protocol 5
# "Union[str, BaseBuffer]", "str", "Dict[str, Any]"
handle = _BZ2File( # type: ignore[call-overload]
handle = get_bz2_file()( # type: ignore[call-overload]
handle,
mode=ioargs.mode,
**compression_args,
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import string
import subprocess
import sys
import textwrap

import numpy as np
import pytest
Expand Down Expand Up @@ -246,3 +247,21 @@ def test_str_size():
]
result = subprocess.check_output(call).decode()[-4:-1].strip("\n")
assert int(result) == int(expected)


@pytest.mark.single_cpu
def test_bz2_missing_import():
# Check whether bz2 missing import is handled correctly (issue #53857)
code = """
import sys
sys.modules['bz2'] = None
import pytest
import pandas as pd
from pandas.compat import get_bz2_file
msg = 'bz2 module not available.'
with pytest.raises(RuntimeError, match=msg):
get_bz2_file()
"""
code = textwrap.dedent(code)
call = [sys.executable, "-c", code]
subprocess.check_output(call)

0 comments on commit 99fb314

Please sign in to comment.