Skip to content

Commit 4576909

Browse files
authored
BUG: Fix pandas compatibility with Python installations lacking bzip2 headers (#53858)
* BUG: Make bz2 import optional * CLN: Create `get_bz2_file` to match `get_lzma_file` * DOC: Add bz2 bugfix to changelog * TST: Test bz2 non-import works * TST: Test bz2 non-import from subprocess * TST: Fix bz2 non-import test * TST: Fix indentation issues in bz2 import test * MAINT: Clean up merge commit * Mark bz2 missing test with `single_cpu`
1 parent bdbe4d2 commit 4576909

File tree

6 files changed

+72
-17
lines changed

6 files changed

+72
-17
lines changed

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,7 @@ I/O
468468
- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
469469
- Bug in :func:`read_sql` when reading multiple timezone aware columns with the same column name (:issue:`44421`)
470470
- Bug when writing and reading empty Stata dta files where dtype information was lost (:issue:`46240`)
471+
- Bug where ``bz2`` was treated as a hard requirement (:issue:`53857`)
471472

472473
Period
473474
^^^^^^

pandas/_testing/_io.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import bz2
43
import gzip
54
import io
65
import pathlib
@@ -12,7 +11,10 @@
1211
)
1312
import zipfile
1413

15-
from pandas.compat import get_lzma_file
14+
from pandas.compat import (
15+
get_bz2_file,
16+
get_lzma_file,
17+
)
1618
from pandas.compat._optional import import_optional_dependency
1719

1820
import pandas as pd
@@ -156,7 +158,7 @@ def write_to_compressed(compression, path, data, dest: str = "test"):
156158
elif compression == "gzip":
157159
compress_method = gzip.GzipFile
158160
elif compression == "bz2":
159-
compress_method = bz2.BZ2File
161+
compress_method = get_bz2_file()
160162
elif compression == "zstd":
161163
compress_method = import_optional_dependency("zstandard").open
162164
elif compression == "xz":

pandas/compat/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,29 @@ def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
154154
return pandas.compat.compressors.LZMAFile
155155

156156

157+
def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
158+
"""
159+
Importing the `BZ2File` class from the `bz2` module.
160+
161+
Returns
162+
-------
163+
class
164+
The `BZ2File` class from the `bz2` module.
165+
166+
Raises
167+
------
168+
RuntimeError
169+
If the `bz2` module was not imported correctly, or didn't exist.
170+
"""
171+
if not pandas.compat.compressors.has_bz2:
172+
raise RuntimeError(
173+
"bz2 module not available. "
174+
"A Python re-install with the proper dependencies, "
175+
"might be required to solve this issue."
176+
)
177+
return pandas.compat.compressors.BZ2File
178+
179+
157180
__all__ = [
158181
"is_numpy_dev",
159182
"pa_version_under7p0",

pandas/compat/compressors.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,17 @@
44

55
from __future__ import annotations
66

7-
import bz2
87
from pickle import PickleBuffer
98

109
from pandas.compat._constants import PY310
1110

11+
try:
12+
import bz2
13+
14+
has_bz2 = True
15+
except ImportError:
16+
has_bz2 = False
17+
1218
try:
1319
import lzma
1420

@@ -41,17 +47,19 @@ def flatten_buffer(
4147
return memoryview(b).tobytes("A")
4248

4349

44-
class BZ2File(bz2.BZ2File):
45-
if not PY310:
50+
if has_bz2:
4651

47-
def write(self, b) -> int:
48-
# Workaround issue where `bz2.BZ2File` expects `len`
49-
# to return the number of bytes in `b` by converting
50-
# `b` into something that meets that constraint with
51-
# minimal copying.
52-
#
53-
# Note: This is fixed in Python 3.10.
54-
return super().write(flatten_buffer(b))
52+
class BZ2File(bz2.BZ2File):
53+
if not PY310:
54+
55+
def write(self, b) -> int:
56+
# Workaround issue where `bz2.BZ2File` expects `len`
57+
# to return the number of bytes in `b` by converting
58+
# `b` into something that meets that constraint with
59+
# minimal copying.
60+
#
61+
# Note: This is fixed in Python 3.10.
62+
return super().write(flatten_buffer(b))
5563

5664

5765
if has_lzma:

pandas/io/common.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,11 @@
5757
StorageOptions,
5858
WriteBuffer,
5959
)
60-
from pandas.compat import get_lzma_file
60+
from pandas.compat import (
61+
get_bz2_file,
62+
get_lzma_file,
63+
)
6164
from pandas.compat._optional import import_optional_dependency
62-
from pandas.compat.compressors import BZ2File as _BZ2File
6365
from pandas.util._decorators import doc
6466
from pandas.util._exceptions import find_stack_level
6567

@@ -766,7 +768,7 @@ def get_handle(
766768
elif compression == "bz2":
767769
# Overload of "BZ2File" to handle pickle protocol 5
768770
# "Union[str, BaseBuffer]", "str", "Dict[str, Any]"
769-
handle = _BZ2File( # type: ignore[call-overload]
771+
handle = get_bz2_file()( # type: ignore[call-overload]
770772
handle,
771773
mode=ioargs.mode,
772774
**compression_args,

pandas/tests/test_common.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import string
44
import subprocess
55
import sys
6+
import textwrap
67

78
import numpy as np
89
import pytest
@@ -246,3 +247,21 @@ def test_str_size():
246247
]
247248
result = subprocess.check_output(call).decode()[-4:-1].strip("\n")
248249
assert int(result) == int(expected)
250+
251+
252+
@pytest.mark.single_cpu
253+
def test_bz2_missing_import():
254+
# Check whether bz2 missing import is handled correctly (issue #53857)
255+
code = """
256+
import sys
257+
sys.modules['bz2'] = None
258+
import pytest
259+
import pandas as pd
260+
from pandas.compat import get_bz2_file
261+
msg = 'bz2 module not available.'
262+
with pytest.raises(RuntimeError, match=msg):
263+
get_bz2_file()
264+
"""
265+
code = textwrap.dedent(code)
266+
call = [sys.executable, "-c", code]
267+
subprocess.check_output(call)

0 commit comments

Comments
 (0)