Skip to content

Commit df5233c

Browse files
author
Krzysztof Chomski
committed
BUG: GH17778 - DataFrame.to_pickle() fails for .zip format.
GH17778: add 'zip' format to unittests. Added entry in doc/source/whatsnew/v0.22.0.txt file to Bug Fixes section.
1 parent 9050e38 commit df5233c

File tree

4 files changed

+29
-14
lines changed

4 files changed

+29
-14
lines changed

doc/source/whatsnew/v0.22.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ Documentation Changes
8787
Bug Fixes
8888
~~~~~~~~~
8989

90+
- Bug in ``DataFrame.to_pickle()`` fails for .zip format (:issue:`17778`)
9091

9192
Conversion
9293
^^^^^^^^^^

pandas/io/common.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -357,17 +357,20 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
357357
# ZIP Compression
358358
elif compression == 'zip':
359359
import zipfile
360-
zip_file = zipfile.ZipFile(path_or_buf)
361-
zip_names = zip_file.namelist()
362-
if len(zip_names) == 1:
363-
f = zip_file.open(zip_names.pop())
364-
elif len(zip_names) == 0:
365-
raise ValueError('Zero files found in ZIP file {}'
366-
.format(path_or_buf))
360+
if mode == 'wb':
361+
f = zipfile.ZipFile(path_or_buf, 'w')
367362
else:
368-
raise ValueError('Multiple files found in ZIP file.'
369-
' Only one file per ZIP: {}'
370-
.format(zip_names))
363+
zip_file = zipfile.ZipFile(path_or_buf)
364+
zip_names = zip_file.namelist()
365+
if len(zip_names) == 1:
366+
f = zip_file.open(zip_names.pop())
367+
elif len(zip_names) == 0:
368+
raise ValueError('Zero files found in ZIP file {}'
369+
.format(path_or_buf))
370+
else:
371+
raise ValueError('Multiple files found in ZIP file.'
372+
' Only one file per ZIP: {}'
373+
.format(zip_names))
371374

372375
# XZ Compression
373376
elif compression == 'xz':

pandas/io/pickle.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,17 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
4242
if protocol < 0:
4343
protocol = pkl.HIGHEST_PROTOCOL
4444
try:
45-
pkl.dump(obj, f, protocol=protocol)
45+
import zipfile
46+
if isinstance(f, zipfile.ZipFile):
47+
import os
48+
import tempfile
49+
tmp_file = tempfile.NamedTemporaryFile(delete=False)
50+
pkl.dump(obj, tmp_file, protocol=protocol)
51+
tmp_file.close()
52+
f.write(tmp_file.name)
53+
os.remove(tmp_file.name)
54+
else:
55+
pkl.dump(obj, f, protocol=protocol)
4656
finally:
4757
for _f in fh:
4858
_f.close()

pandas/tests/io/test_pickle.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def decompress_file(self, src_path, dest_path, compression):
382382
fh.write(f.read())
383383
f.close()
384384

385-
@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz'])
385+
@pytest.mark.parametrize('compression', [None, 'gzip', 'zip', 'bz2', 'xz'])
386386
def test_write_explicit(self, compression, get_random_path):
387387
# issue 11666
388388
if compression == 'xz':
@@ -414,7 +414,8 @@ def test_write_explicit_bad(self, compression, get_random_path):
414414
df = tm.makeDataFrame()
415415
df.to_pickle(path, compression=compression)
416416

417-
@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress'])
417+
@pytest.mark.parametrize('ext', ['', '.gz', '.zip', '.bz2', '.xz',
418+
'.no_compress'])
418419
def test_write_infer(self, ext, get_random_path):
419420
if ext == '.xz':
420421
tm._skip_if_no_lzma()
@@ -442,7 +443,7 @@ def test_write_infer(self, ext, get_random_path):
442443

443444
tm.assert_frame_equal(df, df2)
444445

445-
@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"])
446+
@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', 'zip'])
446447
def test_read_explicit(self, compression, get_random_path):
447448
# issue 11666
448449
if compression == 'xz':

0 commit comments

Comments
 (0)