Skip to content

Commit 2f9a446

Browse files
authored
BUG: read_csv used in file like object RawIOBase is not recognize encoding option (#31596)
1 parent 2f70e41 commit 2f9a446

File tree

5 files changed

+13
-7
lines changed

5 files changed

+13
-7
lines changed

doc/source/whatsnew/v1.0.1.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Fixed regressions
3333
- Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`)
3434
- Fixed regression in assigning to a :class:`Series` using a nullable integer dtype (:issue:`31446`)
3535
- Fixed performance regression when indexing a ``DataFrame`` or ``Series`` with a :class:`MultiIndex` for the index using a list of labels (:issue:`31648`)
36+
- Fixed regression in :meth:`read_csv` used in file like object ``RawIOBase`` is not recognize ``encoding`` option (:issue:`31575`)
3637

3738
.. ---------------------------------------------------------------------------
3839

pandas/_libs/parsers.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ cdef class TextReader:
638638
raise ValueError(f'Unrecognized compression type: '
639639
f'{self.compression}')
640640

641-
if self.encoding and isinstance(source, io.BufferedIOBase):
641+
if self.encoding and isinstance(source, (io.BufferedIOBase, io.RawIOBase)):
642642
source = io.TextIOWrapper(
643643
source, self.encoding.decode('utf-8'), newline='')
644644

pandas/io/common.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import bz2
44
from collections import abc
55
import gzip
6-
from io import BufferedIOBase, BytesIO
6+
from io import BufferedIOBase, BytesIO, RawIOBase
77
import mmap
88
import os
99
import pathlib
@@ -359,9 +359,9 @@ def get_handle(
359359
try:
360360
from s3fs import S3File
361361

362-
need_text_wrapping = (BufferedIOBase, S3File)
362+
need_text_wrapping = (BufferedIOBase, RawIOBase, S3File)
363363
except ImportError:
364-
need_text_wrapping = BufferedIOBase # type: ignore
364+
need_text_wrapping = (BufferedIOBase, RawIOBase) # type: ignore
365365

366366
handles: List[IO] = list()
367367
f = path_or_buf
@@ -437,7 +437,7 @@ def get_handle(
437437
from io import TextIOWrapper
438438

439439
g = TextIOWrapper(f, encoding=encoding, newline="")
440-
if not isinstance(f, BufferedIOBase):
440+
if not isinstance(f, (BufferedIOBase, RawIOBase)):
441441
handles.append(g)
442442
f = g
443443

pandas/io/parsers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections import abc, defaultdict
66
import csv
77
import datetime
8-
from io import BufferedIOBase, StringIO, TextIOWrapper
8+
from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper
99
import re
1010
import sys
1111
from textwrap import fill
@@ -1872,7 +1872,7 @@ def __init__(self, src, **kwds):
18721872

18731873
# Handle the file object with universal line mode enabled.
18741874
# We will handle the newline character ourselves later on.
1875-
if isinstance(src, BufferedIOBase):
1875+
if isinstance(src, (BufferedIOBase, RawIOBase)):
18761876
src = TextIOWrapper(src, encoding=encoding, newline="")
18771877

18781878
kwds["encoding"] = "utf-8"

pandas/tests/io/parser/test_encoding.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt):
141141
)
142142
def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
143143
# gh-23779: Python csv engine shouldn't error on files opened in binary.
144+
# gh-31575: Python csv engine shouldn't error on files opened in raw binary.
144145
parser = all_parsers
145146

146147
fpath = os.path.join(csv_dir_path, fname)
@@ -154,6 +155,10 @@ def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
154155
result = parser.read_csv(fb, encoding=encoding)
155156
tm.assert_frame_equal(expected, result)
156157

158+
with open(fpath, mode="rb", buffering=0) as fb:
159+
result = parser.read_csv(fb, encoding=encoding)
160+
tm.assert_frame_equal(expected, result)
161+
157162

158163
@pytest.mark.parametrize("pass_encoding", [True, False])
159164
def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding):

0 commit comments

Comments
 (0)