BUG: read_csv used in file like object RawIOBase is not recognize encoding option (#31596)

paihu · web-flow · commit 2f9a44635bd8 · 2020-02-05T07:27:04.000-06:00
diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst
@@ -33,6 +33,7 @@ Fixed regressions
 - Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`)
 - Fixed regression in assigning to a :class:`Series` using a nullable integer dtype (:issue:`31446`)
 - Fixed performance regression when indexing a ``DataFrame`` or ``Series`` with a :class:`MultiIndex` for the index using a list of labels (:issue:`31648`)
+- Fixed regression in :meth:`read_csv` used in file like object ``RawIOBase`` is not recognize ``encoding`` option (:issue:`31575`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -638,7 +638,7 @@ cdef class TextReader:
                 raise ValueError(f'Unrecognized compression type: '
                                  f'{self.compression}')
 
-            if self.encoding and isinstance(source, io.BufferedIOBase):
+            if self.encoding and isinstance(source, (io.BufferedIOBase, io.RawIOBase)):
                 source = io.TextIOWrapper(
                     source, self.encoding.decode('utf-8'), newline='')
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -3,7 +3,7 @@
 import bz2
 from collections import abc
 import gzip
-from io import BufferedIOBase, BytesIO
+from io import BufferedIOBase, BytesIO, RawIOBase
 import mmap
 import os
 import pathlib
@@ -359,9 +359,9 @@ def get_handle(
     try:
         from s3fs import S3File
 
-        need_text_wrapping = (BufferedIOBase, S3File)
+        need_text_wrapping = (BufferedIOBase, RawIOBase, S3File)
     except ImportError:
-        need_text_wrapping = BufferedIOBase  # type: ignore
+        need_text_wrapping = (BufferedIOBase, RawIOBase)  # type: ignore
 
     handles: List[IO] = list()
     f = path_or_buf
@@ -437,7 +437,7 @@ def get_handle(
         from io import TextIOWrapper
 
         g = TextIOWrapper(f, encoding=encoding, newline="")
-        if not isinstance(f, BufferedIOBase):
+        if not isinstance(f, (BufferedIOBase, RawIOBase)):
             handles.append(g)
         f = g
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -5,7 +5,7 @@
 from collections import abc, defaultdict
 import csv
 import datetime
-from io import BufferedIOBase, StringIO, TextIOWrapper
+from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper
 import re
 import sys
 from textwrap import fill
@@ -1872,7 +1872,7 @@ def __init__(self, src, **kwds):
 
             # Handle the file object with universal line mode enabled.
             # We will handle the newline character ourselves later on.
-            if isinstance(src, BufferedIOBase):
+            if isinstance(src, (BufferedIOBase, RawIOBase)):
                 src = TextIOWrapper(src, encoding=encoding, newline="")
 
             kwds["encoding"] = "utf-8"
diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py
@@ -141,6 +141,7 @@ def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt):
 )
 def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
     # gh-23779: Python csv engine shouldn't error on files opened in binary.
+    # gh-31575: Python csv engine shouldn't error on files opened in raw binary.
     parser = all_parsers
 
     fpath = os.path.join(csv_dir_path, fname)
@@ -154,6 +155,10 @@ def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
         result = parser.read_csv(fb, encoding=encoding)
     tm.assert_frame_equal(expected, result)
 
+    with open(fpath, mode="rb", buffering=0) as fb:
+        result = parser.read_csv(fb, encoding=encoding)
+    tm.assert_frame_equal(expected, result)
+
 
 @pytest.mark.parametrize("pass_encoding", [True, False])
 def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding):