Remove the bytes_encoding parameter

sidhant007 · sidhant007 · commit 549f577db121 · 2020-06-29T22:22:44.000+08:00
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -1030,7 +1030,7 @@ I/O
 - Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
 - Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`)
 - Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the min_itemsize parameter is set (:issue:`11238`)
-- Bug in :meth:`to_csv` which emitted b'' around bytes. It now has an optional `bytes_encoding` parameter that allows to pass a specific encoding scheme according to which the bytes are decoded. (:issue:`9712`)
+- Bug in :meth:`to_csv` which emitted b'' around bytes (:issue:`9712`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3000,7 +3000,6 @@ def to_csv(
         index_label: Optional[Union[bool_t, str, Sequence[Label]]] = None,
         mode: str = "w",
         encoding: Optional[str] = None,
-        bytes_encoding: Optional[str] = None,
         compression: Optional[Union[str, Mapping[str, str]]] = "infer",
         quoting: Optional[int] = None,
         quotechar: str = '"',
@@ -3058,10 +3057,6 @@ def to_csv(
         encoding : str, optional
             A string representing the encoding to use in the output file,
             defaults to 'utf-8'.
-        bytes_encoding : str, optional
-            A string representing the encoding to use to decode the bytes
-            in the output file, defaults to using the 'encoding' parameter or the
-            encoding specified by the file object.
         compression : str or dict, default 'infer'
             If str, represents compression mode. If dict, value at 'method' is
             the compression mode. Compression mode may be any of the following
@@ -3152,7 +3147,6 @@ def to_csv(
             line_terminator=line_terminator,
             sep=sep,
             encoding=encoding,
-            bytes_encoding=bytes_encoding,
             errors=errors,
             compression=compression,
             quoting=quoting,
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -62,7 +62,6 @@ def __init__(
         index_label: Optional[Union[bool, Hashable, Sequence[Hashable]]] = None,
         mode: str = "w",
         encoding: Optional[str] = None,
-        bytes_encoding: Optional[str] = None,
         errors: str = "strict",
         compression: Union[str, Mapping[str, str], None] = "infer",
         quoting: Optional[int] = None,
@@ -115,11 +114,6 @@ def __init__(
         self.errors = errors
         self.compression = infer_compression(self.path_or_buf, compression)
 
-        if bytes_encoding is None:
-            bytes_encoding = self.encoding
-
-        self.bytes_encoding = bytes_encoding
-
         if quoting is None:
             quoting = csvlib.QUOTE_MINIMAL
         self.quoting = quoting
@@ -147,7 +141,7 @@ def __init__(
             if isinstance(cols, ABCIndexClass):
                 cols = cols.to_native_types(
                     na_rep=na_rep,
-                    bytes_encoding=bytes_encoding,
+                    bytes_encoding=self.encoding,
                     float_format=float_format,
                     date_format=date_format,
                     quoting=self.quoting,
@@ -162,7 +156,7 @@ def __init__(
         if isinstance(cols, ABCIndexClass):
             cols = cols.to_native_types(
                 na_rep=na_rep,
-                bytes_encoding=bytes_encoding,
+                bytes_encoding=self.encoding,
                 float_format=float_format,
                 date_format=date_format,
                 quoting=self.quoting,
@@ -384,7 +378,7 @@ def _save_chunk(self, start_i: int, end_i: int) -> None:
             b = blocks[i]
             d = b.to_native_types(
                 na_rep=self.na_rep,
-                bytes_encoding=self.bytes_encoding,
+                bytes_encoding=self.encoding,
                 float_format=self.float_format,
                 decimal=self.decimal,
                 date_format=self.date_format,
@@ -398,7 +392,7 @@ def _save_chunk(self, start_i: int, end_i: int) -> None:
         ix = data_index.to_native_types(
             slicer=slicer,
             na_rep=self.na_rep,
-            bytes_encoding=self.bytes_encoding,
+            bytes_encoding=self.encoding,
             float_format=self.float_format,
             decimal=self.decimal,
             date_format=self.date_format,
@@ -413,4 +407,4 @@ def _bytes_to_str(self, values):
         np_values = np.array(values, dtype=object)
         if lib.is_bytes_array(np_values, skipna=True, mixing_allowed=False):
             for i, value in enumerate(values):
-                values[i] = value.decode(self.bytes_encoding)
+                values[i] = value.decode(self.encoding)
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -743,9 +743,7 @@ def test_to_csv_withcommas(self):
     def test_to_csv_bytes(self):
         # GH 9712
         times = date_range("2013-10-27 23:00", "2013-10-28 00:00", freq="H")
-        df = DataFrame(
-            {b"foo": [b"bar", b"baz"], b"times": times}, index=[b"A", b"B"]
-        )
+        df = DataFrame({b"foo": [b"bar", b"baz"], b"times": times}, index=[b"A", b"B"])
         df.loc[b"C"] = np.nan
         df.index.name = b"idx"
 
@@ -769,14 +767,20 @@ def test_to_csv_bytes(self):
         df_expected = DataFrame({non_unicode_decoded: [non_unicode_decoded, "foo"]})
         df_expected.index.name = "idx"
 
-        with tm.ensure_clean("__tmp_to_csv_bytes__.csv") as path:
-            df.to_csv(path, bytes_encoding="gb18030", header=True)
+        with tm.ensure_clean(
+            "__tmp_to_csv_bytes__.csv",
+            return_filelike=True,
+            mode="w+",
+            encoding="gb18030",
+        ) as path:
+            df.to_csv(path, header=True)
+            path.seek(0)
             df_output = self.read_csv(path)
             tm.assert_frame_equal(df_output, df_expected)
 
         # decoding error, when transcoding fails
         with pytest.raises(UnicodeDecodeError):
-            df.to_csv(bytes_encoding="utf-8")
+            df.to_csv(encoding="utf-8")
 
         # mixing of bytes and non-bytes
         df = DataFrame({"foo": [b"bar", "baz"]})