diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 2243790a663df..5fd90e8726264 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -288,6 +288,7 @@ Other enhancements - :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`). - Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`) - Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`). +- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`) - :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`). - :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`). - :meth `~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`). diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8c57c2b8b851b..87bb4ce7055be 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3049,6 +3049,7 @@ def to_csv( doublequote: bool_t = True, escapechar: Optional[str] = None, decimal: Optional[str] = ".", + errors: str = "strict", ) -> Optional[str]: r""" Write object to a comma-separated values (csv) file. @@ -3143,6 +3144,12 @@ def to_csv( decimal : str, default '.' Character recognized as decimal separator. E.g. use ',' for European data. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + + .. versionadded:: 1.1.0 Returns ------- @@ -3180,6 +3187,7 @@ def to_csv( line_terminator=line_terminator, sep=sep, encoding=encoding, + errors=errors, compression=compression, quoting=quoting, na_rep=na_rep, diff --git a/pandas/io/common.py b/pandas/io/common.py index 8349acafca1e3..055f84970e916 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -352,6 +352,7 @@ def get_handle( compression: Optional[Union[str, Mapping[str, Any]]] = None, memory_map: bool = False, is_text: bool = True, + errors=None, ): """ Get file handle for given path/buffer and mode. @@ -390,6 +391,12 @@ def get_handle( is_text : boolean, default True whether file/buffer is in text format (csv, json, etc.), or in binary mode (pickle, etc.). + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + + .. versionadded:: 1.1.0 Returns ------- @@ -475,7 +482,7 @@ def get_handle( elif is_path: if encoding: # Encoding - f = open(path_or_buf, mode, encoding=encoding, newline="") + f = open(path_or_buf, mode, encoding=encoding, errors=errors, newline="") elif is_text: # No explicit encoding f = open(path_or_buf, mode, errors="replace", newline="") @@ -488,7 +495,7 @@ def get_handle( if is_text and (compression or isinstance(f, need_text_wrapping)): from io import TextIOWrapper - g = TextIOWrapper(f, encoding=encoding, newline="") + g = TextIOWrapper(f, encoding=encoding, errors=errors, newline="") if not isinstance(f, (BufferedIOBase, RawIOBase)): handles.append(g) f = g diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index dcd764bec7426..5bd51dc8351f6 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -44,6 +44,7 @@ def __init__( index_label: Optional[Union[bool, Hashable, Sequence[Hashable]]] = None, mode: str = "w", encoding: Optional[str] = None, + errors: str = "strict", compression: Union[str, Mapping[str, str], None] = "infer", quoting: Optional[int] = None, line_terminator="\n", @@ -77,6 +78,7 @@ def __init__( if encoding is None: encoding = "utf-8" self.encoding = encoding + self.errors = errors self.compression = infer_compression(self.path_or_buf, compression) if quoting is None: @@ -184,6 +186,7 @@ def save(self) -> None: self.path_or_buf, self.mode, encoding=self.encoding, + errors=self.errors, compression=dict(self.compression_args, method=self.compression), ) close = True @@ -215,6 +218,7 @@ def save(self) -> None: self.path_or_buf, self.mode, encoding=self.encoding, + errors=self.errors, compression=compression, ) f.write(buf) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index b3ee8da52dece..4c86e3a16b135 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -597,3 +597,13 @@ def test_na_rep_truncated(self): result = pd.Series([1.1, 2.2]).to_csv(na_rep=".") expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"]) assert result == expected + + @pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"]) + def test_to_csv_errors(self, errors): + # GH 22610 + data = ["\ud800foo"] + ser = pd.Series(data, index=pd.Index(data)) + with tm.ensure_clean("test.csv") as path: + ser.to_csv(path, errors=errors) + # No use in reading back the data as it is not the same anymore + # due to the error handling