diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 5695c817b5a3a..baeca87b8c4f8 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -24,6 +24,7 @@ Fixed regressions - Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`) - Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings. This resulted in a regression in some cases as the default for ``float_precision`` was changed in pandas 1.2.0 (:issue:`38753`) - Fixed regression in :meth:`Rolling.skew` and :meth:`Rolling.kurt` modifying the object inplace (:issue:`38908`) +- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/common.py b/pandas/io/common.py index 642684ca61480..8f04724773a8a 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -553,8 +553,7 @@ def get_handle( Returns the dataclass IOHandles """ # Windows does not default to utf-8. Set to utf-8 for a consistent behavior - if encoding is None: - encoding = "utf-8" + encoding_passed, encoding = encoding, encoding or "utf-8" # read_csv does not know whether the buffer is opened in binary/text mode if _is_binary_mode(path_or_buf, mode) and "b" not in mode: @@ -641,6 +640,9 @@ def get_handle( # Check whether the filename is to be opened in binary mode. # Binary mode does not support 'encoding' and 'newline'. if ioargs.encoding and "b" not in ioargs.mode: + if errors is None and encoding_passed is None: + # ignore errors when no encoding is specified + errors = "replace" # Encoding handle = open( handle, diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index d445bece593d1..725c14f410357 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -419,3 +419,11 @@ def test_is_fsspec_url(): assert not icom.is_fsspec_url("random:pandas/somethingelse.com") assert not icom.is_fsspec_url("/local/path") assert not icom.is_fsspec_url("relative/local/path") + + +def test_default_errors(): + # GH 38989 + with tm.ensure_clean() as path: + file = Path(path) + file.write_bytes(b"\xe4\na\n1") + tm.assert_frame_equal(pd.read_csv(file, skiprows=[0]), pd.DataFrame({"a": [1]}))