diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7c86ad0f029ed..511e85929f352 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -109,6 +109,7 @@ Other enhancements (:issue:`28368`) - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) +- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) Build Changes diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 64755b2390eaf..f032a9a919b3c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -755,6 +755,7 @@ def to_string( decimal: str = ".", line_width: Optional[int] = None, max_colwidth: Optional[int] = None, + encoding: Optional[str] = None, ) -> Optional[str]: """ Render a DataFrame to a console-friendly tabular output. @@ -765,6 +766,10 @@ def to_string( Max width to truncate each column in characters. By default, no limit. .. versionadded:: 1.0.0 + encoding : str, default "utf-8" + Set character encoding. + + .. versionadded:: 1.0 %(returns)s See Also -------- @@ -803,7 +808,7 @@ def to_string( decimal=decimal, line_width=line_width, ) - return formatter.to_string(buf=buf) + return formatter.to_string(buf=buf, encoding=encoding) # ---------------------------------------------------------------------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b8c40e3f62221..7c58eafd2ec39 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -485,6 +485,8 @@ def get_buffer( if encoding is None: encoding = "utf-8" + elif not isinstance(buf, str): + raise ValueError("buf is not a file name and encoding is specified.") if hasattr(buf, "write"): yield buf @@ -895,8 +897,12 @@ def _join_multiline(self, *args) -> str: st = ed return "\n\n".join(str_lst) - def to_string(self, buf: Optional[FilePathOrBuffer[str]] = None) -> Optional[str]: - return self.get_result(buf=buf) + def to_string( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + encoding: Optional[str] = None, + ) -> Optional[str]: + return self.get_result(buf=buf, encoding=encoding) def to_latex( self, diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 454e2afb8abe0..9aba4c8aa5019 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -73,17 +73,19 @@ def filepath_or_buffer(filepath_or_buffer_id, tmp_path): @pytest.fixture -def assert_filepath_or_buffer_equals(filepath_or_buffer, filepath_or_buffer_id): +def assert_filepath_or_buffer_equals( + filepath_or_buffer, filepath_or_buffer_id, encoding +): """ Assertion helper for checking filepath_or_buffer. """ def _assert_filepath_or_buffer_equals(expected): if filepath_or_buffer_id == "string": - with open(filepath_or_buffer) as f: + with open(filepath_or_buffer, encoding=encoding) as f: result = f.read() elif filepath_or_buffer_id == "pathlike": - result = filepath_or_buffer.read_text() + result = filepath_or_buffer.read_text(encoding=encoding) elif filepath_or_buffer_id == "buffer": result = filepath_or_buffer.getvalue() assert result == expected @@ -3240,14 +3242,32 @@ def test_repr_html_ipython_config(ip): @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) +@pytest.mark.parametrize( + "encoding, data", + [(None, "abc"), ("utf-8", "abc"), ("gbk", "造成输出中文显示乱码"), ("foo", "abc")], +) def test_filepath_or_buffer_arg( - float_frame, method, filepath_or_buffer, assert_filepath_or_buffer_equals + method, + filepath_or_buffer, + assert_filepath_or_buffer_equals, + encoding, + data, + filepath_or_buffer_id, ): - df = float_frame - expected = getattr(df, method)() + df = DataFrame([data]) - getattr(df, method)(buf=filepath_or_buffer) - assert_filepath_or_buffer_equals(expected) + if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: + with pytest.raises( + ValueError, match="buf is not a file name and encoding is specified." + ): + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + elif encoding == "foo": + with pytest.raises(LookupError, match="unknown encoding"): + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + else: + expected = getattr(df, method)() + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + assert_filepath_or_buffer_equals(expected) @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"])