From 1ac01199051ad0488badb1c5e98faf4b41829bda Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 10 Jan 2022 11:22:48 -0800 Subject: [PATCH] DEPR: line_terminator->lineterminator GH#9569 --- doc/source/user_guide/io.rst | 2 +- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/_libs/src/parser/tokenizer.c | 4 ++-- pandas/core/generic.py | 14 +++++++++++--- pandas/io/formats/csvs.py | 6 +++--- pandas/io/formats/format.py | 6 ++++-- pandas/tests/frame/methods/test_to_csv.py | 8 ++++---- pandas/tests/io/formats/test_to_csv.py | 20 +++++++++++--------- pandas/tests/io/parser/test_skiprows.py | 8 ++++---- pandas/tests/io/xml/test_to_xml.py | 2 +- 10 files changed, 42 insertions(+), 30 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index f3be3277003ee..d5af33721c354 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1851,7 +1851,7 @@ function takes a number of arguments. Only the first is required. * ``mode`` : Python write mode, default 'w' * ``encoding``: a string representing the encoding to use if the contents are non-ASCII, for Python versions prior to 3 -* ``line_terminator``: Character sequence denoting line end (default ``os.linesep``) +* ``lineterminator``: Character sequence denoting line end (default ``os.linesep``) * ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a ``float_format`` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric * ``quotechar``: Character used to quote fields (default '"') * ``doublequote``: Control quoting of ``quotechar`` in fields (default True) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e723918ad8b4b..9c4a23dff4121 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -94,7 +94,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ -- +- Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index ade4d4aa4a206..c337c3eaf1318 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -649,7 +649,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes, #define END_LINE() END_LINE_STATE(START_RECORD) #define IS_TERMINATOR(c) \ - (c == line_terminator) + (c == lineterminator) #define IS_QUOTE(c) ((c == self->quotechar && self->quoting != QUOTE_NONE)) @@ -718,7 +718,7 @@ int tokenize_bytes(parser_t *self, char *stream; char *buf = self->data + self->datapos; - const char line_terminator = (self->lineterminator == '\0') ? + const char lineterminator = (self->lineterminator == '\0') ? '\n' : self->lineterminator; // 1000 is something that couldn't fit in "char" diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 193be98e904b9..f73861f80023c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -67,6 +67,7 @@ InvalidIndexError, ) from pandas.util._decorators import ( + deprecate_kwarg, doc, rewrite_axis_style_signature, ) @@ -3355,6 +3356,7 @@ def to_latex( storage_options=_shared_docs["storage_options"], compression_options=_shared_docs["compression_options"], ) + @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator") def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -3370,7 +3372,7 @@ def to_csv( compression: CompressionOptions = "infer", quoting: int | None = None, quotechar: str = '"', - line_terminator: str | None = None, + lineterminator: str | None = None, chunksize: int | None = None, date_format: str | None = None, doublequote: bool_t = True, @@ -3449,10 +3451,16 @@ def to_csv( will treat them as non-numeric. quotechar : str, default '\"' String of length 1. Character used to quote fields. - line_terminator : str, optional + lineterminator : str, optional The newline character or character sequence to use in the output file. Defaults to `os.linesep`, which depends on the OS in which this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.). + + .. versionchanged:: 1.5.0 + + Previously was line_terminator, changed for consistency with + read_csv and the standard library 'csv' module. + chunksize : int or None Rows to write at a time. date_format : str, default None @@ -3527,7 +3535,7 @@ def to_csv( return DataFrameRenderer(formatter).to_csv( path_or_buf, - line_terminator=line_terminator, + lineterminator=lineterminator, sep=sep, encoding=encoding, errors=errors, diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index d2cc77af8eee5..cfbd2d9c9c8da 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -59,7 +59,7 @@ def __init__( errors: str = "strict", compression: CompressionOptions = "infer", quoting: int | None = None, - line_terminator: str | None = "\n", + lineterminator: str | None = "\n", chunksize: int | None = None, quotechar: str | None = '"', date_format: str | None = None, @@ -84,7 +84,7 @@ def __init__( self.quotechar = self._initialize_quotechar(quotechar) self.doublequote = doublequote self.escapechar = escapechar - self.line_terminator = line_terminator or os.linesep + self.lineterminator = lineterminator or os.linesep self.date_format = date_format self.cols = self._initialize_columns(cols) self.chunksize = self._initialize_chunksize(chunksize) @@ -250,7 +250,7 @@ def save(self) -> None: # Note: self.encoding is irrelevant here self.writer = csvlib.writer( handles.handle, - lineterminator=self.line_terminator, + lineterminator=self.lineterminator, delimiter=self.sep, quoting=self.quoting, doublequote=self.doublequote, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 616331bf80a44..e93a14d72dd17 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -57,6 +57,7 @@ StorageOptions, WriteBuffer, ) +from pandas.util._decorators import deprecate_kwarg from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -1128,6 +1129,7 @@ def to_string( string = string_formatter.to_string() return save_to_buffer(string, buf=buf, encoding=encoding) + @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator") def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -1139,7 +1141,7 @@ def to_csv( compression: CompressionOptions = "infer", quoting: int | None = None, quotechar: str = '"', - line_terminator: str | None = None, + lineterminator: str | None = None, chunksize: int | None = None, date_format: str | None = None, doublequote: bool = True, @@ -1160,7 +1162,7 @@ def to_csv( csv_formatter = CSVFormatter( path_or_buf=path_or_buf, - line_terminator=line_terminator, + lineterminator=lineterminator, sep=sep, encoding=encoding, errors=errors, diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 28ffe316e21dc..b7874d51b6f33 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -866,13 +866,13 @@ def test_to_csv_index_no_leading_comma(self): expected = tm.convert_rows_list_to_csv_str(expected_rows) assert buf.getvalue() == expected - def test_to_csv_line_terminators(self): + def test_to_csv_lineterminators(self): # see gh-20353 df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) with tm.ensure_clean() as path: # case 1: CRLF as line terminator - df.to_csv(path, line_terminator="\r\n") + df.to_csv(path, lineterminator="\r\n") expected = b",A,B\r\none,1,4\r\ntwo,2,5\r\nthree,3,6\r\n" with open(path, mode="rb") as f: @@ -880,7 +880,7 @@ def test_to_csv_line_terminators(self): with tm.ensure_clean() as path: # case 2: LF as line terminator - df.to_csv(path, line_terminator="\n") + df.to_csv(path, lineterminator="\n") expected = b",A,B\none,1,4\ntwo,2,5\nthree,3,6\n" with open(path, mode="rb") as f: @@ -1251,7 +1251,7 @@ def test_to_csv_single_level_multi_index(self): df = DataFrame([[1, 2, 3]], columns=index) df = df.reindex(columns=[(1,), (3,)]) expected = ",1,3\n0,1,3\n" - result = df.to_csv(line_terminator="\n") + result = df.to_csv(lineterminator="\n") tm.assert_almost_equal(result, expected) def test_gz_lineend(self): diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index e039ff263ca3a..afc3ef7a25cc7 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -369,9 +369,11 @@ def test_to_csv_multi_index(self): @pytest.mark.parametrize("klass", [DataFrame, pd.Series]) def test_to_csv_single_level_multi_index(self, ind, expected, klass): # see gh-19589 - result = klass(pd.Series([1], ind, name="data")).to_csv( - line_terminator="\n", header=True - ) + obj = klass(pd.Series([1], ind, name="data")) + + with tm.assert_produces_warning(FutureWarning, match="lineterminator"): + # GH#9568 standardize on lineterminator matching stdlib + result = obj.to_csv(line_terminator="\n", header=True) assert result == expected def test_to_csv_string_array_ascii(self): @@ -425,14 +427,14 @@ def test_to_csv_string_with_lf(self): with tm.ensure_clean("lf_test.csv") as path: # case 2: LF as line terminator expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n' - df.to_csv(path, line_terminator="\n", index=False) + df.to_csv(path, lineterminator="\n", index=False) with open(path, "rb") as f: assert f.read() == expected_lf with tm.ensure_clean("lf_test.csv") as path: # case 3: CRLF as line terminator - # 'line_terminator' should not change inner element + # 'lineterminator' should not change inner element expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n' - df.to_csv(path, line_terminator="\r\n", index=False) + df.to_csv(path, lineterminator="\r\n", index=False) with open(path, "rb") as f: assert f.read() == expected_crlf @@ -459,19 +461,19 @@ def test_to_csv_string_with_crlf(self): with tm.ensure_clean("crlf_test.csv") as path: # case 2: LF as line terminator expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n' - df.to_csv(path, line_terminator="\n", index=False) + df.to_csv(path, lineterminator="\n", index=False) with open(path, "rb") as f: assert f.read() == expected_lf with tm.ensure_clean("crlf_test.csv") as path: # case 3: CRLF as line terminator - # 'line_terminator' should not change inner element + # 'lineterminator' should not change inner element expected_crlf = ( b"int,str_crlf\r\n" b"1,abc\r\n" b'2,"d\r\nef"\r\n' b'3,"g\r\nh\r\n\r\ni"\r\n' ) - df.to_csv(path, line_terminator="\r\n", index=False) + df.to_csv(path, lineterminator="\r\n", index=False) with open(path, "rb") as f: assert f.read() == expected_crlf diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 5b722b54da693..e88ccf07353b6 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -180,9 +180,9 @@ def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data): @pytest.mark.parametrize( - "line_terminator", ["\n", "\r\n", "\r"] # "LF" # "CRLF" # "CR" + "lineterminator", ["\n", "\r\n", "\r"] # "LF" # "CRLF" # "CR" ) -def test_skiprows_lineterminator(all_parsers, line_terminator, request): +def test_skiprows_lineterminator(all_parsers, lineterminator, request): # see gh-9079 parser = all_parsers data = "\n".join( @@ -202,11 +202,11 @@ def test_skiprows_lineterminator(all_parsers, line_terminator, request): columns=["date", "time", "var", "flag", "oflag"], ) - if parser.engine == "python" and line_terminator == "\r": + if parser.engine == "python" and lineterminator == "\r": mark = pytest.mark.xfail(reason="'CR' not respect with the Python parser yet") request.node.add_marker(mark) - data = data.replace("\n", line_terminator) + data = data.replace("\n", lineterminator) result = parser.read_csv( StringIO(data), skiprows=1, diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index aeec163ed134a..0666dcacecf39 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -1160,7 +1160,7 @@ def test_style_to_csv(): """ - out_csv = geom_df.to_csv(line_terminator="\n") + out_csv = geom_df.to_csv(lineterminator="\n") if out_csv is not None: out_csv = out_csv.strip()