From e0553abbbbb11c887d4c462dde74731d951d2712 Mon Sep 17 00:00:00 2001 From: Thomas Li Date: Fri, 23 Jul 2021 12:42:29 -0700 Subject: [PATCH 1/7] REGR: ValueError raised when both prefix and names are set to None --- doc/source/whatsnew/v1.3.1.rst | 2 +- pandas/io/parsers/readers.py | 18 +++++++++--------- .../io/parser/common/test_common_basic.py | 17 +++++++++++++---- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst index 2ce146660f98c..69d233daf0920 100644 --- a/doc/source/whatsnew/v1.3.1.rst +++ b/doc/source/whatsnew/v1.3.1.rst @@ -24,7 +24,7 @@ Fixed regressions - Fixed regression in indexing with a ``list`` subclass incorrectly raising ``TypeError`` (:issue:`42433`, :issue:`42461`) - Fixed regression in :meth:`DataFrame.isin` and :meth:`Series.isin` raising ``TypeError`` with nullable data containing at least one missing value (:issue:`42405`) - Regression in :func:`concat` between objects with bool dtype and integer dtype casting to object instead of to integer (:issue:`42092`) - +- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) .. --------------------------------------------------------------------------- .. _whatsnew_131.bug_fixes: diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 06bdbe3054a15..e0bc5a77246e5 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -505,11 +505,11 @@ def read_csv( delimiter=None, # Column and Index Locations and Names header="infer", - names=lib.no_default, + names=None, index_col=None, usecols=None, squeeze=False, - prefix=lib.no_default, + prefix=None, mangle_dupe_cols=True, # General Parsing Configuration dtype: DtypeArg | None = None, @@ -603,11 +603,11 @@ def read_table( delimiter=None, # Column and Index Locations and Names header="infer", - names=lib.no_default, + names=None, index_col=None, usecols=None, squeeze=False, - prefix=lib.no_default, + prefix=None, mangle_dupe_cols=True, # General Parsing Configuration dtype: DtypeArg | None = None, @@ -1224,8 +1224,8 @@ def _refine_defaults_read( error_bad_lines: bool | None, warn_bad_lines: bool | None, on_bad_lines: str | None, - names: ArrayLike | None | object, - prefix: str | None | object, + names: ArrayLike | None, + prefix: str | None, defaults: dict[str, Any], ): """Validate/refine default values of input parameters of read_csv, read_table. @@ -1302,11 +1302,11 @@ def _refine_defaults_read( if delimiter and (sep is not lib.no_default): raise ValueError("Specified a sep and a delimiter; you can only specify one.") - if names is not lib.no_default and prefix is not lib.no_default: + if names is not None and prefix is not None: raise ValueError("Specified named and prefix; you can only specify one.") - kwds["names"] = None if names is lib.no_default else names - kwds["prefix"] = None if prefix is lib.no_default else prefix + kwds["names"] = names + kwds["prefix"] = prefix # Alias sep -> delimiter. if delimiter is None: diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index a1c76e2740dbe..b2e528aa5f8d5 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -764,15 +764,24 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter): @pytest.mark.parametrize("func", ["read_csv", "read_table"]) -@pytest.mark.parametrize("prefix", [None, "x"]) -@pytest.mark.parametrize("names", [None, ["a"]]) -def test_names_and_prefix_not_lib_no_default(all_parsers, names, prefix, func): +def test_names_and_prefix_not_None_raises(all_parsers, func): # GH#39123 f = StringIO("a,b\n1,2") parser = all_parsers msg = "Specified named and prefix; you can only specify one." with pytest.raises(ValueError, match=msg): - getattr(parser, func)(f, names=names, prefix=prefix) + getattr(parser, func)(f, names=["a", "b"], prefix="x") + + +@pytest.mark.parametrize("func", ["read_csv", "read_table"]) +@pytest.mark.parametrize("prefix, names", [(None, ["x0", "x1"]), ("x", None)]) +def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func): + # GH42387 + f = StringIO("a,b\n1,2") + expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]}) + parser = all_parsers + result = getattr(parser, func)(f, names=names, sep=",", prefix=prefix, header=None) + tm.assert_frame_equal(result, expected) def test_dict_keys_as_names(all_parsers): From f12b31186dea1fe18cb59b09b9d25f556063d333 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 29 Jul 2021 14:06:21 -0700 Subject: [PATCH 2/7] Update readers.py --- pandas/io/parsers/readers.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index e0bc5a77246e5..eda0b20a4af71 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -505,11 +505,11 @@ def read_csv( delimiter=None, # Column and Index Locations and Names header="infer", - names=None, + names=lib.no_default, index_col=None, usecols=None, squeeze=False, - prefix=None, + prefix=lib.no_default, mangle_dupe_cols=True, # General Parsing Configuration dtype: DtypeArg | None = None, @@ -603,11 +603,11 @@ def read_table( delimiter=None, # Column and Index Locations and Names header="infer", - names=None, + names=lib.no_default, index_col=None, usecols=None, squeeze=False, - prefix=None, + prefix=lib.no_default, mangle_dupe_cols=True, # General Parsing Configuration dtype: DtypeArg | None = None, @@ -1224,8 +1224,8 @@ def _refine_defaults_read( error_bad_lines: bool | None, warn_bad_lines: bool | None, on_bad_lines: str | None, - names: ArrayLike | None, - prefix: str | None, + names: ArrayLike | None | object, + prefix: str | None | object, defaults: dict[str, Any], ): """Validate/refine default values of input parameters of read_csv, read_table. @@ -1301,12 +1301,13 @@ def _refine_defaults_read( if delimiter and (sep is not lib.no_default): raise ValueError("Specified a sep and a delimiter; you can only specify one.") - - if names is not None and prefix is not None: + + allowed_names_prefix = {None, lib.no_default} + if names not in allowed_names_prefix and prefix not in allowed_names_prefix: raise ValueError("Specified named and prefix; you can only specify one.") - kwds["names"] = names - kwds["prefix"] = prefix + kwds["names"] = None if names is lib.no_default else names + kwds["prefix"] = None if prefix is lib.no_default else prefix # Alias sep -> delimiter. if delimiter is None: From e3c10e19de90bcaf0235654a29ee1b7fece71b0b Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 29 Jul 2021 14:07:47 -0700 Subject: [PATCH 3/7] whitespace --- pandas/io/parsers/readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index eda0b20a4af71..0c5aa3c361441 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1301,7 +1301,7 @@ def _refine_defaults_read( if delimiter and (sep is not lib.no_default): raise ValueError("Specified a sep and a delimiter; you can only specify one.") - + allowed_names_prefix = {None, lib.no_default} if names not in allowed_names_prefix and prefix not in allowed_names_prefix: raise ValueError("Specified named and prefix; you can only specify one.") From 75d7083b1a5ee9e77d3a6126360906f6995df107 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 29 Jul 2021 14:08:22 -0700 Subject: [PATCH 4/7] Update v1.3.1.rst --- doc/source/whatsnew/v1.3.1.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst index 321fda32039a2..0297aeecf01a6 100644 --- a/doc/source/whatsnew/v1.3.1.rst +++ b/doc/source/whatsnew/v1.3.1.rst @@ -29,7 +29,6 @@ Fixed regressions - Fixed regression for :func:`merge_asof` raising ``KeyError`` when one of the ``by`` columns is in the index (:issue:`34488`) - Fixed regression in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`) - Fixed regression in :meth:`SeriesGroupBy.value_counts` that resulted in an ``IndexError`` when called on a Series with one row (:issue:`42618`) -- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) .. --------------------------------------------------------------------------- From 2729d7152d4f0781d0a3e798d53c3ed3663c2f1f Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 29 Jul 2021 14:09:13 -0700 Subject: [PATCH 5/7] Update v1.3.2.rst --- doc/source/whatsnew/v1.3.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 94d62ae988f0c..289d52138404c 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -19,7 +19,7 @@ Fixed regressions - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`) - Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) - Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) -- +- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) .. --------------------------------------------------------------------------- From 50067793677bc957266b2a154636325e59ad3838 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 29 Jul 2021 14:54:47 -0700 Subject: [PATCH 6/7] Update readers.py --- pandas/io/parsers/readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 0c5aa3c361441..0c144e414092c 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1302,7 +1302,7 @@ def _refine_defaults_read( if delimiter and (sep is not lib.no_default): raise ValueError("Specified a sep and a delimiter; you can only specify one.") - allowed_names_prefix = {None, lib.no_default} + allowed_names_prefix = [None, lib.no_default] if names not in allowed_names_prefix and prefix not in allowed_names_prefix: raise ValueError("Specified named and prefix; you can only specify one.") From be486b88702bc90b215758526f10e7da50a87f5e Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 29 Jul 2021 16:11:40 -0700 Subject: [PATCH 7/7] Update readers.py --- pandas/io/parsers/readers.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 0c144e414092c..c639a4a9d494e 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1302,8 +1302,12 @@ def _refine_defaults_read( if delimiter and (sep is not lib.no_default): raise ValueError("Specified a sep and a delimiter; you can only specify one.") - allowed_names_prefix = [None, lib.no_default] - if names not in allowed_names_prefix and prefix not in allowed_names_prefix: + if ( + names is not None + and names is not lib.no_default + and prefix is not None + and prefix is not lib.no_default + ): raise ValueError("Specified named and prefix; you can only specify one.") kwds["names"] = None if names is lib.no_default else names