BUG: parse_dates=False while passing date_parser tries to use date parser (#44599)

loicdiridollou · web-flow · commit 96eb4c9491cd · 2021-11-25T21:51:45.000+01:00
* BUG: Providing fix for GH#44366 * BUG: fix pre-commit run * BUG: adding other API changes for change of default argument * Rewriting if statement and adding test * BUG: changing pd.Series to Series to validate pre-commit hook * Removing the second whatsnew statement only to keep the bug fix one * BUG: simplifying the test with parser and parse_dates False #44366 * BUG: fixing pre-commit #44366
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -663,6 +663,7 @@ I/O
 - Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`)
 - Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
+- Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -510,9 +510,15 @@ def _read(
     filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
 ):
     """Generic reader of line files."""
-    if kwds.get("date_parser", None) is not None:
-        if isinstance(kwds["parse_dates"], bool):
-            kwds["parse_dates"] = True
+    # if we pass a date_parser and parse_dates=False, we should not parse the
+    # dates GH#44366
+    if (
+        kwds.get("date_parser", None) is not None
+        and kwds.get("parse_dates", None) is None
+    ):
+        kwds["parse_dates"] = True
+    elif kwds.get("parse_dates", None) is None:
+        kwds["parse_dates"] = False
 
     # Extract some of the arguments (pass chunksize on).
     iterator = kwds.get("iterator", False)
@@ -585,7 +591,7 @@ def read_csv(
     verbose=False,
     skip_blank_lines=True,
     # Datetime Handling
-    parse_dates=False,
+    parse_dates=None,
     infer_datetime_format=False,
     keep_date_col=False,
     date_parser=None,
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -97,6 +97,39 @@ def __custom_date_parser(time):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
+def test_read_csv_with_custom_date_parser_parse_dates_false(all_parsers):
+    # GH44366
+    def __custom_date_parser(time):
+        time = time.astype(np.float_)
+        time = time.astype(np.int_)  # convert float seconds to int type
+        return pd.to_timedelta(time, unit="s")
+
+    testdata = StringIO(
+        """time e
+        41047.00 -93.77
+        41048.00 -95.79
+        41049.00 -98.73
+        41050.00 -93.99
+        41051.00 -97.72
+        """
+    )
+    result = all_parsers.read_csv(
+        testdata,
+        delim_whitespace=True,
+        parse_dates=False,
+        date_parser=__custom_date_parser,
+        index_col="time",
+    )
+    time = Series([41047.00, 41048.00, 41049.00, 41050.00, 41051.00], name="time")
+    expected = DataFrame(
+        {"e": [-93.77, -95.79, -98.73, -93.99, -97.72]},
+        index=time,
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
 @xfail_pyarrow
 def test_separator_date_conflict(all_parsers):
     # Regression test for gh-4678