diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 51bb21404e7b5..9bd6eb91af8ef 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -976,7 +976,6 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: (('hour',), '%H', 2), (('minute',), '%M', 2), (('second',), '%S', 2), - (('microsecond',), '%f', 6), (('second', 'microsecond'), '%S.%f', 0), (('tzinfo',), '%z', 0), (('tzinfo',), '%Z', 0), @@ -1048,7 +1047,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: parsed_formatted = parsed_datetime.strftime(attr_format) for i, token_format in enumerate(format_guess): - token_filled = tokens[i].zfill(padding) + token_filled = _fill_token(tokens[i], padding) if token_format is None and token_filled == parsed_formatted: format_guess[i] = attr_format tokens[i] = token_filled @@ -1090,6 +1089,19 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: else: return None +cdef str _fill_token(token: str, padding: int): + cdef str token_filled + if '.' not in token: + token_filled = token.zfill(padding) + else: + seconds, nanoseconds = token.split('.') + seconds = f'{int(seconds):02d}' + # right-pad so we get nanoseconds, then only take + # first 6 digits (microseconds) as stdlib datetime + # doesn't support nanoseconds + nanoseconds = nanoseconds.ljust(9, '0')[:6] + token_filled = f'{seconds}.{nanoseconds}' + return token_filled @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index fe14f8e9907d6..18e926c7039b0 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -820,7 +820,7 @@ def to_datetime( to the day starting at noon on January 1, 4713 BC. - If Timestamp convertible (Timestamp, dt.datetime, np.datetimt64 or date string), origin is set to Timestamp identified by origin. - - If a float or integer, origin is the mullisecond difference + - If a float or integer, origin is the millisecond difference relative to 1970-01-01. cache : bool, default True If :const:`True`, use a cache of unique, converted dates to apply the diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index e0166c876cdf5..9588f54388d1e 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -295,3 +295,18 @@ def test_is_iso_format(fmt, expected): # see gh-41047 result = parsing.format_is_iso(fmt) assert result == expected + + +@pytest.mark.parametrize( + "input", + [ + "2018-01-01T00:00:00.123456789", + "2018-01-01T00:00:00.123456", + "2018-01-01T00:00:00.123", + ], +) +def test_guess_datetime_format_f(input): + # https://github.com/pandas-dev/pandas/issues/49043 + result = parsing.guess_datetime_format(input) + expected = "%Y-%m-%dT%H:%M:%S.%f" + assert result == expected