diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 3c9c861afd989..6a2c8e49cecb8 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1100,6 +1100,8 @@ Period - Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising ``UnicodeDecodeError`` when a locale-specific directive was passed (:issue:`46319`) - Bug in adding a :class:`Period` object to an array of :class:`DateOffset` objects incorrectly raising ``TypeError`` (:issue:`50162`) - Bug in :class:`Period` where passing a string with finer resolution than nanosecond would result in a ``KeyError`` instead of dropping the extra precision (:issue:`50417`) +- Bug in parsing strings representing Week-periods e.g. "2017-01-23/2017-01-29" as minute-frequency instead of week-frequency (:issue:`50803`) +- Plotting ^^^^^^^^ diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 3cd6272988339..e84b5114df074 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -316,21 +316,6 @@ def parse_datetime_string( dt = dateutil_parse(date_string, default=_DEFAULT_DATETIME, dayfirst=dayfirst, yearfirst=yearfirst, ignoretz=False, out_bestunit=&out_bestunit) - - if dt.tzinfo is not None: - # dateutil can return a datetime with a tzoffset outside of (-24H, 24H) - # bounds, which is invalid (can be constructed, but raises if we call - # str(dt)). Check that and raise here if necessary. - try: - dt.utcoffset() - except ValueError as err: - # offset must be a timedelta strictly between -timedelta(hours=24) - # and timedelta(hours=24) - raise ValueError( - f'Parsed string "{date_string}" gives an invalid tzoffset, ' - "which must be between -timedelta(hours=24) and timedelta(hours=24)" - ) - return dt @@ -696,6 +681,19 @@ cdef datetime dateutil_parse( elif res.tzoffset: ret = ret.replace(tzinfo=tzoffset(res.tzname, res.tzoffset)) + # dateutil can return a datetime with a tzoffset outside of (-24H, 24H) + # bounds, which is invalid (can be constructed, but raises if we call + # str(ret)). Check that and raise here if necessary. + try: + ret.utcoffset() + except ValueError as err: + # offset must be a timedelta strictly between -timedelta(hours=24) + # and timedelta(hours=24) + raise ValueError( + f'Parsed string "{timestr}" gives an invalid tzoffset, ' + "which must be between -timedelta(hours=24) and timedelta(hours=24)" + ) + out_bestunit[0] = attrname_to_npy_unit[reso] return ret diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 333728ad1198d..e36198da03d04 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1,3 +1,5 @@ +import re + cimport numpy as cnp from cpython.object cimport ( Py_EQ, @@ -2591,20 +2593,31 @@ class Period(_Period): value = value.upper() freqstr = freq.rule_code if freq is not None else None - dt, reso = parse_datetime_string_with_reso(value, freqstr) - if reso == "nanosecond": - nanosecond = dt.nanosecond - if dt is NaT: - ordinal = NPY_NAT + try: + dt, reso = parse_datetime_string_with_reso(value, freqstr) + except ValueError as err: + match = re.search(r"^\d{4}-\d{2}-\d{2}/\d{4}-\d{2}-\d{2}", value) + if match: + # Case that cannot be parsed (correctly) by our datetime + # parsing logic + dt, freq = _parse_weekly_str(value, freq) + else: + raise err - if freq is None and ordinal != NPY_NAT: - # Skip NaT, since it doesn't have a resolution - try: - freq = attrname_to_abbrevs[reso] - except KeyError: - raise ValueError(f"Invalid frequency or could not " - f"infer: {reso}") - freq = to_offset(freq) + else: + if reso == "nanosecond": + nanosecond = dt.nanosecond + if dt is NaT: + ordinal = NPY_NAT + + if freq is None and ordinal != NPY_NAT: + # Skip NaT, since it doesn't have a resolution + try: + freq = attrname_to_abbrevs[reso] + except KeyError: + raise ValueError(f"Invalid frequency or could not " + f"infer: {reso}") + freq = to_offset(freq) elif PyDateTime_Check(value): dt = value @@ -2664,3 +2677,28 @@ def validate_end_alias(how: str) -> str: # Literal["E", "S"] if how not in {"S", "E"}: raise ValueError("How must be one of S or E") return how + + +cdef _parse_weekly_str(value, BaseOffset freq): + """ + Parse e.g. "2017-01-23/2017-01-29", which cannot be parsed by the general + datetime-parsing logic. This ensures that we can round-trip with + Period.__str__ with weekly freq. + """ + # GH#50803 + start, end = value.split("/") + start = Timestamp(start) + end = Timestamp(end) + + if (end - start).days != 6: + # We are interested in cases where this is str(period) + # of a Week-freq period + raise ValueError("Could not parse as weekly-freq Period") + + if freq is None: + day_name = end.day_name()[:3].upper() + freqstr = f"W-{day_name}" + freq = to_offset(freqstr) + # We _should_ have freq.is_on_offset(end) + + return end, freq diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 710a98ba6f005..bb9a7dd9374b5 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -399,6 +399,19 @@ def test_period_cons_weekly(self, num, day): assert result == expected assert isinstance(result, Period) + def test_parse_week_str_roundstrip(self): + # GH#50803 + per = Period("2017-01-23/2017-01-29") + assert per.freq.freqstr == "W-SUN" + + per = Period("2017-01-24/2017-01-30") + assert per.freq.freqstr == "W-MON" + + msg = "Could not parse as weekly-freq Period" + with pytest.raises(ValueError, match=msg): + # not 6 days apart + Period("2016-01-23/2017-01-29") + def test_period_from_ordinal(self): p = Period("2011-01", freq="M") res = Period._from_ordinal(p.ordinal, freq="M")