diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 26e701d008b3f..67835b859255a 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -557,6 +557,50 @@ We are stopping on the included end-point as it is part of the index dft2 = dft2.swaplevel(0, 1).sort_index() dft2.loc[idx[:, '2013-01-05'], :] +.. versionadded:: 0.21.1 + +``DatetimeIndex`` partial string indexing can be used with naive datetime-like labels when the ``DatetimeIndex`` has no timezone set. +If a timezone is provided by the label, the datetime index is assumed to be UTC and a ``UserWarning`` is emitted. + +.. note:: + + This both works with ``pd.Timestamp`` and strings + +.. ipython:: python + :okwarning: + + first_january_implicit_utc = pd.date_range('2016-01-01T00:00', '2016-01-01T23:59', + freq='T') + + df = pd.DataFrame(index=first_january_implicit_utc, + data=np.arange(len(first_january_implicit_utc))) + + df + + four_minute_slice = df["2016-01-01T00:00-02:00":"2016-01-01T02:03"] + + four_minute_slice + + +``DatetimeIndex`` partial string indexing is always well-defined on a ``DatetimeIndex`` with timezone information. +If a timezone is provided by the label, that timezone is respected. +If no timezone is provided, then the same timezone as used in the ``DatetimeIndex`` is assumed. + +.. ipython:: python + + first_january_cet = pd.date_range('2016-01-01T00:00', '2016-01-01T23:59', + freq='T', tz="CET") + + df = pd.DataFrame(index=first_january_cet, + data=np.arange(len(first_january_cet))) + + df + + four_minute_slice = df["2016-01-01T00:00-01:00":"2016-01-01T02:03"] + + four_minute_slice + + .. _timeseries.slice_vs_exact_match: Slice vs. Exact Match diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 4c6cdb9846305..0ec2bc933495a 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -65,6 +65,8 @@ Conversion - Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) - Bug in :func:`DataFrame.to_dict` where columns of datetime that are tz-aware were not converted to required arrays when used with ``orient='records'``, raising``TypeError` (:issue:`18372`) - Bug in :class:`DateTimeIndex` and :meth:`date_range` where mismatching tz-aware ``start`` and ``end`` timezones would not raise an err if ``end.tzinfo`` is None (:issue:`18431`) +- Bug in :class:`DatetimeIndex` when partial string label indices are actually timezone aware (:issue:`16785`) + - Indexing @@ -148,3 +150,4 @@ Other - - +- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 196c881f97526..cbf188239796a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1311,7 +1311,7 @@ def _parsed_string_to_bounds(self, reso, parsed): ---------- reso : Resolution Resolution provided by parsed string. - parsed : datetime + parsed : datetime or object Datetime from parsed string. Returns @@ -1319,6 +1319,20 @@ def _parsed_string_to_bounds(self, reso, parsed): lower, upper: pd.Timestamp """ + parsed = Timestamp(parsed) + if self.tz is None: + if parsed.tz is None: # both are naive, nothing to do + pass + else: # naive datetime index but label provides timezone + warnings.warn("Access naive datetime index with a label " + "containing a timezone, assume UTC") + parsed = parsed.tz_convert(utc) + else: + if parsed.tz is None: # treat like in same timezone + parsed = parsed.tz_localize(self.tz) + else: # actual timezone of the label should be considered + parsed = parsed.tz_convert(tz=self.tz) + if reso == 'year': return (Timestamp(datetime(parsed.year, 1, 1), tz=self.tz), Timestamp(datetime(parsed.year, 12, 31, 23, @@ -1364,7 +1378,7 @@ def _parsed_string_to_bounds(self, reso, parsed): st = datetime(parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute, parsed.second, parsed.microsecond) - return (Timestamp(st, tz=self.tz), Timestamp(st, tz=self.tz)) + return Timestamp(st, tz=self.tz), Timestamp(st, tz=self.tz) else: raise KeyError diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 076c3d6f25a89..801189135e405 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -230,14 +230,6 @@ def test_hash_error(self): type(index).__name__): hash(index) - def test_stringified_slice_with_tz(self): - # GH2658 - import datetime - start = datetime.datetime.now() - idx = DatetimeIndex(start=start, freq="1d", periods=10) - df = DataFrame(lrange(10), index=idx) - df["2013-01-14 23:44:34.437768-05:00":] # no exception here - def test_append_join_nondatetimeindex(self): rng = date_range('1/1/2000', periods=10) idx = Index(['a', 'b', 'c', 'd']) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index f95f493c66043..2ed1be276b34f 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -637,3 +637,66 @@ def test_partial_set_empty_frame_empty_consistencies(self): df.loc[0, 'x'] = 1 expected = DataFrame(dict(x=[1], y=[np.nan])) tm.assert_frame_equal(df, expected, check_dtype=False) + + def test_access_timezoned_datetimeindex_with_timezoned_label_utc(self): + + # GH 6785 + # timezone was ignored when string was provided as a label + + first_january = pd.date_range('2016-01-01T00:00', '2016-01-01T23:59', + freq='T', tz="UTC") + df = pd.DataFrame(index=first_january, data=np.arange(len( + first_january))) + + result = df[ + "2016-01-01T00:00-02:00":"2016-01-01T02:03" + ] + + expected = df[ + pd.Timestamp("2016-01-01T00:00-02:00"): + pd.Timestamp("2016-01-01T02:03") + ] + + tm.assert_frame_equal(result, expected) + + def test_access_timezoned_datetimeindex_with_timezoned_label_in_cet(self): + + # GH 6785 + # timezone was ignored when string was provided as a label + + first_january = pd.date_range('2016-01-01T00:00', '2016-01-01T23:59', + freq='T', tz="CET") + df = pd.DataFrame(index=first_january, data=np.arange(len( + first_january))) + + result = df[ + "2016-01-01T01:00-02:00":"2016-01-01T04:03" + ] + + expected = df[ + pd.Timestamp("2016-01-01T01:00-02:00"): + pd.Timestamp("2016-01-01T04:03") + ] + + tm.assert_frame_equal(result, expected) + + def test_access_naive_datetimeindex_with_timezoned_label(self): + + # GH 6785 + # timezone was ignored when string was provided as a label + # this test is for completeness + + first_january = pd.date_range('2016-01-01T00:00', '2016-01-01T23:59', + freq='T') + df = pd.DataFrame(index=first_january, data=np.arange(len( + first_january))) + + with tm.assert_produces_warning(UserWarning): + result = df["2016-01-01T00:00-02:00":"2016-01-01T02:03"] + + expected = df[ + pd.Timestamp("2016-01-01T00:00-02:00"): + pd.Timestamp("2016-01-01T02:03") + ] + + tm.assert_frame_equal(expected, result)