Skip to content

Commit fd9e620

Browse files
aulemahalspencerkclarkmathause
authored
xr.infer_freq (#4033)
* xr.infer_freq and related code * Formatting and comments * Rewrite _CFTimeFrequencyInferer independently of pandas * Syntax and add frequency.py file * Fix tests and month_deltas * Require cftime 1.1.0 for the test * Apply suggestions from code review Co-authored-by: Spencer Clark <[email protected]> * Changes following code review * Docs * Docs * Black * Fix tests for requiring cftime 1.1.0 * Update whats-new * Apply suggestions from code review Co-authored-by: Spencer Clark <[email protected]> Co-authored-by: Mathias Hauser <[email protected]> * Add invalid input tests for better coverage * Fix link in whats-new.rst Co-authored-by: Spencer Clark <[email protected]> Co-authored-by: Mathias Hauser <[email protected]>
1 parent 73b013f commit fd9e620

File tree

7 files changed

+358
-2
lines changed

7 files changed

+358
-2
lines changed

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Top-level functions
2626
combine_nested
2727
where
2828
set_options
29+
infer_freq
2930
full_like
3031
zeros_like
3132
ones_like

doc/weather-climate.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ instance, we can create the same dates and DataArray we created above using:
7474
dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap")
7575
da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo")
7676
77+
Mirroring pandas' method with the same name, :py:meth:`~xarray.infer_freq` allows one to
78+
infer the sampling frequency of a :py:class:`~xarray.CFTimeIndex` or a 1-D
79+
:py:class:`~xarray.DataArray` containing cftime objects. It also works transparently with
80+
``np.datetime64[ns]`` and ``np.timedelta64[ns]`` data.
81+
82+
.. ipython:: python
83+
84+
xr.infer_freq(dates)
85+
7786
With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from
7887
the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the
7988
:py:meth:`~xarray.DataArray.dt` accessor for a :py:class:`~xarray.DataArray`

doc/whats-new.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ Enhancements
4343

4444
New Features
4545
~~~~~~~~~~~~
46-
46+
- Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`).
47+
By `Pascal Bourgault <https://github.com/aulemahal>`_.
4748
- ``chunks='auto'`` is now supported in the ``chunks`` argument of
4849
:py:meth:`Dataset.chunk`. (:issue:`4055`)
4950
By `Andrew Williams <https://github.com/AndrewWilliams3142>`_

xarray/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .backends.zarr import open_zarr
1414
from .coding.cftime_offsets import cftime_range
1515
from .coding.cftimeindex import CFTimeIndex
16+
from .coding.frequencies import infer_freq
1617
from .conventions import SerializationWarning, decode_cf
1718
from .core.alignment import align, broadcast
1819
from .core.combine import auto_combine, combine_by_coords, combine_nested
@@ -57,6 +58,7 @@
5758
"cov",
5859
"corr",
5960
"full_like",
61+
"infer_freq",
6062
"load_dataarray",
6163
"load_dataset",
6264
"map_blocks",

xarray/coding/cftimeindex.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,8 @@ def asi8(self):
578578
[
579579
_total_microseconds(exact_cftime_datetime_difference(epoch, date))
580580
for date in self.values
581-
]
581+
],
582+
dtype=np.int64,
582583
)
583584

584585
def _round_via_method(self, freq, method):

xarray/coding/frequencies.py

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
"""FrequencyInferer analog for cftime.datetime objects"""
2+
# The infer_freq method and the _CFTimeFrequencyInferer
3+
# subclass defined here were copied and adapted for
4+
# use with cftime.datetime objects based on the source code in
5+
# pandas.tseries.Frequencies._FrequencyInferer
6+
7+
# For reference, here is a copy of the pandas copyright notice:
8+
9+
# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
10+
# All rights reserved.
11+
12+
# Copyright (c) 2008-2011 AQR Capital Management, LLC
13+
# All rights reserved.
14+
15+
# Redistribution and use in source and binary forms, with or without
16+
# modification, are permitted provided that the following conditions are
17+
# met:
18+
19+
# * Redistributions of source code must retain the above copyright
20+
# notice, this list of conditions and the following disclaimer.
21+
22+
# * Redistributions in binary form must reproduce the above
23+
# copyright notice, this list of conditions and the following
24+
# disclaimer in the documentation and/or other materials provided
25+
# with the distribution.
26+
27+
# * Neither the name of the copyright holder nor the names of any
28+
# contributors may be used to endorse or promote products derived
29+
# from this software without specific prior written permission.
30+
31+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
32+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
34+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
37+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42+
43+
import numpy as np
44+
import pandas as pd
45+
46+
from ..core.common import _contains_datetime_like_objects
47+
from .cftime_offsets import _MONTH_ABBREVIATIONS
48+
from .cftimeindex import CFTimeIndex
49+
50+
_ONE_MICRO = 1
51+
_ONE_MILLI = _ONE_MICRO * 1000
52+
_ONE_SECOND = _ONE_MILLI * 1000
53+
_ONE_MINUTE = 60 * _ONE_SECOND
54+
_ONE_HOUR = 60 * _ONE_MINUTE
55+
_ONE_DAY = 24 * _ONE_HOUR
56+
57+
58+
def infer_freq(index):
59+
"""
60+
Infer the most likely frequency given the input index.
61+
62+
Parameters
63+
----------
64+
index : CFTimeIndex, DataArray, pd.DatetimeIndex, pd.TimedeltaIndex, pd.Series
65+
If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`.
66+
If passed a Series or a DataArray will use the values of the series (NOT THE INDEX).
67+
68+
Returns
69+
-------
70+
str or None
71+
None if no discernible frequency.
72+
73+
Raises
74+
------
75+
TypeError
76+
If the index is not datetime-like.
77+
ValueError
78+
If there are fewer than three values or the index is not 1D.
79+
"""
80+
from xarray.core.dataarray import DataArray
81+
82+
if isinstance(index, (DataArray, pd.Series)):
83+
if index.ndim != 1:
84+
raise ValueError("'index' must be 1D")
85+
elif not _contains_datetime_like_objects(DataArray(index)):
86+
raise ValueError("'index' must contain datetime-like objects")
87+
dtype = np.asarray(index).dtype
88+
if dtype == "datetime64[ns]":
89+
index = pd.DatetimeIndex(index.values)
90+
elif dtype == "timedelta64[ns]":
91+
index = pd.TimedeltaIndex(index.values)
92+
else:
93+
index = CFTimeIndex(index.values)
94+
95+
if isinstance(index, CFTimeIndex):
96+
inferer = _CFTimeFrequencyInferer(index)
97+
return inferer.get_freq()
98+
99+
return pd.infer_freq(index)
100+
101+
102+
class _CFTimeFrequencyInferer: # (pd.tseries.frequencies._FrequencyInferer):
103+
def __init__(self, index):
104+
self.index = index
105+
self.values = index.asi8
106+
107+
if len(index) < 3:
108+
raise ValueError("Need at least 3 dates to infer frequency")
109+
110+
self.is_monotonic = (
111+
self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing
112+
)
113+
114+
self._deltas = None
115+
self._year_deltas = None
116+
self._month_deltas = None
117+
118+
def get_freq(self):
119+
"""Find the appropriate frequency string to describe the inferred frequency of self.index
120+
121+
Adapted from `pandas.tsseries.frequencies._FrequencyInferer.get_freq` for CFTimeIndexes.
122+
123+
Returns
124+
-------
125+
str or None
126+
"""
127+
if not self.is_monotonic or not self.index.is_unique:
128+
return None
129+
130+
delta = self.deltas[0] # Smallest delta
131+
if _is_multiple(delta, _ONE_DAY):
132+
return self._infer_daily_rule()
133+
# There is no possible intraday frequency with a non-unique delta
134+
# Different from pandas: we don't need to manage DST and business offsets in cftime
135+
elif not len(self.deltas) == 1:
136+
return None
137+
138+
if _is_multiple(delta, _ONE_HOUR):
139+
return _maybe_add_count("H", delta / _ONE_HOUR)
140+
elif _is_multiple(delta, _ONE_MINUTE):
141+
return _maybe_add_count("T", delta / _ONE_MINUTE)
142+
elif _is_multiple(delta, _ONE_SECOND):
143+
return _maybe_add_count("S", delta / _ONE_SECOND)
144+
elif _is_multiple(delta, _ONE_MILLI):
145+
return _maybe_add_count("L", delta / _ONE_MILLI)
146+
else:
147+
return _maybe_add_count("U", delta / _ONE_MICRO)
148+
149+
def _infer_daily_rule(self):
150+
annual_rule = self._get_annual_rule()
151+
if annual_rule:
152+
nyears = self.year_deltas[0]
153+
month = _MONTH_ABBREVIATIONS[self.index[0].month]
154+
alias = f"{annual_rule}-{month}"
155+
return _maybe_add_count(alias, nyears)
156+
157+
quartely_rule = self._get_quartely_rule()
158+
if quartely_rule:
159+
nquarters = self.month_deltas[0] / 3
160+
mod_dict = {0: 12, 2: 11, 1: 10}
161+
month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]]
162+
alias = f"{quartely_rule}-{month}"
163+
return _maybe_add_count(alias, nquarters)
164+
165+
monthly_rule = self._get_monthly_rule()
166+
if monthly_rule:
167+
return _maybe_add_count(monthly_rule, self.month_deltas[0])
168+
169+
if len(self.deltas) == 1:
170+
# Daily as there is no "Weekly" offsets with CFTime
171+
days = self.deltas[0] / _ONE_DAY
172+
return _maybe_add_count("D", days)
173+
174+
# CFTime has no business freq and no "week of month" (WOM)
175+
return None
176+
177+
def _get_annual_rule(self):
178+
if len(self.year_deltas) > 1:
179+
return None
180+
181+
if len(np.unique(self.index.month)) > 1:
182+
return None
183+
184+
return {"cs": "AS", "ce": "A"}.get(month_anchor_check(self.index))
185+
186+
def _get_quartely_rule(self):
187+
if len(self.month_deltas) > 1:
188+
return None
189+
190+
if not self.month_deltas[0] % 3 == 0:
191+
return None
192+
193+
return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index))
194+
195+
def _get_monthly_rule(self):
196+
if len(self.month_deltas) > 1:
197+
return None
198+
199+
return {"cs": "MS", "ce": "M"}.get(month_anchor_check(self.index))
200+
201+
@property
202+
def deltas(self):
203+
"""Sorted unique timedeltas as microseconds."""
204+
if self._deltas is None:
205+
self._deltas = _unique_deltas(self.values)
206+
return self._deltas
207+
208+
@property
209+
def year_deltas(self):
210+
"""Sorted unique year deltas."""
211+
if self._year_deltas is None:
212+
self._year_deltas = _unique_deltas(self.index.year)
213+
return self._year_deltas
214+
215+
@property
216+
def month_deltas(self):
217+
"""Sorted unique month deltas."""
218+
if self._month_deltas is None:
219+
self._month_deltas = _unique_deltas(self.index.year * 12 + self.index.month)
220+
return self._month_deltas
221+
222+
223+
def _unique_deltas(arr):
224+
"""Sorted unique deltas of numpy array"""
225+
return np.sort(np.unique(np.diff(arr)))
226+
227+
228+
def _is_multiple(us, mult: int):
229+
"""Whether us is a multiple of mult"""
230+
return us % mult == 0
231+
232+
233+
def _maybe_add_count(base: str, count: float):
234+
"""If count is greater than 1, add it to the base offset string"""
235+
if count != 1:
236+
assert count == int(count)
237+
count = int(count)
238+
return f"{count}{base}"
239+
else:
240+
return base
241+
242+
243+
def month_anchor_check(dates):
244+
"""Return the monthly offset string.
245+
246+
Return "cs" if all dates are the first days of the month,
247+
"ce" if all dates are the last day of the month,
248+
None otherwise.
249+
250+
Replicated pandas._libs.tslibs.resolution.month_position_check
251+
but without business offset handling.
252+
"""
253+
calendar_end = True
254+
calendar_start = True
255+
256+
for date in dates:
257+
if calendar_start:
258+
calendar_start &= date.day == 1
259+
260+
if calendar_end:
261+
cal = date.day == date.daysinmonth
262+
if calendar_end:
263+
calendar_end &= cal
264+
elif not calendar_start:
265+
break
266+
267+
if calendar_end:
268+
return "ce"
269+
elif calendar_start:
270+
return "cs"
271+
else:
272+
return None

0 commit comments

Comments
 (0)