Skip to content

Commit 4719ce6

Browse files
authored
Merge branch 'master' into file-manager-del
2 parents fd12b18 + a15587d commit 4719ce6

9 files changed

+173
-7
lines changed

doc/whats-new.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ v0.11.1 (unreleased)
3333
Breaking changes
3434
~~~~~~~~~~~~~~~~
3535

36+
- Time bounds variables are now also decoded according to CF conventions
37+
(:issue:`2565`). The previous behavior was to decode them only if they
38+
had specific time attributes, now these attributes are copied
39+
automatically from the corresponding time coordinate. This might
40+
brake downstream code that was relying on these variables to be
41+
not decoded.
42+
By `Fabien Maussion <https://github.com/fmaussion>`_.
43+
3644
Enhancements
3745
~~~~~~~~~~~~
3846

@@ -50,6 +58,9 @@ Enhancements
5058
recommend enabling it in your test suites if you use xarray for IO.
5159
By `Stephan Hoyer <https://github.com/shoyer>`_
5260
- Support Dask ``HighLevelGraphs`` by `Matthew Rocklin <https://matthewrocklin.com>`_.
61+
- :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the
62+
``loffset`` kwarg just like Pandas.
63+
By `Deepak Cherian <https://github.com/dcherian>`_
5364

5465
Bug fixes
5566
~~~~~~~~~

xarray/conventions.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,11 +320,39 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
320320
return Variable(dimensions, data, attributes, encoding=encoding)
321321

322322

323+
def _update_bounds_attributes(variables):
324+
"""Adds time attributes to time bounds variables.
325+
326+
Variables handling time bounds ("Cell boundaries" in the CF
327+
conventions) do not necessarily carry the necessary attributes to be
328+
decoded. This copies the attributes from the time variable to the
329+
associated boundaries.
330+
331+
See Also:
332+
333+
http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/
334+
cf-conventions.html#cell-boundaries
335+
336+
https://github.com/pydata/xarray/issues/2565
337+
"""
338+
339+
# For all time variables with bounds
340+
for v in variables.values():
341+
attrs = v.attrs
342+
has_date_units = 'units' in attrs and 'since' in attrs['units']
343+
if has_date_units and 'bounds' in attrs:
344+
if attrs['bounds'] in variables:
345+
bounds_attrs = variables[attrs['bounds']].attrs
346+
bounds_attrs.setdefault('units', attrs['units'])
347+
if 'calendar' in attrs:
348+
bounds_attrs.setdefault('calendar', attrs['calendar'])
349+
350+
323351
def decode_cf_variables(variables, attributes, concat_characters=True,
324352
mask_and_scale=True, decode_times=True,
325353
decode_coords=True, drop_variables=None):
326354
"""
327-
Decode a several CF encoded variables.
355+
Decode several CF encoded variables.
328356
329357
See: decode_cf_variable
330358
"""
@@ -350,6 +378,10 @@ def stackable(dim):
350378
drop_variables = []
351379
drop_variables = set(drop_variables)
352380

381+
# Time bounds coordinates might miss the decoding attributes
382+
if decode_times:
383+
_update_bounds_attributes(variables)
384+
353385
new_vars = OrderedDict()
354386
for k, v in iteritems(variables):
355387
if k in drop_variables:

xarray/core/common.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
592592
center=center)
593593

594594
def resample(self, indexer=None, skipna=None, closed=None, label=None,
595-
base=0, keep_attrs=None, **indexer_kwargs):
595+
base=0, keep_attrs=None, loffset=None, **indexer_kwargs):
596596
"""Returns a Resample object for performing resampling operations.
597597
598598
Handles both downsampling and upsampling. If any intervals contain no
@@ -612,6 +612,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
612612
For frequencies that evenly subdivide 1 day, the "origin" of the
613613
aggregated intervals. For example, for '24H' frequency, base could
614614
range from 0 through 23.
615+
loffset : timedelta or str, optional
616+
Offset used to adjust the resampled time labels. Some pandas date
617+
offset strings are supported.
615618
keep_attrs : bool, optional
616619
If True, the object's attributes (`attrs`) will be copied from
617620
the original object to the new one. If False (default), the new
@@ -700,7 +703,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
700703

701704
group = DataArray(dim_coord, coords=dim_coord.coords,
702705
dims=dim_coord.dims, name=RESAMPLE_DIM)
703-
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base)
706+
# TODO: to_offset() call required for pandas==0.19.2
707+
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base,
708+
loffset=pd.tseries.frequencies.to_offset(loffset))
704709
resampler = self._resample_cls(self, group=group, dim=dim_name,
705710
grouper=grouper,
706711
resample_dim=RESAMPLE_DIM)

xarray/core/groupby.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import functools
44
import warnings
55

6+
import datetime
67
import numpy as np
78
import pandas as pd
89

@@ -154,6 +155,32 @@ def _unique_and_monotonic(group):
154155
return index.is_unique and index.is_monotonic
155156

156157

158+
def _apply_loffset(grouper, result):
159+
"""
160+
(copied from pandas)
161+
if loffset is set, offset the result index
162+
163+
This is NOT an idempotent routine, it will be applied
164+
exactly once to the result.
165+
166+
Parameters
167+
----------
168+
result : Series or DataFrame
169+
the result of resample
170+
"""
171+
172+
needs_offset = (
173+
isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta))
174+
and isinstance(result.index, pd.DatetimeIndex)
175+
and len(result.index) > 0
176+
)
177+
178+
if needs_offset:
179+
result.index = result.index + grouper.loffset
180+
181+
grouper.loffset = None
182+
183+
157184
class GroupBy(SupportsArithmetic):
158185
"""A object that implements the split-apply-combine pattern.
159186
@@ -235,6 +262,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
235262
raise ValueError('index must be monotonic for resampling')
236263
s = pd.Series(np.arange(index.size), index)
237264
first_items = s.groupby(grouper).first()
265+
_apply_loffset(grouper, first_items)
238266
full_index = first_items.index
239267
if first_items.isnull().any():
240268
first_items = first_items.dropna()

xarray/tests/test_coding_times.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from xarray import DataArray, Variable, coding, decode_cf
1111
from xarray.coding.times import (_import_cftime, cftime_to_nptime,
1212
decode_cf_datetime, encode_cf_datetime)
13+
from xarray.conventions import _update_bounds_attributes
1314
from xarray.core.common import contains_cftime_datetimes
1415

1516
from . import (
@@ -624,6 +625,41 @@ def test_decode_cf(calendar):
624625
assert ds.test.dtype == np.dtype('M8[ns]')
625626

626627

628+
def test_decode_cf_time_bounds():
629+
630+
da = DataArray(np.arange(6, dtype='int64').reshape((3, 2)),
631+
coords={'time': [1, 2, 3]},
632+
dims=('time', 'nbnd'), name='time_bnds')
633+
634+
attrs = {'units': 'days since 2001-01',
635+
'calendar': 'standard',
636+
'bounds': 'time_bnds'}
637+
638+
ds = da.to_dataset()
639+
ds['time'].attrs.update(attrs)
640+
_update_bounds_attributes(ds.variables)
641+
assert ds.variables['time_bnds'].attrs == {'units': 'days since 2001-01',
642+
'calendar': 'standard'}
643+
dsc = decode_cf(ds)
644+
assert dsc.time_bnds.dtype == np.dtype('M8[ns]')
645+
dsc = decode_cf(ds, decode_times=False)
646+
assert dsc.time_bnds.dtype == np.dtype('int64')
647+
648+
# Do not overwrite existing attrs
649+
ds = da.to_dataset()
650+
ds['time'].attrs.update(attrs)
651+
bnd_attr = {'units': 'hours since 2001-01', 'calendar': 'noleap'}
652+
ds['time_bnds'].attrs.update(bnd_attr)
653+
_update_bounds_attributes(ds.variables)
654+
assert ds.variables['time_bnds'].attrs == bnd_attr
655+
656+
# If bounds variable not available do not complain
657+
ds = da.to_dataset()
658+
ds['time'].attrs.update(attrs)
659+
ds['time'].attrs['bounds'] = 'fake_var'
660+
_update_bounds_attributes(ds.variables)
661+
662+
627663
@pytest.fixture(params=_ALL_CALENDARS)
628664
def calendar(request):
629665
return request.param

xarray/tests/test_dataarray.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2273,6 +2273,11 @@ def test_resample(self):
22732273
actual = array.resample(time='24H').reduce(np.mean)
22742274
assert_identical(expected, actual)
22752275

2276+
actual = array.resample(time='24H', loffset='-12H').mean()
2277+
expected = DataArray(array.to_series().resample('24H', loffset='-12H')
2278+
.mean())
2279+
assert_identical(expected, actual)
2280+
22762281
with raises_regex(ValueError, 'index must be monotonic'):
22772282
array[[2, 0, 1]].resample(time='1D')
22782283

xarray/tests/test_dataset.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2804,6 +2804,18 @@ def test_resample_by_mean_with_keep_attrs(self):
28042804
expected = ds.attrs
28052805
assert expected == actual
28062806

2807+
def test_resample_loffset(self):
2808+
times = pd.date_range('2000-01-01', freq='6H', periods=10)
2809+
ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),
2810+
'bar': ('time', np.random.randn(10), {'meta': 'data'}),
2811+
'time': times})
2812+
ds.attrs['dsmeta'] = 'dsdata'
2813+
2814+
actual = ds.resample(time='24H', loffset='-12H').mean('time').time
2815+
expected = xr.DataArray(ds.bar.to_series()
2816+
.resample('24H', loffset='-12H').mean()).time
2817+
assert_identical(expected, actual)
2818+
28072819
def test_resample_by_mean_discarding_attrs(self):
28082820
times = pd.date_range('2000-01-01', freq='6H', periods=10)
28092821
ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),

xarray/tests/test_ufuncs.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,41 @@ def test_xarray_ufuncs_deprecation():
172172
with pytest.warns(PendingDeprecationWarning, match='xarray.ufuncs'):
173173
xu.cos(xr.DataArray([0, 1]))
174174

175+
with pytest.warns(None) as record:
176+
xu.angle(xr.DataArray([0, 1]))
177+
record = [el.message for el in record
178+
if el.category == PendingDeprecationWarning]
179+
assert len(record) == 0
180+
181+
182+
@requires_np113
183+
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
184+
@pytest.mark.parametrize(
185+
'name',
186+
[name for name in dir(xu)
187+
if (not name.startswith('_') and hasattr(np, name)
188+
and name not in ['print_function', 'absolute_import', 'division'])]
189+
)
190+
def test_numpy_ufuncs(name, request):
191+
x = xr.DataArray([1, 1])
192+
193+
np_func = getattr(np, name)
194+
if hasattr(np_func, 'nin') and np_func.nin == 2:
195+
args = (x, x)
196+
else:
197+
args = (x,)
198+
199+
y = np_func(*args)
200+
201+
if name in ['angle', 'iscomplex']:
202+
# these functions need to be handled with __array_function__ protocol
203+
assert isinstance(y, np.ndarray)
204+
elif name in ['frexp']:
205+
# np.frexp returns a tuple
206+
assert not isinstance(y, xr.DataArray)
207+
else:
208+
assert isinstance(y, xr.DataArray)
209+
175210

176211
def test_xarray_ufuncs_pickle():
177212
a = 1.0

xarray/ufuncs.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ def __init__(self, name):
4444
self._name = name
4545

4646
def __call__(self, *args, **kwargs):
47-
_warnings.warn(
48-
'xarray.ufuncs will be deprecated when xarray no longer supports '
49-
'versions of numpy older than v1.13. Instead, use numpy ufuncs '
50-
'directly.', PendingDeprecationWarning, stacklevel=2)
47+
if self._name not in ['angle', 'iscomplex']:
48+
_warnings.warn(
49+
'xarray.ufuncs will be deprecated when xarray no longer '
50+
'supports versions of numpy older than v1.17. Instead, use '
51+
'numpy ufuncs directly.',
52+
PendingDeprecationWarning, stacklevel=2)
5153

5254
new_args = args
5355
f = _dask_or_eager_func(self._name, array_args=slice(len(args)))

0 commit comments

Comments
 (0)