Skip to content
forked from pydata/xarray

Commit e1a3823

Browse files
committed
Merge remote-tracking branch 'upstream/master' into fix/user-coordinates
* upstream/master: Allow appending datetime & boolean variables to zarr stores (pydata#3504) warn if dim is passed to rolling operations. (pydata#3513) Deprecate allow_lazy (pydata#3435) Recursive tokenization (pydata#3515) format indexing.rst code with black (pydata#3511)
2 parents 1a42c6c + 40588dc commit e1a3823

14 files changed

+208
-82
lines changed

doc/indexing.rst

Lines changed: 82 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -209,20 +209,23 @@ simultaneously, returning a new dataset:
209209

210210
.. ipython:: python
211211
212-
da = xr.DataArray(np.random.rand(4, 3),
213-
[('time', pd.date_range('2000-01-01', periods=4)),
214-
('space', ['IA', 'IL', 'IN'])])
215-
ds = da.to_dataset(name='foo')
212+
da = xr.DataArray(
213+
np.random.rand(4, 3),
214+
[
215+
("time", pd.date_range("2000-01-01", periods=4)),
216+
("space", ["IA", "IL", "IN"]),
217+
],
218+
)
219+
ds = da.to_dataset(name="foo")
216220
ds.isel(space=[0], time=[0])
217-
ds.sel(time='2000-01-01')
221+
ds.sel(time="2000-01-01")
218222
219223
Positional indexing on a dataset is not supported because the ordering of
220224
dimensions in a dataset is somewhat ambiguous (it can vary between different
221225
arrays). However, you can do normal indexing with dimension names:
222226

223227
.. ipython:: python
224228
225-
226229
ds[dict(space=[0], time=[0])]
227230
ds.loc[dict(time='2000-01-01')]
228231
@@ -248,7 +251,6 @@ Any variables with these dimensions are also dropped:
248251
249252
ds.drop_dims('time')
250253
251-
252254
.. _masking with where:
253255

254256
Masking with ``where``
@@ -326,8 +328,12 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper:
326328

327329
.. ipython:: python
328330
329-
da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'],
330-
coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']})
331+
332+
da = xr.DataArray(
333+
np.arange(12).reshape((3, 4)),
334+
dims=["x", "y"],
335+
coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]},
336+
)
331337
da
332338
da[[0, 1], [1, 1]]
333339
@@ -410,43 +416,56 @@ can use indexing with ``.loc`` :
410416

411417
.. ipython:: python
412418
413-
ds = xr.tutorial.open_dataset('air_temperature')
419+
ds = xr.tutorial.open_dataset("air_temperature")
414420
415-
#add an empty 2D dataarray
416-
ds['empty']= xr.full_like(ds.air.mean('time'),fill_value=0)
421+
# add an empty 2D dataarray
422+
ds["empty"] = xr.full_like(ds.air.mean("time"), fill_value=0)
417423
418-
#modify one grid point using loc()
419-
ds['empty'].loc[dict(lon=260, lat=30)] = 100
424+
# modify one grid point using loc()
425+
ds["empty"].loc[dict(lon=260, lat=30)] = 100
420426
421-
#modify a 2D region using loc()
422-
lc = ds.coords['lon']
423-
la = ds.coords['lat']
424-
ds['empty'].loc[dict(lon=lc[(lc>220)&(lc<260)], lat=la[(la>20)&(la<60)])] = 100
427+
# modify a 2D region using loc()
428+
lc = ds.coords["lon"]
429+
la = ds.coords["lat"]
430+
ds["empty"].loc[
431+
dict(lon=lc[(lc > 220) & (lc < 260)], lat=la[(la > 20) & (la < 60)])
432+
] = 100
425433
426434
or :py:meth:`~xarray.where`:
427435

428436
.. ipython:: python
429437
430-
#modify one grid point using xr.where()
431-
ds['empty'] = xr.where((ds.coords['lat']==20)&(ds.coords['lon']==260), 100, ds['empty'])
438+
# modify one grid point using xr.where()
439+
ds["empty"] = xr.where(
440+
(ds.coords["lat"] == 20) & (ds.coords["lon"] == 260), 100, ds["empty"]
441+
)
442+
443+
# or modify a 2D region using xr.where()
444+
mask = (
445+
(ds.coords["lat"] > 20)
446+
& (ds.coords["lat"] < 60)
447+
& (ds.coords["lon"] > 220)
448+
& (ds.coords["lon"] < 260)
449+
)
450+
ds["empty"] = xr.where(mask, 100, ds["empty"])
432451
433-
#or modify a 2D region using xr.where()
434-
mask = (ds.coords['lat']>20)&(ds.coords['lat']<60)&(ds.coords['lon']>220)&(ds.coords['lon']<260)
435-
ds['empty'] = xr.where(mask, 100, ds['empty'])
436452
437453
438454
Vectorized indexing can also be used to assign values to xarray object.
439455

440456
.. ipython:: python
441457
442-
da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'],
443-
coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']})
458+
da = xr.DataArray(
459+
np.arange(12).reshape((3, 4)),
460+
dims=["x", "y"],
461+
coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]},
462+
)
444463
da
445464
da[0] = -1 # assignment with broadcasting
446465
da
447466
448-
ind_x = xr.DataArray([0, 1], dims=['x'])
449-
ind_y = xr.DataArray([0, 1], dims=['y'])
467+
ind_x = xr.DataArray([0, 1], dims=["x"])
468+
ind_y = xr.DataArray([0, 1], dims=["y"])
450469
da[ind_x, ind_y] = -2 # assign -2 to (ix, iy) = (0, 0) and (1, 1)
451470
da
452471
@@ -508,10 +527,10 @@ flexible indexing. The following is an example of the pointwise indexing:
508527

509528
.. ipython:: python
510529
511-
da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y'])
530+
da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=["x", "y"])
512531
da
513-
da.isel(x=xr.DataArray([0, 1, 6], dims='z'),
514-
y=xr.DataArray([0, 1, 0], dims='z'))
532+
da.isel(x=xr.DataArray([0, 1, 6], dims="z"), y=xr.DataArray([0, 1, 0], dims="z"))
533+
515534
516535
where three elements at ``(ix, iy) = ((0, 0), (1, 1), (6, 0))`` are selected
517536
and mapped along a new dimension ``z``.
@@ -521,23 +540,27 @@ you can supply a :py:class:`~xarray.DataArray` with a coordinate,
521540

522541
.. ipython:: python
523542
524-
da.isel(x=xr.DataArray([0, 1, 6], dims='z',
525-
coords={'z': ['a', 'b', 'c']}),
526-
y=xr.DataArray([0, 1, 0], dims='z'))
527-
543+
da.isel(
544+
x=xr.DataArray([0, 1, 6], dims="z", coords={"z": ["a", "b", "c"]}),
545+
y=xr.DataArray([0, 1, 0], dims="z"),
546+
)
547+
528548
Analogously, label-based pointwise-indexing is also possible by the ``.sel``
529549
method:
530550

531551
.. ipython:: python
532552
533-
da = xr.DataArray(np.random.rand(4, 3),
534-
[('time', pd.date_range('2000-01-01', periods=4)),
535-
('space', ['IA', 'IL', 'IN'])])
536-
times = xr.DataArray(pd.to_datetime(['2000-01-03', '2000-01-02', '2000-01-01']),
537-
dims='new_time')
538-
da.sel(space=xr.DataArray(['IA', 'IL', 'IN'], dims=['new_time']),
539-
time=times)
540-
553+
da = xr.DataArray(
554+
np.random.rand(4, 3),
555+
[
556+
("time", pd.date_range("2000-01-01", periods=4)),
557+
("space", ["IA", "IL", "IN"]),
558+
],
559+
)
560+
times = xr.DataArray(
561+
pd.to_datetime(["2000-01-03", "2000-01-02", "2000-01-01"]), dims="new_time"
562+
)
563+
da.sel(space=xr.DataArray(["IA", "IL", "IN"], dims=["new_time"]), time=times)
541564
542565
.. _align and reindex:
543566

@@ -635,12 +658,16 @@ through the :py:attr:`~xarray.DataArray.indexes` attribute.
635658

636659
.. ipython:: python
637660
638-
da = xr.DataArray(np.random.rand(4, 3),
639-
[('time', pd.date_range('2000-01-01', periods=4)),
640-
('space', ['IA', 'IL', 'IN'])])
661+
da = xr.DataArray(
662+
np.random.rand(4, 3),
663+
[
664+
("time", pd.date_range("2000-01-01", periods=4)),
665+
("space", ["IA", "IL", "IN"]),
666+
],
667+
)
641668
da
642669
da.indexes
643-
da.indexes['time']
670+
da.indexes["time"]
644671
645672
Use :py:meth:`~xarray.DataArray.get_index` to get an index for a dimension,
646673
falling back to a default :py:class:`pandas.RangeIndex` if it has no coordinate
@@ -694,32 +721,31 @@ pandas:
694721

695722
.. ipython:: python
696723
697-
midx = pd.MultiIndex.from_product([list('abc'), [0, 1]],
698-
names=('one', 'two'))
699-
mda = xr.DataArray(np.random.rand(6, 3),
700-
[('x', midx), ('y', range(3))])
701-
mda
702-
mda.sel(x=(list('ab'), [0]))
724+
725+
midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two"))
726+
mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))])
727+
mda
728+
mda.sel(x=(list("ab"), [0]))
703729
704730
You can also select multiple elements by providing a list of labels or tuples or
705731
a slice of tuples:
706732

707733
.. ipython:: python
708734
709-
mda.sel(x=[('a', 0), ('b', 1)])
735+
mda.sel(x=[('a', 0), ('b', 1)])
710736
711737
Additionally, xarray supports dictionaries:
712738

713739
.. ipython:: python
714740
715-
mda.sel(x={'one': 'a', 'two': 0})
741+
mda.sel(x={'one': 'a', 'two': 0})
716742
717743
For convenience, ``sel`` also accepts multi-index levels directly
718744
as keyword arguments:
719745

720746
.. ipython:: python
721747
722-
mda.sel(one='a', two=0)
748+
mda.sel(one='a', two=0)
723749
724750
Note that using ``sel`` it is not possible to mix a dimension
725751
indexer with level indexers for that dimension
@@ -731,7 +757,7 @@ multi-index is reduced to a single index.
731757

732758
.. ipython:: python
733759
734-
mda.loc[{'one': 'a'}, ...]
760+
mda.loc[{'one': 'a'}, ...]
735761
736762
Unlike pandas, xarray does not guess whether you provide index levels or
737763
dimensions when using ``loc`` in some ambiguous cases. For example, for

doc/whats-new.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ New Features
7474
deterministic hashing in previous releases; this change implements it when whole
7575
xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map_blocks` is
7676
invoked. (:issue:`3378`, :pull:`3446`)
77+
xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is
78+
invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
7779
By `Deepak Cherian <https://github.com/dcherian>`_ and
7880
`Guido Imperiale <https://github.com/crusaderky>`_.
7981
- xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk.
@@ -91,9 +93,14 @@ Bug fixes
9193
By `Deepak Cherian <https://github.com/dcherian>`_.
9294
- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
9395
By `Anderson Banihirwe <https://github.com/andersy005>`_.
96+
- Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`).
97+
In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated.
98+
By `Deepak Cherian <https://github.com/dcherian>`_.
9499
- Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
95100
:py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
96101
(:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
102+
- Allow appending datetime and bool data variables to zarr stores.
103+
(:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
97104

98105
Documentation
99106
~~~~~~~~~~~~~
@@ -220,6 +227,9 @@ Bug fixes
220227
By `Deepak Cherian <https://github.com/dcherian>`_.
221228
- Fix error in concatenating unlabeled dimensions (:pull:`3362`).
222229
By `Deepak Cherian <https://github.com/dcherian/>`_.
230+
- Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is
231+
specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created.
232+
(:pull:`3362`). By `Deepak Cherian <https://github.com/dcherian/>`_.
223233

224234
Documentation
225235
~~~~~~~~~~~~~

xarray/backends/api.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,15 +1234,18 @@ def _validate_datatypes_for_zarr_append(dataset):
12341234
def check_dtype(var):
12351235
if (
12361236
not np.issubdtype(var.dtype, np.number)
1237+
and not np.issubdtype(var.dtype, np.datetime64)
1238+
and not np.issubdtype(var.dtype, np.bool)
12371239
and not coding.strings.is_unicode_dtype(var.dtype)
12381240
and not var.dtype == object
12391241
):
12401242
# and not re.match('^bytes[1-9]+$', var.dtype.name)):
12411243
raise ValueError(
12421244
"Invalid dtype for data variable: {} "
12431245
"dtype must be a subtype of number, "
1244-
"a fixed sized string, a fixed size "
1245-
"unicode string or an object".format(var)
1246+
"datetime, bool, a fixed sized string, "
1247+
"a fixed size unicode string or an "
1248+
"object".format(var)
12461249
)
12471250

12481251
for k in dataset.data_vars.values():

xarray/core/common.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,12 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
4343
if include_skipna:
4444

4545
def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs):
46-
return self.reduce(
47-
func, dim, axis, skipna=skipna, allow_lazy=True, **kwargs
48-
)
46+
return self.reduce(func, dim, axis, skipna=skipna, **kwargs)
4947

5048
else:
5149

5250
def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore
53-
return self.reduce(func, dim, axis, allow_lazy=True, **kwargs)
51+
return self.reduce(func, dim, axis, **kwargs)
5452

5553
return wrapped_func
5654

@@ -83,20 +81,13 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
8381

8482
def wrapped_func(self, dim=None, skipna=None, **kwargs):
8583
return self.reduce(
86-
func,
87-
dim,
88-
skipna=skipna,
89-
numeric_only=numeric_only,
90-
allow_lazy=True,
91-
**kwargs,
84+
func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs
9285
)
9386

9487
else:
9588

9689
def wrapped_func(self, dim=None, **kwargs): # type: ignore
97-
return self.reduce(
98-
func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs
99-
)
90+
return self.reduce(func, dim, numeric_only=numeric_only, **kwargs)
10091

10192
return wrapped_func
10293

xarray/core/dataarray.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,9 @@ def reset_coords(
755755
return dataset
756756

757757
def __dask_tokenize__(self):
758-
return (type(self), self._variable, self._coords, self._name)
758+
from dask.base import normalize_token
759+
760+
return normalize_token((type(self), self._variable, self._coords, self._name))
759761

760762
def __dask_graph__(self):
761763
return self._to_temp_dataset().__dask_graph__()

xarray/core/dataset.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,11 @@ def load(self, **kwargs) -> "Dataset":
652652
return self
653653

654654
def __dask_tokenize__(self):
655-
return (type(self), self._variables, self._coord_names, self._attrs)
655+
from dask.base import normalize_token
656+
657+
return normalize_token(
658+
(type(self), self._variables, self._coord_names, self._attrs)
659+
)
656660

657661
def __dask_graph__(self):
658662
graphs = {k: v.__dask_graph__() for k, v in self.variables.items()}
@@ -4027,7 +4031,7 @@ def reduce(
40274031
keep_attrs: bool = None,
40284032
keepdims: bool = False,
40294033
numeric_only: bool = False,
4030-
allow_lazy: bool = False,
4034+
allow_lazy: bool = None,
40314035
**kwargs: Any,
40324036
) -> "Dataset":
40334037
"""Reduce this dataset by applying `func` along some dimension(s).

xarray/core/groupby.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -585,9 +585,7 @@ def _first_or_last(self, op, skipna, keep_attrs):
585585
return self._obj
586586
if keep_attrs is None:
587587
keep_attrs = _get_keep_attrs(default=True)
588-
return self.reduce(
589-
op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True
590-
)
588+
return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs)
591589

592590
def first(self, skipna=None, keep_attrs=None):
593591
"""Return the first element of each group along the group dimension

0 commit comments

Comments
 (0)