Skip to content

Commit 308bb37

Browse files
authored
make coarsen reductions consistent with reductions on other classes (#3500)
* Coarsen now has the same reduction methods as groupby & rolling. This brings in support for coarsen.count as well as passing skipna down to the other reduction functions. * test for count * Test that dims passed to coarsen are present in dataset. * Add whats-new * fix tests. * review comments. * Update doc/whats-new.rst Co-Authored-By: keewis <[email protected]> * fix whats-new
1 parent ed05f98 commit 308bb37

File tree

7 files changed

+74
-35
lines changed

7 files changed

+74
-35
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ New Features
2828
- :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile``
2929
now work with dask Variables.
3030
By `Deepak Cherian <https://github.com/dcherian>`_.
31-
31+
- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen`
32+
and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`)
33+
By `Deepak Cherian <https://github.com/dcherian/>`_
3234

3335
Bug fixes
3436
~~~~~~~~~

xarray/core/nanops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):
2525
"""
2626
if hasattr(axis, "__len__"): # if tuple or list
2727
raise ValueError(
28-
"min_count is not available for reduction " "with more than one dimensions."
28+
"min_count is not available for reduction with more than one dimensions."
2929
)
3030

3131
if axis is not None and getattr(result, "ndim", False):

xarray/core/ops.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):
347347

348348
inject_reduce_methods(cls)
349349
inject_cum_methods(cls)
350-
351-
352-
def inject_coarsen_methods(cls):
353-
# standard numpy reduce methods
354-
methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS]
355-
for name, f in methods:
356-
func = cls._reduce_method(f)
357-
func.__name__ = name
358-
func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__)
359-
setattr(cls, name, func)

xarray/core/rolling.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import functools
22
import warnings
3-
from typing import Callable
3+
from typing import Any, Callable, Dict
44

55
import numpy as np
66

77
from . import dtypes, duck_array_ops, utils
88
from .dask_array_ops import dask_rolling_wrapper
9-
from .ops import inject_coarsen_methods
9+
from .ops import inject_reduce_methods
1010
from .pycompat import dask_array_type
1111

1212
try:
@@ -542,6 +542,11 @@ def __init__(self, obj, windows, boundary, side, coord_func):
542542
self.side = side
543543
self.boundary = boundary
544544

545+
absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
546+
if absent_dims:
547+
raise ValueError(
548+
f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}."
549+
)
545550
if not utils.is_dict_like(coord_func):
546551
coord_func = {d: coord_func for d in self.obj.dims}
547552
for c in self.obj.coords:
@@ -565,18 +570,23 @@ def __repr__(self):
565570
class DataArrayCoarsen(Coarsen):
566571
__slots__ = ()
567572

573+
_reduce_extra_args_docstring = """"""
574+
568575
@classmethod
569-
def _reduce_method(cls, func):
576+
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
570577
"""
571-
Return a wrapped function for injecting numpy methods.
572-
see ops.inject_coarsen_methods
578+
Return a wrapped function for injecting reduction methods.
579+
see ops.inject_reduce_methods
573580
"""
581+
kwargs: Dict[str, Any] = {}
582+
if include_skipna:
583+
kwargs["skipna"] = None
574584

575585
def wrapped_func(self, **kwargs):
576586
from .dataarray import DataArray
577587

578588
reduced = self.obj.variable.coarsen(
579-
self.windows, func, self.boundary, self.side
589+
self.windows, func, self.boundary, self.side, **kwargs
580590
)
581591
coords = {}
582592
for c, v in self.obj.coords.items():
@@ -585,7 +595,11 @@ def wrapped_func(self, **kwargs):
585595
else:
586596
if any(d in self.windows for d in v.dims):
587597
coords[c] = v.variable.coarsen(
588-
self.windows, self.coord_func[c], self.boundary, self.side
598+
self.windows,
599+
self.coord_func[c],
600+
self.boundary,
601+
self.side,
602+
**kwargs,
589603
)
590604
else:
591605
coords[c] = v
@@ -597,27 +611,36 @@ def wrapped_func(self, **kwargs):
597611
class DatasetCoarsen(Coarsen):
598612
__slots__ = ()
599613

614+
_reduce_extra_args_docstring = """"""
615+
600616
@classmethod
601-
def _reduce_method(cls, func):
617+
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
602618
"""
603-
Return a wrapped function for injecting numpy methods.
604-
see ops.inject_coarsen_methods
619+
Return a wrapped function for injecting reduction methods.
620+
see ops.inject_reduce_methods
605621
"""
622+
kwargs: Dict[str, Any] = {}
623+
if include_skipna:
624+
kwargs["skipna"] = None
606625

607626
def wrapped_func(self, **kwargs):
608627
from .dataset import Dataset
609628

610629
reduced = {}
611630
for key, da in self.obj.data_vars.items():
612631
reduced[key] = da.variable.coarsen(
613-
self.windows, func, self.boundary, self.side
632+
self.windows, func, self.boundary, self.side, **kwargs
614633
)
615634

616635
coords = {}
617636
for c, v in self.obj.coords.items():
618637
if any(d in self.windows for d in v.dims):
619638
coords[c] = v.variable.coarsen(
620-
self.windows, self.coord_func[c], self.boundary, self.side
639+
self.windows,
640+
self.coord_func[c],
641+
self.boundary,
642+
self.side,
643+
**kwargs,
621644
)
622645
else:
623646
coords[c] = v.variable
@@ -626,5 +649,5 @@ def wrapped_func(self, **kwargs):
626649
return wrapped_func
627650

628651

629-
inject_coarsen_methods(DataArrayCoarsen)
630-
inject_coarsen_methods(DatasetCoarsen)
652+
inject_reduce_methods(DataArrayCoarsen)
653+
inject_reduce_methods(DatasetCoarsen)

xarray/core/variable.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,9 +1864,9 @@ def rolling_window(
18641864
),
18651865
)
18661866

1867-
def coarsen(self, windows, func, boundary="exact", side="left"):
1867+
def coarsen(self, windows, func, boundary="exact", side="left", **kwargs):
18681868
"""
1869-
Apply
1869+
Apply reduction function.
18701870
"""
18711871
windows = {k: v for k, v in windows.items() if k in self.dims}
18721872
if not windows:
@@ -1878,11 +1878,11 @@ def coarsen(self, windows, func, boundary="exact", side="left"):
18781878
func = getattr(duck_array_ops, name, None)
18791879
if func is None:
18801880
raise NameError(f"{name} is not a valid method.")
1881-
return type(self)(self.dims, func(reshaped, axis=axes), self._attrs)
1881+
return self._replace(data=func(reshaped, axis=axes, **kwargs))
18821882

18831883
def _coarsen_reshape(self, windows, boundary, side):
18841884
"""
1885-
Construct a reshaped-array for corsen
1885+
Construct a reshaped-array for coarsen
18861886
"""
18871887
if not utils.is_dict_like(boundary):
18881888
boundary = {d: boundary for d in windows.keys()}

xarray/tests/test_dataset.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5497,6 +5497,11 @@ def ds(request):
54975497
)
54985498

54995499

5500+
def test_coarsen_absent_dims_error(ds):
5501+
with raises_regex(ValueError, "not found in Dataset."):
5502+
ds.coarsen(foo=2)
5503+
5504+
55005505
@pytest.mark.parametrize("dask", [True, False])
55015506
@pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")])
55025507
def test_coarsen(ds, dask, boundary, side):
@@ -5505,12 +5510,11 @@ def test_coarsen(ds, dask, boundary, side):
55055510

55065511
actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max()
55075512
assert_equal(
5508-
actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary=boundary, side=side).max()
5513+
actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max()
55095514
)
55105515
# coordinate should be mean by default
55115516
assert_equal(
5512-
actual["time"],
5513-
ds["time"].coarsen(time=2, x=3, boundary=boundary, side=side).mean(),
5517+
actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean()
55145518
)
55155519

55165520

@@ -5521,8 +5525,8 @@ def test_coarsen_coords(ds, dask):
55215525

55225526
# check if coord_func works
55235527
actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max()
5524-
assert_equal(actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary="trim").max())
5525-
assert_equal(actual["time"], ds["time"].coarsen(time=2, x=3, boundary="trim").max())
5528+
assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max())
5529+
assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max())
55265530

55275531
# raise if exact
55285532
with pytest.raises(ValueError):

xarray/tests/test_variable.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1833,6 +1833,26 @@ def test_coarsen_2d(self):
18331833
expected[1, 1] *= 12 / 11
18341834
assert_allclose(actual, expected)
18351835

1836+
v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4))
1837+
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
1838+
expected = self.cls(("x", "y"), 4 * np.ones((2, 2)))
1839+
assert_equal(actual, expected)
1840+
1841+
v[0, 0] = np.nan
1842+
v[-1, -1] = np.nan
1843+
expected[0, 0] = 3
1844+
expected[-1, -1] = 3
1845+
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
1846+
assert_equal(actual, expected)
1847+
1848+
actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False)
1849+
expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]])
1850+
assert_equal(actual, expected)
1851+
1852+
actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True)
1853+
expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
1854+
assert_equal(actual, expected)
1855+
18361856

18371857
@requires_dask
18381858
class TestVariableWithDask(VariableSubclassobjects):

0 commit comments

Comments
 (0)