Skip to content

Commit b054c31

Browse files
huardshoyer
authored andcommitted
Add quantile method to GroupBy (#2828)
* implement groupby.quantile + tests * added quantile method in whats-new * mark additional test as xfail. * lint fix * simpler version of groupby.quantile * added quantile methods to api.rst * included DEFAULT_DIMS handling in quantile method * clarified groupby tests * added test with more typical use case * pep8 * removed failing test
1 parent cfd8210 commit b054c31

File tree

4 files changed

+123
-3
lines changed

4 files changed

+123
-3
lines changed

doc/api.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ Computation
190190
:py:attr:`~core.groupby.DatasetGroupBy.last`
191191
:py:attr:`~core.groupby.DatasetGroupBy.fillna`
192192
:py:attr:`~core.groupby.DatasetGroupBy.where`
193+
:py:attr:`~core.groupby.DatasetGroupBy.quantile`
193194

194195
Reshaping and reorganizing
195196
--------------------------
@@ -362,7 +363,7 @@ Computation
362363
:py:attr:`~core.groupby.DataArrayGroupBy.last`
363364
:py:attr:`~core.groupby.DataArrayGroupBy.fillna`
364365
:py:attr:`~core.groupby.DataArrayGroupBy.where`
365-
366+
:py:attr:`~core.groupby.DataArrayGroupBy.quantile`
366367

367368
Reshaping and reorganizing
368369
--------------------------

doc/whats-new.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ v0.12.2 (unreleased)
2121
Enhancements
2222
~~~~~~~~~~~~
2323

24-
24+
- New :py:meth:`~xarray.GroupBy.quantile` method. (:issue:`3018`)
25+
By `David Huard <https://github.com/huard>`_.
2526
- Add ``keepdims`` argument for reduce operations (:issue:`2170`)
2627
By `Scott Wales <https://github.com/ScottWales>`_.
2728
- netCDF chunksizes are now only dropped when original_shape is different,
@@ -90,7 +91,7 @@ Bug fixes
9091
By `Maximilian Roos <https://github.com/max-sixty>`_.
9192
- Fixed performance issues with cftime installed (:issue:`3000`)
9293
By `0x0L <https://github.com/0x0L>`_.
93-
- Replace incorrect usages of `message` in pytest assertions
94+
- Replace incorrect usages of `message` in pytest assertions
9495
with `match` (:issue:`3011`)
9596
By `Maximilian Roos <https://github.com/max-sixty>`_.
9697
- Add explicit pytest markers, now required by pytest

xarray/core/groupby.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,64 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False):
595595
combined = self._maybe_unstack(combined)
596596
return combined
597597

598+
def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None):
599+
"""Compute the qth quantile over each array in the groups and
600+
concatenate them together into a new array.
601+
602+
Parameters
603+
----------
604+
q : float in range of [0,1] (or sequence of floats)
605+
Quantile to compute, which must be between 0 and 1
606+
inclusive.
607+
dim : str or sequence of str, optional
608+
Dimension(s) over which to apply quantile.
609+
Defaults to the grouped dimension.
610+
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
611+
This optional parameter specifies the interpolation method to
612+
use when the desired quantile lies between two data points
613+
``i < j``:
614+
* linear: ``i + (j - i) * fraction``, where ``fraction`` is
615+
the fractional part of the index surrounded by ``i`` and
616+
``j``.
617+
* lower: ``i``.
618+
* higher: ``j``.
619+
* nearest: ``i`` or ``j``, whichever is nearest.
620+
* midpoint: ``(i + j) / 2``.
621+
622+
Returns
623+
-------
624+
quantiles : Variable
625+
If `q` is a single quantile, then the result
626+
is a scalar. If multiple percentiles are given, first axis of
627+
the result corresponds to the quantile and a quantile dimension
628+
is added to the return array. The other dimensions are the
629+
dimensions that remain after the reduction of the array.
630+
631+
See Also
632+
--------
633+
numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
634+
DataArray.quantile
635+
"""
636+
if dim == DEFAULT_DIMS:
637+
dim = ALL_DIMS
638+
# TODO change this to dim = self._group_dim after
639+
# the deprecation process
640+
if self._obj.ndim > 1:
641+
warnings.warn(
642+
"Default reduction dimension will be changed to the "
643+
"grouped dimension in a future version of xarray. To "
644+
"silence this warning, pass dim=xarray.ALL_DIMS "
645+
"explicitly.",
646+
FutureWarning, stacklevel=2)
647+
648+
out = self.apply(self._obj.__class__.quantile, shortcut=False,
649+
q=q, dim=dim, interpolation=interpolation,
650+
keep_attrs=keep_attrs)
651+
652+
if np.asarray(q, dtype=np.float64).ndim == 0:
653+
out = out.drop('quantile')
654+
return out
655+
598656
def reduce(self, func, dim=None, axis=None, keep_attrs=None,
599657
shortcut=True, **kwargs):
600658
"""Reduce the items in this group by applying `func` along some

xarray/tests/test_groupby.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,64 @@ def func(arg1, arg2, arg3=0):
105105
assert_identical(expected, actual)
106106

107107

108+
def test_da_groupby_quantile():
109+
110+
array = xr.DataArray([1, 2, 3, 4, 5, 6],
111+
[('x', [1, 1, 1, 2, 2, 2])])
112+
113+
# Scalar quantile
114+
expected = xr.DataArray([2, 5], [('x', [1, 2])])
115+
actual = array.groupby('x').quantile(.5)
116+
assert_identical(expected, actual)
117+
118+
# Vector quantile
119+
expected = xr.DataArray([[1, 3], [4, 6]],
120+
[('x', [1, 2]), ('quantile', [0, 1])])
121+
actual = array.groupby('x').quantile([0, 1])
122+
assert_identical(expected, actual)
123+
124+
# Multiple dimensions
125+
array = xr.DataArray([[1, 11, 26], [2, 12, 22], [3, 13, 23],
126+
[4, 16, 24], [5, 15, 25]],
127+
[('x', [1, 1, 1, 2, 2],),
128+
('y', [0, 0, 1])])
129+
130+
actual_x = array.groupby('x').quantile(0)
131+
expected_x = xr.DataArray([1, 4],
132+
[('x', [1, 2]), ])
133+
assert_identical(expected_x, actual_x)
134+
135+
actual_y = array.groupby('y').quantile(0)
136+
expected_y = xr.DataArray([1, 22],
137+
[('y', [0, 1]), ])
138+
assert_identical(expected_y, actual_y)
139+
140+
actual_xx = array.groupby('x').quantile(0, dim='x')
141+
expected_xx = xr.DataArray([[1, 11, 22], [4, 15, 24]],
142+
[('x', [1, 2]), ('y', [0, 0, 1])])
143+
assert_identical(expected_xx, actual_xx)
144+
145+
actual_yy = array.groupby('y').quantile(0, dim='y')
146+
expected_yy = xr.DataArray([[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
147+
[('x', [1, 1, 1, 2, 2]), ('y', [0, 1])])
148+
assert_identical(expected_yy, actual_yy)
149+
150+
times = pd.date_range('2000-01-01', periods=365)
151+
x = [0, 1]
152+
foo = xr.DataArray(np.reshape(np.arange(365 * 2), (365, 2)),
153+
coords=dict(time=times, x=x), dims=('time', 'x'))
154+
g = foo.groupby(foo.time.dt.month)
155+
156+
actual = g.quantile(0)
157+
expected = xr.DataArray([0., 62., 120., 182., 242., 304.,
158+
364., 426., 488., 548., 610., 670.],
159+
[('month', np.arange(1, 13))])
160+
assert_identical(expected, actual)
161+
162+
actual = g.quantile(0, dim='time')[:2]
163+
expected = xr.DataArray([[0., 1], [62., 63]],
164+
[('month', [1, 2]), ('x', [0, 1])])
165+
assert_identical(expected, actual)
166+
167+
108168
# TODO: move other groupby tests from test_dataset and test_dataarray over here

0 commit comments

Comments
 (0)