From 59355d68122d3f1b1f5a8186b320f46c08d9bfd5 Mon Sep 17 00:00:00 2001 From: Pablo Ambrosio Date: Thu, 15 Mar 2018 20:13:15 -0300 Subject: [PATCH 1/7] DOC: update pandas.core.groupby.DataFrameGroupBy.resample docstring. --- pandas/core/groupby/groupby.py | 89 +++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7c89cab6b1428..28fabd8918455 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1460,8 +1460,93 @@ def describe(self, **kwargs): @Appender(_doc_template) def resample(self, rule, *args, **kwargs): """ - Provide resampling when using a TimeGrouper - Return a new grouper with our resampler appended + Provide resampling when using a TimeGrouper. + + Given a grouper the function resamples it according to a string and an + optional list and dictionary of parameters. Returns a new grouper with + our resampler appended. + + Parameters + ---------- + rule : str + The offset string or object representing target grouper conversion. + *args + These parameters will be passed to the get_resampler_for_grouping + function which builds the approriate resampler and checks for + deprecated parameters. + **kwargs + These parameters will be passed to the get_resampler_for_grouping + function which builds the approriate resampler and checks for + deprecated parameters. + + Returns + ------- + Grouper + Return a new grouper with our resampler appended. + + Examples + -------- + Start by creating a DataFrame with 9 one minute timestamps. + + >>> idx = pd.date_range('1/1/2000', periods=9, freq='T') + >>> df = pd.DataFrame(data=9*[range(4)], + ... index=idx, + ... columns=['a', 'b', 'c', 'd']) + >>> df.iloc[[6], [0]] = 5 # change a value for grouping + >>> df + a b c d + 2000-01-01 00:00:00 0 1 2 3 + 2000-01-01 00:01:00 0 1 2 3 + 2000-01-01 00:02:00 0 1 2 3 + 2000-01-01 00:03:00 0 1 2 3 + 2000-01-01 00:04:00 0 1 2 3 + 2000-01-01 00:05:00 0 1 2 3 + 2000-01-01 00:06:00 5 1 2 3 + 2000-01-01 00:07:00 0 1 2 3 + 2000-01-01 00:08:00 0 1 2 3 + + Downsample the DataFrame into 3 minute bins and sum the values of + the timestamps falling into a bin. + + >>> df.groupby('a').resample('3T').sum() + a b c d + a + 0 2000-01-01 00:00:00 0 3 6 9 + 2000-01-01 00:03:00 0 3 6 9 + 2000-01-01 00:06:00 0 2 4 6 + 5 2000-01-01 00:06:00 5 1 2 3 + + Upsample the series into 30 second bins. + + >>> df.groupby('a').resample('30S').sum() + a b c d + a + 0 2000-01-01 00:00:00 0 1 2 3 + 2000-01-01 00:00:30 0 0 0 0 + 2000-01-01 00:01:00 0 1 2 3 + 2000-01-01 00:01:30 0 0 0 0 + 2000-01-01 00:02:00 0 1 2 3 + 2000-01-01 00:02:30 0 0 0 0 + 2000-01-01 00:03:00 0 1 2 3 + 2000-01-01 00:03:30 0 0 0 0 + 2000-01-01 00:04:00 0 1 2 3 + 2000-01-01 00:04:30 0 0 0 0 + 2000-01-01 00:05:00 0 1 2 3 + 2000-01-01 00:05:30 0 0 0 0 + 2000-01-01 00:06:00 0 0 0 0 + 2000-01-01 00:06:30 0 0 0 0 + 2000-01-01 00:07:00 0 1 2 3 + 2000-01-01 00:07:30 0 0 0 0 + 2000-01-01 00:08:00 0 1 2 3 + 5 2000-01-01 00:06:00 5 1 2 3 + + Resample by month. Values are assigned to the month of the period. + + >>> df.groupby('a').resample('M').sum() + a b c d + a + 0 2000-01-31 0 8 16 24 + 5 2000-01-31 5 1 2 3 """ from pandas.core.resample import get_resampler_for_grouping return get_resampler_for_grouping(self, rule, *args, **kwargs) From deea3c7e882d58eb182b01e863dbbf32e6575b5c Mon Sep 17 00:00:00 2001 From: Pablo Ambrosio Date: Mon, 19 Mar 2018 22:59:03 -0300 Subject: [PATCH 2/7] DOC: update pandas.core.groupby.DataFrameGroupBy.resample docstring. Better summary and parameters description. --- pandas/core/groupby/groupby.py | 75 +++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 28fabd8918455..be416f83ffdf8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1462,22 +1462,25 @@ def resample(self, rule, *args, **kwargs): """ Provide resampling when using a TimeGrouper. - Given a grouper the function resamples it according to a string and an - optional list and dictionary of parameters. Returns a new grouper with - our resampler appended. + Given a grouper the function resamples it according to a string + "string" -> "frequency". + + See the :ref:`frequency aliases ` + documentation for more details. Parameters ---------- - rule : str + rule : str or Offset The offset string or object representing target grouper conversion. - *args - These parameters will be passed to the get_resampler_for_grouping - function which builds the approriate resampler and checks for - deprecated parameters. - **kwargs - These parameters will be passed to the get_resampler_for_grouping - function which builds the approriate resampler and checks for - deprecated parameters. + *args, **kwargs + For compatibility with other groupby methods. See below for some + example parameters. + closed : {‘right’, ‘left’} + Which side of bin interval is closed. + label : {‘right’, ‘left’} + Which bin edge label to label bucket with. + loffset : timedelta + Adjust the resampled time labels. Returns ------- @@ -1486,13 +1489,13 @@ def resample(self, rule, *args, **kwargs): Examples -------- - Start by creating a DataFrame with 9 one minute timestamps. + Start by creating a length-9 DataFrame with minute frequency. >>> idx = pd.date_range('1/1/2000', periods=9, freq='T') - >>> df = pd.DataFrame(data=9*[range(4)], + >>> df = pd.DataFrame(data=9 * [range(4)], ... index=idx, ... columns=['a', 'b', 'c', 'd']) - >>> df.iloc[[6], [0]] = 5 # change a value for grouping + >>> df.iloc[6, 0] = 5 >>> df a b c d 2000-01-01 00:00:00 0 1 2 3 @@ -1547,6 +1550,48 @@ def resample(self, rule, *args, **kwargs): a 0 2000-01-31 0 8 16 24 5 2000-01-31 5 1 2 3 + + Downsample the series into 3 minute bins as above, but close the right + side of the bin interval. + + >>> df.groupby('a').resample('3T', closed='right').sum() + a b c d + a + 0 1999-12-31 23:57:00 0 1 2 3 + 2000-01-01 00:00:00 0 3 6 9 + 2000-01-01 00:03:00 0 2 4 6 + 2000-01-01 00:06:00 0 2 4 6 + 5 2000-01-01 00:03:00 5 1 2 3 + + Downsample the series into 3 minute bins and close the right side of + the bin interval, but label each bin using the right edge instead of + the left. + + >>> df.groupby('a').resample('3T', closed='right', label='right').sum() + a b c d + a + 0 2000-01-01 00:00:00 0 1 2 3 + 2000-01-01 00:03:00 0 3 6 9 + 2000-01-01 00:06:00 0 2 4 6 + 2000-01-01 00:09:00 0 2 4 6 + 5 2000-01-01 00:06:00 5 1 2 3 + + Add an offset of twenty seconds. + + >>> df.groupby('a').resample('3T', loffset='20s').sum() + a b c d + a + 0 2000-01-01 00:00:20 0 3 6 9 + 2000-01-01 00:03:20 0 3 6 9 + 2000-01-01 00:06:20 0 2 4 6 + 5 2000-01-01 00:06:20 5 1 2 3 + + See Also + -------- + pandas.Grouper : specify a frequency to resample with when + grouping by a key. + DatetimeIndex.resample : Frequency conversion and resampling of + time series. """ from pandas.core.resample import get_resampler_for_grouping return get_resampler_for_grouping(self, rule, *args, **kwargs) From 2fcabf7a0b6763a80dce7503f01855cf83dd5c5d Mon Sep 17 00:00:00 2001 From: Pablo Ambrosio Date: Wed, 22 Aug 2018 15:55:18 -0300 Subject: [PATCH 3/7] DOC: update pandas.core.groupby.DataFrameGroupBy.resample docstring. Smaller sample. Kwargs review. See also review. --- pandas/core/groupby/groupby.py | 113 ++++++++++++++------------------- 1 file changed, 46 insertions(+), 67 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index be416f83ffdf8..a3d3e70e75ceb 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1472,7 +1472,7 @@ def resample(self, rule, *args, **kwargs): ---------- rule : str or Offset The offset string or object representing target grouper conversion. - *args, **kwargs + *args, **kwargs : [closed, label, loffset] For compatibility with other groupby methods. See below for some example parameters. closed : {‘right’, ‘left’} @@ -1487,111 +1487,90 @@ def resample(self, rule, *args, **kwargs): Grouper Return a new grouper with our resampler appended. + See Also + -------- + pandas.Grouper : specify a frequency to resample with when + grouping by a key. + DatetimeIndex.resample : Frequency conversion and resampling of + time series. + Examples -------- - Start by creating a length-9 DataFrame with minute frequency. + Start by creating a length-4 DataFrame with minute frequency. - >>> idx = pd.date_range('1/1/2000', periods=9, freq='T') - >>> df = pd.DataFrame(data=9 * [range(4)], + >>> idx = pd.date_range('1/1/2000', periods=4, freq='T') + >>> df = pd.DataFrame(data=4 * [range(2)], ... index=idx, - ... columns=['a', 'b', 'c', 'd']) - >>> df.iloc[6, 0] = 5 + ... columns=['a', 'b']) + >>> df.iloc[2, 0] = 5 >>> df - a b c d - 2000-01-01 00:00:00 0 1 2 3 - 2000-01-01 00:01:00 0 1 2 3 - 2000-01-01 00:02:00 0 1 2 3 - 2000-01-01 00:03:00 0 1 2 3 - 2000-01-01 00:04:00 0 1 2 3 - 2000-01-01 00:05:00 0 1 2 3 - 2000-01-01 00:06:00 5 1 2 3 - 2000-01-01 00:07:00 0 1 2 3 - 2000-01-01 00:08:00 0 1 2 3 + a b + 2000-01-01 00:00:00 0 1 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:02:00 5 1 + 2000-01-01 00:03:00 0 1 Downsample the DataFrame into 3 minute bins and sum the values of the timestamps falling into a bin. >>> df.groupby('a').resample('3T').sum() - a b c d + a b a - 0 2000-01-01 00:00:00 0 3 6 9 - 2000-01-01 00:03:00 0 3 6 9 - 2000-01-01 00:06:00 0 2 4 6 - 5 2000-01-01 00:06:00 5 1 2 3 + 0 2000-01-01 00:00:00 0 2 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:00:00 5 1 Upsample the series into 30 second bins. >>> df.groupby('a').resample('30S').sum() - a b c d + a b a - 0 2000-01-01 00:00:00 0 1 2 3 - 2000-01-01 00:00:30 0 0 0 0 - 2000-01-01 00:01:00 0 1 2 3 - 2000-01-01 00:01:30 0 0 0 0 - 2000-01-01 00:02:00 0 1 2 3 - 2000-01-01 00:02:30 0 0 0 0 - 2000-01-01 00:03:00 0 1 2 3 - 2000-01-01 00:03:30 0 0 0 0 - 2000-01-01 00:04:00 0 1 2 3 - 2000-01-01 00:04:30 0 0 0 0 - 2000-01-01 00:05:00 0 1 2 3 - 2000-01-01 00:05:30 0 0 0 0 - 2000-01-01 00:06:00 0 0 0 0 - 2000-01-01 00:06:30 0 0 0 0 - 2000-01-01 00:07:00 0 1 2 3 - 2000-01-01 00:07:30 0 0 0 0 - 2000-01-01 00:08:00 0 1 2 3 - 5 2000-01-01 00:06:00 5 1 2 3 + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:00:30 0 0 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:01:30 0 0 + 2000-01-01 00:02:00 0 0 + 2000-01-01 00:02:30 0 0 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:02:00 5 1 Resample by month. Values are assigned to the month of the period. >>> df.groupby('a').resample('M').sum() - a b c d + a b a - 0 2000-01-31 0 8 16 24 - 5 2000-01-31 5 1 2 3 + 0 2000-01-31 0 3 + 5 2000-01-31 5 1 Downsample the series into 3 minute bins as above, but close the right side of the bin interval. >>> df.groupby('a').resample('3T', closed='right').sum() - a b c d + a b a - 0 1999-12-31 23:57:00 0 1 2 3 - 2000-01-01 00:00:00 0 3 6 9 - 2000-01-01 00:03:00 0 2 4 6 - 2000-01-01 00:06:00 0 2 4 6 - 5 2000-01-01 00:03:00 5 1 2 3 + 0 1999-12-31 23:57:00 0 1 + 2000-01-01 00:00:00 0 2 + 5 2000-01-01 00:00:00 5 1 Downsample the series into 3 minute bins and close the right side of the bin interval, but label each bin using the right edge instead of the left. >>> df.groupby('a').resample('3T', closed='right', label='right').sum() - a b c d + a b a - 0 2000-01-01 00:00:00 0 1 2 3 - 2000-01-01 00:03:00 0 3 6 9 - 2000-01-01 00:06:00 0 2 4 6 - 2000-01-01 00:09:00 0 2 4 6 - 5 2000-01-01 00:06:00 5 1 2 3 + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:03:00 0 2 + 5 2000-01-01 00:03:00 5 1 Add an offset of twenty seconds. >>> df.groupby('a').resample('3T', loffset='20s').sum() - a b c d + a b a - 0 2000-01-01 00:00:20 0 3 6 9 - 2000-01-01 00:03:20 0 3 6 9 - 2000-01-01 00:06:20 0 2 4 6 - 5 2000-01-01 00:06:20 5 1 2 3 - - See Also - -------- - pandas.Grouper : specify a frequency to resample with when - grouping by a key. - DatetimeIndex.resample : Frequency conversion and resampling of - time series. + 0 2000-01-01 00:00:20 0 2 + 2000-01-01 00:03:20 0 1 + 5 2000-01-01 00:00:20 5 1 """ from pandas.core.resample import get_resampler_for_grouping return get_resampler_for_grouping(self, rule, *args, **kwargs) From 4b55b5fd51447c43ab380cd7027860994c77bec9 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 3 Nov 2018 06:02:39 +0000 Subject: [PATCH 4/7] Minor fixes --- pandas/core/groupby/groupby.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5245aa3faaacd..2c1b45d40b4b6 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1294,13 +1294,11 @@ def describe(self, **kwargs): return result.T return result.unstack() - @Substitution(name='groupby') - @Appender(_doc_template) def resample(self, rule, *args, **kwargs): """ Provide resampling when using a TimeGrouper. - Given a grouper the function resamples it according to a string + Given a grouper, the function resamples it according to a string "string" -> "frequency". See the :ref:`frequency aliases ` @@ -1310,15 +1308,16 @@ def resample(self, rule, *args, **kwargs): ---------- rule : str or Offset The offset string or object representing target grouper conversion. - *args, **kwargs : [closed, label, loffset] - For compatibility with other groupby methods. See below for some - example parameters. - closed : {‘right’, ‘left’} - Which side of bin interval is closed. - label : {‘right’, ‘left’} - Which bin edge label to label bucket with. - loffset : timedelta - Adjust the resampled time labels. + *args, **kwargs + For compatibility with other groupby methods. Available keywor + arguments are: + + * closed : {'right', 'left'} + Which side of bin interval is closed. + * label : {'right', 'left'} + Which bin edge label to label bucket with. + * loffset : timedelta + Adjust the resampled time labels. Returns ------- @@ -1327,15 +1326,13 @@ def resample(self, rule, *args, **kwargs): See Also -------- - pandas.Grouper : specify a frequency to resample with when + pandas.Grouper : Specify a frequency to resample with when grouping by a key. DatetimeIndex.resample : Frequency conversion and resampling of time series. Examples -------- - Start by creating a length-4 DataFrame with minute frequency. - >>> idx = pd.date_range('1/1/2000', periods=4, freq='T') >>> df = pd.DataFrame(data=4 * [range(2)], ... index=idx, From e7905b240b72801817474a2fdb404e578f7e9533 Mon Sep 17 00:00:00 2001 From: Pablo Ambrosio Date: Sat, 3 Nov 2018 23:27:44 -0300 Subject: [PATCH 5/7] Replace Offset with DateOffset. --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a3d3e70e75ceb..d92bf2261caed 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1470,7 +1470,7 @@ def resample(self, rule, *args, **kwargs): Parameters ---------- - rule : str or Offset + rule : str or DateOffset The offset string or object representing target grouper conversion. *args, **kwargs : [closed, label, loffset] For compatibility with other groupby methods. See below for some From 4880d3b448325dc31887ceba764ebe09055d0ea1 Mon Sep 17 00:00:00 2001 From: Pablo Ambrosio Date: Sun, 11 Nov 2018 19:35:05 -0300 Subject: [PATCH 6/7] Change args and kwargs description. --- pandas/core/groupby/groupby.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f50a9eea87d10..00243510ffc76 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1309,15 +1309,9 @@ def resample(self, rule, *args, **kwargs): rule : str or DateOffset The offset string or object representing target grouper conversion. *args, **kwargs - For compatibility with other groupby methods. Available keyword - arguments are: - - * closed : {'right', 'left'} - Which side of bin interval is closed. - * label : {'right', 'left'} - Which bin edge label to label bucket with. - * loffset : timedelta - Adjust the resampled time labels. + For compatibility with other groupby methods. Possible arguments + are `how`, `fill_method`, `limit`, `kind` and `on`, and other + arguments of `TimeGrouper`. Returns ------- From 084ac398581535544e33858d0eae3f8f51519383 Mon Sep 17 00:00:00 2001 From: Pablo Ambrosio Date: Sun, 11 Nov 2018 19:38:25 -0300 Subject: [PATCH 7/7] Change args and kwargs description. --- pandas/core/groupby/groupby.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 00243510ffc76..9ca3486df3050 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1309,9 +1309,8 @@ def resample(self, rule, *args, **kwargs): rule : str or DateOffset The offset string or object representing target grouper conversion. *args, **kwargs - For compatibility with other groupby methods. Possible arguments - are `how`, `fill_method`, `limit`, `kind` and `on`, and other - arguments of `TimeGrouper`. + Possible arguments are `how`, `fill_method`, `limit`, `kind` and + `on`, and other arguments of `TimeGrouper`. Returns -------