diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f42825a11933b..7a5770d3968ec 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1793,13 +1793,13 @@ def indices(self): @cache_readonly def group_info(self): ngroups = self.ngroups - obs_group_ids = np.arange(ngroups) + obs_group_ids = np.arange(ngroups, dtype='int64') rep = np.diff(np.r_[0, self.bins]) if ngroups == len(self.bins): - comp_ids = np.repeat(np.arange(ngroups), rep) + comp_ids = np.repeat(np.arange(ngroups, dtype='int64'), rep) else: - comp_ids = np.repeat(np.r_[-1, np.arange(ngroups)], rep) + comp_ids = np.repeat(np.r_[-1, np.arange(ngroups, dtype='int64')], rep) return comp_ids, obs_group_ids, ngroups @@ -2552,8 +2552,8 @@ def nunique(self, dropna=True): # group boundries are where group ids change # unique observations are where sorted values change - idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] - inc = np.r_[1, val[1:] != val[:-1]] + idx = com._ensure_int64(np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]) + inc = com._ensure_int64(np.r_[1, val[1:] != val[:-1]]) # 1st item of each group is a new unique observation mask = isnull(val) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 49d344631e4b9..ec03d558e45b8 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -919,7 +919,7 @@ def test_resample_timegrouper(self): def test_resample_group_info(self): # GH10914 for n, k in product((10000, 100000), (10, 100, 1000)): dr = date_range(start='2015-08-27', periods=n // 10, freq='T') - ts = Series(np.random.randint(0, n // k, n), + ts = Series(np.random.randint(0, n // k, n).astype('int64'), index=np.random.choice(dr, n)) left = ts.resample('30T', how='nunique') @@ -1585,7 +1585,7 @@ def test_aggregate_with_nat(self): # check TimeGrouper's aggregation is identical as normal groupby n = 20 - data = np.random.randn(n, 4) + data = np.random.randn(n, 4).astype('int64') normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) normal_df['key'] = [1, 2, np.nan, 4, 5] * 4