Skip to content

Commit 2b1b3da

Browse files
fujiaxiangTomAugspurger
authored andcommitted
BUG: pct_change wrong result when there are duplicated indices (#30526)
* BUG: pct_change wrong result when there are duplicated indices (GH30463)
1 parent 8806ed7 commit 2b1b3da

File tree

4 files changed

+33
-5
lines changed

4 files changed

+33
-5
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,7 @@ Numeric
830830
- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`)
831831
- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`)
832832
- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`)
833+
- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`)
833834
- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`)
834835

835836
Conversion

pandas/core/generic.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9857,11 +9857,11 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar
98579857
data = self.fillna(method=fill_method, limit=limit, axis=axis)
98589858

98599859
rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1
9860-
rs = rs.loc[~rs.index.duplicated()]
9861-
rs = rs.reindex_like(data)
9862-
if freq is None:
9863-
mask = isna(com.values_from_object(data))
9864-
np.putmask(rs.values, mask, np.nan)
9860+
if freq is not None:
9861+
# Shift method is implemented differently when freq is not None
9862+
# We want to restore the original index
9863+
rs = rs.loc[~rs.index.duplicated()]
9864+
rs = rs.reindex_like(data)
98659865
return rs
98669866

98679867
def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):

pandas/tests/frame/methods/test_pct_change.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,21 @@ def test_pct_change_periods_freq(
7676
rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit)
7777
rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit)
7878
tm.assert_frame_equal(rs_freq, rs_periods)
79+
80+
81+
@pytest.mark.parametrize("fill_method", ["pad", "ffill", None])
82+
def test_pct_change_with_duplicated_indices(fill_method):
83+
# GH30463
84+
data = DataFrame(
85+
{0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3
86+
)
87+
result = data.pct_change(fill_method=fill_method)
88+
if fill_method is None:
89+
second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0]
90+
else:
91+
second_column = [np.nan, np.inf, 0.0, 2.0, 2.0, 1.0]
92+
expected = DataFrame(
93+
{0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], 1: second_column},
94+
index=["a", "b"] * 3,
95+
)
96+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_pct_change.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,12 @@ def test_pct_change_periods_freq(
6868
rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit)
6969
rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit)
7070
tm.assert_series_equal(rs_freq, rs_periods)
71+
72+
73+
@pytest.mark.parametrize("fill_method", ["pad", "ffill", None])
74+
def test_pct_change_with_duplicated_indices(fill_method):
75+
# GH30463
76+
s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3)
77+
result = s.pct_change(fill_method=fill_method)
78+
expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3)
79+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)