Skip to content

Commit bca5699

Browse files
committed
Merge pull request #8462 from immerrr/preserve-index-type-when-reindexing-to-empty
BUG: fix Index.reindex to preserve type when target is empty list/ndarray
2 parents d1c0271 + 8980295 commit bca5699

File tree

5 files changed

+108
-16
lines changed

5 files changed

+108
-16
lines changed

doc/source/v0.15.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,3 +1025,4 @@ Bug Fixes
10251025
- Bug in NDFrame.equals gives false negatives with dtype=object (:issue:`8437`)
10261026
- Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`)
10271027
- Bug in ``NDFrame.loc`` indexing when row/column names were lost when target was a list/ndarray (:issue:`6552`)
1028+
- Regression in ``NDFrame.loc`` indexing when rows/columns were converted to Float64Index if target was an empty list/ndarray (:issue:`7774`)

pandas/core/generic.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1707,15 +1707,11 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy):
17071707
if labels is None:
17081708
continue
17091709

1710-
# convert to an index if we are not a multi-selection
17111710
ax = self._get_axis(a)
1712-
if level is None:
1713-
labels = _ensure_index(labels)
1714-
1715-
axis = self._get_axis_number(a)
17161711
new_index, indexer = ax.reindex(
17171712
labels, level=level, limit=limit, method=method)
17181713

1714+
axis = self._get_axis_number(a)
17191715
obj = obj._reindex_with_indexers(
17201716
{axis: [new_index, indexer]}, method=method,
17211717
fill_value=fill_value, limit=limit, copy=copy,

pandas/core/index.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,7 +1594,14 @@ def reindex(self, target, method=None, level=None, limit=None):
15941594
# (i.e. neither Index nor Series).
15951595
preserve_names = not hasattr(target, 'name')
15961596

1597-
target = _ensure_index(target)
1597+
# GH7774: preserve dtype/tz if target is empty and not an Index.
1598+
target = _ensure_has_len(target) # target may be an iterator
1599+
if not isinstance(target, Index) and len(target) == 0:
1600+
attrs = self._get_attributes_dict()
1601+
attrs.pop('freq', None) # don't preserve freq
1602+
target = self._simple_new(np.empty(0, dtype=self.dtype), **attrs)
1603+
else:
1604+
target = _ensure_index(target)
15981605
if level is not None:
15991606
if method is not None:
16001607
raise TypeError('Fill method not supported if level passed')
@@ -3706,7 +3713,17 @@ def reindex(self, target, method=None, level=None, limit=None):
37063713
if level is not None:
37073714
if method is not None:
37083715
raise TypeError('Fill method not supported if level passed')
3709-
target = _ensure_index(target)
3716+
3717+
# GH7774: preserve dtype/tz if target is empty and not an Index.
3718+
target = _ensure_has_len(target) # target may be an iterator
3719+
if len(target) == 0 and not isinstance(target, Index):
3720+
idx = self.levels[level]
3721+
attrs = idx._get_attributes_dict()
3722+
attrs.pop('freq', None) # don't preserve freq
3723+
target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype),
3724+
**attrs)
3725+
else:
3726+
target = _ensure_index(target)
37103727
target, indexer, _ = self._join_level(target, level, how='right',
37113728
return_indexers=True)
37123729
else:
@@ -4566,3 +4583,13 @@ def _get_na_rep(dtype):
45664583
def _get_na_value(dtype):
45674584
return {np.datetime64: tslib.NaT, np.timedelta64: tslib.NaT}.get(dtype,
45684585
np.nan)
4586+
4587+
4588+
def _ensure_has_len(seq):
4589+
"""If seq is an iterator, put its values into a list."""
4590+
try:
4591+
len(seq)
4592+
except TypeError:
4593+
return list(seq)
4594+
else:
4595+
return seq

pandas/tests/test_index.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,34 @@ def test_reindex_preserves_name_if_target_is_list_or_ndarray(self):
10491049
self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar')
10501050
self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar')
10511051

1052+
def test_reindex_preserves_type_if_target_is_empty_list_or_array(self):
1053+
# GH7774
1054+
idx = pd.Index(list('abc'))
1055+
def get_reindex_type(target):
1056+
return idx.reindex(target)[0].dtype.type
1057+
1058+
self.assertEqual(get_reindex_type([]), np.object_)
1059+
self.assertEqual(get_reindex_type(np.array([])), np.object_)
1060+
self.assertEqual(get_reindex_type(np.array([], dtype=np.int64)),
1061+
np.object_)
1062+
1063+
def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self):
1064+
# GH7774
1065+
idx = pd.Index(list('abc'))
1066+
def get_reindex_type(target):
1067+
return idx.reindex(target)[0].dtype.type
1068+
1069+
self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int_)
1070+
self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float_)
1071+
self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64)
1072+
1073+
reindexed = idx.reindex(pd.MultiIndex([pd.Int64Index([]),
1074+
pd.Float64Index([])],
1075+
[[], []]))[0]
1076+
self.assertEqual(reindexed.levels[0].dtype.type, np.int64)
1077+
self.assertEqual(reindexed.levels[1].dtype.type, np.float64)
1078+
1079+
10521080

10531081
class Numeric(Base):
10541082

@@ -1699,6 +1727,13 @@ def test_roundtrip_pickle_with_tz(self):
16991727
unpickled = self.round_trip_pickle(index)
17001728
self.assertTrue(index.equals(unpickled))
17011729

1730+
def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
1731+
# GH7774
1732+
index = date_range('20130101', periods=3, tz='US/Eastern')
1733+
self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern')
1734+
self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern')
1735+
1736+
17021737
class TestPeriodIndex(Base, tm.TestCase):
17031738
_holder = PeriodIndex
17041739
_multiprocess_can_split_ = True
@@ -3321,6 +3356,21 @@ def test_reindex_preserves_names_when_target_is_list_or_ndarray(self):
33213356
self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar'])
33223357
self.assertEqual(idx.reindex(other_dtype.values)[0].names, ['foo', 'bar'])
33233358

3359+
def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self):
3360+
# GH7774
3361+
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']],
3362+
names=['foo', 'bar'])
3363+
self.assertEqual(idx.reindex([], level=0)[0].names, ['foo', 'bar'])
3364+
self.assertEqual(idx.reindex([], level=1)[0].names, ['foo', 'bar'])
3365+
3366+
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self):
3367+
# GH7774
3368+
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
3369+
self.assertEqual(idx.reindex([], level=0)[0].levels[0].dtype.type,
3370+
np.int_)
3371+
self.assertEqual(idx.reindex([], level=1)[0].levels[1].dtype.type,
3372+
np.object_)
3373+
33243374

33253375
def test_get_combined_index():
33263376
from pandas.core.index import _get_combined_index

pandas/tests/test_indexing.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3832,23 +3832,41 @@ def test_set_ix_out_of_bounds_axis_1(self):
38323832
def test_iloc_empty_list_indexer_is_ok(self):
38333833
from pandas.util.testing import makeCustomDataframe as mkdf
38343834
df = mkdf(5, 2)
3835-
assert_frame_equal(df.iloc[:,[]], df.iloc[:, :0]) # vertical empty
3836-
assert_frame_equal(df.iloc[[],:], df.iloc[:0, :]) # horizontal empty
3837-
assert_frame_equal(df.iloc[[]], df.iloc[:0, :]) # horizontal empty
3835+
# vertical empty
3836+
assert_frame_equal(df.iloc[:, []], df.iloc[:, :0],
3837+
check_index_type=True, check_column_type=True)
3838+
# horizontal empty
3839+
assert_frame_equal(df.iloc[[], :], df.iloc[:0, :],
3840+
check_index_type=True, check_column_type=True)
3841+
# horizontal empty
3842+
assert_frame_equal(df.iloc[[]], df.iloc[:0, :],
3843+
check_index_type=True, check_column_type=True)
38383844

38393845
def test_loc_empty_list_indexer_is_ok(self):
38403846
from pandas.util.testing import makeCustomDataframe as mkdf
38413847
df = mkdf(5, 2)
3842-
assert_frame_equal(df.loc[:,[]], df.iloc[:, :0]) # vertical empty
3843-
assert_frame_equal(df.loc[[],:], df.iloc[:0, :]) # horizontal empty
3844-
assert_frame_equal(df.loc[[]], df.iloc[:0, :]) # horizontal empty
3848+
# vertical empty
3849+
assert_frame_equal(df.loc[:, []], df.iloc[:, :0],
3850+
check_index_type=True, check_column_type=True)
3851+
# horizontal empty
3852+
assert_frame_equal(df.loc[[], :], df.iloc[:0, :],
3853+
check_index_type=True, check_column_type=True)
3854+
# horizontal empty
3855+
assert_frame_equal(df.loc[[]], df.iloc[:0, :],
3856+
check_index_type=True, check_column_type=True)
38453857

38463858
def test_ix_empty_list_indexer_is_ok(self):
38473859
from pandas.util.testing import makeCustomDataframe as mkdf
38483860
df = mkdf(5, 2)
3849-
assert_frame_equal(df.ix[:,[]], df.iloc[:, :0]) # vertical empty
3850-
assert_frame_equal(df.ix[[],:], df.iloc[:0, :]) # horizontal empty
3851-
assert_frame_equal(df.ix[[]], df.iloc[:0, :]) # horizontal empty
3861+
# vertical empty
3862+
assert_frame_equal(df.ix[:, []], df.iloc[:, :0],
3863+
check_index_type=True, check_column_type=True)
3864+
# horizontal empty
3865+
assert_frame_equal(df.ix[[], :], df.iloc[:0, :],
3866+
check_index_type=True, check_column_type=True)
3867+
# horizontal empty
3868+
assert_frame_equal(df.ix[[]], df.iloc[:0, :],
3869+
check_index_type=True, check_column_type=True)
38523870

38533871
def test_deprecate_float_indexers(self):
38543872

0 commit comments

Comments
 (0)