Skip to content

Commit 1a9bc97

Browse files
committed
Generalize NA Compat
1 parent a1387ad commit 1a9bc97

File tree

15 files changed

+109
-40
lines changed

15 files changed

+109
-40
lines changed

doc/source/whatsnew/v0.21.1.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ Bug Fixes
6464
- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`)
6565
- Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`)
6666
- Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`)
67-
- Bug in ``IntervalIndex.insert`` when attempting to insert ``NaN`` (:issue:`18295`)
6867

6968
Conversion
7069
^^^^^^^^^^

doc/source/whatsnew/v0.22.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ Other API Changes
4848
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
4949
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
5050
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
51-
51+
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
5252

5353
.. _whatsnew_0220.deprecations:
5454

pandas/core/indexes/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3728,6 +3728,10 @@ def insert(self, loc, item):
37283728
-------
37293729
new_index : Index
37303730
"""
3731+
if lib.checknull(item):
3732+
# GH 18295
3733+
item = self._na_value
3734+
37313735
_self = np.asarray(self)
37323736
item = self._coerce_scalar_to_index(item)._values
37333737
idx = np.concatenate((_self[:loc], item, _self[loc:]))

pandas/core/indexes/category.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numpy as np
2-
from pandas._libs import index as libindex
2+
from pandas._libs import index as libindex, lib
33

44
from pandas import compat
55
from pandas.compat.numpy import function as nv
@@ -688,7 +688,7 @@ def insert(self, loc, item):
688688
689689
"""
690690
code = self.categories.get_indexer([item])
691-
if (code == -1):
691+
if (code == -1) and not lib.checknull(item):
692692
raise TypeError("cannot insert an item into a CategoricalIndex "
693693
"that is not already an existing category")
694694

pandas/core/indexes/datetimes.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1751,6 +1751,9 @@ def insert(self, loc, item):
17511751
-------
17521752
new_index : Index
17531753
"""
1754+
if lib.checknull(item):
1755+
# GH 18295
1756+
item = self._na_value
17541757

17551758
freq = None
17561759

@@ -1767,14 +1770,14 @@ def insert(self, loc, item):
17671770
elif (loc == len(self)) and item - self.freq == self[-1]:
17681771
freq = self.freq
17691772
item = _to_m8(item, tz=self.tz)
1773+
17701774
try:
17711775
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
17721776
self[loc:].asi8))
17731777
if self.tz is not None:
17741778
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
17751779
return DatetimeIndex(new_dates, name=self.name, freq=freq,
17761780
tz=self.tz)
1777-
17781781
except (AttributeError, TypeError):
17791782

17801783
# fall back to object index

pandas/core/indexes/interval.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
Index, _ensure_index,
2323
default_pprint, _index_shared_docs)
2424

25-
from pandas._libs import Timestamp, Timedelta
25+
from pandas._libs import lib, Timestamp, Timedelta
2626
from pandas._libs.interval import (
2727
Interval, IntervalMixin, IntervalTree,
2828
intervals_to_interval_bounds)
@@ -986,12 +986,8 @@ def insert(self, loc, item):
986986
'side as the index')
987987
left_insert = item.left
988988
right_insert = item.right
989-
elif is_scalar(item) and isna(item):
989+
elif lib.checknull(item):
990990
# GH 18295
991-
if item is not self.left._na_value:
992-
raise TypeError('cannot insert with incompatible NA value: '
993-
'got {item}, expected {na}'
994-
.format(item=item, na=self.left._na_value))
995991
left_insert = right_insert = item
996992
else:
997993
raise ValueError('can only insert Interval objects and NA into '

pandas/core/indexes/timedeltas.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -849,16 +849,18 @@ def insert(self, loc, item):
849849
-------
850850
new_index : Index
851851
"""
852-
853852
# try to convert if possible
854853
if _is_convertible_to_td(item):
855854
try:
856855
item = Timedelta(item)
857856
except Exception:
858857
pass
858+
elif lib.checknull(item):
859+
# GH 18295
860+
item = self._na_value
859861

860862
freq = None
861-
if isinstance(item, Timedelta) or item is NaT:
863+
if isinstance(item, Timedelta) or (item is self._na_value):
862864

863865
# check freq can be preserved on edge cases
864866
if self.freq is not None:

pandas/tests/indexes/datetimes/test_indexing.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,13 @@ def test_insert(self):
145145
assert result.tz == expected.tz
146146
assert result.freq is None
147147

148+
# GH 18295 (test missing)
149+
expected = DatetimeIndex(
150+
['20170101', pd.NaT, '20170102', '20170103', '20170104'])
151+
for na in (np.nan, pd.NaT, None):
152+
result = date_range('20170101', periods=4).insert(1, na)
153+
tm.assert_index_equal(result, expected)
154+
148155
def test_delete(self):
149156
idx = date_range(start='2000-01-01', periods=5, freq='M', name='idx')
150157

pandas/tests/indexes/period/test_period.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,3 +706,11 @@ def test_join_self(self, how):
706706
index = period_range('1/1/2000', periods=10)
707707
joined = index.join(index, how=how)
708708
assert index is joined
709+
710+
def test_insert(self):
711+
# GH 18295 (test missing)
712+
expected = PeriodIndex(
713+
['2017Q1', pd.NaT, '2017Q2', '2017Q3', '2017Q4'], freq='Q')
714+
for na in (np.nan, pd.NaT, None):
715+
result = period_range('2017Q1', periods=4, freq='Q').insert(1, na)
716+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,12 @@ def test_insert(self):
442442
null_index = Index([])
443443
tm.assert_index_equal(Index(['a']), null_index.insert(0, 'a'))
444444

445+
# GH 18295 (test missing)
446+
expected = Index(['a', np.nan, 'b', 'c'])
447+
for na in (np.nan, pd.NaT, None):
448+
result = Index(list('abc')).insert(1, na)
449+
tm.assert_index_equal(result, expected)
450+
445451
def test_delete(self):
446452
idx = Index(['a', 'b', 'c', 'd'], name='idx')
447453

pandas/tests/indexes/test_category.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,12 @@ def test_insert(self):
353353
# invalid
354354
pytest.raises(TypeError, lambda: ci.insert(0, 'd'))
355355

356+
# GH 18295 (test missing)
357+
expected = CategoricalIndex(['a', np.nan, 'a', 'b', 'c', 'b'])
358+
for na in (np.nan, pd.NaT, None):
359+
result = CategoricalIndex(list('aabcb')).insert(1, na)
360+
tm.assert_index_equal(result, expected)
361+
356362
def test_delete(self):
357363

358364
ci = self.create_index()

pandas/tests/indexes/test_interval.py

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -309,38 +309,50 @@ def test_delete(self, closed):
309309
result = self.create_index(closed=closed).delete(0)
310310
tm.assert_index_equal(result, expected)
311311

312-
def test_insert(self):
313-
expected = IntervalIndex.from_breaks(range(4))
314-
actual = self.index.insert(2, Interval(2, 3))
315-
assert expected.equals(actual)
316-
317-
pytest.raises(ValueError, self.index.insert, 0, 1)
318-
pytest.raises(ValueError, self.index.insert, 0,
319-
Interval(2, 3, closed='left'))
320-
321312
@pytest.mark.parametrize('data', [
322-
interval_range(0, periods=10),
323-
interval_range(1.7, periods=8, freq=2.5),
324-
interval_range(Timestamp('20170101'), periods=12),
325-
interval_range(Timedelta('1 day'), periods=6),
313+
interval_range(0, periods=10, closed='neither'),
314+
interval_range(1.7, periods=8, freq=2.5, closed='both'),
315+
interval_range(Timestamp('20170101'), periods=12, closed='left'),
316+
interval_range(Timedelta('1 day'), periods=6, closed='right'),
326317
IntervalIndex.from_tuples([('a', 'd'), ('e', 'j'), ('w', 'z')]),
327318
IntervalIndex.from_tuples([(1, 2), ('a', 'z'), (3.14, 6.28)])])
328-
def test_insert_na(self, data):
329-
# GH 18295
330-
valid_na, invalid_na = np.nan, pd.NaT
331-
if data.left._na_value is pd.NaT:
332-
valid_na, invalid_na = invalid_na, valid_na
333-
334-
# valid insertion
335-
expected = IntervalIndex([data[0], np.nan]).append(data[1:])
336-
result = data.insert(1, valid_na)
319+
def test_insert(self, data):
320+
item = data[0]
321+
idx_item = IntervalIndex([item], closed=data.closed)
322+
323+
# start
324+
expected = idx_item.append(data)
325+
result = data.insert(0, item)
337326
tm.assert_index_equal(result, expected)
338327

339-
# invalid insertion
340-
msg = ('cannot insert with incompatible NA value: got {invalid}, '
341-
'expected {valid}').format(invalid=invalid_na, valid=valid_na)
342-
with tm.assert_raises_regex(TypeError, msg):
343-
data.insert(1, invalid_na)
328+
# end
329+
expected = data.append(idx_item)
330+
result = data.insert(len(data), item)
331+
tm.assert_index_equal(result, expected)
332+
333+
# mid
334+
expected = data[:3].append(idx_item).append(data[3:])
335+
result = data.insert(3, item)
336+
tm.assert_index_equal(result, expected)
337+
338+
# invalid type
339+
msg = 'can only insert Interval objects and NA into an IntervalIndex'
340+
with tm.assert_raises_regex(ValueError, msg):
341+
data.insert(1, 'foo')
342+
343+
# invalid closed
344+
msg = 'inserted item must be closed on the same side as the index'
345+
for closed in {'left', 'right', 'both', 'neither'} - {item.closed}:
346+
with tm.assert_raises_regex(ValueError, msg):
347+
bad_item = Interval(item.left, item.right, closed=closed)
348+
data.insert(1, bad_item)
349+
350+
# GH 18295 (test missing)
351+
na_idx = IntervalIndex([np.nan], closed=data.closed)
352+
for na in (np.nan, pd.NaT, None):
353+
expected = data[:1].append(na_idx).append(data[1:])
354+
result = data.insert(1, na)
355+
tm.assert_index_equal(result, expected)
344356

345357
def test_take(self, closed):
346358
index = self.create_index(closed=closed)

pandas/tests/indexes/test_numeric.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,13 @@ def test_modulo(self):
175175
expected = Index(index.values % 2)
176176
tm.assert_index_equal(index % 2, expected)
177177

178+
def test_insert(self):
179+
# GH 18295 (test missing)
180+
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
181+
for na in (np.nan, pd.NaT, None):
182+
result = self.create_index().insert(1, na)
183+
tm.assert_index_equal(result, expected)
184+
178185

179186
class TestFloat64Index(Numeric):
180187
_holder = Float64Index
@@ -1206,3 +1213,10 @@ def test_join_outer(self):
12061213
tm.assert_index_equal(res, eres)
12071214
tm.assert_numpy_array_equal(lidx, elidx)
12081215
tm.assert_numpy_array_equal(ridx, eridx)
1216+
1217+
def test_insert(self):
1218+
# GH 18295 (test missing)
1219+
expected = UInt64Index([0, 0, 1, 2, 3, 4])
1220+
for na in (np.nan, pd.NaT, None):
1221+
result = self.create_index().insert(1, na)
1222+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_range.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,12 @@ def test_insert(self):
295295
# test 0th element
296296
tm.assert_index_equal(idx[0:4], result.insert(0, idx[0]))
297297

298+
# GH 18295 (test missing)
299+
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
300+
for na in (np.nan, pd.NaT, None):
301+
result = RangeIndex(5).insert(1, na)
302+
tm.assert_index_equal(result, expected)
303+
298304
def test_delete(self):
299305

300306
idx = RangeIndex(5, name='Foo')

pandas/tests/indexes/timedeltas/test_indexing.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ def test_insert(self):
5757
assert result.name == expected.name
5858
assert result.freq == expected.freq
5959

60+
# GH 18295 (test missing)
61+
expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day'])
62+
for na in (np.nan, pd.NaT, None):
63+
result = timedelta_range('1day', '3day').insert(1, na)
64+
tm.assert_index_equal(result, expected)
65+
6066
def test_delete(self):
6167
idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx')
6268

0 commit comments

Comments
 (0)