Skip to content

Change UInt64Index._na_value from 0 to np.nan #18401

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 24, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Backwards incompatible API changes

- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`)
- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`)
- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`)
-


Expand Down Expand Up @@ -127,7 +128,7 @@ Bug Fixes
Conversion
^^^^^^^^^^

-
- Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

18398 as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a separate entry for 18398 under "Backwards incompatible API changes"

-
-

Expand Down
25 changes: 14 additions & 11 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
# then coerce to integer.
try:
return cls._try_convert_to_int_index(
data, copy, name)
data, copy, name, dtype)
except ValueError:
pass

Expand Down Expand Up @@ -307,7 +307,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
if inferred == 'integer':
try:
return cls._try_convert_to_int_index(
subarr, copy, name)
subarr, copy, name, dtype)
except ValueError:
pass

Expand Down Expand Up @@ -664,7 +664,7 @@ def ravel(self, order='C'):

# construction helpers
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
def _try_convert_to_int_index(cls, data, copy, name, dtype):
"""
Attempt to convert an array of data into an integer index.

Expand All @@ -685,15 +685,18 @@ def _try_convert_to_int_index(cls, data, copy, name):
"""

from .numeric import Int64Index, UInt64Index
try:
res = data.astype('i8', copy=False)
if (res == data).all():
return Int64Index(res, copy=copy, name=name)
except (OverflowError, TypeError, ValueError):
pass
if not is_unsigned_integer_dtype(dtype):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here (eg. about why we don't convert for uint)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

# skip int64 conversion attempt if uint-like dtype is passed, as
# this could return Int64Index when UInt64Index is what's desrired
try:
res = data.astype('i8', copy=False)
if (res == data).all():
return Int64Index(res, copy=copy, name=name)
except (OverflowError, TypeError, ValueError):
pass

# Conversion to int64 failed (possibly due to
# overflow), so let's try now with uint64.
# Conversion to int64 failed (possibly due to overflow) or was skipped,
# so let's try now with uint64.
try:
res = data.astype('u8', copy=False)
if (res == data).all():
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ class UInt64Index(NumericIndex):
_inner_indexer = libjoin.inner_join_indexer_uint64
_outer_indexer = libjoin.outer_join_indexer_uint64
_can_hold_na = False
_na_value = 0
_engine_type = libindex.UInt64Engine
_default_dtype = np.uint64

Expand Down
28 changes: 8 additions & 20 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@

from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index,
RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex,
TimedeltaIndex, PeriodIndex, IntervalIndex,
notna, isna)
TimedeltaIndex, PeriodIndex, IntervalIndex, isna)
from pandas.core.indexes.base import InvalidIndexError
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
from pandas.core.dtypes.common import needs_i8_conversion
Expand Down Expand Up @@ -529,31 +528,20 @@ def test_numpy_repeat(self):
tm.assert_raises_regex(ValueError, msg, np.repeat,
i, rep, axis=0)

def test_where(self):
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where(self, klass):
i = self.create_index()
result = i.where(notna(i))

cond = [True] * len(i)
result = i.where(klass(cond))
expected = i
tm.assert_index_equal(result, expected)

_nan = i._na_value
cond = [False] + [True] * len(i[1:])
expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype)

result = i.where(cond)
expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype)
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()

_nan = i._na_value
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, pd.Series]
expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype)

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_setops_errorcases(self):
for name, idx in compat.iteritems(self.indices):
# # non-iterable input
Expand Down
23 changes: 7 additions & 16 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,27 +61,18 @@ def test_pickle_round_trip(self):
result = tm.round_trip_pickle(idx)
tm.assert_index_equal(result, idx)

def test_where(self):
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where(self, klass):
i = self.create_index()
result = i.where(notna(i))
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
freq='D')
result = i.where(notna(i2))
expected = i2
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, Series]
expected = pd.PeriodIndex([pd.NaT] + i[1:].tolist(), freq='D')

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
expected = PeriodIndex([NaT] + i[1:].tolist(), freq='D')
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_where_other(self):

Expand Down
17 changes: 16 additions & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime, timedelta

import pandas.util.testing as tm
from pandas.core.dtypes.common import is_unsigned_integer_dtype
from pandas.core.indexes.api import Index, MultiIndex
from pandas.tests.indexes.common import Base

Expand All @@ -14,7 +15,7 @@
import numpy as np

from pandas import (period_range, date_range, Series,
DataFrame, Float64Index, Int64Index,
DataFrame, Float64Index, Int64Index, UInt64Index,
CategoricalIndex, DatetimeIndex, TimedeltaIndex,
PeriodIndex, isna)
from pandas.core.index import _get_combined_index, _ensure_index_from_sequences
Expand Down Expand Up @@ -201,6 +202,20 @@ def __array__(self, dtype=None):
result = pd.Index(ArrayLike(array))
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('dtype', [
int, 'int64', 'int32', 'int16', 'int8', 'uint64', 'uint32',
'uint16', 'uint8'])
def test_constructor_int_dtype_float(self, dtype):
# GH 18400
if is_unsigned_integer_dtype(dtype):
index_type = UInt64Index
else:
index_type = Int64Index

expected = index_type([0, 1, 2, 3])
result = Index([0., 1., 2., 3.], dtype=dtype)
tm.assert_index_equal(result, expected)

def test_constructor_int_dtype_nan(self):
# see gh-15187
data = [np.nan]
Expand Down
27 changes: 9 additions & 18 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import numpy as np

from pandas import Categorical, IntervalIndex, compat, notna
from pandas import Categorical, IntervalIndex, compat
from pandas.util.testing import assert_almost_equal
import pandas.core.config as cf
import pandas as pd
Expand Down Expand Up @@ -269,28 +269,19 @@ def f(x):
ordered=False)
tm.assert_index_equal(result, exp)

def test_where(self):
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
def test_where(self, klass):
Copy link
Contributor

@jreback jreback Nov 24, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should prob move all of the test_where tests to test_base and use the indices fixture to avoid the code repetition (new issue though)

i = self.create_index()
result = i.where(notna(i))
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(),
categories=i.categories)
result = i.where(notna(i2))
expected = i2
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, pd.Series]
expected = pd.CategoricalIndex([np.nan] + i[1:].tolist(),
categories=i.categories)

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
expected = CategoricalIndex([np.nan] + i[1:].tolist(),
categories=i.categories)
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_append(self):

Expand Down
19 changes: 9 additions & 10 deletions pandas/tests/indexes/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,20 +290,19 @@ def test_astype(self, closed):
expected = pd.Categorical(idx, ordered=True)
tm.assert_categorical_equal(result, expected)

def test_where(self, closed):
expected = self.create_index(closed=closed)
result = expected.where(expected.notna())
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
def test_where(self, closed, klass):
idx = self.create_index(closed=closed)
cond = [True] * len(idx)
expected = idx
result = expected.where(klass(cond))
tm.assert_index_equal(result, expected)

idx = IntervalIndex.from_breaks([1, 2], closed=closed)
result = idx.where([True, False])
expected = IntervalIndex.from_intervals(
[Interval(1.0, 2.0, closed=closed), np.nan])
cond = [False] + [True] * len(idx[1:])
expected = IntervalIndex([np.nan] + idx[1:].tolist())
result = idx.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
pass

def test_delete(self, closed):
expected = IntervalIndex.from_breaks([1, 2], closed=closed)
result = self.create_index(closed=closed).delete(0)
Expand Down
39 changes: 13 additions & 26 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from pandas import (date_range, notna, Series, Index, Float64Index,
from pandas import (date_range, Series, Index, Float64Index,
Int64Index, UInt64Index, RangeIndex)

import pandas.util.testing as tm
Expand Down Expand Up @@ -175,6 +175,18 @@ def test_modulo(self):
expected = Index(index.values % 2)
tm.assert_index_equal(index % 2, expected)

@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where(self, klass):
i = self.create_index()
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))

cond = [False] + [True] * (len(i) - 1)
expected = Float64Index([i._na_value] + i[1:].tolist())
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)


class TestFloat64Index(Numeric):
_holder = Float64Index
Expand Down Expand Up @@ -726,31 +738,6 @@ def test_coerce_list(self):
arr = Index([1, 2, 3, 4], dtype=object)
assert isinstance(arr, Index)

def test_where(self):
i = self.create_index()
result = i.where(notna(i))
expected = i
tm.assert_index_equal(result, expected)

_nan = i._na_value
cond = [False] + [True] * len(i[1:])
expected = pd.Index([_nan] + i[1:].tolist())

result = i.where(cond)
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()

_nan = i._na_value
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, pd.Series]
expected = pd.Index([_nan] + i[1:].tolist())

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_get_indexer(self):
target = Int64Index(np.arange(10))
indexer = self.index.get_indexer(target)
Expand Down
27 changes: 1 addition & 26 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import numpy as np

from pandas import (isna, notna, Series, Index, Float64Index,
from pandas import (isna, Series, Index, Float64Index,
Int64Index, RangeIndex)

import pandas.util.testing as tm
Expand Down Expand Up @@ -934,31 +934,6 @@ def test_len_specialised(self):
i = RangeIndex(0, 5, step)
assert len(i) == 0

def test_where(self):
i = self.create_index()
result = i.where(notna(i))
expected = i
tm.assert_index_equal(result, expected)

_nan = i._na_value
cond = [False] + [True] * len(i[1:])
expected = pd.Index([_nan] + i[1:].tolist())

result = i.where(cond)
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()

_nan = i._na_value
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, pd.Series]
expected = pd.Index([_nan] + i[1:].tolist())

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_append(self):
# GH16212
RI = RangeIndex
Expand Down