diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 90032a692fd15..ff710cc899ac7 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -52,6 +52,7 @@ Backwards incompatible API changes - :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) - :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) +- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) - @@ -127,7 +128,7 @@ Bug Fixes Conversion ^^^^^^^^^^ -- +- Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) - - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7a34e64724245..c1423fab91a0d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -251,7 +251,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # then coerce to integer. try: return cls._try_convert_to_int_index( - data, copy, name) + data, copy, name, dtype) except ValueError: pass @@ -307,7 +307,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if inferred == 'integer': try: return cls._try_convert_to_int_index( - subarr, copy, name) + subarr, copy, name, dtype) except ValueError: pass @@ -664,7 +664,7 @@ def ravel(self, order='C'): # construction helpers @classmethod - def _try_convert_to_int_index(cls, data, copy, name): + def _try_convert_to_int_index(cls, data, copy, name, dtype): """ Attempt to convert an array of data into an integer index. @@ -685,15 +685,18 @@ def _try_convert_to_int_index(cls, data, copy, name): """ from .numeric import Int64Index, UInt64Index - try: - res = data.astype('i8', copy=False) - if (res == data).all(): - return Int64Index(res, copy=copy, name=name) - except (OverflowError, TypeError, ValueError): - pass + if not is_unsigned_integer_dtype(dtype): + # skip int64 conversion attempt if uint-like dtype is passed, as + # this could return Int64Index when UInt64Index is what's desrired + try: + res = data.astype('i8', copy=False) + if (res == data).all(): + return Int64Index(res, copy=copy, name=name) + except (OverflowError, TypeError, ValueError): + pass - # Conversion to int64 failed (possibly due to - # overflow), so let's try now with uint64. + # Conversion to int64 failed (possibly due to overflow) or was skipped, + # so let's try now with uint64. try: res = data.astype('u8', copy=False) if (res == data).all(): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index ae6a810ece510..fddbb2de83dca 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -216,7 +216,6 @@ class UInt64Index(NumericIndex): _inner_indexer = libjoin.inner_join_indexer_uint64 _outer_indexer = libjoin.outer_join_indexer_uint64 _can_hold_na = False - _na_value = 0 _engine_type = libindex.UInt64Engine _default_dtype = np.uint64 diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 81360bc0c13f9..43b20f420eb48 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -9,8 +9,7 @@ from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, - TimedeltaIndex, PeriodIndex, IntervalIndex, - notna, isna) + TimedeltaIndex, PeriodIndex, IntervalIndex, isna) from pandas.core.indexes.base import InvalidIndexError from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.dtypes.common import needs_i8_conversion @@ -529,31 +528,20 @@ def test_numpy_repeat(self): tm.assert_raises_regex(ValueError, msg, np.repeat, i, rep, axis=0) - def test_where(self): + @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) + def test_where(self, klass): i = self.create_index() - result = i.where(notna(i)) + + cond = [True] * len(i) + result = i.where(klass(cond)) expected = i tm.assert_index_equal(result, expected) - _nan = i._na_value cond = [False] + [True] * len(i[1:]) - expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype) - - result = i.where(cond) + expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype) + result = i.where(klass(cond)) tm.assert_index_equal(result, expected) - def test_where_array_like(self): - i = self.create_index() - - _nan = i._na_value - cond = [False] + [True] * (len(i) - 1) - klasses = [list, tuple, np.array, pd.Series] - expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype) - - for klass in klasses: - result = i.where(klass(cond)) - tm.assert_index_equal(result, expected) - def test_setops_errorcases(self): for name, idx in compat.iteritems(self.indices): # # non-iterable input diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 7fefcc859d447..52558c27ce707 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -61,27 +61,18 @@ def test_pickle_round_trip(self): result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx) - def test_where(self): + @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) + def test_where(self, klass): i = self.create_index() - result = i.where(notna(i)) + cond = [True] * len(i) expected = i + result = i.where(klass(cond)) tm.assert_index_equal(result, expected) - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notna(i2)) - expected = i2 - tm.assert_index_equal(result, expected) - - def test_where_array_like(self): - i = self.create_index() cond = [False] + [True] * (len(i) - 1) - klasses = [list, tuple, np.array, Series] - expected = pd.PeriodIndex([pd.NaT] + i[1:].tolist(), freq='D') - - for klass in klasses: - result = i.where(klass(cond)) - tm.assert_index_equal(result, expected) + expected = PeriodIndex([NaT] + i[1:].tolist(), freq='D') + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) def test_where_other(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c55f53601848c..99a99cc5cc3eb 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta import pandas.util.testing as tm +from pandas.core.dtypes.common import is_unsigned_integer_dtype from pandas.core.indexes.api import Index, MultiIndex from pandas.tests.indexes.common import Base @@ -14,7 +15,7 @@ import numpy as np from pandas import (period_range, date_range, Series, - DataFrame, Float64Index, Int64Index, + DataFrame, Float64Index, Int64Index, UInt64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, isna) from pandas.core.index import _get_combined_index, _ensure_index_from_sequences @@ -201,6 +202,20 @@ def __array__(self, dtype=None): result = pd.Index(ArrayLike(array)) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('dtype', [ + int, 'int64', 'int32', 'int16', 'int8', 'uint64', 'uint32', + 'uint16', 'uint8']) + def test_constructor_int_dtype_float(self, dtype): + # GH 18400 + if is_unsigned_integer_dtype(dtype): + index_type = UInt64Index + else: + index_type = Int64Index + + expected = index_type([0, 1, 2, 3]) + result = Index([0., 1., 2., 3.], dtype=dtype) + tm.assert_index_equal(result, expected) + def test_constructor_int_dtype_nan(self): # see gh-15187 data = [np.nan] diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 5e40e06d57413..5e6898f9c8711 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -11,7 +11,7 @@ import numpy as np -from pandas import Categorical, IntervalIndex, compat, notna +from pandas import Categorical, IntervalIndex, compat from pandas.util.testing import assert_almost_equal import pandas.core.config as cf import pandas as pd @@ -269,28 +269,19 @@ def f(x): ordered=False) tm.assert_index_equal(result, exp) - def test_where(self): + @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) + def test_where(self, klass): i = self.create_index() - result = i.where(notna(i)) + cond = [True] * len(i) expected = i + result = i.where(klass(cond)) tm.assert_index_equal(result, expected) - i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(), - categories=i.categories) - result = i.where(notna(i2)) - expected = i2 - tm.assert_index_equal(result, expected) - - def test_where_array_like(self): - i = self.create_index() cond = [False] + [True] * (len(i) - 1) - klasses = [list, tuple, np.array, pd.Series] - expected = pd.CategoricalIndex([np.nan] + i[1:].tolist(), - categories=i.categories) - - for klass in klasses: - result = i.where(klass(cond)) - tm.assert_index_equal(result, expected) + expected = CategoricalIndex([np.nan] + i[1:].tolist(), + categories=i.categories) + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) def test_append(self): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 399d88309072e..3876fa4f9939d 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -290,20 +290,19 @@ def test_astype(self, closed): expected = pd.Categorical(idx, ordered=True) tm.assert_categorical_equal(result, expected) - def test_where(self, closed): - expected = self.create_index(closed=closed) - result = expected.where(expected.notna()) + @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) + def test_where(self, closed, klass): + idx = self.create_index(closed=closed) + cond = [True] * len(idx) + expected = idx + result = expected.where(klass(cond)) tm.assert_index_equal(result, expected) - idx = IntervalIndex.from_breaks([1, 2], closed=closed) - result = idx.where([True, False]) - expected = IntervalIndex.from_intervals( - [Interval(1.0, 2.0, closed=closed), np.nan]) + cond = [False] + [True] * len(idx[1:]) + expected = IntervalIndex([np.nan] + idx[1:].tolist()) + result = idx.where(klass(cond)) tm.assert_index_equal(result, expected) - def test_where_array_like(self): - pass - def test_delete(self, closed): expected = IntervalIndex.from_breaks([1, 2], closed=closed) result = self.create_index(closed=closed).delete(0) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index a96c677852339..030d688f510b0 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -7,7 +7,7 @@ import numpy as np -from pandas import (date_range, notna, Series, Index, Float64Index, +from pandas import (date_range, Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex) import pandas.util.testing as tm @@ -175,6 +175,18 @@ def test_modulo(self): expected = Index(index.values % 2) tm.assert_index_equal(index % 2, expected) + @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) + def test_where(self, klass): + i = self.create_index() + cond = [True] * len(i) + expected = i + result = i.where(klass(cond)) + + cond = [False] + [True] * (len(i) - 1) + expected = Float64Index([i._na_value] + i[1:].tolist()) + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + class TestFloat64Index(Numeric): _holder = Float64Index @@ -726,31 +738,6 @@ def test_coerce_list(self): arr = Index([1, 2, 3, 4], dtype=object) assert isinstance(arr, Index) - def test_where(self): - i = self.create_index() - result = i.where(notna(i)) - expected = i - tm.assert_index_equal(result, expected) - - _nan = i._na_value - cond = [False] + [True] * len(i[1:]) - expected = pd.Index([_nan] + i[1:].tolist()) - - result = i.where(cond) - tm.assert_index_equal(result, expected) - - def test_where_array_like(self): - i = self.create_index() - - _nan = i._na_value - cond = [False] + [True] * (len(i) - 1) - klasses = [list, tuple, np.array, pd.Series] - expected = pd.Index([_nan] + i[1:].tolist()) - - for klass in klasses: - result = i.where(klass(cond)) - tm.assert_index_equal(result, expected) - def test_get_indexer(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 7d88b547746f6..b4d1c3760f25a 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -10,7 +10,7 @@ import numpy as np -from pandas import (isna, notna, Series, Index, Float64Index, +from pandas import (isna, Series, Index, Float64Index, Int64Index, RangeIndex) import pandas.util.testing as tm @@ -934,31 +934,6 @@ def test_len_specialised(self): i = RangeIndex(0, 5, step) assert len(i) == 0 - def test_where(self): - i = self.create_index() - result = i.where(notna(i)) - expected = i - tm.assert_index_equal(result, expected) - - _nan = i._na_value - cond = [False] + [True] * len(i[1:]) - expected = pd.Index([_nan] + i[1:].tolist()) - - result = i.where(cond) - tm.assert_index_equal(result, expected) - - def test_where_array_like(self): - i = self.create_index() - - _nan = i._na_value - cond = [False] + [True] * (len(i) - 1) - klasses = [list, tuple, np.array, pd.Series] - expected = pd.Index([_nan] + i[1:].tolist()) - - for klass in klasses: - result = i.where(klass(cond)) - tm.assert_index_equal(result, expected) - def test_append(self): # GH16212 RI = RangeIndex