Skip to content

BUG: DatetimeIndex._data should return an ndarray #20912

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 10, 2018
4 changes: 4 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,10 @@ def _shallow_copy(self, values=None, **kwargs):
attributes.update(kwargs)
if not len(values) and 'dtype' not in kwargs:
attributes['dtype'] = self.dtype

# _simple_new expects an ndarray
values = getattr(values, 'values', values)

return self._simple_new(values, **attributes)

def _shallow_copy_with_infer(self, values=None, **kwargs):
Expand Down
28 changes: 16 additions & 12 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from pandas.core.dtypes.common import (
_INT64_DTYPE,
_NS_DTYPE,
is_object_dtype,
is_datetime64_dtype,
is_datetimetz,
is_dtype_equal,
Expand Down Expand Up @@ -551,10 +550,11 @@ def _generate(cls, start, end, periods, name, freq,
index = _generate_regular_range(start, end, periods, freq)

if tz is not None and getattr(index, 'tz', None) is None:
index = conversion.tz_localize_to_utc(_ensure_int64(index),
tz,
ambiguous=ambiguous)
index = index.view(_NS_DTYPE)
arr = conversion.tz_localize_to_utc(_ensure_int64(index),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tz_localize_to_utc returns an array and not a DatetimeIndex so I then convert this array to a DatetimeIndex called index so I can pass index.values to _simple_new below

tz,
ambiguous=ambiguous)

index = DatetimeIndex(arr)

# index is localized datetime64 array -> have to convert
# start/end as well to compare
Expand All @@ -575,7 +575,9 @@ def _generate(cls, start, end, periods, name, freq,
index = index[1:]
if not right_closed and len(index) and index[-1] == end:
index = index[:-1]
index = cls._simple_new(index, name=name, freq=freq, tz=tz)

index = cls._simple_new(index.values, name=name, freq=freq, tz=tz)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don’t like having 2 different construction paths generally: eg we always need to be an ndarray or already converted to a DTI by the time _simple_new gets called

what i would do is run all of the index tests and see what the current state is
then probably settle on an ndarray input to _simple_new and put an assertion to validate this

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which 2 different construction paths do you mean?
With the update above, index is always an index, and it's always the values that are passed to _simple_new. So it is ndarray input to _simple_new.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my point is that we need an assertion to validate this. it may be that everything is fixed, but we should actually test this.


return index

def _convert_for_op(self, value):
Expand Down Expand Up @@ -606,12 +608,14 @@ def _simple_new(cls, values, name=None, freq=None, tz=None,
dtype=dtype, **kwargs)
values = np.array(values, copy=False)

if is_object_dtype(values):
return cls(values, name=name, freq=freq, tz=tz,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was this just never hit?

Copy link
Contributor Author

@reidy-p reidy-p Jul 9, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I think it's never hit

dtype=dtype, **kwargs).values
elif not is_datetime64_dtype(values):
if not is_datetime64_dtype(values):
values = _ensure_int64(values).view(_NS_DTYPE)

values = getattr(values, 'values', values)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this one still needed? I thought the idea was now to ensure ndarrays are passed to _simple_new and not DatetimeIndexes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well-spotted! I inserted this when I was trying to investigate why some tests were failing and I meant to move it before pushing the commit but forgot. I think we can move this line to just before the call to _simple_new in this file:

def _shallow_copy(self, values=None, **kwargs):
if values is None:
# Note: slightly different from Index implementation which defaults
# to self.values
values = self._ndarray_values
attributes = self._get_attributes_dict()
attributes.update(kwargs)
if not len(values) and 'dtype' not in kwargs:
attributes['dtype'] = self.dtype
return self._simple_new(values, **attributes)

Does this make sense? I did the same thing here:

@Appender(_index_shared_docs['_shallow_copy'])
def _shallow_copy(self, values=None, **kwargs):
if values is None:
values = self.values
attributes = self._get_attributes_dict()
attributes.update(kwargs)
if not len(values) and 'dtype' not in kwargs:
attributes['dtype'] = self.dtype
# _simple_new expects an ndarray
values = getattr(values, 'values', values)
return self._simple_new(values, **attributes)


assert isinstance(values, np.ndarray), "values is not an np.ndarray"
assert is_datetime64_dtype(values)

result = super(DatetimeIndex, cls)._simple_new(values, freq, tz,
**kwargs)
result.name = name
Expand Down Expand Up @@ -1000,7 +1004,7 @@ def unique(self, level=None):
else:
naive = self
result = super(DatetimeIndex, naive).unique(level=level)
return self._simple_new(result, name=self.name, tz=self.tz,
return self._simple_new(result.values, name=self.name, tz=self.tz,
freq=self.freq)

def union(self, other):
Expand Down Expand Up @@ -1855,7 +1859,7 @@ def _generate_regular_range(start, end, periods, freq):
"if a 'period' is given.")

data = np.arange(b, e, stride, dtype=np.int64)
data = DatetimeIndex._simple_new(data, None, tz=tz)
data = DatetimeIndex._simple_new(data.view(_NS_DTYPE), None, tz=tz)
else:
if isinstance(start, Timestamp):
start = start.to_pydatetime()
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2472,7 +2472,8 @@ def _get_index_factory(self, klass):
if klass == DatetimeIndex:
def f(values, freq=None, tz=None):
# data are already in UTC, localize and convert if tz present
result = DatetimeIndex._simple_new(values, None, freq=freq)
result = DatetimeIndex._simple_new(values.values, None,
freq=freq)
if tz is not None:
result = result.tz_localize('UTC').tz_convert(tz)
return result
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def test_index_ctor_infer_periodindex(self):
])
def test_constructor_simple_new(self, vals, dtype):
index = Index(vals, name=dtype)
result = index._simple_new(index, dtype)
result = index._simple_new(index.values, dtype)
tm.assert_index_equal(result, index)

@pytest.mark.parametrize("vals", [
Expand Down