Skip to content

BUG/CLN: datetime64/timedelta64 #3516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 8, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ pandas 0.11.1
Note: The default value will change in 0.12 to the "no mangle" behaviour,
If your code relies on this behaviour, explicitly specify mangle_dupe_cols=True
in your calls.
- Do not allow astypes on ``datetime64[ns]`` except to ``object``, and
``timedelta64[ns]`` to ``object/int`` (GH3425_)
- Do not allow datetimelike/timedeltalike creation except with valid types
(e.g. cannot pass ``datetime64[ms]``) (GH3423_)

**Bug Fixes**

Expand Down Expand Up @@ -87,11 +91,15 @@ pandas 0.11.1
- Fixed bug in mixed-frame assignment with aligned series (GH3492_)
- Fixed bug in selecting month/quarter/year from a series would not select the time element
on the last day (GH3546_)
- Properly convert np.datetime64 objects in a Series (GH3416_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH2786: https://github.com/pydata/pandas/issues/2786
.. _GH2194: https://github.com/pydata/pandas/issues/2194
.. _GH3230: https://github.com/pydata/pandas/issues/3230
.. _GH3425: https://github.com/pydata/pandas/issues/3425
.. _GH3416: https://github.com/pydata/pandas/issues/3416
.. _GH3423: https://github.com/pydata/pandas/issues/3423
.. _GH3251: https://github.com/pydata/pandas/issues/3251
.. _GH3379: https://github.com/pydata/pandas/issues/3379
.. _GH3480: https://github.com/pydata/pandas/issues/3480
Expand Down
37 changes: 29 additions & 8 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class AmbiguousIndexError(PandasError, KeyError):


_POSSIBLY_CAST_DTYPES = set([ np.dtype(t) for t in ['M8[ns]','m8[ns]','O','int8','uint8','int16','uint16','int32','uint32','int64','uint64'] ])
_NS_DTYPE = np.dtype('M8[ns]')
_TD_DTYPE = np.dtype('m8[ns]')
_INT64_DTYPE = np.dtype(np.int64)

def isnull(obj):
'''
Expand Down Expand Up @@ -1084,6 +1087,12 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):

if is_datetime64 or is_timedelta64:

# force the dtype if needed
if is_datetime64 and dtype != _NS_DTYPE:
raise TypeError("cannot convert datetimelike to dtype [%s]" % dtype)
elif is_timedelta64 and dtype != _TD_DTYPE:
raise TypeError("cannot convert timedeltalike to dtype [%s]" % dtype)

if np.isscalar(value):
if value == tslib.iNaT or isnull(value):
value = tslib.iNaT
Expand All @@ -1098,7 +1107,8 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
elif np.prod(value.shape) and value.dtype != dtype:
try:
if is_datetime64:
value = tslib.array_to_datetime(value, coerce = coerce)
from pandas.tseries.tools import to_datetime
value = to_datetime(value, coerce=coerce).values
elif is_timedelta64:
value = _possibly_cast_to_timedelta(value)
except:
Expand All @@ -1119,12 +1129,12 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
v = [ v ]
if len(v):
inferred_type = lib.infer_dtype(v)
if inferred_type == 'datetime':
if inferred_type in ['datetime','datetime64']:
try:
value = tslib.array_to_datetime(np.array(v))
except:
pass
elif inferred_type == 'timedelta':
elif inferred_type in ['timedelta','timedelta64']:
value = _possibly_cast_to_timedelta(value)

return value
Expand Down Expand Up @@ -1515,9 +1525,24 @@ def _astype_nansafe(arr, dtype, copy = True):
if not isinstance(dtype, np.dtype):
dtype = np.dtype(dtype)

if issubclass(arr.dtype.type, np.datetime64):
if is_datetime64_dtype(arr):
if dtype == object:
return tslib.ints_to_pydatetime(arr.view(np.int64))
elif issubclass(dtype.type, np.int):
return arr.view(dtype)
elif dtype != _NS_DTYPE:
raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % (arr.dtype,dtype))
return arr.astype(_NS_DTYPE)
elif is_timedelta64_dtype(arr):
if issubclass(dtype.type, np.int):
return arr.view(dtype)
elif dtype == object:
return arr.astype(object)

# in py3, timedelta64[ns] are int64
elif (py3compat.PY3 and dtype not in [_INT64_DTYPE,_TD_DTYPE]) or (not py3compat.PY3 and dtype != _TD_DTYPE):
raise TypeError("cannot astype a timedelta from [%s] to [%s]" % (arr.dtype,dtype))
return arr.astype(_TD_DTYPE)
elif (np.issubdtype(arr.dtype, np.floating) and
np.issubdtype(dtype, np.integer)):

Expand Down Expand Up @@ -1721,9 +1746,6 @@ def _check_as_is(x):
self.queue.truncate(0)


_NS_DTYPE = np.dtype('M8[ns]')


def _concat_compat(to_concat, axis=0):
# filter empty arrays
to_concat = [x for x in to_concat if x.shape[axis] > 0]
Expand Down Expand Up @@ -1751,7 +1773,6 @@ def _to_pydatetime(x):

return x


def _where_compat(mask, arr1, arr2):
if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE:
new_vals = np.where(mask, arr1.view(np.int64), arr2.view(np.int64))
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from numpy import nan
import numpy as np

from pandas.core.common import _possibly_downcast_to_dtype, isnull
from pandas.core.common import _possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE
from pandas.core.index import Index, MultiIndex, _ensure_index, _handle_legacy_indexes
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
import pandas.core.common as com
Expand Down Expand Up @@ -740,10 +740,6 @@ def should_store(self, value):
(np.integer, np.floating, np.complexfloating,
np.datetime64, np.bool_))

_NS_DTYPE = np.dtype('M8[ns]')
_TD_DTYPE = np.dtype('m8[ns]')


class DatetimeBlock(Block):
_can_hold_na = True

Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,27 @@ def test_constructor_dtype_datetime64(self):
self.assert_(isnull(s[1]) == True)
self.assert_(s.dtype == 'M8[ns]')

# GH3416
dates = [
np.datetime64(datetime(2013, 1, 1)),
np.datetime64(datetime(2013, 1, 2)),
np.datetime64(datetime(2013, 1, 3)),
]

s = Series(dates)
self.assert_(s.dtype == 'M8[ns]')

s.ix[0] = np.nan
self.assert_(s.dtype == 'M8[ns]')

# invalid astypes
for t in ['s','D','us','ms']:
self.assertRaises(TypeError, s.astype, 'M8[%s]' % t)

# GH3414 related
self.assertRaises(TypeError, lambda x: Series(Series(dates).astype('int')/1000000,dtype='M8[ms]'))
self.assertRaises(TypeError, lambda x: Series(dates, dtype='datetime64'))

def test_constructor_dict(self):
d = {'a': 0., 'b': 1., 'c': 2.}
result = Series(d, index=['b', 'c', 'd', 'a'])
Expand Down Expand Up @@ -1809,6 +1830,13 @@ def test_constructor_dtype_timedelta64(self):
td = Series([ timedelta(days=i) for i in range(3) ] + [ np.nan ], dtype='m8[ns]' )
self.assert_(td.dtype=='timedelta64[ns]')

# invalid astypes
for t in ['s','D','us','ms']:
self.assertRaises(TypeError, td.astype, 'm8[%s]' % t)

# valid astype
td.astype('int')

# this is an invalid casting
self.assertRaises(Exception, Series, [ timedelta(days=i) for i in range(3) ] + [ 'foo' ], dtype='m8[ns]' )

Expand Down
5 changes: 1 addition & 4 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np

from pandas.core.common import isnull
from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE
from pandas.core.index import Index, Int64Index
from pandas.tseries.frequencies import (
infer_freq, to_offset, get_period_alias,
Expand Down Expand Up @@ -92,9 +92,6 @@ class TimeSeriesError(Exception):


_midnight = time(0, 0)
_NS_DTYPE = np.dtype('M8[ns]')
_INT64_DTYPE = np.dtype(np.int64)


class DatetimeIndex(Int64Index):
"""
Expand Down
6 changes: 1 addition & 5 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import pandas.tseries.frequencies as _freq_mod

import pandas.core.common as com
from pandas.core.common import isnull
from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE
from pandas.util import py3compat

from pandas.lib import Timestamp
Expand Down Expand Up @@ -516,10 +516,6 @@ def wrapper(self, other):
return result
return wrapper

_INT64_DTYPE = np.dtype(np.int64)
_NS_DTYPE = np.dtype('M8[ns]')


class PeriodIndex(Int64Index):
"""
Immutable ndarray holding ordinal values indicating regular periods in
Expand Down
2 changes: 1 addition & 1 deletion pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1470,7 +1470,7 @@ def test_frame_datetime64_handling_groupby(self):
(3, np.datetime64('2012-07-04'))],
columns=['a', 'date'])
result = df.groupby('a').first()
self.assertEqual(result['date'][3], np.datetime64('2012-07-03'))
self.assertEqual(result['date'][3], datetime(2012,7,3))

def test_series_interpolate_intraday(self):
# #1698
Expand Down
6 changes: 4 additions & 2 deletions pandas/tseries/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _maybe_get_tz(tz):


def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
format=None):
format=None, coerce=False):
"""
Convert argument to datetime

Expand All @@ -68,6 +68,7 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
If True returns a DatetimeIndex, if False returns ndarray of values
format : string, default None
strftime to parse time, eg "%d/%m/%Y"
coerce : force errors to NaT (False by default)

Returns
-------
Expand All @@ -84,7 +85,8 @@ def _convert_f(arg):
result = tslib.array_strptime(arg, format)
else:
result = tslib.array_to_datetime(arg, raise_=errors == 'raise',
utc=utc, dayfirst=dayfirst)
utc=utc, dayfirst=dayfirst,
coerce=coerce)
if com.is_datetime64_dtype(result) and box:
result = DatetimeIndex(result, tz='utc' if utc else None)
return result
Expand Down