diff --git a/RELEASE.rst b/RELEASE.rst index f3d9c72db8bc5..7a2848003783f 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -56,6 +56,10 @@ pandas 0.11.1 Note: The default value will change in 0.12 to the "no mangle" behaviour, If your code relies on this behaviour, explicitly specify mangle_dupe_cols=True in your calls. + - Do not allow astypes on ``datetime64[ns]`` except to ``object``, and + ``timedelta64[ns]`` to ``object/int`` (GH3425_) + - Do not allow datetimelike/timedeltalike creation except with valid types + (e.g. cannot pass ``datetime64[ms]``) (GH3423_) **Bug Fixes** @@ -87,11 +91,15 @@ pandas 0.11.1 - Fixed bug in mixed-frame assignment with aligned series (GH3492_) - Fixed bug in selecting month/quarter/year from a series would not select the time element on the last day (GH3546_) + - Properly convert np.datetime64 objects in a Series (GH3416_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 .. _GH2194: https://github.com/pydata/pandas/issues/2194 .. _GH3230: https://github.com/pydata/pandas/issues/3230 +.. _GH3425: https://github.com/pydata/pandas/issues/3425 +.. _GH3416: https://github.com/pydata/pandas/issues/3416 +.. _GH3423: https://github.com/pydata/pandas/issues/3423 .. _GH3251: https://github.com/pydata/pandas/issues/3251 .. _GH3379: https://github.com/pydata/pandas/issues/3379 .. _GH3480: https://github.com/pydata/pandas/issues/3480 diff --git a/pandas/core/common.py b/pandas/core/common.py index 490f269c8c104..893d912dcece8 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -43,6 +43,9 @@ class AmbiguousIndexError(PandasError, KeyError): _POSSIBLY_CAST_DTYPES = set([ np.dtype(t) for t in ['M8[ns]','m8[ns]','O','int8','uint8','int16','uint16','int32','uint32','int64','uint64'] ]) +_NS_DTYPE = np.dtype('M8[ns]') +_TD_DTYPE = np.dtype('m8[ns]') +_INT64_DTYPE = np.dtype(np.int64) def isnull(obj): ''' @@ -1084,6 +1087,12 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False): if is_datetime64 or is_timedelta64: + # force the dtype if needed + if is_datetime64 and dtype != _NS_DTYPE: + raise TypeError("cannot convert datetimelike to dtype [%s]" % dtype) + elif is_timedelta64 and dtype != _TD_DTYPE: + raise TypeError("cannot convert timedeltalike to dtype [%s]" % dtype) + if np.isscalar(value): if value == tslib.iNaT or isnull(value): value = tslib.iNaT @@ -1098,7 +1107,8 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False): elif np.prod(value.shape) and value.dtype != dtype: try: if is_datetime64: - value = tslib.array_to_datetime(value, coerce = coerce) + from pandas.tseries.tools import to_datetime + value = to_datetime(value, coerce=coerce).values elif is_timedelta64: value = _possibly_cast_to_timedelta(value) except: @@ -1119,12 +1129,12 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False): v = [ v ] if len(v): inferred_type = lib.infer_dtype(v) - if inferred_type == 'datetime': + if inferred_type in ['datetime','datetime64']: try: value = tslib.array_to_datetime(np.array(v)) except: pass - elif inferred_type == 'timedelta': + elif inferred_type in ['timedelta','timedelta64']: value = _possibly_cast_to_timedelta(value) return value @@ -1515,9 +1525,24 @@ def _astype_nansafe(arr, dtype, copy = True): if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) - if issubclass(arr.dtype.type, np.datetime64): + if is_datetime64_dtype(arr): if dtype == object: return tslib.ints_to_pydatetime(arr.view(np.int64)) + elif issubclass(dtype.type, np.int): + return arr.view(dtype) + elif dtype != _NS_DTYPE: + raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % (arr.dtype,dtype)) + return arr.astype(_NS_DTYPE) + elif is_timedelta64_dtype(arr): + if issubclass(dtype.type, np.int): + return arr.view(dtype) + elif dtype == object: + return arr.astype(object) + + # in py3, timedelta64[ns] are int64 + elif (py3compat.PY3 and dtype not in [_INT64_DTYPE,_TD_DTYPE]) or (not py3compat.PY3 and dtype != _TD_DTYPE): + raise TypeError("cannot astype a timedelta from [%s] to [%s]" % (arr.dtype,dtype)) + return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer)): @@ -1721,9 +1746,6 @@ def _check_as_is(x): self.queue.truncate(0) -_NS_DTYPE = np.dtype('M8[ns]') - - def _concat_compat(to_concat, axis=0): # filter empty arrays to_concat = [x for x in to_concat if x.shape[axis] > 0] @@ -1751,7 +1773,6 @@ def _to_pydatetime(x): return x - def _where_compat(mask, arr1, arr2): if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE: new_vals = np.where(mask, arr1.view(np.int64), arr2.view(np.int64)) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 5c0f9253beb62..13e1654963844 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4,7 +4,7 @@ from numpy import nan import numpy as np -from pandas.core.common import _possibly_downcast_to_dtype, isnull +from pandas.core.common import _possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE from pandas.core.index import Index, MultiIndex, _ensure_index, _handle_legacy_indexes from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices import pandas.core.common as com @@ -740,10 +740,6 @@ def should_store(self, value): (np.integer, np.floating, np.complexfloating, np.datetime64, np.bool_)) -_NS_DTYPE = np.dtype('M8[ns]') -_TD_DTYPE = np.dtype('m8[ns]') - - class DatetimeBlock(Block): _can_hold_na = True diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 4845ae5258892..78e2cef230e24 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -469,6 +469,27 @@ def test_constructor_dtype_datetime64(self): self.assert_(isnull(s[1]) == True) self.assert_(s.dtype == 'M8[ns]') + # GH3416 + dates = [ + np.datetime64(datetime(2013, 1, 1)), + np.datetime64(datetime(2013, 1, 2)), + np.datetime64(datetime(2013, 1, 3)), + ] + + s = Series(dates) + self.assert_(s.dtype == 'M8[ns]') + + s.ix[0] = np.nan + self.assert_(s.dtype == 'M8[ns]') + + # invalid astypes + for t in ['s','D','us','ms']: + self.assertRaises(TypeError, s.astype, 'M8[%s]' % t) + + # GH3414 related + self.assertRaises(TypeError, lambda x: Series(Series(dates).astype('int')/1000000,dtype='M8[ms]')) + self.assertRaises(TypeError, lambda x: Series(dates, dtype='datetime64')) + def test_constructor_dict(self): d = {'a': 0., 'b': 1., 'c': 2.} result = Series(d, index=['b', 'c', 'd', 'a']) @@ -1809,6 +1830,13 @@ def test_constructor_dtype_timedelta64(self): td = Series([ timedelta(days=i) for i in range(3) ] + [ np.nan ], dtype='m8[ns]' ) self.assert_(td.dtype=='timedelta64[ns]') + # invalid astypes + for t in ['s','D','us','ms']: + self.assertRaises(TypeError, td.astype, 'm8[%s]' % t) + + # valid astype + td.astype('int') + # this is an invalid casting self.assertRaises(Exception, Series, [ timedelta(days=i) for i in range(3) ] + [ 'foo' ], dtype='m8[ns]' ) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 6bccf323f8654..46e2488fb70e6 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -6,7 +6,7 @@ import numpy as np -from pandas.core.common import isnull +from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE from pandas.core.index import Index, Int64Index from pandas.tseries.frequencies import ( infer_freq, to_offset, get_period_alias, @@ -92,9 +92,6 @@ class TimeSeriesError(Exception): _midnight = time(0, 0) -_NS_DTYPE = np.dtype('M8[ns]') -_INT64_DTYPE = np.dtype(np.int64) - class DatetimeIndex(Int64Index): """ diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index c1af7ba5cccc2..d9dfa51bc0bff 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -12,7 +12,7 @@ import pandas.tseries.frequencies as _freq_mod import pandas.core.common as com -from pandas.core.common import isnull +from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE from pandas.util import py3compat from pandas.lib import Timestamp @@ -516,10 +516,6 @@ def wrapper(self, other): return result return wrapper -_INT64_DTYPE = np.dtype(np.int64) -_NS_DTYPE = np.dtype('M8[ns]') - - class PeriodIndex(Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index e52d9c9c8b777..9b20ac1e3f055 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1470,7 +1470,7 @@ def test_frame_datetime64_handling_groupby(self): (3, np.datetime64('2012-07-04'))], columns=['a', 'date']) result = df.groupby('a').first() - self.assertEqual(result['date'][3], np.datetime64('2012-07-03')) + self.assertEqual(result['date'][3], datetime(2012,7,3)) def test_series_interpolate_intraday(self): # #1698 diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index f9608be013b3c..62ee19da6b845 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -50,7 +50,7 @@ def _maybe_get_tz(tz): def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, - format=None): + format=None, coerce=False): """ Convert argument to datetime @@ -68,6 +68,7 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, If True returns a DatetimeIndex, if False returns ndarray of values format : string, default None strftime to parse time, eg "%d/%m/%Y" + coerce : force errors to NaT (False by default) Returns ------- @@ -84,7 +85,8 @@ def _convert_f(arg): result = tslib.array_strptime(arg, format) else: result = tslib.array_to_datetime(arg, raise_=errors == 'raise', - utc=utc, dayfirst=dayfirst) + utc=utc, dayfirst=dayfirst, + coerce=coerce) if com.is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None) return result