pandas-dev · jreback · Sep 12, 2013 · Sep 11, 2013 · Sep 12, 2013 · Sep 12, 2013
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -2009,6 +2009,26 @@ space. These are in terms of the total number of rows in a table.
                        Term('minor_axis', '=', ['A','B']) ],
                 start=0, stop=10)
 
+**Using timedelta64[ns]**
+
+.. versionadded:: 0.13
+
+Beginning in 0.13.0, you can store and query using the ``timedelta64[ns]`` type. Terms can be
+specified in the format: ``<float>(<unit>)``, where float may be signed (and fractional), and unit can be
+``D,s,ms,us,ns`` for the timedelta. Here's an example:
+
+.. warning::
+
+   This requires ``numpy >= 1.7``
+
+.. ipython:: python
+
+   from datetime import timedelta
+   dftd = DataFrame(dict(A = Timestamp('20130101'), B = [ Timestamp('20130101') + timedelta(days=i,seconds=10) for i in range(10) ]))
+   dftd['C'] = dftd['A']-dftd['B']
+   dftd
+   store.append('dftd',dftd,data_columns=True)
+   store.select('dftd',Term("C","<","-3.5D"))
 
 Indexing
 ~~~~~~~~

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -156,6 +156,7 @@ API Changes
     - a column multi-index will be recreated properly (:issue:`4710`); raise on trying to use a multi-index
       with data_columns on the same axis
     - ``select_as_coordinates`` will now return an ``Int64Index`` of the resultant selection set
+    - support ``timedelta64[ns]`` as a serialization type (:issue:`3577`)
   - ``JSON``
 
     - added ``date_unit`` parameter to specify resolution of timestamps. Options
@@ -190,6 +191,8 @@ API Changes
   - provide automatic dtype conversions on _reduce operations (:issue:`3371`)
   - exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`)
   - default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`)
+  - moved timedeltas support to pandas.tseries.timedeltas.py; add timedeltas string parsing,
+    add top-level ``to_timedelta`` function
 
 Internal Refactoring
 ~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
@@ -1211,6 +1211,26 @@ Time Deltas & Conversions
 
 .. versionadded:: 0.13
 
+**string/integer conversion**
+
+Using the top-level ``to_timedelta``, you can convert a scalar or array from the standard
+timedelta format (produced by ``to_csv``) into a timedelta type (``np.timedelta64`` in ``nanoseconds``).
+It can also construct Series.
+
+.. warning::
+
+   This requires ``numpy >= 1.7``
+
+.. ipython:: python
+
+   to_timedelta('1 days 06:05:01.00003')
+   to_timedelta('15.5us')
+   to_timedelta(['1 days 06:05:01.00003','15.5us','nan'])
+   to_timedelta(np.arange(5),unit='s')
+   to_timedelta(np.arange(5),unit='d')
+
+**frequency conversion**
+
 Timedeltas can be converted to other 'frequencies' by dividing by another timedelta.
 These operations yield ``float64`` dtyped Series.
 

diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -80,7 +80,7 @@ API changes
       See :ref:`here<io.hdf5-selecting_coordinates>` for an example.
     - allow a passed locations array or mask as a ``where`` condition (:issue:`4467`).
       See :ref:`here<io.hdf5-where_mask>` for an example.
-
+    - support ``timedelta64[ns]`` as a serialization type (:issue:`3577`)
     - the ``format`` keyword now replaces the ``table`` keyword; allowed values are ``fixed(f)`` or ``table(t)``
       the same defaults as prior < 0.13.0 remain, e.g. ``put`` implies 'fixed` or 'f' (Fixed) format
       and ``append`` imples 'table' or 't' (Table) format
@@ -208,6 +208,21 @@ Enhancements
 
   - ``timedelta64[ns]`` operations
 
+    - Using the new top-level ``to_timedelta``, you can convert a scalar or array from the standard
+      timedelta format (produced by ``to_csv``) into a timedelta type (``np.timedelta64`` in ``nanoseconds``).
+
+      .. warning::
+
+         This requires ``numpy >= 1.7``
+
+      .. ipython:: python
+
+         to_timedelta('1 days 06:05:01.00003')
+         to_timedelta('15.5us')
+         to_timedelta(['1 days 06:05:01.00003','15.5us','nan'])
+         to_timedelta(np.arange(5),unit='s')
+         to_timedelta(np.arange(5),unit='d')
+
     - A Series of dtype ``timedelta64[ns]`` can now be divided by another
       ``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This
       is frequency conversion. See :ref:`here<timeseries.timedeltas_convert>` for the docs.

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -18,6 +18,19 @@
 from datetime import datetime
 import numpy as np
 
+# XXX: HACK for NumPy 1.5.1 to suppress warnings
+try:
+    np.seterr(all='ignore')
+    # np.set_printoptions(suppress=True)
+except Exception:  # pragma: no cover
+    pass
+
+# numpy versioning
+from distutils.version import LooseVersion
+_np_version = np.version.short_version
+_np_version_under1p6 = LooseVersion(_np_version) < '1.6'
+_np_version_under1p7 = LooseVersion(_np_version) < '1.7'
+
 from pandas.version import version as __version__
 from pandas.info import __doc__
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -11,35 +11,20 @@
 import pandas.algos as algos
 import pandas.lib as lib
 import pandas.tslib as tslib
-from distutils.version import LooseVersion
 from pandas import compat
 from pandas.compat import StringIO, BytesIO, range, long, u, zip, map
 from datetime import timedelta
 
 from pandas.core.config import get_option
 from pandas.core import array as pa
 
-
-# XXX: HACK for NumPy 1.5.1 to suppress warnings
-try:
-    np.seterr(all='ignore')
-    # np.set_printoptions(suppress=True)
-except Exception:  # pragma: no cover
-    pass
-
-
 class PandasError(Exception):
     pass
 
 
 class AmbiguousIndexError(PandasError, KeyError):
     pass
 
-# versioning
-_np_version = np.version.short_version
-_np_version_under1p6 = LooseVersion(_np_version) < '1.6'
-_np_version_under1p7 = LooseVersion(_np_version) < '1.7'
-
 _POSSIBLY_CAST_DTYPES = set([np.dtype(t)
                             for t in ['M8[ns]', 'm8[ns]', 'O', 'int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64']])
 
@@ -704,34 +689,13 @@ def diff(arr, n, axis=0):
 
     return out_arr
 
-
-def _coerce_scalar_to_timedelta_type(r):
-    # kludgy here until we have a timedelta scalar
-    # handle the numpy < 1.7 case
-
-    if is_integer(r):
-        r = timedelta(microseconds=r/1000)
-
-    if _np_version_under1p7:
-        if not isinstance(r, timedelta):
-            raise AssertionError("Invalid type for timedelta scalar: %s" % type(r))
-        if compat.PY3:
-            # convert to microseconds in timedelta64
-            r = np.timedelta64(int(r.total_seconds()*1e9 + r.microseconds*1000))
-        else:
-            return r
-
-    if isinstance(r, timedelta):
-        r = np.timedelta64(r)
-    elif not isinstance(r, np.timedelta64):
-        raise AssertionError("Invalid type for timedelta scalar: %s" % type(r))
-    return r.astype('timedelta64[ns]')
-
 def _coerce_to_dtypes(result, dtypes):
     """ given a dtypes and a result set, coerce the result elements to the dtypes """
     if len(result) != len(dtypes):
         raise AssertionError("_coerce_to_dtypes requires equal len arrays")
 
+    from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type
+
     def conv(r,dtype):
         try:
             if isnull(r):
@@ -1324,68 +1288,6 @@ def _possibly_convert_platform(values):
 
     return values
 
-
-def _possibly_cast_to_timedelta(value, coerce=True):
-    """ try to cast to timedelta64, if already a timedeltalike, then make
-        sure that we are [ns] (as numpy 1.6.2 is very buggy in this regards,
-        don't force the conversion unless coerce is True
-
-        if coerce='compat' force a compatibilty coercerion (to timedeltas) if needeed
-        """
-
-    # coercion compatability
-    if coerce == 'compat' and _np_version_under1p7:
-
-        def convert(td, dtype):
-
-            # we have an array with a non-object dtype
-            if hasattr(td,'item'):
-                td = td.astype(np.int64).item()
-                if td == tslib.iNaT:
-                    return td
-                if dtype == 'm8[us]':
-                    td *= 1000
-                return td
-
-            if td == tslib.compat_NaT:
-                return tslib.iNaT
-
-            # convert td value to a nanosecond value
-            d = td.days
-            s = td.seconds
-            us = td.microseconds
-
-            if dtype == 'object' or dtype == 'm8[ns]':
-                td = 1000*us + (s + d * 24 * 3600) * 10 ** 9
-            else:
-                raise ValueError("invalid conversion of dtype in np < 1.7 [%s]" % dtype)
-
-            return td
-
-        # < 1.7 coercion
-        if not is_list_like(value):
-            value = np.array([ value ])
-
-        dtype = value.dtype
-        return np.array([ convert(v,dtype) for v in value ], dtype='m8[ns]')
-
-    # deal with numpy not being able to handle certain timedelta operations
-    if isinstance(value, (ABCSeries, np.ndarray)) and value.dtype.kind == 'm':
-        if value.dtype != 'timedelta64[ns]':
-            value = value.astype('timedelta64[ns]')
-        return value
-
-    # we don't have a timedelta, but we want to try to convert to one (but
-    # don't force it)
-    if coerce:
-        new_value = tslib.array_to_timedelta64(
-            _values_from_object(value).astype(object), coerce=False)
-        if new_value.dtype == 'i8':
-            value = np.array(new_value, dtype='timedelta64[ns]')
-
-    return value
-
-
 def _possibly_cast_to_datetime(value, dtype, coerce=False):
     """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """
 
@@ -1423,6 +1325,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
                             from pandas.tseries.tools import to_datetime
                             value = to_datetime(value, coerce=coerce).values
                         elif is_timedelta64:
+                            from pandas.tseries.timedeltas import _possibly_cast_to_timedelta
                             value = _possibly_cast_to_timedelta(value)
                     except:
                         pass
@@ -1448,6 +1351,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
                     except:
                         pass
                 elif inferred_type in ['timedelta', 'timedelta64']:
+                    from pandas.tseries.timedeltas import _possibly_cast_to_timedelta
                     value = _possibly_cast_to_timedelta(value)
 
     return value

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -13,7 +13,7 @@
 from pandas.tseries.index import DatetimeIndex
 from pandas.core.internals import BlockManager
 import pandas.core.common as com
-from pandas import compat
+from pandas import compat, _np_version_under1p7
 from pandas.compat import map, zip, lrange
 from pandas.core.common import (isnull, notnull, is_list_like,
                                 _values_from_object,
@@ -1908,7 +1908,7 @@ def abs(self):
         obj = np.abs(self)
 
         # suprimo numpy 1.6 hacking
-        if com._np_version_under1p7:
+        if _np_version_under1p7:
             if self.ndim == 1:
                 if obj.dtype == 'm8[us]':
                     obj = obj.astype('m8[ns]')

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -19,6 +19,7 @@
                                 _asarray_tuplesafe, is_integer_dtype,
                                 _NS_DTYPE, _TD_DTYPE,
                                 _infer_dtype_from_scalar, is_list_like, _values_from_object,
+                                _possibly_cast_to_datetime, _possibly_castable, _possibly_convert_platform,
                                 ABCSparseArray)
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
                                _ensure_index, _handle_legacy_indexes)
@@ -32,6 +33,7 @@
 from pandas.tseries.index import DatetimeIndex
 from pandas.tseries.period import PeriodIndex, Period
 from pandas.tseries.offsets import DateOffset
+from pandas.tseries.timedeltas import _possibly_cast_to_timedelta
 from pandas import compat
 from pandas.util.terminal import get_terminal_size
 from pandas.compat import zip, lzip, u, OrderedDict
@@ -142,7 +144,7 @@ def _convert_to_array(self, values, name=None):
                 values = values.to_series()
         elif inferred_type in ('timedelta', 'timedelta64'):
             # have a timedelta, convert to to ns here
-            values = com._possibly_cast_to_timedelta(values, coerce=coerce)
+            values = _possibly_cast_to_timedelta(values, coerce=coerce)
         elif inferred_type == 'integer':
             # py3 compat where dtype is 'm' but is an integer
             if values.dtype.kind == 'm':
@@ -160,7 +162,7 @@ def _convert_to_array(self, values, name=None):
                 raise TypeError("cannot use a non-absolute DateOffset in "
                                 "datetime/timedelta operations [{0}]".format(
                                     ','.join([ com.pprint_thing(v) for v in values[mask] ])))
-            values = com._possibly_cast_to_timedelta(os, coerce=coerce)
+            values = _possibly_cast_to_timedelta(os, coerce=coerce)
         else:
             raise TypeError("incompatible type [{0}] for a datetime/timedelta operation".format(pa.array(values).dtype))
 
@@ -3215,11 +3217,11 @@ def _try_cast(arr, take_fast_path):
 
         # perf shortcut as this is the most common case
         if take_fast_path:
-            if com._possibly_castable(arr) and not copy and dtype is None:
+            if _possibly_castable(arr) and not copy and dtype is None:
                 return arr
 
         try:
-            arr = com._possibly_cast_to_datetime(arr, dtype)
+            arr = _possibly_cast_to_datetime(arr, dtype)
             subarr = pa.array(arr, dtype=dtype, copy=copy)
         except (ValueError, TypeError):
             if dtype is not None and raise_cast_failure:
@@ -3266,9 +3268,9 @@ def _try_cast(arr, take_fast_path):
                 subarr = lib.maybe_convert_objects(subarr)
 
         else:
-            subarr = com._possibly_convert_platform(data)
+            subarr = _possibly_convert_platform(data)
 
-        subarr = com._possibly_cast_to_datetime(subarr, dtype)
+        subarr = _possibly_cast_to_datetime(subarr, dtype)
 
     else:
         subarr = _try_cast(data, False)
@@ -3285,7 +3287,7 @@ def _try_cast(arr, take_fast_path):
                 dtype, value = _infer_dtype_from_scalar(value)
             else:
                 # need to possibly convert the value here
-                value = com._possibly_cast_to_datetime(value, dtype)
+                value = _possibly_cast_to_datetime(value, dtype)
 
             subarr = pa.empty(len(index), dtype=dtype)
             subarr.fill(value)