From 3f509ac5a062616cb97fe2477a4e53545b559022 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 25 Sep 2013 12:53:32 -0400 Subject: [PATCH 1/4] BUG: allow Timestamp comparisons on the left Now tested with Series and DataFrame --- doc/source/release.rst | 2 + pandas/tests/test_frame.py | 25 ++++++++++++ pandas/tests/test_series.py | 69 ++++++++++++++++++++++++++++++++ pandas/tslib.pyx | 80 +++++++++++++++++++++++++++---------- 4 files changed, 156 insertions(+), 20 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 3b5bb04344d25..74e54526cfe9a 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -487,6 +487,8 @@ Bug Fixes - Fix repr for DateOffset. No longer show duplicate entries in kwds. Removed unused offset fields. (:issue:`4638`) - Fixed wrong index name during read_csv if using usecols. Applies to c parser only. (:issue:`4201`) + - ``Timestamp`` objects can now appear in the left hand side of a comparison + operation with a ``Series`` or ``DataFrame`` object (:issue:`4982`). pandas 0.12.0 ------------- diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 82be82ea57dae..a6f806d5ce097 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4335,6 +4335,31 @@ def check(df,df2): df2 = DataFrame({'a': date_range('20010101', periods=len(df)), 'b': date_range('20100101', periods=len(df))}) check(df,df2) + def test_timestamp_compare(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH4982 + df = DataFrame({'dates1': date_range('20010101', periods=10), + 'dates2': date_range('20010102', periods=10), + 'intcol': np.random.randint(1000000000, size=10), + 'floatcol': np.random.randn(10), + 'stringcol': list(tm.rands(10))}) + df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT + ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', + 'ne': 'ne'} + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + expected = left_f(df, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), df) + tm.assert_frame_equal(result, expected) + + # nats + expected = left_f(df, Timestamp('nat')) + result = right_f(Timestamp('nat'), df) + tm.assert_frame_equal(result, expected) + def test_modulo(self): # GH3590, modulo as ints diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 98fa5c0a56ccd..045a3da709812 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2059,6 +2059,42 @@ def check_comparators(series, other): check_comparators(self.ts, 5) check_comparators(self.ts, self.ts + 1) + def test_timestamp_compare(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH4982 + df = DataFrame({'dates': date_range('20010101', periods=10)}) + s = df.dates.copy() + + s[0] = pd.Timestamp('nat') + s[3] = pd.Timestamp('nat') + + ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + expected = left_f(df.dates, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), df.dates) + tm.assert_series_equal(result, expected) + + # nats + expected = left_f(df.dates, Timestamp('nat')) + result = right_f(Timestamp('nat'), df.dates) + tm.assert_series_equal(result, expected) + + # compare to timestamp with series containing nats + expected = left_f(s, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), s) + tm.assert_series_equal(result, expected) + + # compare to nat with series containing nats + expected = left_f(s, Timestamp('nat')) + result = right_f(Timestamp('nat'), s) + tm.assert_series_equal(result, expected) + + def test_operators_empty_int_corner(self): s1 = Series([], [], dtype=np.int32) s2 = Series({'x': 0.}) @@ -4990,6 +5026,39 @@ def test_numpy_unique(self): result = np.unique(self.ts) +def test_timestamp_compare_scalars(): + # case where ndim == 0 + lhs = np.datetime64(datetime(2013, 12, 6)) + rhs = Timestamp('now') + nat = Timestamp('nat') + + ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + if pd._np_version_under1p7: + # you have to convert to timestamp for this to work with numpy + # scalars + expected = left_f(Timestamp(lhs), rhs) + + # otherwise a TypeError is thrown + if left not in ('eq', 'ne'): + with tm.assertRaises(TypeError): + left_f(lhs, rhs) + else: + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + tm.assert_equal(result, expected) + + expected = left_f(rhs, nat) + result = right_f(nat, rhs) + tm.assert_equal(result, expected) + + class TestSeriesNonUnique(unittest.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 075102dd63100..381ae3eebdb3d 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -457,6 +457,7 @@ cdef class _Timestamp(datetime): def __richcmp__(_Timestamp self, object other, int op): cdef _Timestamp ots + cdef int ndim = getattr(other, 'ndim', -1) if isinstance(other, _Timestamp): ots = other @@ -470,13 +471,33 @@ cdef class _Timestamp(datetime): except ValueError: return self._compare_outside_nanorange(other, op) else: - if op == 2: - return False - elif op == 3: - return True + if ndim != -1: + if ndim == 0: + if isinstance(other, np.datetime64): + other = Timestamp(other) + else: + raise TypeError("Cannot compare Timestamp with type" + " %r" % type(other).__name__) + if op == 2: # == + return other == self + elif op == 3: # != + return other != self + elif op == 0: # < + return other > self + elif op == 1: # <= + return other >= self + elif op == 4: # > + return other < self + elif op == 5: # >= + return other <= self else: - raise TypeError('Cannot compare Timestamp with ' - '{0!r}'.format(other.__class__.__name__)) + if op == 2: + return False + elif op == 3: + return True + else: + raise TypeError('Cannot compare Timestamp with ' + '{0!r}'.format(other.__class__.__name__)) self._assert_tzawareness_compat(other) @@ -589,21 +610,40 @@ cdef class _NaT(_Timestamp): def __richcmp__(_NaT self, object other, int op): # if not isinstance(other, (_NaT, _Timestamp)): # raise TypeError('Cannot compare %s with NaT' % type(other)) + cdef int ndim = getattr(other, 'ndim', -1) - if op == 2: # == - return False - elif op == 3: # != - return True - elif op == 0: # < - return False - elif op == 1: # <= - return False - elif op == 4: # > - return False - elif op == 5: # >= - return False - - + if ndim != -1: + if ndim == 0: + if isinstance(other, np.datetime64): + other = Timestamp(other) + else: + raise TypeError("Cannot compare NaT with type " + "%r" % type(other).__name__) + if op == 2: # == + return other == self + elif op == 3: # != + return other != self + elif op == 0: # < + return other > self + elif op == 1: # <= + return other >= self + elif op == 4: # > + return other < self + elif op == 5: # >= + return other <= self + else: + if op == 2: # == + return False + elif op == 3: # != + return True + elif op == 0: # < + return False + elif op == 1: # <= + return False + elif op == 4: # > + return False + elif op == 5: # >= + return False def _delta_to_nanoseconds(delta): From a8f68f90736124ac5246c4b525a406cb081219ce Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 25 Sep 2013 14:32:33 -0400 Subject: [PATCH 2/4] BUG: Fix Timestamp compares to NaT For example Timestamp('now') > pd.NaT should return the same result as pd.NaT < Timestamp('now') but before this commit it didn't because when Timestamp was on the left, the values were compared whereas if NaT is on the left then the truth values are hard-coded based on rules like NaN. --- pandas/tslib.pyx | 166 +++++++++++++++++++++-------------------------- 1 file changed, 73 insertions(+), 93 deletions(-) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 381ae3eebdb3d..18d2c59ae2778 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -9,12 +9,15 @@ from cpython cimport ( PyTypeObject, PyFloat_Check, PyObject_RichCompareBool, - PyString_Check + PyObject_RichCompare, + PyString_Check, + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE ) # Cython < 0.17 doesn't have this in cpython cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) + int PySlice_Check(object) from libc.stdlib cimport free @@ -30,9 +33,6 @@ from datetime import timedelta, datetime from datetime import time as datetime_time from pandas.compat import parse_date -cdef extern from "Python.h": - int PySlice_Check(object) - # initialize numpy import_array() #import_ufunc() @@ -437,9 +437,35 @@ def apply_offset(ndarray[object] values, object offset): result = np.empty(n, dtype='M8[ns]') new_values = result.view('i8') - pass +cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: + if op == Py_EQ: + return lhs == rhs + elif op == Py_NE: + return lhs != rhs + elif op == Py_LT: + return lhs < rhs + elif op == Py_LE: + return lhs <= rhs + elif op == Py_GT: + return lhs > rhs + elif op == Py_GE: + return lhs >= rhs + + +cdef int _reverse_ops[6] + +_reverse_ops[Py_LT] = Py_GT +_reverse_ops[Py_LE] = Py_GE +_reverse_ops[Py_EQ] = Py_EQ +_reverse_ops[Py_NE] = Py_NE +_reverse_ops[Py_GT] = Py_LT +_reverse_ops[Py_GE] = Py_LE + + +cdef char* _NDIM_STRING = "ndim" + # This is PITA. Because we inherit from datetime, which has very specific # construction requirements, we need to do object instantiation in python # (see Timestamp class above). This will serve as a C extension type that @@ -456,12 +482,15 @@ cdef class _Timestamp(datetime): return datetime.__hash__(self) def __richcmp__(_Timestamp self, object other, int op): - cdef _Timestamp ots - cdef int ndim = getattr(other, 'ndim', -1) + cdef: + _Timestamp ots + int ndim = getattr(other, _NDIM_STRING, -1) if isinstance(other, _Timestamp): + if isinstance(other, NaTType): + return PyObject_RichCompare(other, self, _reverse_ops[op]) ots = other - elif type(other) is datetime: + elif isinstance(other, datetime): if self.nanosecond == 0: val = self.to_datetime() return PyObject_RichCompareBool(val, other, op) @@ -476,74 +505,40 @@ cdef class _Timestamp(datetime): if isinstance(other, np.datetime64): other = Timestamp(other) else: - raise TypeError("Cannot compare Timestamp with type" - " %r" % type(other).__name__) - if op == 2: # == - return other == self - elif op == 3: # != - return other != self - elif op == 0: # < - return other > self - elif op == 1: # <= - return other >= self - elif op == 4: # > - return other < self - elif op == 5: # >= - return other <= self + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, + type(other).__name__)) + return PyObject_RichCompare(other, self, _reverse_ops[op]) else: - if op == 2: + if op == Py_EQ: return False - elif op == 3: + elif op == Py_NE: return True - else: - raise TypeError('Cannot compare Timestamp with ' - '{0!r}'.format(other.__class__.__name__)) + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, type(other).__name__)) self._assert_tzawareness_compat(other) - - if op == 2: # == - return self.value == ots.value - elif op == 3: # != - return self.value != ots.value - elif op == 0: # < - return self.value < ots.value - elif op == 1: # <= - return self.value <= ots.value - elif op == 4: # > - return self.value > ots.value - elif op == 5: # >= - return self.value >= ots.value + return _cmp_scalar(self.value, ots.value, op) cdef _compare_outside_nanorange(self, object other, int op): - dtval = self.to_datetime() + cdef datetime dtval = self.to_datetime() self._assert_tzawareness_compat(other) if self.nanosecond == 0: - if op == 2: # == - return dtval == other - elif op == 3: # != - return dtval != other - elif op == 0: # < - return dtval < other - elif op == 1: # <= - return dtval <= other - elif op == 4: # > - return dtval > other - elif op == 5: # >= - return dtval >= other + return PyObject_RichCompare(dtval, other, op) else: - if op == 2: # == + if op == Py_EQ: return False - elif op == 3: # != + elif op == Py_NE: return True - elif op == 0: # < + elif op == Py_LT: return dtval < other - elif op == 1: # <= + elif op == Py_LE: return dtval < other - elif op == 4: # > + elif op == Py_GT: return dtval >= other - elif op == 5: # >= + elif op == Py_GE: return dtval >= other cdef _assert_tzawareness_compat(self, object other): @@ -601,6 +596,16 @@ cdef inline bint is_timestamp(object o): return Py_TYPE(o) == ts_type # isinstance(o, Timestamp) +cdef bint _nat_scalar_rules[6] + +_nat_scalar_rules[Py_EQ] = False +_nat_scalar_rules[Py_NE] = True +_nat_scalar_rules[Py_LT] = False +_nat_scalar_rules[Py_LE] = False +_nat_scalar_rules[Py_GT] = False +_nat_scalar_rules[Py_GE] = False + + cdef class _NaT(_Timestamp): def __hash__(_NaT self): @@ -608,42 +613,17 @@ cdef class _NaT(_Timestamp): return hash(self.value) def __richcmp__(_NaT self, object other, int op): - # if not isinstance(other, (_NaT, _Timestamp)): - # raise TypeError('Cannot compare %s with NaT' % type(other)) cdef int ndim = getattr(other, 'ndim', -1) - if ndim != -1: - if ndim == 0: - if isinstance(other, np.datetime64): - other = Timestamp(other) - else: - raise TypeError("Cannot compare NaT with type " - "%r" % type(other).__name__) - if op == 2: # == - return other == self - elif op == 3: # != - return other != self - elif op == 0: # < - return other > self - elif op == 1: # <= - return other >= self - elif op == 4: # > - return other < self - elif op == 5: # >= - return other <= self - else: - if op == 2: # == - return False - elif op == 3: # != - return True - elif op == 0: # < - return False - elif op == 1: # <= - return False - elif op == 4: # > - return False - elif op == 5: # >= - return False + if ndim == -1: + return _nat_scalar_rules[op] + + if ndim == 0: + if isinstance(other, np.datetime64): + other = Timestamp(other) + else: + raise TypeError("asdf") + return PyObject_RichCompare(other, self, _reverse_ops[op]) def _delta_to_nanoseconds(delta): From ebbe7bc2f2cbbdb3fd3a1387911c8c3759ba52aa Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Fri, 27 Sep 2013 09:01:21 -0400 Subject: [PATCH 3/4] PERF: add vbench for DatetimeIndex unions/intersetions --- pandas/tslib.pyx | 20 ++++++++++---------- vb_suite/index_object.py | 10 ++++++++++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 18d2c59ae2778..5d09d9badb6eb 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -475,11 +475,10 @@ cdef class _Timestamp(datetime): int64_t value, nanosecond object offset # frequency reference - def __hash__(self): + def __hash__(_Timestamp self): if self.nanosecond: return hash(self.value) - else: - return datetime.__hash__(self) + return datetime.__hash__(self) def __richcmp__(_Timestamp self, object other, int op): cdef: @@ -520,13 +519,14 @@ cdef class _Timestamp(datetime): self._assert_tzawareness_compat(other) return _cmp_scalar(self.value, ots.value, op) - cdef _compare_outside_nanorange(self, object other, int op): + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1: cdef datetime dtval = self.to_datetime() self._assert_tzawareness_compat(other) if self.nanosecond == 0: - return PyObject_RichCompare(dtval, other, op) + return PyObject_RichCompareBool(dtval, other, op) else: if op == Py_EQ: return False @@ -541,15 +541,15 @@ cdef class _Timestamp(datetime): elif op == Py_GE: return dtval >= other - cdef _assert_tzawareness_compat(self, object other): + cdef void _assert_tzawareness_compat(_Timestamp self, object other): if self.tzinfo is None: if other.tzinfo is not None: - raise Exception('Cannot compare tz-naive and ' - 'tz-aware timestamps') + raise ValueError('Cannot compare tz-naive and tz-aware ' + 'timestamps') elif other.tzinfo is None: - raise Exception('Cannot compare tz-naive and tz-aware timestamps') + raise ValueError('Cannot compare tz-naive and tz-aware timestamps') - cpdef to_datetime(self): + cpdef datetime to_datetime(_Timestamp self): cdef: pandas_datetimestruct dts _TSObject ts diff --git a/vb_suite/index_object.py b/vb_suite/index_object.py index cf87a9af500fb..8b348ddc6e6cc 100644 --- a/vb_suite/index_object.py +++ b/vb_suite/index_object.py @@ -22,6 +22,16 @@ index_datetime_intersection = Benchmark("rng.intersection(rng2)", setup) index_datetime_union = Benchmark("rng.union(rng2)", setup) +setup = common_setup + """ +rng = date_range('1/1/2000', periods=10000, freq='T') +rng2 = rng[:-1] +""" + +datetime_index_intersection = Benchmark("rng.intersection(rng2)", setup, + start_date=datetime(2013, 9, 27)) +datetime_index_union = Benchmark("rng.union(rng2)", setup, + start_date=datetime(2013, 9, 27)) + # integers setup = common_setup + """ N = 1000000 From d591a80984da6c1fd71020165560bf48c0b05fbf Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Fri, 27 Sep 2013 09:18:01 -0400 Subject: [PATCH 4/4] PERF: add Timestamp <-> Series comparison vbench --- pandas/tests/test_series.py | 69 ------------------------ pandas/tseries/tests/test_timeseries.py | 71 +++++++++++++++++++++++++ pandas/tslib.pyx | 21 +++++--- vb_suite/binary_ops.py | 12 +++++ 4 files changed, 98 insertions(+), 75 deletions(-) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 045a3da709812..98fa5c0a56ccd 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2059,42 +2059,6 @@ def check_comparators(series, other): check_comparators(self.ts, 5) check_comparators(self.ts, self.ts + 1) - def test_timestamp_compare(self): - # make sure we can compare Timestamps on the right AND left hand side - # GH4982 - df = DataFrame({'dates': date_range('20010101', periods=10)}) - s = df.dates.copy() - - s[0] = pd.Timestamp('nat') - s[3] = pd.Timestamp('nat') - - ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - - # no nats - expected = left_f(df.dates, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), df.dates) - tm.assert_series_equal(result, expected) - - # nats - expected = left_f(df.dates, Timestamp('nat')) - result = right_f(Timestamp('nat'), df.dates) - tm.assert_series_equal(result, expected) - - # compare to timestamp with series containing nats - expected = left_f(s, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), s) - tm.assert_series_equal(result, expected) - - # compare to nat with series containing nats - expected = left_f(s, Timestamp('nat')) - result = right_f(Timestamp('nat'), s) - tm.assert_series_equal(result, expected) - - def test_operators_empty_int_corner(self): s1 = Series([], [], dtype=np.int32) s2 = Series({'x': 0.}) @@ -5026,39 +4990,6 @@ def test_numpy_unique(self): result = np.unique(self.ts) -def test_timestamp_compare_scalars(): - # case where ndim == 0 - lhs = np.datetime64(datetime(2013, 12, 6)) - rhs = Timestamp('now') - nat = Timestamp('nat') - - ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', - 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - - if pd._np_version_under1p7: - # you have to convert to timestamp for this to work with numpy - # scalars - expected = left_f(Timestamp(lhs), rhs) - - # otherwise a TypeError is thrown - if left not in ('eq', 'ne'): - with tm.assertRaises(TypeError): - left_f(lhs, rhs) - else: - expected = left_f(lhs, rhs) - - result = right_f(rhs, lhs) - tm.assert_equal(result, expected) - - expected = left_f(rhs, nat) - result = right_f(nat, rhs) - tm.assert_equal(result, expected) - - class TestSeriesNonUnique(unittest.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 51a010f9d4ead..0e5e3d1922ec4 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3,6 +3,7 @@ import sys import os import unittest +import operator import nose @@ -2010,6 +2011,7 @@ def test_join_self(self): joined = index.join(index, how=kind) self.assert_(index is joined) + class TestDatetime64(unittest.TestCase): """ Also test supoprt for datetime64[ns] in Series / DataFrame @@ -2507,6 +2509,74 @@ def test_hash_equivalent(self): stamp = Timestamp(datetime(2011, 1, 1)) self.assertEquals(d[stamp], 5) + def test_timestamp_compare_scalars(self): + # case where ndim == 0 + lhs = np.datetime64(datetime(2013, 12, 6)) + rhs = Timestamp('now') + nat = Timestamp('nat') + + ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + if pd._np_version_under1p7: + # you have to convert to timestamp for this to work with numpy + # scalars + expected = left_f(Timestamp(lhs), rhs) + + # otherwise a TypeError is thrown + if left not in ('eq', 'ne'): + with tm.assertRaises(TypeError): + left_f(lhs, rhs) + else: + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + self.assertEqual(result, expected) + + expected = left_f(rhs, nat) + result = right_f(nat, rhs) + self.assertEqual(result, expected) + + def test_timestamp_compare_series(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH4982 + s = Series(date_range('20010101', periods=10), name='dates') + s_nat = s.copy(deep=True) + + s[0] = pd.Timestamp('nat') + s[3] = pd.Timestamp('nat') + + ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + expected = left_f(s, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), s) + tm.assert_series_equal(result, expected) + + # nats + expected = left_f(s, Timestamp('nat')) + result = right_f(Timestamp('nat'), s) + tm.assert_series_equal(result, expected) + + # compare to timestamp with series containing nats + expected = left_f(s_nat, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), s_nat) + tm.assert_series_equal(result, expected) + + # compare to nat with series containing nats + expected = left_f(s_nat, Timestamp('nat')) + result = right_f(Timestamp('nat'), s_nat) + tm.assert_series_equal(result, expected) + + class TestSlicing(unittest.TestCase): def test_slice_year(self): @@ -2775,6 +2845,7 @@ def test_frame_apply_dont_convert_datetime64(self): self.assertTrue(df.x1.dtype == 'M8[ns]') + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 5d09d9badb6eb..99b09446be232 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -350,6 +350,11 @@ NaT = NaTType() iNaT = util.get_nat() + +cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1: + return _nat_scalar_rules[op] + + cdef _tz_format(object obj, object zone): try: return obj.strftime(' %%Z, tz=%s' % zone) @@ -464,7 +469,7 @@ _reverse_ops[Py_GT] = Py_LT _reverse_ops[Py_GE] = Py_LE -cdef char* _NDIM_STRING = "ndim" +cdef str _NDIM_STRING = "ndim" # This is PITA. Because we inherit from datetime, which has very specific # construction requirements, we need to do object instantiation in python @@ -483,11 +488,11 @@ cdef class _Timestamp(datetime): def __richcmp__(_Timestamp self, object other, int op): cdef: _Timestamp ots - int ndim = getattr(other, _NDIM_STRING, -1) + int ndim if isinstance(other, _Timestamp): - if isinstance(other, NaTType): - return PyObject_RichCompare(other, self, _reverse_ops[op]) + if isinstance(other, _NaT): + return _cmp_nat_dt(other, self, _reverse_ops[op]) ots = other elif isinstance(other, datetime): if self.nanosecond == 0: @@ -499,6 +504,8 @@ cdef class _Timestamp(datetime): except ValueError: return self._compare_outside_nanorange(other, op) else: + ndim = getattr(other, _NDIM_STRING, -1) + if ndim != -1: if ndim == 0: if isinstance(other, np.datetime64): @@ -541,7 +548,8 @@ cdef class _Timestamp(datetime): elif op == Py_GE: return dtval >= other - cdef void _assert_tzawareness_compat(_Timestamp self, object other): + cdef int _assert_tzawareness_compat(_Timestamp self, + object other) except -1: if self.tzinfo is None: if other.tzinfo is not None: raise ValueError('Cannot compare tz-naive and tz-aware ' @@ -622,7 +630,8 @@ cdef class _NaT(_Timestamp): if isinstance(other, np.datetime64): other = Timestamp(other) else: - raise TypeError("asdf") + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, type(other).__name__)) return PyObject_RichCompare(other, self, _reverse_ops[op]) diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py index 3f076f9f922a3..8293f650425e3 100644 --- a/vb_suite/binary_ops.py +++ b/vb_suite/binary_ops.py @@ -102,3 +102,15 @@ frame_multi_and_no_ne = \ Benchmark("df[(df>0) & (df2>0)]", setup, name='frame_multi_and_no_ne',cleanup="expr.set_use_numexpr(True)", start_date=datetime(2013, 2, 26)) + +setup = common_setup + """ +N = 1000000 +halfway = N // 2 - 1 +s = Series(date_range('20010101', periods=N, freq='D')) +ts = s[halfway] +""" + +timestamp_series_compare = Benchmark("ts >= s", setup, + start_date=datetime(2013, 9, 27)) +series_timestamp_compare = Benchmark("s <= ts", setup, + start_date=datetime(2012, 2, 21))