Skip to content

Commit a866db4

Browse files
committed
PERF: Improve performance CustmBusinessDay
1 parent c8efd6b commit a866db4

File tree

4 files changed

+146
-78
lines changed

4 files changed

+146
-78
lines changed

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ Performance
563563
- Performance improvements in ``StataWriter`` when writing large files (:issue:`8079`)
564564
- Performance and memory usage improvements in multi-key ``groupby`` (:issue:`8128`)
565565
- Performance improvements in groupby ``.agg`` and ``.apply`` where builtins max/min were not mapped to numpy/cythonized versions (:issue:`7722`)
566-
566+
- Performance improvement in ``CustomBusinessDay``, ``CustomBusinessMonth`` (:issue:`8236`)
567567

568568

569569

pandas/tseries/offsets.py

+96-61
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,66 @@ def _is_normalized(dt):
9494
return False
9595
return True
9696

97+
def _to_dt64(dt, dtype='datetime64'):
98+
# Currently
99+
# > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]')
100+
# numpy.datetime64('2013-05-01T02:00:00.000000+0200')
101+
# Thus astype is needed to cast datetime to datetime64[D]
102+
103+
if getattr(dt, 'tzinfo', None) is not None:
104+
i8 = tslib.pydt_to_i8(dt)
105+
dt = tslib.tz_convert_single(i8, 'UTC', dt.tzinfo)
106+
dt = Timestamp(dt)
107+
dt = np.datetime64(dt)
108+
if dt.dtype.name != dtype:
109+
dt = dt.astype(dtype)
110+
return dt
111+
112+
def from_various_func(cls,**kwds):
113+
"""
114+
Converts various holiday inputs to format readable for np.busdaycalendar
115+
and creates an instance of CustomBusinessDay, CustomBusinessMonth...
116+
117+
Parameters
118+
----------
119+
weekmask : str, Default 'Mon Tue Wed Thu Fri'
120+
weekmask of valid business days, passed to ``numpy.busdaycalendar``
121+
holidays : list
122+
list/array of dates to exclude from the set of valid business days,
123+
passed to ``numpy.busdaycalendar``
124+
calendar : HolidayCalendar instance
125+
instance of AbstractHolidayCalendar that provide the list of holidays
126+
"""
127+
bdaycal_kwds = {}
128+
if 'calendar' in kwds or 'holidays' in kwds: # generate holiday list
129+
if 'calendar' in kwds:
130+
holidays = kwds.pop('calendar').holidays()
131+
else:
132+
holidays = kwds.pop('holidays')
133+
holidays = [_to_dt64(dt, dtype='datetime64[D]') for dt in
134+
holidays]
135+
holidays = tuple(sorted(holidays))
136+
bdaycal_kwds['holidays'] = holidays
137+
if 'weekmask' in kwds:
138+
bdaycal_kwds['weekmask'] = kwds.pop('weekmask') # simply pass on to numpy
139+
140+
try:
141+
busdaycalendar = np.busdaycalendar(**bdaycal_kwds)
142+
except:
143+
# Check we have the required numpy version
144+
from distutils.version import LooseVersion
145+
146+
if LooseVersion(np.__version__) < '1.7.0':
147+
raise NotImplementedError("CustomBusinessDay requires numpy >= "
148+
"1.7.0. Current version: " +
149+
np.__version__)
150+
else:
151+
raise
152+
153+
return cls(busdaycalendar=busdaycalendar, **kwds)
154+
155+
156+
97157
#----------------------------------------------------------------------
98158
# DateOffset
99159

@@ -344,17 +404,23 @@ def rollforward(self, dt):
344404
return dt
345405

346406
def onOffset(self, dt):
407+
# print('in onOffset')
347408
if self.normalize and not _is_normalized(dt):
409+
print('here')
348410
return False
349411
# XXX, see #1395
350412
if type(self) == DateOffset or isinstance(self, Tick):
413+
# print('Dateoffset')
351414
return True
352415

353416
# Default (slow) method for determining if some date is a member of the
354417
# date range generated by this offset. Subclasses may have this
355418
# re-implemented in a nicer way.
356419
a = dt
357420
b = ((dt + self) - self)
421+
# print(self)
422+
# print('a={}'.format(a))
423+
# print('b={}'.format(b))
358424
return a == b
359425

360426
# way to get around weirdness with rule_code
@@ -542,13 +608,9 @@ class CustomBusinessDay(BusinessDay):
542608
offset : timedelta, default timedelta(0)
543609
normalize : bool, default False
544610
Normalize start/end dates to midnight before generating date range
545-
weekmask : str, Default 'Mon Tue Wed Thu Fri'
546-
weekmask of valid business days, passed to ``numpy.busdaycalendar``
547-
holidays : list
548-
list/array of dates to exclude from the set of valid business days,
549-
passed to ``numpy.busdaycalendar``
550-
calendar : HolidayCalendar instance
551-
instance of AbstractHolidayCalendar that provide the list of holidays
611+
busdaycalendar: np.busdaycalendar instance
612+
Use `CustomBusinessDay.from_various` to instantiate from weekmask, list of holidays
613+
or `HolidayCalendar` instance.
552614
"""
553615

554616
_cacheable = False
@@ -559,36 +621,9 @@ def __init__(self, n=1, normalize=False, **kwds):
559621
self.normalize = normalize
560622
self.kwds = kwds
561623
self.offset = kwds.get('offset', timedelta(0))
562-
self.weekmask = kwds.get('weekmask', 'Mon Tue Wed Thu Fri')
563-
564-
if 'calendar' in kwds:
565-
holidays = kwds['calendar'].holidays()
566-
else:
567-
holidays = kwds.get('holidays', [])
568-
holidays = [self._to_dt64(dt, dtype='datetime64[D]') for dt in
569-
holidays]
570-
self.holidays = tuple(sorted(holidays))
571-
self.kwds['holidays'] = self.holidays
624+
self.busdaycalendar = kwds.get('busdaycalendar', np.busdaycalendar())
572625

573-
self._set_busdaycalendar()
574-
575-
def _set_busdaycalendar(self):
576-
if self.holidays:
577-
kwargs = {'weekmask':self.weekmask,'holidays':self.holidays}
578-
else:
579-
kwargs = {'weekmask':self.weekmask}
580-
try:
581-
self.busdaycalendar = np.busdaycalendar(**kwargs)
582-
except:
583-
# Check we have the required numpy version
584-
from distutils.version import LooseVersion
585-
586-
if LooseVersion(np.__version__) < '1.7.0':
587-
raise NotImplementedError("CustomBusinessDay requires numpy >= "
588-
"1.7.0. Current version: " +
589-
np.__version__)
590-
else:
591-
raise
626+
from_various = classmethod(from_various_func)
592627

593628
def __getstate__(self):
594629
"""Return a pickleable state"""
@@ -783,26 +818,26 @@ class CustomBusinessMonthEnd(BusinessMixin, MonthOffset):
783818
offset : timedelta, default timedelta(0)
784819
normalize : bool, default False
785820
Normalize start/end dates to midnight before generating date range
786-
weekmask : str, Default 'Mon Tue Wed Thu Fri'
787-
weekmask of valid business days, passed to ``numpy.busdaycalendar``
788-
holidays : list
789-
list/array of dates to exclude from the set of valid business days,
790-
passed to ``numpy.busdaycalendar``
821+
busdaycalendar: np.busdaycalendar instance
822+
Use `CustomBusinessDay.from_various` to instantiate from weekmask, list of holidays
823+
or `HolidayCalendar` instance.
791824
"""
792825

793826
_cacheable = False
794827
_prefix = 'CBM'
795828
def __init__(self, n=1, normalize=False, **kwds):
796829
self.n = int(n)
797830
self.normalize = normalize
798-
self.kwds = kwds
831+
self.kwds = kwds.copy()
799832
self.offset = kwds.get('offset', timedelta(0))
800-
self.weekmask = kwds.get('weekmask', 'Mon Tue Wed Thu Fri')
801-
self.cbday = CustomBusinessDay(n=self.n, **kwds)
802-
self.m_offset = MonthEnd()
833+
self.cbday = CustomBusinessDay(n=1, normalize=normalize, **kwds)
834+
self.busdaycalendar = kwds.pop('busdaycalendar', np.busdaycalendar())
835+
self.m_offset = MonthEnd(n=1, normalize=normalize, **kwds)
836+
837+
from_various = classmethod(from_various_func)
803838

804839
@apply_wraps
805-
def apply(self,other):
840+
def apply(self, other):
806841
n = self.n
807842
# First move to month offset
808843
cur_mend = self.m_offset.rollforward(other)
@@ -817,11 +852,11 @@ def apply(self,other):
817852
n -= 1
818853
elif other > cur_cmend and n <= -1:
819854
n += 1
820-
821-
new = cur_mend + n * MonthEnd()
855+
856+
new = cur_mend + n * self.m_offset
822857
result = self.cbday.rollback(new)
823858
return result
824-
859+
825860
class CustomBusinessMonthBegin(BusinessMixin, MonthOffset):
826861
"""
827862
**EXPERIMENTAL** DateOffset of one custom business month
@@ -837,26 +872,26 @@ class CustomBusinessMonthBegin(BusinessMixin, MonthOffset):
837872
offset : timedelta, default timedelta(0)
838873
normalize : bool, default False
839874
Normalize start/end dates to midnight before generating date range
840-
weekmask : str, Default 'Mon Tue Wed Thu Fri'
841-
weekmask of valid business days, passed to ``numpy.busdaycalendar``
842-
holidays : list
843-
list/array of dates to exclude from the set of valid business days,
844-
passed to ``numpy.busdaycalendar``
875+
busdaycalendar: np.busdaycalendar instance
876+
Use `CustomBusinessDay.from_various` to instantiate from weekmask, list of holidays
877+
or `HolidayCalendar` instance.
845878
"""
846879

847880
_cacheable = False
848881
_prefix = 'CBMS'
849882
def __init__(self, n=1, normalize=False, **kwds):
850883
self.n = int(n)
851884
self.normalize = normalize
852-
self.kwds = kwds
885+
self.kwds = kwds.copy()
853886
self.offset = kwds.get('offset', timedelta(0))
854-
self.weekmask = kwds.get('weekmask', 'Mon Tue Wed Thu Fri')
855-
self.cbday = CustomBusinessDay(n=self.n, normalize=normalize, **kwds)
856-
self.m_offset = MonthBegin(normalize=normalize)
887+
self.cbday = CustomBusinessDay(n=1, normalize=normalize, **kwds)
888+
self.busdaycalendar = kwds.pop('busdaycalendar', np.busdaycalendar())
889+
self.m_offset = MonthBegin(n=1, normalize=normalize, **kwds)
890+
891+
from_various = classmethod(from_various_func)
857892

858893
@apply_wraps
859-
def apply(self,other):
894+
def apply(self, other):
860895
n = self.n
861896
dt_in = other
862897
# First move to month offset
@@ -872,8 +907,8 @@ def apply(self,other):
872907
n += 1
873908
elif dt_in < cur_cmbegin and n >= 1:
874909
n -= 1
875-
876-
new = cur_mbegin + n * MonthBegin()
910+
911+
new = cur_mbegin + n * self.m_offset
877912
result = self.cbday.rollforward(new)
878913
return result
879914

pandas/tseries/tests/test_offsets.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ def _get_offset(self, klass, value=1, normalize=False):
116116
klass = klass(n=value, weekday=5, normalize=normalize)
117117
elif klass is DateOffset:
118118
klass = klass(days=value, normalize=normalize)
119+
# elif klass in [CDay, CBMonthEnd, CBMonthBegin]:
120+
# klass = klass.from_various(n=value, normalize=normalize)
119121
else:
120122
try:
121123
klass = klass(value, normalize=normalize)
@@ -808,7 +810,7 @@ def test_holidays(self):
808810
# Define a TradingDay offset
809811
holidays = ['2012-05-01', datetime(2013, 5, 1),
810812
np.datetime64('2014-05-01')]
811-
tday = CDay(holidays=holidays)
813+
tday = CDay.from_various(holidays=holidays)
812814
for year in range(2012, 2015):
813815
dt = datetime(year, 4, 30)
814816
xp = datetime(year, 5, 2)
@@ -819,9 +821,9 @@ def test_weekmask(self):
819821
weekmask_saudi = 'Sat Sun Mon Tue Wed' # Thu-Fri Weekend
820822
weekmask_uae = '1111001' # Fri-Sat Weekend
821823
weekmask_egypt = [1,1,1,1,0,0,1] # Fri-Sat Weekend
822-
bday_saudi = CDay(weekmask=weekmask_saudi)
823-
bday_uae = CDay(weekmask=weekmask_uae)
824-
bday_egypt = CDay(weekmask=weekmask_egypt)
824+
bday_saudi = CDay.from_various(weekmask=weekmask_saudi)
825+
bday_uae = CDay.from_various(weekmask=weekmask_uae)
826+
bday_egypt = CDay.from_various(weekmask=weekmask_egypt)
825827
dt = datetime(2013, 5, 1)
826828
xp_saudi = datetime(2013, 5, 4)
827829
xp_uae = datetime(2013, 5, 2)
@@ -838,15 +840,15 @@ def test_weekmask_and_holidays(self):
838840
weekmask_egypt = 'Sun Mon Tue Wed Thu' # Fri-Sat Weekend
839841
holidays = ['2012-05-01', datetime(2013, 5, 1),
840842
np.datetime64('2014-05-01')]
841-
bday_egypt = CDay(holidays=holidays, weekmask=weekmask_egypt)
843+
bday_egypt = CDay.from_various(holidays=holidays, weekmask=weekmask_egypt)
842844
dt = datetime(2013, 4, 30)
843845
xp_egypt = datetime(2013, 5, 5)
844846
self.assertEqual(xp_egypt, dt + 2 * bday_egypt)
845847

846848
def test_calendar(self):
847849
calendar = USFederalHolidayCalendar()
848850
dt = datetime(2014, 1, 17)
849-
assertEq(CDay(calendar=calendar), dt, datetime(2014, 1, 21))
851+
assertEq(CDay.from_various(calendar=calendar), dt, datetime(2014, 1, 21))
850852

851853
class CustomBusinessMonthBase(object):
852854
_multiprocess_can_split_ = True
@@ -999,15 +1001,19 @@ def test_holidays(self):
9991001
# Define a TradingDay offset
10001002
holidays = ['2012-01-31', datetime(2012, 2, 28),
10011003
np.datetime64('2012-02-29')]
1002-
bm_offset = CBMonthEnd(holidays=holidays)
1004+
bm_offset = CBMonthEnd.from_various(holidays=holidays)
10031005
dt = datetime(2012,1,1)
10041006
self.assertEqual(dt + bm_offset,datetime(2012,1,30))
10051007
self.assertEqual(dt + 2*bm_offset,datetime(2012,2,27))
10061008

10071009
def test_datetimeindex(self):
10081010
from pandas.tseries.holiday import USFederalHolidayCalendar
1009-
self.assertEqual(DatetimeIndex(start='20120101',end='20130101',freq=CBMonthEnd(calendar=USFederalHolidayCalendar())).tolist()[0],
1010-
datetime(2012,1,31))
1011+
hcal = USFederalHolidayCalendar()
1012+
freq = CBMonthEnd.from_various(calendar=hcal)
1013+
1014+
self.assertEqual(DatetimeIndex(start='20120101',end='20130101',
1015+
freq=freq).tolist()[0],
1016+
datetime(2012,1,31))
10111017

10121018
class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base):
10131019
_object = CBMonthBegin
@@ -1114,14 +1120,17 @@ def test_holidays(self):
11141120
# Define a TradingDay offset
11151121
holidays = ['2012-02-01', datetime(2012, 2, 2),
11161122
np.datetime64('2012-03-01')]
1117-
bm_offset = CBMonthBegin(holidays=holidays)
1123+
bm_offset = CBMonthBegin.from_various(holidays=holidays)
11181124
dt = datetime(2012,1,1)
11191125
self.assertEqual(dt + bm_offset,datetime(2012,1,2))
11201126
self.assertEqual(dt + 2*bm_offset,datetime(2012,2,3))
11211127

11221128
def test_datetimeindex(self):
1123-
self.assertEqual(DatetimeIndex(start='20120101',end='20130101',freq=CBMonthBegin(calendar=USFederalHolidayCalendar())).tolist()[0],
1124-
datetime(2012,1,3))
1129+
hcal = USFederalHolidayCalendar()
1130+
cbmb = CBMonthBegin.from_various(calendar=hcal)
1131+
self.assertEqual(DatetimeIndex(start='20120101', end='20130101',
1132+
freq=cbmb).tolist()[0],
1133+
datetime(2012,1,3))
11251134

11261135

11271136
def assertOnOffset(offset, date, expected):

0 commit comments

Comments
 (0)