From 63b0a2b573cc70375bf36064613e37a048df4be7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 2 Sep 2017 09:53:32 -0700 Subject: [PATCH 1/6] Implement get_freq_code cython frequencies --- pandas/_libs/tslibs/__init__.py | 0 pandas/_libs/tslibs/frequencies.pyx | 203 ++++++++++++++++++++++++++++ pandas/tseries/frequencies.py | 78 +---------- setup.py | 3 + 4 files changed, 207 insertions(+), 77 deletions(-) create mode 100644 pandas/_libs/tslibs/__init__.py create mode 100644 pandas/_libs/tslibs/frequencies.pyx diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx new file mode 100644 index 0000000000000..3ba2758c3c464 --- /dev/null +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- +# cython: profile=False +import re + +cimport cython + +import numpy as np +cimport numpy as np +np.import_array() # Without this, `is_integer_object` causes segfaults + +from util cimport is_integer_object + + +@cython.ccall +def get_freq_code(freqstr): + """ + Return freq str or tuple to freq code and stride (mult) + + Parameters + ---------- + freqstr : str or tuple + + Returns + ------- + return : tuple of base frequency code and stride (mult) + + Example + ------- + >>> get_freq_code('3D') + (6000, 3) + + >>> get_freq_code('D') + (6000, 1) + + >>> get_freq_code(('D', 3)) + (6000, 3) + """ + if getattr(freqstr, '_typ', None) == 'dateoffset': + freqstr = (freqstr.rule_code, freqstr.n) + + if isinstance(freqstr, tuple): + if (is_integer_object(freqstr[0]) and + is_integer_object(freqstr[1])): + # e.g., freqstr = (2000, 1) + return freqstr + else: + # e.g., freqstr = ('T', 5) + try: + code = _period_str_to_code(freqstr[0]) + stride = freqstr[1] + except: + if is_integer_object(freqstr[1]): + raise + code = _period_str_to_code(freqstr[1]) + stride = freqstr[0] + return code, stride + + if is_integer_object(freqstr): + return (freqstr, 1) + + base, stride = _base_and_stride(freqstr) + code = _period_str_to_code(base) + + return code, stride + + +# hack to handle WOM-1MON +opattern = re.compile( + r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' +) + + +@cython.ccall +def _base_and_stride(freqstr): + """ + Return base freq and stride info from string representation + + Examples + -------- + _freq_and_stride('5Min') -> 'Min', 5 + """ + groups = opattern.match(freqstr) + + if not groups: + raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) + + stride = groups.group(1) + + if len(stride): + stride = int(stride) + else: + stride = 1 + + base = groups.group(2) + + return (base, stride) + + +# --------------------------------------------------------------------- +# Period codes + +# period frequency constants corresponding to scikits timeseries +# originals +_period_code_map = { + # Annual freqs with various fiscal year ends. + # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 + "A-DEC": 1000, # Annual - December year end + "A-JAN": 1001, # Annual - January year end + "A-FEB": 1002, # Annual - February year end + "A-MAR": 1003, # Annual - March year end + "A-APR": 1004, # Annual - April year end + "A-MAY": 1005, # Annual - May year end + "A-JUN": 1006, # Annual - June year end + "A-JUL": 1007, # Annual - July year end + "A-AUG": 1008, # Annual - August year end + "A-SEP": 1009, # Annual - September year end + "A-OCT": 1010, # Annual - October year end + "A-NOV": 1011, # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 + "Q-DEC": 2000, # Quarterly - December year end + "Q-JAN": 2001, # Quarterly - January year end + "Q-FEB": 2002, # Quarterly - February year end + "Q-MAR": 2003, # Quarterly - March year end + "Q-APR": 2004, # Quarterly - April year end + "Q-MAY": 2005, # Quarterly - May year end + "Q-JUN": 2006, # Quarterly - June year end + "Q-JUL": 2007, # Quarterly - July year end + "Q-AUG": 2008, # Quarterly - August year end + "Q-SEP": 2009, # Quarterly - September year end + "Q-OCT": 2010, # Quarterly - October year end + "Q-NOV": 2011, # Quarterly - November year end + + "M": 3000, # Monthly + + "W-SUN": 4000, # Weekly - Sunday end of week + "W-MON": 4001, # Weekly - Monday end of week + "W-TUE": 4002, # Weekly - Tuesday end of week + "W-WED": 4003, # Weekly - Wednesday end of week + "W-THU": 4004, # Weekly - Thursday end of week + "W-FRI": 4005, # Weekly - Friday end of week + "W-SAT": 4006, # Weekly - Saturday end of week + + "B": 5000, # Business days + "D": 6000, # Daily + "H": 7000, # Hourly + "T": 8000, # Minutely + "S": 9000, # Secondly + "L": 10000, # Millisecondly + "U": 11000, # Microsecondly + "N": 12000, # Nanosecondly +} + +# Yearly aliases; careful not to put these in _reverse_period_code_map +_period_code_map.update({'Y'+key[1:]: _period_code_map[key] + for key in _period_code_map + if key.startswith('A-')}) + +_period_code_map.update({ + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": 1000, # Annual + "W": 4000, # Weekly + "C": 5000, # Custom Business Day + }) + +_dont_uppercase = set(('MS', 'ms')) + +_lite_rule_alias = { + 'W': 'W-SUN', + 'Q': 'Q-DEC', + + 'A': 'A-DEC', # YearEnd(month=12), + 'Y': 'A-DEC', + 'AS': 'AS-JAN', # YearBegin(month=1), + 'YS': 'AS-JAN', + 'BA': 'BA-DEC', # BYearEnd(month=12), + 'BY': 'BA-DEC', + 'BAS': 'BAS-JAN', # BYearBegin(month=1), + 'BYS': 'BAS-JAN', + + 'Min': 'T', + 'min': 'T', + 'ms': 'L', + 'us': 'U', + 'ns': 'N'} + +_INVALID_FREQ_ERROR = "Invalid frequency: {0}" + +@cython.ccall +def _period_str_to_code(freqstr): + freqstr = _lite_rule_alias.get(freqstr, freqstr) + + if freqstr not in _dont_uppercase: + lower = freqstr.lower() + freqstr = _lite_rule_alias.get(lower, freqstr) + + if freqstr not in _dont_uppercase: + freqstr = freqstr.upper() + try: + return _period_code_map[freqstr] + except KeyError: + raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 7f34bcaf52926..3516b0415720f 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -21,6 +21,7 @@ from pandas._libs import lib, tslib from pandas._libs.tslib import Timedelta +from pandas._libs.tslibs.frequencies import get_freq_code, _base_and_stride from pytz import AmbiguousTimeError @@ -298,58 +299,6 @@ def get_freq(freq): return freq -def get_freq_code(freqstr): - """ - Return freq str or tuple to freq code and stride (mult) - - Parameters - ---------- - freqstr : str or tuple - - Returns - ------- - return : tuple of base frequency code and stride (mult) - - Example - ------- - >>> get_freq_code('3D') - (6000, 3) - - >>> get_freq_code('D') - (6000, 1) - - >>> get_freq_code(('D', 3)) - (6000, 3) - """ - if isinstance(freqstr, DateOffset): - freqstr = (freqstr.rule_code, freqstr.n) - - if isinstance(freqstr, tuple): - if (is_integer(freqstr[0]) and - is_integer(freqstr[1])): - # e.g., freqstr = (2000, 1) - return freqstr - else: - # e.g., freqstr = ('T', 5) - try: - code = _period_str_to_code(freqstr[0]) - stride = freqstr[1] - except: - if is_integer(freqstr[1]): - raise - code = _period_str_to_code(freqstr[1]) - stride = freqstr[0] - return code, stride - - if is_integer(freqstr): - return (freqstr, 1) - - base, stride = _base_and_stride(freqstr) - code = _period_str_to_code(base) - - return code, stride - - def _get_freq_str(base, mult=1): code = _reverse_period_code_map.get(base) if mult == 1: @@ -577,31 +526,6 @@ def to_offset(freq): ) -def _base_and_stride(freqstr): - """ - Return base freq and stride info from string representation - - Examples - -------- - _freq_and_stride('5Min') -> 'Min', 5 - """ - groups = opattern.match(freqstr) - - if not groups: - raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) - - stride = groups.group(1) - - if len(stride): - stride = int(stride) - else: - stride = 1 - - base = groups.group(2) - - return (base, stride) - - def get_base_alias(freqstr): """ Returns the base frequency alias, e.g., '5D' -> 'D' diff --git a/setup.py b/setup.py index 444db5bc4d275..37509ebcdd1b0 100755 --- a/setup.py +++ b/setup.py @@ -341,6 +341,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/window.pyx', 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', + 'panads/_libs/tslibs/frequencies.pyx', 'pandas/io/sas/sas.pyx'] def initialize_options(self): @@ -492,6 +493,8 @@ def pxd(name): 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', 'pandas/_libs/src/period_helper.c']}, + '_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies', + 'pxdfiles': ['_libs/src/util']}, '_libs.index': {'pyxfile': '_libs/index', 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c'], From f48c5c3e84c4b07ef354036f852c903790ca74b7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 2 Sep 2017 16:10:17 -0700 Subject: [PATCH 2/6] flake8 fixup --- pandas/_libs/tslibs/frequencies.pyx | 9 +++++---- pandas/tseries/frequencies.py | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 3ba2758c3c464..fce295cfd059a 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -6,7 +6,7 @@ cimport cython import numpy as np cimport numpy as np -np.import_array() # Without this, `is_integer_object` causes segfaults +np.import_array() # Without this, `is_integer_object` causes segfaults from util cimport is_integer_object @@ -153,9 +153,9 @@ _period_code_map = { } # Yearly aliases; careful not to put these in _reverse_period_code_map -_period_code_map.update({'Y'+key[1:]: _period_code_map[key] - for key in _period_code_map - if key.startswith('A-')}) +_period_code_map.update({'Y' + key[1:]: _period_code_map[key] + for key in _period_code_map + if key.startswith('A-')}) _period_code_map.update({ "Q": 2000, # Quarterly - December year end (default quarterly) @@ -187,6 +187,7 @@ _lite_rule_alias = { _INVALID_FREQ_ERROR = "Invalid frequency: {0}" + @cython.ccall def _period_str_to_code(freqstr): freqstr = _lite_rule_alias.get(freqstr, freqstr) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 3516b0415720f..6644a33245a84 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -8,7 +8,6 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import ( - is_integer, is_period_arraylike, is_timedelta64_dtype, is_datetime64_dtype) From 83d6781985e93234b56c2b49298ba7787c8a001c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 2 Sep 2017 18:01:11 -0700 Subject: [PATCH 3/6] asv benchmark for Period property lookups --- asv_bench/benchmarks/period.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index f9837191a7bae..78d66295f28cc 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -2,6 +2,35 @@ from pandas import Series, Period, PeriodIndex, date_range +class PeriodProperties(object): + def setup(self): + self.per = Period('2012-06-01', freq='M') + + def time_year(self): + self.per.year + + def time_month(self): + self.per.month + + def time_quarter(self): + self.per.quarter + + def time_day(self): + self.per.day + + def time_hour(self): + self.per.hour + + def time_minute(self): + self.per.second + + def time_second(self): + self.per.second + + def time_leap_year(self): + self.per.is_leapyear + + class Constructor(object): goal_time = 0.2 From 3ab53ff1f474666e2a8635369d90bfa7039e386f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 5 Sep 2017 07:19:28 -0700 Subject: [PATCH 4/6] Per reviewer request, change @cython.ccall-->cpdef --- pandas/_libs/tslibs/frequencies.pyx | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index fce295cfd059a..7cdce2d31cdf3 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -11,8 +11,7 @@ np.import_array() # Without this, `is_integer_object` causes segfaults from util cimport is_integer_object -@cython.ccall -def get_freq_code(freqstr): +cpdef get_freq_code(freqstr): """ Return freq str or tuple to freq code and stride (mult) @@ -70,8 +69,7 @@ opattern = re.compile( ) -@cython.ccall -def _base_and_stride(freqstr): +cpdef _base_and_stride(freqstr): """ Return base freq and stride info from string representation @@ -188,8 +186,7 @@ _lite_rule_alias = { _INVALID_FREQ_ERROR = "Invalid frequency: {0}" -@cython.ccall -def _period_str_to_code(freqstr): +cpdef _period_str_to_code(freqstr): freqstr = _lite_rule_alias.get(freqstr, freqstr) if freqstr not in _dont_uppercase: From 150db3cc869d3ebb7cf14ec1053ecff135a5796e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 6 Sep 2017 18:18:39 -0700 Subject: [PATCH 5/6] Remove comment per reviewer request --- pandas/_libs/tslibs/frequencies.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 7cdce2d31cdf3..35429e8ae87f0 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -6,7 +6,7 @@ cimport cython import numpy as np cimport numpy as np -np.import_array() # Without this, `is_integer_object` causes segfaults +np.import_array() from util cimport is_integer_object From 77e2e758e0c28810f0d78e1c47a0b545771a9fe2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 7 Sep 2017 07:37:55 -0700 Subject: [PATCH 6/6] Add tslibs to setup.py packages --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 37509ebcdd1b0..4e326beefa908 100755 --- a/setup.py +++ b/setup.py @@ -656,6 +656,7 @@ def pxd(name): 'pandas.io.formats', 'pandas.io.clipboard', 'pandas._libs', + 'pandas._libs.tslibs', 'pandas.plotting', 'pandas.stats', 'pandas.types',