From 09dcb5401613797111140735fc96f2ff0983d168 Mon Sep 17 00:00:00 2001 From: Brandon Bradley Date: Wed, 4 Feb 2015 18:59:36 -0600 Subject: [PATCH 1/4] move Period to Cython extension --- pandas/period.pyx | 519 +++++++++++++++++++++++++++++++++++++++ pandas/tseries/period.py | 508 +------------------------------------- setup.py | 1 + 3 files changed, 526 insertions(+), 502 deletions(-) create mode 100644 pandas/period.pyx diff --git a/pandas/period.pyx b/pandas/period.pyx new file mode 100644 index 0000000000000..acbdb793d22fb --- /dev/null +++ b/pandas/period.pyx @@ -0,0 +1,519 @@ +from datetime import datetime, date, timedelta +import operator +import numpy as np + +from pandas import compat +from pandas.core import common as com +from pandas.core.base import PandasObject + +from pandas.tseries import frequencies +from pandas.tseries.frequencies import get_freq_code as _gfc +from pandas.tseries import offsets +from pandas.tseries.tools import parse_time_string + +from pandas import tslib +from tslib import Timedelta, Timestamp + + +#--------------- +# Period logic + +def _period_field_accessor(name, alias): + def f(self): + base, mult = _gfc(self.freq) + return tslib.get_period_field(alias, self.ordinal, base) + f.__name__ = name + return property(f) + + +class Period(PandasObject): + """ + Represents an period of time + + Parameters + ---------- + value : Period or compat.string_types, default None + The time period represented (e.g., '4Q2005') + freq : str, default None + e.g., 'B' for businessday. Must be a singular rule-code (e.g. 5T is not + allowed). + year : int, default None + month : int, default 1 + quarter : int, default None + day : int, default 1 + hour : int, default 0 + minute : int, default 0 + second : int, default 0 + """ + __slots__ = ['freq', 'ordinal'] + _comparables = ['name','freqstr'] + _typ = 'period' + + @classmethod + def _from_ordinal(cls, ordinal, freq): + """ fast creation from an ordinal and freq that are already validated! """ + self = object.__new__(cls) + self.ordinal = ordinal + self.freq = freq + return self + + def __init__(self, value=None, freq=None, ordinal=None, + year=None, month=1, quarter=None, day=1, + hour=0, minute=0, second=0): + + # freq points to a tuple (base, mult); base is one of the defined + # periods such as A, Q, etc. Every five minutes would be, e.g., + # ('T', 5) but may be passed in as a string like '5T' + + self.freq = None + + # ordinal is the period offset from the gregorian proleptic epoch + self.ordinal = None + + if ordinal is not None and value is not None: + raise ValueError(("Only value or ordinal but not both should be " + "given but not both")) + elif ordinal is not None: + if not com.is_integer(ordinal): + raise ValueError("Ordinal must be an integer") + if freq is None: + raise ValueError('Must supply freq for ordinal value') + self.ordinal = ordinal + + elif value is None: + if freq is None: + raise ValueError("If value is None, freq cannot be None") + + self.ordinal = _ordinal_from_fields(year, month, quarter, day, + hour, minute, second, freq) + + elif isinstance(value, Period): + other = value + if freq is None or _gfc(freq) == _gfc(other.freq): + self.ordinal = other.ordinal + freq = other.freq + else: + converted = other.asfreq(freq) + self.ordinal = converted.ordinal + + elif com.is_null_datelike_scalar(value) or value in tslib._nat_strings: + self.ordinal = tslib.iNaT + if freq is None: + raise ValueError("If value is NaT, freq cannot be None " + "because it cannot be inferred") + + elif isinstance(value, compat.string_types) or com.is_integer(value): + if com.is_integer(value): + value = str(value) + value = value.upper() + + dt, _, reso = parse_time_string(value, freq) + if freq is None: + try: + freq = frequencies.Resolution.get_freq(reso) + except KeyError: + raise ValueError("Invalid frequency or could not infer: %s" % reso) + + elif isinstance(value, datetime): + dt = value + if freq is None: + raise ValueError('Must supply freq for datetime value') + elif isinstance(value, date): + dt = datetime(year=value.year, month=value.month, day=value.day) + if freq is None: + raise ValueError('Must supply freq for datetime value') + else: + msg = "Value must be Period, string, integer, or datetime" + raise ValueError(msg) + + base, mult = _gfc(freq) + if mult != 1: + # TODO: Better error message - this is slightly confusing + raise ValueError('Only mult == 1 supported') + + if self.ordinal is None: + self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, dt.microsecond, 0, + base) + + self.freq = frequencies._get_freq_str(base) + + def __eq__(self, other): + if isinstance(other, Period): + if other.freq != self.freq: + raise ValueError("Cannot compare non-conforming periods") + if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: + return False + return (self.ordinal == other.ordinal + and _gfc(self.freq) == _gfc(other.freq)) + return NotImplemented + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((self.ordinal, self.freq)) + + def _add_delta(self, other): + if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): + offset = frequencies.to_offset(self.freq) + if isinstance(offset, offsets.Tick): + nanos = tslib._delta_to_nanoseconds(other) + offset_nanos = tslib._delta_to_nanoseconds(offset) + + if nanos % offset_nanos == 0: + if self.ordinal == tslib.iNaT: + ordinal = self.ordinal + else: + ordinal = self.ordinal + (nanos // offset_nanos) + return Period(ordinal=ordinal, freq=self.freq) + elif isinstance(other, offsets.DateOffset): + freqstr = frequencies.get_standard_freq(other) + base = frequencies.get_base_alias(freqstr) + + if base == self.freq: + if self.ordinal == tslib.iNaT: + ordinal = self.ordinal + else: + ordinal = self.ordinal + other.n + return Period(ordinal=ordinal, freq=self.freq) + + raise ValueError("Input has different freq from Period(freq={0})".format(self.freq)) + + def __add__(self, other): + if isinstance(other, (timedelta, np.timedelta64, + offsets.Tick, offsets.DateOffset, Timedelta)): + return self._add_delta(other) + elif com.is_integer(other): + if self.ordinal == tslib.iNaT: + ordinal = self.ordinal + else: + ordinal = self.ordinal + other + return Period(ordinal=ordinal, freq=self.freq) + else: # pragma: no cover + return NotImplemented + + def __sub__(self, other): + if isinstance(other, (timedelta, np.timedelta64, + offsets.Tick, offsets.DateOffset, Timedelta)): + neg_other = -other + return self + neg_other + elif com.is_integer(other): + if self.ordinal == tslib.iNaT: + ordinal = self.ordinal + else: + ordinal = self.ordinal - other + return Period(ordinal=ordinal, freq=self.freq) + elif isinstance(other, Period): + if other.freq != self.freq: + raise ValueError("Cannot do arithmetic with " + "non-conforming periods") + if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: + return Period(ordinal=tslib.iNaT, freq=self.freq) + return self.ordinal - other.ordinal + else: # pragma: no cover + return NotImplemented + + def _comp_method(func, name): + def f(self, other): + if isinstance(other, Period): + if other.freq != self.freq: + raise ValueError("Cannot compare non-conforming periods") + if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: + return False + return func(self.ordinal, other.ordinal) + else: + raise TypeError(other) + + f.__name__ = name + return f + + __lt__ = _comp_method(operator.lt, '__lt__') + __le__ = _comp_method(operator.le, '__le__') + __gt__ = _comp_method(operator.gt, '__gt__') + __ge__ = _comp_method(operator.ge, '__ge__') + + def asfreq(self, freq, how='E'): + """ + Convert Period to desired frequency, either at the start or end of the + interval + + Parameters + ---------- + freq : string + how : {'E', 'S', 'end', 'start'}, default 'end' + Start or end of the timespan + + Returns + ------- + resampled : Period + """ + how = _validate_end_alias(how) + base1, mult1 = _gfc(self.freq) + base2, mult2 = _gfc(freq) + + if mult2 != 1: + raise ValueError('Only mult == 1 supported') + + end = how == 'E' + new_ordinal = tslib.period_asfreq(self.ordinal, base1, base2, end) + + return Period(ordinal=new_ordinal, freq=base2) + + @property + def start_time(self): + return self.to_timestamp(how='S') + + @property + def end_time(self): + if self.ordinal == tslib.iNaT: + ordinal = self.ordinal + else: + ordinal = (self + 1).start_time.value - 1 + return Timestamp(ordinal) + + def to_timestamp(self, freq=None, how='start', tz=None): + """ + Return the Timestamp representation of the Period at the target + frequency at the specified end (how) of the Period + + Parameters + ---------- + freq : string or DateOffset, default is 'D' if self.freq is week or + longer and 'S' otherwise + Target frequency + how: str, default 'S' (start) + 'S', 'E'. Can be aliased as case insensitive + 'Start', 'Finish', 'Begin', 'End' + + Returns + ------- + Timestamp + """ + how = _validate_end_alias(how) + + if freq is None: + base, mult = _gfc(self.freq) + freq = frequencies.get_to_timestamp_base(base) + + base, mult = _gfc(freq) + val = self.asfreq(freq, how) + + dt64 = tslib.period_ordinal_to_dt64(val.ordinal, base) + return Timestamp(dt64, tz=tz) + + year = _period_field_accessor('year', 0) + month = _period_field_accessor('month', 3) + day = _period_field_accessor('day', 4) + hour = _period_field_accessor('hour', 5) + minute = _period_field_accessor('minute', 6) + second = _period_field_accessor('second', 7) + weekofyear = _period_field_accessor('week', 8) + week = weekofyear + dayofweek = _period_field_accessor('dayofweek', 10) + weekday = dayofweek + dayofyear = _period_field_accessor('dayofyear', 9) + quarter = _period_field_accessor('quarter', 2) + qyear = _period_field_accessor('qyear', 1) + + @classmethod + def now(cls, freq=None): + return Period(datetime.now(), freq=freq) + + def __repr__(self): + base, mult = _gfc(self.freq) + formatted = tslib.period_format(self.ordinal, base) + freqstr = frequencies._reverse_period_code_map[base] + + if not compat.PY3: + encoding = com.get_option("display.encoding") + formatted = formatted.encode(encoding) + + return "Period('%s', '%s')" % (formatted, freqstr) + + def __unicode__(self): + """ + Return a string representation for a particular DataFrame + + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. + """ + base, mult = _gfc(self.freq) + formatted = tslib.period_format(self.ordinal, base) + value = ("%s" % formatted) + return value + + def strftime(self, fmt): + """ + Returns the string representation of the :class:`Period`, depending + on the selected :keyword:`format`. :keyword:`format` must be a string + containing one or several directives. The method recognizes the same + directives as the :func:`time.strftime` function of the standard Python + distribution, as well as the specific additional directives ``%f``, + ``%F``, ``%q``. (formatting & docs originally from scikits.timeries) + + +-----------+--------------------------------+-------+ + | Directive | Meaning | Notes | + +===========+================================+=======+ + | ``%a`` | Locale's abbreviated weekday | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%A`` | Locale's full weekday name. | | + +-----------+--------------------------------+-------+ + | ``%b`` | Locale's abbreviated month | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%B`` | Locale's full month name. | | + +-----------+--------------------------------+-------+ + | ``%c`` | Locale's appropriate date and | | + | | time representation. | | + +-----------+--------------------------------+-------+ + | ``%d`` | Day of the month as a decimal | | + | | number [01,31]. | | + +-----------+--------------------------------+-------+ + | ``%f`` | 'Fiscal' year without a | \(1) | + | | century as a decimal number | | + | | [00,99] | | + +-----------+--------------------------------+-------+ + | ``%F`` | 'Fiscal' year with a century | \(2) | + | | as a decimal number | | + +-----------+--------------------------------+-------+ + | ``%H`` | Hour (24-hour clock) as a | | + | | decimal number [00,23]. | | + +-----------+--------------------------------+-------+ + | ``%I`` | Hour (12-hour clock) as a | | + | | decimal number [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%j`` | Day of the year as a decimal | | + | | number [001,366]. | | + +-----------+--------------------------------+-------+ + | ``%m`` | Month as a decimal number | | + | | [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%M`` | Minute as a decimal number | | + | | [00,59]. | | + +-----------+--------------------------------+-------+ + | ``%p`` | Locale's equivalent of either | \(3) | + | | AM or PM. | | + +-----------+--------------------------------+-------+ + | ``%q`` | Quarter as a decimal number | | + | | [01,04] | | + +-----------+--------------------------------+-------+ + | ``%S`` | Second as a decimal number | \(4) | + | | [00,61]. | | + +-----------+--------------------------------+-------+ + | ``%U`` | Week number of the year | \(5) | + | | (Sunday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Sunday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%w`` | Weekday as a decimal number | | + | | [0(Sunday),6]. | | + +-----------+--------------------------------+-------+ + | ``%W`` | Week number of the year | \(5) | + | | (Monday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Monday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%x`` | Locale's appropriate date | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%X`` | Locale's appropriate time | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%y`` | Year without century as a | | + | | decimal number [00,99]. | | + +-----------+--------------------------------+-------+ + | ``%Y`` | Year with century as a decimal | | + | | number. | | + +-----------+--------------------------------+-------+ + | ``%Z`` | Time zone name (no characters | | + | | if no time zone exists). | | + +-----------+--------------------------------+-------+ + | ``%%`` | A literal ``'%'`` character. | | + +-----------+--------------------------------+-------+ + + .. note:: + + (1) + The ``%f`` directive is the same as ``%y`` if the frequency is + not quarterly. + Otherwise, it corresponds to the 'fiscal' year, as defined by + the :attr:`qyear` attribute. + + (2) + The ``%F`` directive is the same as ``%Y`` if the frequency is + not quarterly. + Otherwise, it corresponds to the 'fiscal' year, as defined by + the :attr:`qyear` attribute. + + (3) + The ``%p`` directive only affects the output hour field + if the ``%I`` directive is used to parse the hour. + + (4) + The range really is ``0`` to ``61``; this accounts for leap + seconds and the (very rare) double leap seconds. + + (5) + The ``%U`` and ``%W`` directives are only used in calculations + when the day of the week and the year are specified. + + .. rubric:: Examples + + >>> a = Period(freq='Q@JUL', year=2006, quarter=1) + >>> a.strftime('%F-Q%q') + '2006-Q1' + >>> # Output the last month in the quarter of this date + >>> a.strftime('%b-%Y') + 'Oct-2005' + >>> + >>> a = Period(freq='D', year=2001, month=1, day=1) + >>> a.strftime('%d-%b-%Y') + '01-Jan-2006' + >>> a.strftime('%b. %d, %Y was a %A') + 'Jan. 01, 2001 was a Monday' + """ + base, mult = _gfc(self.freq) + return tslib.period_format(self.ordinal, base, fmt) + + +def _ordinal_from_fields(year, month, quarter, day, hour, minute, + second, freq): + base, mult = _gfc(freq) + if mult != 1: + raise ValueError('Only mult == 1 supported') + + if quarter is not None: + year, month = _quarter_to_myear(year, quarter, freq) + + return tslib.period_ordinal(year, month, day, hour, minute, second, 0, 0, base) + + +def _quarter_to_myear(year, quarter, freq): + if quarter is not None: + if quarter <= 0 or quarter > 4: + raise ValueError('Quarter must be 1 <= q <= 4') + + mnum = frequencies._month_numbers[frequencies._get_rule_month(freq)] + 1 + month = (mnum + (quarter - 1) * 3) % 12 + 1 + if month > mnum: + year -= 1 + + return year, month + + +def _validate_end_alias(how): + how_dict = {'S': 'S', 'E': 'E', + 'START': 'S', 'FINISH': 'E', + 'BEGIN': 'S', 'END': 'E'} + how = how_dict.get(str(how).upper()) + if how not in set(['S', 'E']): + raise ValueError('How must be one of S or E') + return how diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 58d2606ec41f1..8a0ac0488ec44 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -12,6 +12,12 @@ from pandas.tseries.tools import parse_time_string import pandas.tseries.offsets as offsets +from pandas.period import Period +from pandas.period import ( + _validate_end_alias, + _quarter_to_myear, +) + import pandas.core.common as com from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box, _values_from_object, ABCSeries) @@ -23,17 +29,6 @@ from pandas.compat import zip, u -#--------------- -# Period logic - -def _period_field_accessor(name, alias): - def f(self): - base, mult = _gfc(self.freq) - return tslib.get_period_field(alias, self.ordinal, base) - f.__name__ = name - return property(f) - - def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) @@ -42,462 +37,6 @@ def f(self): f.__doc__ = docstring return property(f) -class Period(PandasObject): - """ - Represents an period of time - - Parameters - ---------- - value : Period or compat.string_types, default None - The time period represented (e.g., '4Q2005') - freq : str, default None - e.g., 'B' for businessday. Must be a singular rule-code (e.g. 5T is not - allowed). - year : int, default None - month : int, default 1 - quarter : int, default None - day : int, default 1 - hour : int, default 0 - minute : int, default 0 - second : int, default 0 - """ - __slots__ = ['freq', 'ordinal'] - _comparables = ['name','freqstr'] - _typ = 'period' - - @classmethod - def _from_ordinal(cls, ordinal, freq): - """ fast creation from an ordinal and freq that are already validated! """ - self = object.__new__(cls) - self.ordinal = ordinal - self.freq = freq - return self - - def __init__(self, value=None, freq=None, ordinal=None, - year=None, month=1, quarter=None, day=1, - hour=0, minute=0, second=0): - - # freq points to a tuple (base, mult); base is one of the defined - # periods such as A, Q, etc. Every five minutes would be, e.g., - # ('T', 5) but may be passed in as a string like '5T' - - self.freq = None - - # ordinal is the period offset from the gregorian proleptic epoch - self.ordinal = None - - if ordinal is not None and value is not None: - raise ValueError(("Only value or ordinal but not both should be " - "given but not both")) - elif ordinal is not None: - if not com.is_integer(ordinal): - raise ValueError("Ordinal must be an integer") - if freq is None: - raise ValueError('Must supply freq for ordinal value') - self.ordinal = ordinal - - elif value is None: - if freq is None: - raise ValueError("If value is None, freq cannot be None") - - self.ordinal = _ordinal_from_fields(year, month, quarter, day, - hour, minute, second, freq) - - elif isinstance(value, Period): - other = value - if freq is None or _gfc(freq) == _gfc(other.freq): - self.ordinal = other.ordinal - freq = other.freq - else: - converted = other.asfreq(freq) - self.ordinal = converted.ordinal - - elif com.is_null_datelike_scalar(value) or value in tslib._nat_strings: - self.ordinal = tslib.iNaT - if freq is None: - raise ValueError("If value is NaT, freq cannot be None " - "because it cannot be inferred") - - elif isinstance(value, compat.string_types) or com.is_integer(value): - if com.is_integer(value): - value = str(value) - value = value.upper() - - dt, _, reso = parse_time_string(value, freq) - if freq is None: - try: - freq = frequencies.Resolution.get_freq(reso) - except KeyError: - raise ValueError("Invalid frequency or could not infer: %s" % reso) - - elif isinstance(value, datetime): - dt = value - if freq is None: - raise ValueError('Must supply freq for datetime value') - elif isinstance(value, date): - dt = datetime(year=value.year, month=value.month, day=value.day) - if freq is None: - raise ValueError('Must supply freq for datetime value') - else: - msg = "Value must be Period, string, integer, or datetime" - raise ValueError(msg) - - base, mult = _gfc(freq) - if mult != 1: - # TODO: Better error message - this is slightly confusing - raise ValueError('Only mult == 1 supported') - - if self.ordinal is None: - self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day, - dt.hour, dt.minute, dt.second, dt.microsecond, 0, - base) - - self.freq = frequencies._get_freq_str(base) - - def __eq__(self, other): - if isinstance(other, Period): - if other.freq != self.freq: - raise ValueError("Cannot compare non-conforming periods") - if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: - return False - return (self.ordinal == other.ordinal - and _gfc(self.freq) == _gfc(other.freq)) - return NotImplemented - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((self.ordinal, self.freq)) - - def _add_delta(self, other): - if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): - offset = frequencies.to_offset(self.freq) - if isinstance(offset, offsets.Tick): - nanos = tslib._delta_to_nanoseconds(other) - offset_nanos = tslib._delta_to_nanoseconds(offset) - - if nanos % offset_nanos == 0: - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal + (nanos // offset_nanos) - return Period(ordinal=ordinal, freq=self.freq) - elif isinstance(other, offsets.DateOffset): - freqstr = frequencies.get_standard_freq(other) - base = frequencies.get_base_alias(freqstr) - - if base == self.freq: - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal + other.n - return Period(ordinal=ordinal, freq=self.freq) - - raise ValueError("Input has different freq from Period(freq={0})".format(self.freq)) - - def __add__(self, other): - if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, offsets.DateOffset, Timedelta)): - return self._add_delta(other) - elif com.is_integer(other): - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal + other - return Period(ordinal=ordinal, freq=self.freq) - else: # pragma: no cover - return NotImplemented - - def __sub__(self, other): - if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, offsets.DateOffset, Timedelta)): - neg_other = -other - return self + neg_other - elif com.is_integer(other): - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal - other - return Period(ordinal=ordinal, freq=self.freq) - elif isinstance(other, Period): - if other.freq != self.freq: - raise ValueError("Cannot do arithmetic with " - "non-conforming periods") - if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: - return Period(ordinal=tslib.iNaT, freq=self.freq) - return self.ordinal - other.ordinal - else: # pragma: no cover - return NotImplemented - - def _comp_method(func, name): - def f(self, other): - if isinstance(other, Period): - if other.freq != self.freq: - raise ValueError("Cannot compare non-conforming periods") - if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: - return False - return func(self.ordinal, other.ordinal) - else: - raise TypeError(other) - - f.__name__ = name - return f - - __lt__ = _comp_method(operator.lt, '__lt__') - __le__ = _comp_method(operator.le, '__le__') - __gt__ = _comp_method(operator.gt, '__gt__') - __ge__ = _comp_method(operator.ge, '__ge__') - - def asfreq(self, freq, how='E'): - """ - Convert Period to desired frequency, either at the start or end of the - interval - - Parameters - ---------- - freq : string - how : {'E', 'S', 'end', 'start'}, default 'end' - Start or end of the timespan - - Returns - ------- - resampled : Period - """ - how = _validate_end_alias(how) - base1, mult1 = _gfc(self.freq) - base2, mult2 = _gfc(freq) - - if mult2 != 1: - raise ValueError('Only mult == 1 supported') - - end = how == 'E' - new_ordinal = tslib.period_asfreq(self.ordinal, base1, base2, end) - - return Period(ordinal=new_ordinal, freq=base2) - - @property - def start_time(self): - return self.to_timestamp(how='S') - - @property - def end_time(self): - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = (self + 1).start_time.value - 1 - return Timestamp(ordinal) - - def to_timestamp(self, freq=None, how='start', tz=None): - """ - Return the Timestamp representation of the Period at the target - frequency at the specified end (how) of the Period - - Parameters - ---------- - freq : string or DateOffset, default is 'D' if self.freq is week or - longer and 'S' otherwise - Target frequency - how: str, default 'S' (start) - 'S', 'E'. Can be aliased as case insensitive - 'Start', 'Finish', 'Begin', 'End' - - Returns - ------- - Timestamp - """ - how = _validate_end_alias(how) - - if freq is None: - base, mult = _gfc(self.freq) - freq = frequencies.get_to_timestamp_base(base) - - base, mult = _gfc(freq) - val = self.asfreq(freq, how) - - dt64 = tslib.period_ordinal_to_dt64(val.ordinal, base) - return Timestamp(dt64, tz=tz) - - year = _period_field_accessor('year', 0) - month = _period_field_accessor('month', 3) - day = _period_field_accessor('day', 4) - hour = _period_field_accessor('hour', 5) - minute = _period_field_accessor('minute', 6) - second = _period_field_accessor('second', 7) - weekofyear = _period_field_accessor('week', 8) - week = weekofyear - dayofweek = _period_field_accessor('dayofweek', 10) - weekday = dayofweek - dayofyear = _period_field_accessor('dayofyear', 9) - quarter = _period_field_accessor('quarter', 2) - qyear = _period_field_accessor('qyear', 1) - - @classmethod - def now(cls, freq=None): - return Period(datetime.now(), freq=freq) - - def __repr__(self): - base, mult = _gfc(self.freq) - formatted = tslib.period_format(self.ordinal, base) - freqstr = frequencies._reverse_period_code_map[base] - - if not compat.PY3: - encoding = com.get_option("display.encoding") - formatted = formatted.encode(encoding) - - return "Period('%s', '%s')" % (formatted, freqstr) - - def __unicode__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - base, mult = _gfc(self.freq) - formatted = tslib.period_format(self.ordinal, base) - value = ("%s" % formatted) - return value - - def strftime(self, fmt): - """ - Returns the string representation of the :class:`Period`, depending - on the selected :keyword:`format`. :keyword:`format` must be a string - containing one or several directives. The method recognizes the same - directives as the :func:`time.strftime` function of the standard Python - distribution, as well as the specific additional directives ``%f``, - ``%F``, ``%q``. (formatting & docs originally from scikits.timeries) - - +-----------+--------------------------------+-------+ - | Directive | Meaning | Notes | - +===========+================================+=======+ - | ``%a`` | Locale's abbreviated weekday | | - | | name. | | - +-----------+--------------------------------+-------+ - | ``%A`` | Locale's full weekday name. | | - +-----------+--------------------------------+-------+ - | ``%b`` | Locale's abbreviated month | | - | | name. | | - +-----------+--------------------------------+-------+ - | ``%B`` | Locale's full month name. | | - +-----------+--------------------------------+-------+ - | ``%c`` | Locale's appropriate date and | | - | | time representation. | | - +-----------+--------------------------------+-------+ - | ``%d`` | Day of the month as a decimal | | - | | number [01,31]. | | - +-----------+--------------------------------+-------+ - | ``%f`` | 'Fiscal' year without a | \(1) | - | | century as a decimal number | | - | | [00,99] | | - +-----------+--------------------------------+-------+ - | ``%F`` | 'Fiscal' year with a century | \(2) | - | | as a decimal number | | - +-----------+--------------------------------+-------+ - | ``%H`` | Hour (24-hour clock) as a | | - | | decimal number [00,23]. | | - +-----------+--------------------------------+-------+ - | ``%I`` | Hour (12-hour clock) as a | | - | | decimal number [01,12]. | | - +-----------+--------------------------------+-------+ - | ``%j`` | Day of the year as a decimal | | - | | number [001,366]. | | - +-----------+--------------------------------+-------+ - | ``%m`` | Month as a decimal number | | - | | [01,12]. | | - +-----------+--------------------------------+-------+ - | ``%M`` | Minute as a decimal number | | - | | [00,59]. | | - +-----------+--------------------------------+-------+ - | ``%p`` | Locale's equivalent of either | \(3) | - | | AM or PM. | | - +-----------+--------------------------------+-------+ - | ``%q`` | Quarter as a decimal number | | - | | [01,04] | | - +-----------+--------------------------------+-------+ - | ``%S`` | Second as a decimal number | \(4) | - | | [00,61]. | | - +-----------+--------------------------------+-------+ - | ``%U`` | Week number of the year | \(5) | - | | (Sunday as the first day of | | - | | the week) as a decimal number | | - | | [00,53]. All days in a new | | - | | year preceding the first | | - | | Sunday are considered to be in | | - | | week 0. | | - +-----------+--------------------------------+-------+ - | ``%w`` | Weekday as a decimal number | | - | | [0(Sunday),6]. | | - +-----------+--------------------------------+-------+ - | ``%W`` | Week number of the year | \(5) | - | | (Monday as the first day of | | - | | the week) as a decimal number | | - | | [00,53]. All days in a new | | - | | year preceding the first | | - | | Monday are considered to be in | | - | | week 0. | | - +-----------+--------------------------------+-------+ - | ``%x`` | Locale's appropriate date | | - | | representation. | | - +-----------+--------------------------------+-------+ - | ``%X`` | Locale's appropriate time | | - | | representation. | | - +-----------+--------------------------------+-------+ - | ``%y`` | Year without century as a | | - | | decimal number [00,99]. | | - +-----------+--------------------------------+-------+ - | ``%Y`` | Year with century as a decimal | | - | | number. | | - +-----------+--------------------------------+-------+ - | ``%Z`` | Time zone name (no characters | | - | | if no time zone exists). | | - +-----------+--------------------------------+-------+ - | ``%%`` | A literal ``'%'`` character. | | - +-----------+--------------------------------+-------+ - - .. note:: - - (1) - The ``%f`` directive is the same as ``%y`` if the frequency is - not quarterly. - Otherwise, it corresponds to the 'fiscal' year, as defined by - the :attr:`qyear` attribute. - - (2) - The ``%F`` directive is the same as ``%Y`` if the frequency is - not quarterly. - Otherwise, it corresponds to the 'fiscal' year, as defined by - the :attr:`qyear` attribute. - - (3) - The ``%p`` directive only affects the output hour field - if the ``%I`` directive is used to parse the hour. - - (4) - The range really is ``0`` to ``61``; this accounts for leap - seconds and the (very rare) double leap seconds. - - (5) - The ``%U`` and ``%W`` directives are only used in calculations - when the day of the week and the year are specified. - - .. rubric:: Examples - - >>> a = Period(freq='Q@JUL', year=2006, quarter=1) - >>> a.strftime('%F-Q%q') - '2006-Q1' - >>> # Output the last month in the quarter of this date - >>> a.strftime('%b-%Y') - 'Oct-2005' - >>> - >>> a = Period(freq='D', year=2001, month=1, day=1) - >>> a.strftime('%d-%b-%Y') - '01-Jan-2006' - >>> a.strftime('%b. %d, %Y was a %A') - 'Jan. 01, 2001 was a Monday' - """ - base, mult = _gfc(self.freq) - return tslib.period_format(self.ordinal, base, fmt) def _get_ordinals(data, freq): f = lambda x: Period(x, freq=freq).ordinal @@ -1371,41 +910,6 @@ def _make_field_arrays(*fields): return arrays -def _ordinal_from_fields(year, month, quarter, day, hour, minute, - second, freq): - base, mult = _gfc(freq) - if mult != 1: - raise ValueError('Only mult == 1 supported') - - if quarter is not None: - year, month = _quarter_to_myear(year, quarter, freq) - - return tslib.period_ordinal(year, month, day, hour, minute, second, 0, 0, base) - - -def _quarter_to_myear(year, quarter, freq): - if quarter is not None: - if quarter <= 0 or quarter > 4: - raise ValueError('Quarter must be 1 <= q <= 4') - - mnum = frequencies._month_numbers[frequencies._get_rule_month(freq)] + 1 - month = (mnum + (quarter - 1) * 3) % 12 + 1 - if month > mnum: - year -= 1 - - return year, month - - -def _validate_end_alias(how): - how_dict = {'S': 'S', 'E': 'E', - 'START': 'S', 'FINISH': 'E', - 'BEGIN': 'S', 'END': 'E'} - how = how_dict.get(str(how).upper()) - if how not in set(['S', 'E']): - raise ValueError('How must be one of S or E') - return how - - def pnow(freq=None): return Period(datetime.now(), freq=freq) diff --git a/setup.py b/setup.py index 42d666dcb8f5c..f90e238ad9dc1 100755 --- a/setup.py +++ b/setup.py @@ -462,6 +462,7 @@ def pxd(name): 'sources': ['pandas/src/datetime/np_datetime.c', 'pandas/src/datetime/np_datetime_strings.c', 'pandas/src/period.c']}, + period=dict(pyxfile='period'), index={'pyxfile': 'index', 'sources': ['pandas/src/datetime/np_datetime.c', 'pandas/src/datetime/np_datetime_strings.c']}, From a05115b6feee5342956dbef44605b557c9d7dd0c Mon Sep 17 00:00:00 2001 From: Brandon Bradley Date: Mon, 16 Feb 2015 10:53:31 -0600 Subject: [PATCH 2/4] remove some Python dependecies from Period class --- pandas/lib.pxd | 1 + pandas/period.pyx | 29 ++++++++++++++++------------- 2 files changed, 17 insertions(+), 13 deletions(-) create mode 100644 pandas/lib.pxd diff --git a/pandas/lib.pxd b/pandas/lib.pxd new file mode 100644 index 0000000000000..ba52e4cc47c89 --- /dev/null +++ b/pandas/lib.pxd @@ -0,0 +1 @@ +cdef bint is_null_datetimelike(v) diff --git a/pandas/period.pyx b/pandas/period.pyx index acbdb793d22fb..2913235c32017 100644 --- a/pandas/period.pyx +++ b/pandas/period.pyx @@ -2,15 +2,18 @@ from datetime import datetime, date, timedelta import operator import numpy as np +cdef extern from "datetime_helper.h": + double total_seconds(object) + from pandas import compat -from pandas.core import common as com -from pandas.core.base import PandasObject from pandas.tseries import frequencies from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.tseries import offsets from pandas.tseries.tools import parse_time_string +cimport lib +import lib from pandas import tslib from tslib import Timedelta, Timestamp @@ -26,7 +29,7 @@ def _period_field_accessor(name, alias): return property(f) -class Period(PandasObject): +class Period(object): """ Represents an period of time @@ -74,7 +77,7 @@ class Period(PandasObject): raise ValueError(("Only value or ordinal but not both should be " "given but not both")) elif ordinal is not None: - if not com.is_integer(ordinal): + if not lib.is_integer(ordinal): raise ValueError("Ordinal must be an integer") if freq is None: raise ValueError('Must supply freq for ordinal value') @@ -96,14 +99,14 @@ class Period(PandasObject): converted = other.asfreq(freq) self.ordinal = converted.ordinal - elif com.is_null_datelike_scalar(value) or value in tslib._nat_strings: + elif lib.is_null_datetimelike(value) or value in tslib._nat_strings: self.ordinal = tslib.iNaT if freq is None: raise ValueError("If value is NaT, freq cannot be None " "because it cannot be inferred") - elif isinstance(value, compat.string_types) or com.is_integer(value): - if com.is_integer(value): + elif isinstance(value, compat.string_types) or lib.is_integer(value): + if lib.is_integer(value): value = str(value) value = value.upper() @@ -184,7 +187,7 @@ class Period(PandasObject): if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, offsets.DateOffset, Timedelta)): return self._add_delta(other) - elif com.is_integer(other): + elif lib.is_integer(other): if self.ordinal == tslib.iNaT: ordinal = self.ordinal else: @@ -198,7 +201,7 @@ class Period(PandasObject): offsets.Tick, offsets.DateOffset, Timedelta)): neg_other = -other return self + neg_other - elif com.is_integer(other): + elif lib.is_integer(other): if self.ordinal == tslib.iNaT: ordinal = self.ordinal else: @@ -320,15 +323,15 @@ class Period(PandasObject): def now(cls, freq=None): return Period(datetime.now(), freq=freq) + # HACK IT UP AND YOU BETTER FIX IT SOON + def __str__(self): + return self.__unicode__() + def __repr__(self): base, mult = _gfc(self.freq) formatted = tslib.period_format(self.ordinal, base) freqstr = frequencies._reverse_period_code_map[base] - if not compat.PY3: - encoding = com.get_option("display.encoding") - formatted = formatted.encode(encoding) - return "Period('%s', '%s')" % (formatted, freqstr) def __unicode__(self): From bfa80660c67076b0f5457056d1d857fc3204a620 Mon Sep 17 00:00:00 2001 From: Brandon Bradley Date: Fri, 6 Feb 2015 18:49:02 -0600 Subject: [PATCH 3/4] move period related functions from tslib to period module Moved many import statement to local scope in period module, similar to tslib. This fixes circular dependency problems. --- pandas/period.pyx | 625 ++++++++++++++++++++++++++++- pandas/tests/test_tseries.py | 7 +- pandas/tseries/frequencies.py | 13 +- pandas/tseries/index.py | 3 +- pandas/tseries/period.py | 18 +- pandas/tseries/tests/test_tslib.py | 10 +- pandas/tslib.pxd | 4 + pandas/tslib.pyx | 566 -------------------------- setup.py | 6 +- 9 files changed, 649 insertions(+), 603 deletions(-) diff --git a/pandas/period.pyx b/pandas/period.pyx index 2913235c32017..946736a3fe82c 100644 --- a/pandas/period.pyx +++ b/pandas/period.pyx @@ -1,30 +1,616 @@ from datetime import datetime, date, timedelta import operator + +from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, + NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) import numpy as np cdef extern from "datetime_helper.h": double total_seconds(object) +from libc.stdlib cimport free + from pandas import compat -from pandas.tseries import frequencies -from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.tseries import offsets from pandas.tseries.tools import parse_time_string +from datetime cimport * +cimport util cimport lib import lib from pandas import tslib -from tslib import Timedelta, Timestamp - +from tslib import Timedelta, Timestamp, iNaT, NaT +from tslib import have_pytz, _get_utcoffset +from tslib cimport ( + maybe_get_tz, + _is_utc, + _is_tzlocal, + _get_dst_info, +) + +from sys import version_info + +cdef bint PY2 = version_info[0] == 2 + +cdef int64_t NPY_NAT = util.get_nat() + + +cdef extern from "period.h": + ctypedef struct date_info: + int64_t absdate + double abstime + double second + int minute + int hour + int day + int month + int quarter + int year + int day_of_week + int day_of_year + int calendar + + ctypedef struct asfreq_info: + int from_week_end + int to_week_end + + int from_a_year_end + int to_a_year_end + + int from_q_year_end + int to_q_year_end + + ctypedef int64_t (*freq_conv_func)(int64_t, char, asfreq_info*) + + void initialize_daytime_conversion_factor_matrix() + int64_t asfreq(int64_t dtordinal, int freq1, int freq2, char relation) except INT32_MIN + freq_conv_func get_asfreq_func(int fromFreq, int toFreq) + void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) + + int64_t get_period_ordinal(int year, int month, int day, + int hour, int minute, int second, int microseconds, int picoseconds, + int freq) except INT32_MIN + + int64_t get_python_ordinal(int64_t period_ordinal, int freq) except INT32_MIN + + int get_date_info(int64_t ordinal, int freq, date_info *dinfo) except INT32_MIN + double getAbsTime(int, int64_t, int64_t) + + int pyear(int64_t ordinal, int freq) except INT32_MIN + int pqyear(int64_t ordinal, int freq) except INT32_MIN + int pquarter(int64_t ordinal, int freq) except INT32_MIN + int pmonth(int64_t ordinal, int freq) except INT32_MIN + int pday(int64_t ordinal, int freq) except INT32_MIN + int pweekday(int64_t ordinal, int freq) except INT32_MIN + int pday_of_week(int64_t ordinal, int freq) except INT32_MIN + int pday_of_year(int64_t ordinal, int freq) except INT32_MIN + int pweek(int64_t ordinal, int freq) except INT32_MIN + int phour(int64_t ordinal, int freq) except INT32_MIN + int pminute(int64_t ordinal, int freq) except INT32_MIN + int psecond(int64_t ordinal, int freq) except INT32_MIN + char *c_strftime(date_info *dinfo, char *fmt) + int get_yq(int64_t ordinal, int freq, int *quarter, int *year) + +initialize_daytime_conversion_factor_matrix() -#--------------- # Period logic +#---------------------------------------------------------------------- + +cdef inline int64_t apply_mult(int64_t period_ord, int64_t mult): + """ + Get freq+multiple ordinal value from corresponding freq-only ordinal value. + For example, 5min ordinal will be 1/5th the 1min ordinal (rounding down to + integer). + """ + if mult == 1: + return period_ord + + return (period_ord - 1) // mult + +cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult): + """ + Get freq-only ordinal value from corresponding freq+multiple ordinal. + """ + if mult == 1: + return period_ord_w_mult + + return period_ord_w_mult * mult + 1; + +def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): + """ + Convert array of datetime64 values (passed in as 'i8' dtype) to a set of + periods corresponding to desired frequency, per period convention. + """ + cdef: + ndarray[int64_t] out + Py_ssize_t i, l + pandas_datetimestruct dts + + l = len(dtarr) + + out = np.empty(l, dtype='i8') + + if tz is None: + for i in range(l): + if dtarr[i] == iNaT: + out[i] = iNaT + continue + pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts) + out[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) + else: + out = localize_dt64arr_to_period(dtarr, freq, tz) + return out + +def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): + """ + Convert array to datetime64 values from a set of ordinals corresponding to + periods per period convention. + """ + cdef: + ndarray[int64_t] out + Py_ssize_t i, l + + l = len(periodarr) + + out = np.empty(l, dtype='i8') + + for i in range(l): + if periodarr[i] == iNaT: + out[i] = iNaT + continue + out[i] = period_ordinal_to_dt64(periodarr[i], freq) + + return out + +cdef char START = 'S' +cdef char END = 'E' + +cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, + bint end): + """ + Convert period ordinal from one frequency to another, and if upsampling, + choose to use start ('S') or end ('E') of period. + """ + cdef: + int64_t retval + + if period_ordinal == iNaT: + return iNaT + + if end: + retval = asfreq(period_ordinal, freq1, freq2, END) + else: + retval = asfreq(period_ordinal, freq1, freq2, START) + + if retval == INT32_MIN: + raise ValueError('Frequency conversion failed') + + return retval + +def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): + """ + Convert int64-array of period ordinals from one frequency to another, and + if upsampling, choose to use start ('S') or end ('E') of period. + """ + cdef: + ndarray[int64_t] result + Py_ssize_t i, n + freq_conv_func func + asfreq_info finfo + int64_t val, ordinal + char relation + + n = len(arr) + result = np.empty(n, dtype=np.int64) + + func = get_asfreq_func(freq1, freq2) + get_asfreq_info(freq1, freq2, &finfo) + + if end: + relation = END + else: + relation = START + + mask = arr == iNaT + if mask.any(): # NaT process + for i in range(n): + val = arr[i] + if val != iNaT: + val = func(val, relation, &finfo) + if val == INT32_MIN: + raise ValueError("Unable to convert to desired frequency.") + result[i] = val + else: + for i in range(n): + val = func(arr[i], relation, &finfo) + if val == INT32_MIN: + raise ValueError("Unable to convert to desired frequency.") + result[i] = val + + return result + +def period_ordinal(int y, int m, int d, int h, int min, int s, int us, int ps, int freq): + cdef: + int64_t ordinal + + return get_period_ordinal(y, m, d, h, min, s, us, ps, freq) + + +cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq): + cdef: + pandas_datetimestruct dts + date_info dinfo + float subsecond_fraction + + if ordinal == iNaT: + return NPY_NAT + + get_date_info(ordinal, freq, &dinfo) + + dts.year = dinfo.year + dts.month = dinfo.month + dts.day = dinfo.day + dts.hour = dinfo.hour + dts.min = dinfo.minute + dts.sec = int(dinfo.second) + subsecond_fraction = dinfo.second - dts.sec + dts.us = int((subsecond_fraction) * 1e6) + dts.ps = int(((subsecond_fraction) * 1e6 - dts.us) * 1e6) + + return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) + +def period_format(int64_t value, int freq, object fmt=None): + cdef: + int freq_group + + if value == iNaT: + return repr(NaT) + + if fmt is None: + freq_group = (freq // 1000) * 1000 + if freq_group == 1000: # FR_ANN + fmt = b'%Y' + elif freq_group == 2000: # FR_QTR + fmt = b'%FQ%q' + elif freq_group == 3000: # FR_MTH + fmt = b'%Y-%m' + elif freq_group == 4000: # WK + left = period_asfreq(value, freq, 6000, 0) + right = period_asfreq(value, freq, 6000, 1) + return '%s/%s' % (period_format(left, 6000), + period_format(right, 6000)) + elif (freq_group == 5000 # BUS + or freq_group == 6000): # DAY + fmt = b'%Y-%m-%d' + elif freq_group == 7000: # HR + fmt = b'%Y-%m-%d %H:00' + elif freq_group == 8000: # MIN + fmt = b'%Y-%m-%d %H:%M' + elif freq_group == 9000: # SEC + fmt = b'%Y-%m-%d %H:%M:%S' + elif freq_group == 10000: # MILLISEC + fmt = b'%Y-%m-%d %H:%M:%S.%l' + elif freq_group == 11000: # MICROSEC + fmt = b'%Y-%m-%d %H:%M:%S.%u' + elif freq_group == 12000: # NANOSEC + fmt = b'%Y-%m-%d %H:%M:%S.%n' + else: + raise ValueError('Unknown freq: %d' % freq) + + return _period_strftime(value, freq, fmt) + + +cdef list extra_fmts = [(b"%q", b"^`AB`^"), + (b"%f", b"^`CD`^"), + (b"%F", b"^`EF`^"), + (b"%l", b"^`GH`^"), + (b"%u", b"^`IJ`^"), + (b"%n", b"^`KL`^")] + +cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", "^`GH`^", "^`IJ`^", "^`KL`^"] + +cdef object _period_strftime(int64_t value, int freq, object fmt): + import sys + + cdef: + Py_ssize_t i + date_info dinfo + char *formatted + object pat, repl, result + list found_pat = [False] * len(extra_fmts) + int year, quarter + + if PyUnicode_Check(fmt): + fmt = fmt.encode('utf-8') + + get_date_info(value, freq, &dinfo) + for i in range(len(extra_fmts)): + pat = extra_fmts[i][0] + repl = extra_fmts[i][1] + if pat in fmt: + fmt = fmt.replace(pat, repl) + found_pat[i] = True + + formatted = c_strftime(&dinfo, fmt) + + result = util.char_to_string(formatted) + free(formatted) + + for i in range(len(extra_fmts)): + if found_pat[i]: + if get_yq(value, freq, &quarter, &year) < 0: + raise ValueError('Unable to get quarter and year') + + if i == 0: + repl = '%d' % quarter + elif i == 1: # %f, 2-digit year + repl = '%.2d' % (year % 100) + elif i == 2: + repl = '%d' % year + elif i == 3: + repl = '%03d' % (value % 1000) + elif i == 4: + repl = '%06d' % (value % 1000000) + elif i == 5: + repl = '%09d' % (value % 1000000000) + + result = result.replace(str_extra_fmts[i], repl) + + if PY2: + result = result.decode('utf-8', 'ignore') + + return result + +# period accessors + +ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN + +def get_period_field(int code, int64_t value, int freq): + cdef accessor f = _get_accessor_func(code) + if f is NULL: + raise ValueError('Unrecognized period code: %d' % code) + if value == iNaT: + return np.nan + return f(value, freq) + +def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): + cdef: + Py_ssize_t i, sz + ndarray[int64_t] out + accessor f + + f = _get_accessor_func(code) + if f is NULL: + raise ValueError('Unrecognized period code: %d' % code) + + sz = len(arr) + out = np.empty(sz, dtype=np.int64) + + for i in range(sz): + if arr[i] == iNaT: + out[i] = -1 + continue + out[i] = f(arr[i], freq) + + return out + + + +cdef accessor _get_accessor_func(int code): + if code == 0: + return &pyear + elif code == 1: + return &pqyear + elif code == 2: + return &pquarter + elif code == 3: + return &pmonth + elif code == 4: + return &pday + elif code == 5: + return &phour + elif code == 6: + return &pminute + elif code == 7: + return &psecond + elif code == 8: + return &pweek + elif code == 9: + return &pday_of_year + elif code == 10: + return &pweekday + return NULL + + +def extract_ordinals(ndarray[object] values, freq): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] ordinals = np.empty(n, dtype=np.int64) + object p + + for i in range(n): + p = values[i] + ordinals[i] = p.ordinal + if p.freq != freq: + raise ValueError("%s is wrong freq" % p) + + return ordinals + +cpdef resolution(ndarray[int64_t] stamps, tz=None): + cdef: + Py_ssize_t i, n = len(stamps) + pandas_datetimestruct dts + int reso = D_RESO, curr_reso + + if tz is not None: + tz = maybe_get_tz(tz) + return _reso_local(stamps, tz) + else: + for i in range(n): + if stamps[i] == NPY_NAT: + continue + pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + return reso + +US_RESO = 0 +MS_RESO = 1 +S_RESO = 2 +T_RESO = 3 +H_RESO = 4 +D_RESO = 5 + +cdef inline int _reso_stamp(pandas_datetimestruct *dts): + if dts.us != 0: + if dts.us % 1000 == 0: + return MS_RESO + return US_RESO + elif dts.sec != 0: + return S_RESO + elif dts.min != 0: + return T_RESO + elif dts.hour != 0: + return H_RESO + return D_RESO + +cdef _reso_local(ndarray[int64_t] stamps, object tz): + cdef: + Py_ssize_t n = len(stamps) + int reso = D_RESO, curr_reso + ndarray[int64_t] trans, deltas, pos + pandas_datetimestruct dts + + if _is_utc(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + continue + pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + elif _is_tzlocal(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + continue + pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, + &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + pandas_datetime_to_datetimestruct(stamps[i] + delta, + PANDAS_FR_ns, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = _get_dst_info(tz) + + _pos = trans.searchsorted(stamps, side='right') - 1 + if _pos.dtype != np.int64: + _pos = _pos.astype(np.int64) + pos = _pos + + # statictzinfo + if typ not in ['pytz','dateutil']: + for i in range(n): + if stamps[i] == NPY_NAT: + continue + pandas_datetime_to_datetimestruct(stamps[i] + deltas[0], + PANDAS_FR_ns, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + else: + for i in range(n): + if stamps[i] == NPY_NAT: + continue + pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos[i]], + PANDAS_FR_ns, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + + return reso + + +# period helpers + +cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, + int freq, object tz): + cdef: + Py_ssize_t n = len(stamps) + ndarray[int64_t] result = np.empty(n, dtype=np.int64) + ndarray[int64_t] trans, deltas, pos + pandas_datetimestruct dts + + if not have_pytz: + raise Exception('Could not find pytz module') + + if _is_utc(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) + result[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) + + elif _is_tzlocal(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, + &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + pandas_datetime_to_datetimestruct(stamps[i] + delta, + PANDAS_FR_ns, &dts) + result[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = _get_dst_info(tz) + + _pos = trans.searchsorted(stamps, side='right') - 1 + if _pos.dtype != np.int64: + _pos = _pos.astype(np.int64) + pos = _pos + + # statictzinfo + if typ not in ['pytz','dateutil']: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i] + deltas[0], + PANDAS_FR_ns, &dts) + result[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) + else: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos[i]], + PANDAS_FR_ns, &dts) + result[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) + + return result + def _period_field_accessor(name, alias): def f(self): + from pandas.tseries.frequencies import get_freq_code as _gfc base, mult = _gfc(self.freq) - return tslib.get_period_field(alias, self.ordinal, base) + return get_period_field(alias, self.ordinal, base) f.__name__ = name return property(f) @@ -63,6 +649,8 @@ class Period(object): def __init__(self, value=None, freq=None, ordinal=None, year=None, month=1, quarter=None, day=1, hour=0, minute=0, second=0): + from pandas.tseries import frequencies + from pandas.tseries.frequencies import get_freq_code as _gfc # freq points to a tuple (base, mult); base is one of the defined # periods such as A, Q, etc. Every five minutes would be, e.g., @@ -135,7 +723,7 @@ class Period(object): raise ValueError('Only mult == 1 supported') if self.ordinal is None: - self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day, + self.ordinal = period_ordinal(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond, 0, base) @@ -143,6 +731,7 @@ class Period(object): def __eq__(self, other): if isinstance(other, Period): + from pandas.tseries.frequencies import get_freq_code as _gfc if other.freq != self.freq: raise ValueError("Cannot compare non-conforming periods") if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: @@ -158,6 +747,7 @@ class Period(object): return hash((self.ordinal, self.freq)) def _add_delta(self, other): + from pandas.tseries import frequencies if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): @@ -251,6 +841,7 @@ class Period(object): ------- resampled : Period """ + from pandas.tseries.frequencies import get_freq_code as _gfc how = _validate_end_alias(how) base1, mult1 = _gfc(self.freq) base2, mult2 = _gfc(freq) @@ -259,7 +850,7 @@ class Period(object): raise ValueError('Only mult == 1 supported') end = how == 'E' - new_ordinal = tslib.period_asfreq(self.ordinal, base1, base2, end) + new_ordinal = period_asfreq(self.ordinal, base1, base2, end) return Period(ordinal=new_ordinal, freq=base2) @@ -293,6 +884,8 @@ class Period(object): ------- Timestamp """ + from pandas.tseries import frequencies + from pandas.tseries.frequencies import get_freq_code as _gfc how = _validate_end_alias(how) if freq is None: @@ -302,7 +895,7 @@ class Period(object): base, mult = _gfc(freq) val = self.asfreq(freq, how) - dt64 = tslib.period_ordinal_to_dt64(val.ordinal, base) + dt64 = period_ordinal_to_dt64(val.ordinal, base) return Timestamp(dt64, tz=tz) year = _period_field_accessor('year', 0) @@ -328,8 +921,10 @@ class Period(object): return self.__unicode__() def __repr__(self): + from pandas.tseries import frequencies + from pandas.tseries.frequencies import get_freq_code as _gfc base, mult = _gfc(self.freq) - formatted = tslib.period_format(self.ordinal, base) + formatted = period_format(self.ordinal, base) freqstr = frequencies._reverse_period_code_map[base] return "Period('%s', '%s')" % (formatted, freqstr) @@ -341,8 +936,9 @@ class Period(object): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ + from pandas.tseries.frequencies import get_freq_code as _gfc base, mult = _gfc(self.freq) - formatted = tslib.period_format(self.ordinal, base) + formatted = period_format(self.ordinal, base) value = ("%s" % formatted) return value @@ -483,12 +1079,14 @@ class Period(object): >>> a.strftime('%b. %d, %Y was a %A') 'Jan. 01, 2001 was a Monday' """ + from pandas.tseries.frequencies import get_freq_code as _gfc base, mult = _gfc(self.freq) - return tslib.period_format(self.ordinal, base, fmt) + return period_format(self.ordinal, base, fmt) def _ordinal_from_fields(year, month, quarter, day, hour, minute, second, freq): + from pandas.tseries.frequencies import get_freq_code as _gfc base, mult = _gfc(freq) if mult != 1: raise ValueError('Only mult == 1 supported') @@ -496,7 +1094,7 @@ def _ordinal_from_fields(year, month, quarter, day, hour, minute, if quarter is not None: year, month = _quarter_to_myear(year, quarter, freq) - return tslib.period_ordinal(year, month, day, hour, minute, second, 0, 0, base) + return period_ordinal(year, month, day, hour, minute, second, 0, 0, base) def _quarter_to_myear(year, quarter, freq): @@ -504,6 +1102,7 @@ def _quarter_to_myear(year, quarter, freq): if quarter <= 0 or quarter > 4: raise ValueError('Quarter must be 1 <= q <= 4') + from pandas.tseries import frequencies mnum = frequencies._month_numbers[frequencies._get_rule_month(freq)] + 1 month = (mnum + (quarter - 1) * 3) % 12 + 1 if month > mnum: diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index af68e7059f8ac..b8e5f23d825f0 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -7,6 +7,7 @@ import pandas.util.testing as tm from pandas.compat import range, lrange, zip import pandas.lib as lib +import pandas.period as period import pandas.algos as algos @@ -731,12 +732,10 @@ def test_to_datetime_bijective(self): class TestPeriodField(tm.TestCase): def test_get_period_field_raises_on_out_of_range(self): - from pandas import tslib - self.assertRaises(ValueError, tslib.get_period_field, -1, 0, 0) + self.assertRaises(ValueError, period.get_period_field, -1, 0, 0) def test_get_period_field_array_raises_on_out_of_range(self): - from pandas import tslib - self.assertRaises(ValueError, tslib.get_period_field_arr, -1, np.empty(1), 0) + self.assertRaises(ValueError, period.get_period_field_arr, -1, np.empty(1), 0) if __name__ == '__main__': import nose diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 0ec225d77f5e2..26c0d26482001 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -12,6 +12,7 @@ import pandas.core.common as com import pandas.lib as lib import pandas.tslib as tslib +import pandas.period as period from pandas.tslib import Timedelta class FreqGroup(object): @@ -31,12 +32,12 @@ class FreqGroup(object): class Resolution(object): - RESO_US = tslib.US_RESO - RESO_MS = tslib.MS_RESO - RESO_SEC = tslib.S_RESO - RESO_MIN = tslib.T_RESO - RESO_HR = tslib.H_RESO - RESO_DAY = tslib.D_RESO + RESO_US = period.US_RESO + RESO_MS = period.MS_RESO + RESO_SEC = period.S_RESO + RESO_MIN = period.T_RESO + RESO_HR = period.H_RESO + RESO_DAY = period.D_RESO _reso_str_map = { RESO_US: 'microsecond', diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 34cbfe0a3abda..2de1a004879bc 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -28,6 +28,7 @@ from pandas.lib import Timestamp import pandas.lib as lib import pandas.tslib as tslib +import pandas.period as period import pandas.algos as _algos import pandas.index as _index @@ -1461,7 +1462,7 @@ def is_normalized(self): @cache_readonly def _resolution(self): - return tslib.resolution(self.asi8, self.tz) + return period.resolution(self.asi8, self.tz) def equals(self, other): """ diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 8a0ac0488ec44..71f97926f8b3f 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -13,7 +13,9 @@ import pandas.tseries.offsets as offsets from pandas.period import Period +import pandas.period as period from pandas.period import ( + get_period_field_arr, _validate_end_alias, _quarter_to_myear, ) @@ -32,7 +34,7 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - return tslib.get_period_field_arr(alias, self.values, base) + return get_period_field_arr(alias, self.values, base) f.__name__ = name f.__doc__ = docstring return property(f) @@ -41,7 +43,7 @@ def f(self): def _get_ordinals(data, freq): f = lambda x: Period(x, freq=freq).ordinal if isinstance(data[0], Period): - return tslib.extract_ordinals(data, freq) + return period.extract_ordinals(data, freq) else: return lib.map_infer(data, f) @@ -51,7 +53,7 @@ def dt64arr_to_periodarr(data, freq, tz): raise ValueError('Wrong dtype: %s' % data.dtype) base, mult = _gfc(freq) - return tslib.dt64arr_to_periodarr(data.view('i8'), base, tz) + return period.dt64arr_to_periodarr(data.view('i8'), base, tz) # --- Period index sketch @@ -236,7 +238,7 @@ def _from_arraylike(cls, data, freq, tz): else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) - data = tslib.period_asfreq_arr(data.values, base1, + data = period.period_asfreq_arr(data.values, base1, base2, 1) else: if freq is None and len(data) > 0: @@ -363,7 +365,7 @@ def asfreq(self, freq=None, how='E'): raise ValueError('Only mult == 1 supported') end = how == 'E' - new_data = tslib.period_asfreq_arr(self.values, base1, base2, end) + new_data = period.period_asfreq_arr(self.values, base1, base2, end) return self._simple_new(new_data, self.name, freq=freq) def to_datetime(self, dayfirst=False): @@ -431,7 +433,7 @@ def to_timestamp(self, freq=None, how='start'): base, mult = _gfc(freq) new_data = self.asfreq(freq, how) - new_data = tslib.periodarr_to_dt64arr(new_data.values, base) + new_data = period.periodarr_to_dt64arr(new_data.values, base) return DatetimeIndex(new_data, freq='infer', name=self.name) def _add_delta(self, other): @@ -881,7 +883,7 @@ def _range_from_fields(year=None, month=None, quarter=None, day=None, year, quarter = _make_field_arrays(year, quarter) for y, q in zip(year, quarter): y, m = _quarter_to_myear(y, q, freq) - val = tslib.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base) + val = period.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base) ordinals.append(val) else: base, mult = _gfc(freq) @@ -890,7 +892,7 @@ def _range_from_fields(year=None, month=None, quarter=None, day=None, arrays = _make_field_arrays(year, month, day, hour, minute, second) for y, mth, d, h, mn, s in zip(*arrays): - ordinals.append(tslib.period_ordinal(y, mth, d, h, mn, s, 0, 0, base)) + ordinals.append(period.period_ordinal(y, mth, d, h, mn, s, 0, 0, base)) return np.array(ordinals, dtype=np.int64), freq diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 8140d289f8c8a..0197dd7eaf74b 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -3,10 +3,12 @@ import numpy as np from pandas import tslib +from pandas import period import datetime from pandas.core.api import Timestamp, Series, Timedelta -from pandas.tslib import period_asfreq, period_ordinal, get_timezone +from pandas.tslib import get_timezone +from pandas.period import period_asfreq, period_ordinal from pandas.tseries.index import date_range from pandas.tseries.frequencies import get_freq import pandas.tseries.offsets as offsets @@ -764,11 +766,11 @@ def test_addition_subtraction_preserve_frequency(self): def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], - [tslib.D_RESO, tslib.D_RESO, tslib.D_RESO, tslib.D_RESO, - tslib.H_RESO, tslib.T_RESO,tslib.S_RESO, tslib.MS_RESO, tslib.US_RESO]): + [period.D_RESO, period.D_RESO, period.D_RESO, period.D_RESO, + period.H_RESO, period.T_RESO, period.S_RESO, period.MS_RESO, period.US_RESO]): for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Eastern']: idx = date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) - result = tslib.resolution(idx.asi8, idx.tz) + result = period.resolution(idx.asi8, idx.tz) self.assertEqual(result, expected) diff --git a/pandas/tslib.pxd b/pandas/tslib.pxd index 1452dbdca03ee..d8fc57fe85bfd 100644 --- a/pandas/tslib.pxd +++ b/pandas/tslib.pxd @@ -2,3 +2,7 @@ from numpy cimport ndarray, int64_t cdef convert_to_tsobject(object, object, object) cdef convert_to_timedelta64(object, object, object) +cpdef object maybe_get_tz(object) +cdef bint _is_utc(object) +cdef bint _is_tzlocal(object) +cdef object _get_dst_info(object) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index c7c35564c1e5a..85cb50b8f18ae 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -3574,572 +3574,6 @@ cpdef normalize_date(object dt): else: raise TypeError('Unrecognized type: %s' % type(dt)) -cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, - int freq, object tz): - cdef: - Py_ssize_t n = len(stamps) - ndarray[int64_t] result = np.empty(n, dtype=np.int64) - ndarray[int64_t] trans, deltas, pos - pandas_datetimestruct dts - - if not have_pytz: - raise Exception('Could not find pytz module') - - if _is_utc(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) - result[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) - - elif _is_tzlocal(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, - &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 - pandas_datetime_to_datetimestruct(stamps[i] + delta, - PANDAS_FR_ns, &dts) - result[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) - else: - # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = _get_dst_info(tz) - - _pos = trans.searchsorted(stamps, side='right') - 1 - if _pos.dtype != np.int64: - _pos = _pos.astype(np.int64) - pos = _pos - - # statictzinfo - if typ not in ['pytz','dateutil']: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct(stamps[i] + deltas[0], - PANDAS_FR_ns, &dts) - result[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) - else: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos[i]], - PANDAS_FR_ns, &dts) - result[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) - - return result - - -cdef extern from "period.h": - ctypedef struct date_info: - int64_t absdate - double abstime - double second - int minute - int hour - int day - int month - int quarter - int year - int day_of_week - int day_of_year - int calendar - - ctypedef struct asfreq_info: - int from_week_end - int to_week_end - - int from_a_year_end - int to_a_year_end - - int from_q_year_end - int to_q_year_end - - ctypedef int64_t (*freq_conv_func)(int64_t, char, asfreq_info*) - - void initialize_daytime_conversion_factor_matrix() - int64_t asfreq(int64_t dtordinal, int freq1, int freq2, char relation) except INT32_MIN - freq_conv_func get_asfreq_func(int fromFreq, int toFreq) - void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) - - int64_t get_period_ordinal(int year, int month, int day, - int hour, int minute, int second, int microseconds, int picoseconds, - int freq) except INT32_MIN - - int64_t get_python_ordinal(int64_t period_ordinal, int freq) except INT32_MIN - - int get_date_info(int64_t ordinal, int freq, date_info *dinfo) except INT32_MIN - double getAbsTime(int, int64_t, int64_t) - - int pyear(int64_t ordinal, int freq) except INT32_MIN - int pqyear(int64_t ordinal, int freq) except INT32_MIN - int pquarter(int64_t ordinal, int freq) except INT32_MIN - int pmonth(int64_t ordinal, int freq) except INT32_MIN - int pday(int64_t ordinal, int freq) except INT32_MIN - int pweekday(int64_t ordinal, int freq) except INT32_MIN - int pday_of_week(int64_t ordinal, int freq) except INT32_MIN - int pday_of_year(int64_t ordinal, int freq) except INT32_MIN - int pweek(int64_t ordinal, int freq) except INT32_MIN - int phour(int64_t ordinal, int freq) except INT32_MIN - int pminute(int64_t ordinal, int freq) except INT32_MIN - int psecond(int64_t ordinal, int freq) except INT32_MIN - char *c_strftime(date_info *dinfo, char *fmt) - int get_yq(int64_t ordinal, int freq, int *quarter, int *year) - -initialize_daytime_conversion_factor_matrix() - -# Period logic -#---------------------------------------------------------------------- - -cdef inline int64_t apply_mult(int64_t period_ord, int64_t mult): - """ - Get freq+multiple ordinal value from corresponding freq-only ordinal value. - For example, 5min ordinal will be 1/5th the 1min ordinal (rounding down to - integer). - """ - if mult == 1: - return period_ord - - return (period_ord - 1) // mult - -cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult): - """ - Get freq-only ordinal value from corresponding freq+multiple ordinal. - """ - if mult == 1: - return period_ord_w_mult - - return period_ord_w_mult * mult + 1; - -def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): - """ - Convert array of datetime64 values (passed in as 'i8' dtype) to a set of - periods corresponding to desired frequency, per period convention. - """ - cdef: - ndarray[int64_t] out - Py_ssize_t i, l - pandas_datetimestruct dts - - l = len(dtarr) - - out = np.empty(l, dtype='i8') - - if tz is None: - for i in range(l): - if dtarr[i] == iNaT: - out[i] = iNaT - continue - pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts) - out[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) - else: - out = localize_dt64arr_to_period(dtarr, freq, tz) - return out - -def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): - """ - Convert array to datetime64 values from a set of ordinals corresponding to - periods per period convention. - """ - cdef: - ndarray[int64_t] out - Py_ssize_t i, l - - l = len(periodarr) - - out = np.empty(l, dtype='i8') - - for i in range(l): - if periodarr[i] == iNaT: - out[i] = iNaT - continue - out[i] = period_ordinal_to_dt64(periodarr[i], freq) - - return out - -cdef char START = 'S' -cdef char END = 'E' - -cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, - bint end): - """ - Convert period ordinal from one frequency to another, and if upsampling, - choose to use start ('S') or end ('E') of period. - """ - cdef: - int64_t retval - - if period_ordinal == iNaT: - return iNaT - - if end: - retval = asfreq(period_ordinal, freq1, freq2, END) - else: - retval = asfreq(period_ordinal, freq1, freq2, START) - - if retval == INT32_MIN: - raise ValueError('Frequency conversion failed') - - return retval - -def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): - """ - Convert int64-array of period ordinals from one frequency to another, and - if upsampling, choose to use start ('S') or end ('E') of period. - """ - cdef: - ndarray[int64_t] result - Py_ssize_t i, n - freq_conv_func func - asfreq_info finfo - int64_t val, ordinal - char relation - - n = len(arr) - result = np.empty(n, dtype=np.int64) - - func = get_asfreq_func(freq1, freq2) - get_asfreq_info(freq1, freq2, &finfo) - - if end: - relation = END - else: - relation = START - - mask = arr == iNaT - if mask.any(): # NaT process - for i in range(n): - val = arr[i] - if val != iNaT: - val = func(val, relation, &finfo) - if val == INT32_MIN: - raise ValueError("Unable to convert to desired frequency.") - result[i] = val - else: - for i in range(n): - val = func(arr[i], relation, &finfo) - if val == INT32_MIN: - raise ValueError("Unable to convert to desired frequency.") - result[i] = val - - return result - -def period_ordinal(int y, int m, int d, int h, int min, int s, int us, int ps, int freq): - cdef: - int64_t ordinal - - return get_period_ordinal(y, m, d, h, min, s, us, ps, freq) - - -cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq): - cdef: - pandas_datetimestruct dts - date_info dinfo - float subsecond_fraction - - if ordinal == iNaT: - return NPY_NAT - - get_date_info(ordinal, freq, &dinfo) - - dts.year = dinfo.year - dts.month = dinfo.month - dts.day = dinfo.day - dts.hour = dinfo.hour - dts.min = dinfo.minute - dts.sec = int(dinfo.second) - subsecond_fraction = dinfo.second - dts.sec - dts.us = int((subsecond_fraction) * 1e6) - dts.ps = int(((subsecond_fraction) * 1e6 - dts.us) * 1e6) - - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - -def period_format(int64_t value, int freq, object fmt=None): - cdef: - int freq_group - - if value == iNaT: - return repr(NaT) - - if fmt is None: - freq_group = (freq // 1000) * 1000 - if freq_group == 1000: # FR_ANN - fmt = b'%Y' - elif freq_group == 2000: # FR_QTR - fmt = b'%FQ%q' - elif freq_group == 3000: # FR_MTH - fmt = b'%Y-%m' - elif freq_group == 4000: # WK - left = period_asfreq(value, freq, 6000, 0) - right = period_asfreq(value, freq, 6000, 1) - return '%s/%s' % (period_format(left, 6000), - period_format(right, 6000)) - elif (freq_group == 5000 # BUS - or freq_group == 6000): # DAY - fmt = b'%Y-%m-%d' - elif freq_group == 7000: # HR - fmt = b'%Y-%m-%d %H:00' - elif freq_group == 8000: # MIN - fmt = b'%Y-%m-%d %H:%M' - elif freq_group == 9000: # SEC - fmt = b'%Y-%m-%d %H:%M:%S' - elif freq_group == 10000: # MILLISEC - fmt = b'%Y-%m-%d %H:%M:%S.%l' - elif freq_group == 11000: # MICROSEC - fmt = b'%Y-%m-%d %H:%M:%S.%u' - elif freq_group == 12000: # NANOSEC - fmt = b'%Y-%m-%d %H:%M:%S.%n' - else: - raise ValueError('Unknown freq: %d' % freq) - - return _period_strftime(value, freq, fmt) - - -cdef list extra_fmts = [(b"%q", b"^`AB`^"), - (b"%f", b"^`CD`^"), - (b"%F", b"^`EF`^"), - (b"%l", b"^`GH`^"), - (b"%u", b"^`IJ`^"), - (b"%n", b"^`KL`^")] - -cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", "^`GH`^", "^`IJ`^", "^`KL`^"] - -cdef object _period_strftime(int64_t value, int freq, object fmt): - import sys - - cdef: - Py_ssize_t i - date_info dinfo - char *formatted - object pat, repl, result - list found_pat = [False] * len(extra_fmts) - int year, quarter - - if PyUnicode_Check(fmt): - fmt = fmt.encode('utf-8') - - get_date_info(value, freq, &dinfo) - for i in range(len(extra_fmts)): - pat = extra_fmts[i][0] - repl = extra_fmts[i][1] - if pat in fmt: - fmt = fmt.replace(pat, repl) - found_pat[i] = True - - formatted = c_strftime(&dinfo, fmt) - - result = util.char_to_string(formatted) - free(formatted) - - for i in range(len(extra_fmts)): - if found_pat[i]: - if get_yq(value, freq, &quarter, &year) < 0: - raise ValueError('Unable to get quarter and year') - - if i == 0: - repl = '%d' % quarter - elif i == 1: # %f, 2-digit year - repl = '%.2d' % (year % 100) - elif i == 2: - repl = '%d' % year - elif i == 3: - repl = '%03d' % (value % 1000) - elif i == 4: - repl = '%06d' % (value % 1000000) - elif i == 5: - repl = '%09d' % (value % 1000000000) - - result = result.replace(str_extra_fmts[i], repl) - - if PY2: - result = result.decode('utf-8', 'ignore') - - return result - -# period accessors - -ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN - -def get_period_field(int code, int64_t value, int freq): - cdef accessor f = _get_accessor_func(code) - if f is NULL: - raise ValueError('Unrecognized period code: %d' % code) - if value == iNaT: - return np.nan - return f(value, freq) - -def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): - cdef: - Py_ssize_t i, sz - ndarray[int64_t] out - accessor f - - f = _get_accessor_func(code) - if f is NULL: - raise ValueError('Unrecognized period code: %d' % code) - - sz = len(arr) - out = np.empty(sz, dtype=np.int64) - - for i in range(sz): - if arr[i] == iNaT: - out[i] = -1 - continue - out[i] = f(arr[i], freq) - - return out - - - -cdef accessor _get_accessor_func(int code): - if code == 0: - return &pyear - elif code == 1: - return &pqyear - elif code == 2: - return &pquarter - elif code == 3: - return &pmonth - elif code == 4: - return &pday - elif code == 5: - return &phour - elif code == 6: - return &pminute - elif code == 7: - return &psecond - elif code == 8: - return &pweek - elif code == 9: - return &pday_of_year - elif code == 10: - return &pweekday - return NULL - - -def extract_ordinals(ndarray[object] values, freq): - cdef: - Py_ssize_t i, n = len(values) - ndarray[int64_t] ordinals = np.empty(n, dtype=np.int64) - object p - - for i in range(n): - p = values[i] - ordinals[i] = p.ordinal - if p.freq != freq: - raise ValueError("%s is wrong freq" % p) - - return ordinals - -cpdef resolution(ndarray[int64_t] stamps, tz=None): - cdef: - Py_ssize_t i, n = len(stamps) - pandas_datetimestruct dts - int reso = D_RESO, curr_reso - - if tz is not None: - tz = maybe_get_tz(tz) - return _reso_local(stamps, tz) - else: - for i in range(n): - if stamps[i] == NPY_NAT: - continue - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - return reso - -US_RESO = 0 -MS_RESO = 1 -S_RESO = 2 -T_RESO = 3 -H_RESO = 4 -D_RESO = 5 - -cdef inline int _reso_stamp(pandas_datetimestruct *dts): - if dts.us != 0: - if dts.us % 1000 == 0: - return MS_RESO - return US_RESO - elif dts.sec != 0: - return S_RESO - elif dts.min != 0: - return T_RESO - elif dts.hour != 0: - return H_RESO - return D_RESO - -cdef _reso_local(ndarray[int64_t] stamps, object tz): - cdef: - Py_ssize_t n = len(stamps) - int reso = D_RESO, curr_reso - ndarray[int64_t] trans, deltas, pos - pandas_datetimestruct dts - - if _is_utc(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - continue - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - elif _is_tzlocal(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - continue - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, - &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 - pandas_datetime_to_datetimestruct(stamps[i] + delta, - PANDAS_FR_ns, &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - else: - # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = _get_dst_info(tz) - - _pos = trans.searchsorted(stamps, side='right') - 1 - if _pos.dtype != np.int64: - _pos = _pos.astype(np.int64) - pos = _pos - - # statictzinfo - if typ not in ['pytz','dateutil']: - for i in range(n): - if stamps[i] == NPY_NAT: - continue - pandas_datetime_to_datetimestruct(stamps[i] + deltas[0], - PANDAS_FR_ns, &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - else: - for i in range(n): - if stamps[i] == NPY_NAT: - continue - pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos[i]], - PANDAS_FR_ns, &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - - return reso #---------------------------------------------------------------------- # Don't even ask diff --git a/setup.py b/setup.py index f90e238ad9dc1..321ffb36d76f2 100755 --- a/setup.py +++ b/setup.py @@ -462,7 +462,11 @@ def pxd(name): 'sources': ['pandas/src/datetime/np_datetime.c', 'pandas/src/datetime/np_datetime_strings.c', 'pandas/src/period.c']}, - period=dict(pyxfile='period'), + period=dict(pyxfile='period', + depends=tseries_depends, + sources=['pandas/src/datetime/np_datetime.c', + 'pandas/src/datetime/np_datetime_strings.c', + 'pandas/src/period.c']), index={'pyxfile': 'index', 'sources': ['pandas/src/datetime/np_datetime.c', 'pandas/src/datetime/np_datetime_strings.c']}, From 42b5416df22ec2114767ebf72c1e30725a6b4e9e Mon Sep 17 00:00:00 2001 From: Brandon Bradley Date: Mon, 16 Feb 2015 12:11:52 -0600 Subject: [PATCH 4/4] various Period related name changes, release note --- doc/source/whatsnew/v0.16.0.txt | 1 + pandas/{ => src}/period.pyx | 2 +- pandas/src/{period.c => period_helper.c} | 2 +- pandas/src/{period.h => period_helper.h} | 0 pandas/tests/test_tseries.py | 2 +- pandas/tseries/frequencies.py | 2 +- pandas/tseries/index.py | 2 +- pandas/tseries/period.py | 6 +++--- pandas/tseries/tests/test_tslib.py | 4 ++-- setup.py | 24 ++++++++++++------------ 10 files changed, 23 insertions(+), 22 deletions(-) rename pandas/{ => src}/period.pyx (99%) rename pandas/src/{period.c => period_helper.c} (99%) rename pandas/src/{period.h => period_helper.h} (100%) diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index c12513e087619..63606cb830cbe 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -208,6 +208,7 @@ Performance - Performance and memory usage improvements in ``merge`` when key space exceeds ``int64`` bounds (:issue:`9151`) - Performance improvements in multi-key ``groupby`` (:issue:`9429`) - Performance improvements in ``MultiIndex.sortlevel`` (:issue:`9445`) +- Cythonized ``Period`` (:issue:`9440`) Bug Fixes ~~~~~~~~~ diff --git a/pandas/period.pyx b/pandas/src/period.pyx similarity index 99% rename from pandas/period.pyx rename to pandas/src/period.pyx index 946736a3fe82c..e57bdc3b33c5e 100644 --- a/pandas/period.pyx +++ b/pandas/src/period.pyx @@ -36,7 +36,7 @@ cdef bint PY2 = version_info[0] == 2 cdef int64_t NPY_NAT = util.get_nat() -cdef extern from "period.h": +cdef extern from "period_helper.h": ctypedef struct date_info: int64_t absdate double abstime diff --git a/pandas/src/period.c b/pandas/src/period_helper.c similarity index 99% rename from pandas/src/period.c rename to pandas/src/period_helper.c index 5a744de4c3f7b..6641000544858 100644 --- a/pandas/src/period.c +++ b/pandas/src/period_helper.c @@ -1,4 +1,4 @@ -#include "period.h" +#include "period_helper.h" /* diff --git a/pandas/src/period.h b/pandas/src/period_helper.h similarity index 100% rename from pandas/src/period.h rename to pandas/src/period_helper.h diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index b8e5f23d825f0..1b796ed2d83d1 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -7,7 +7,7 @@ import pandas.util.testing as tm from pandas.compat import range, lrange, zip import pandas.lib as lib -import pandas.period as period +import pandas._period as period import pandas.algos as algos diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 26c0d26482001..b220e03fdb327 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -12,7 +12,7 @@ import pandas.core.common as com import pandas.lib as lib import pandas.tslib as tslib -import pandas.period as period +import pandas._period as period from pandas.tslib import Timedelta class FreqGroup(object): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 2de1a004879bc..2205c6c4f4a64 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -28,7 +28,7 @@ from pandas.lib import Timestamp import pandas.lib as lib import pandas.tslib as tslib -import pandas.period as period +import pandas._period as period import pandas.algos as _algos import pandas.index as _index diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 71f97926f8b3f..106e8535ce15a 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -12,9 +12,9 @@ from pandas.tseries.tools import parse_time_string import pandas.tseries.offsets as offsets -from pandas.period import Period -import pandas.period as period -from pandas.period import ( +from pandas._period import Period +import pandas._period as period +from pandas._period import ( get_period_field_arr, _validate_end_alias, _quarter_to_myear, diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 0197dd7eaf74b..c1b9a3e2359d9 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -3,12 +3,12 @@ import numpy as np from pandas import tslib -from pandas import period +import pandas._period as period import datetime from pandas.core.api import Timestamp, Series, Timedelta from pandas.tslib import get_timezone -from pandas.period import period_asfreq, period_ordinal +from pandas._period import period_asfreq, period_ordinal from pandas.tseries.index import date_range from pandas.tseries.frequencies import get_freq import pandas.tseries.offsets as offsets diff --git a/setup.py b/setup.py index 321ffb36d76f2..34bf4685d5dd5 100755 --- a/setup.py +++ b/setup.py @@ -445,7 +445,7 @@ def pxd(name): tseries_depends = ['pandas/src/datetime/np_datetime.h', 'pandas/src/datetime/np_datetime_strings.h', - 'pandas/src/period.h'] + 'pandas/src/period_helper.h'] # some linux distros require it @@ -461,24 +461,24 @@ def pxd(name): 'depends': tseries_depends, 'sources': ['pandas/src/datetime/np_datetime.c', 'pandas/src/datetime/np_datetime_strings.c', - 'pandas/src/period.c']}, - period=dict(pyxfile='period', - depends=tseries_depends, - sources=['pandas/src/datetime/np_datetime.c', + 'pandas/src/period_helper.c']}, + _period={'pyxfile': 'src/period', + 'depends': tseries_depends, + 'sources': ['pandas/src/datetime/np_datetime.c', 'pandas/src/datetime/np_datetime_strings.c', - 'pandas/src/period.c']), + 'pandas/src/period_helper.c']}, index={'pyxfile': 'index', 'sources': ['pandas/src/datetime/np_datetime.c', 'pandas/src/datetime/np_datetime_strings.c']}, algos={'pyxfile': 'algos', 'depends': [srcpath('generated', suffix='.pyx'), srcpath('join', suffix='.pyx')]}, - parser=dict(pyxfile='parser', - depends=['pandas/src/parser/tokenizer.h', - 'pandas/src/parser/io.h', - 'pandas/src/numpy_helper.h'], - sources=['pandas/src/parser/tokenizer.c', - 'pandas/src/parser/io.c']) + parser={'pyxfile': 'parser', + 'depends': ['pandas/src/parser/tokenizer.h', + 'pandas/src/parser/io.h', + 'pandas/src/numpy_helper.h'], + 'sources': ['pandas/src/parser/tokenizer.c', + 'pandas/src/parser/io.c']} ) extensions = []