diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index b46a05a0842c3..dff8d18c2de4e 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -12,7 +12,7 @@ cdef extern from "Python.h": import numpy as np from numpy cimport int64_t -cdef extern from "compat_helper.h": +cdef extern from "src/compat_helper.h": cdef int slice_get_indices(PyObject* s, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd index b1d965c3618cd..ba09721c6bd35 100644 --- a/pandas/_libs/khash.pxd +++ b/pandas/_libs/khash.pxd @@ -3,7 +3,7 @@ from cpython cimport PyObject from numpy cimport int64_t, uint64_t, int32_t, uint32_t, float64_t -cdef extern from "khash_python.h": +cdef extern from "src/klib/khash_python.h": ctypedef uint32_t khint_t ctypedef khint_t khiter_t diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e1d59f807a7fd..688c8fcf33983 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -914,4 +914,4 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, return result -include "inference.pyx" +include "src/inference.pyx" diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 89d2de6de213a..73800476da2c7 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -72,7 +72,7 @@ cdef double NEGINF = -INF cdef extern from "errno.h": int errno -cdef extern from "headers/portable.h": +cdef extern from "src/headers/portable.h": # I *think* this is here so that strcasecmp is defined on Windows # so we don't get # `parsers.obj : error LNK2001: unresolved external symbol strcasecmp` @@ -87,7 +87,7 @@ except NameError: basestring = str -cdef extern from "parser/tokenizer.h": +cdef extern from "src/parser/tokenizer.h": ctypedef enum ParserState: START_RECORD @@ -243,7 +243,7 @@ cdef extern from "parser/tokenizer.h": int to_boolean(const char *item, uint8_t *val) nogil -cdef extern from "parser/io.h": +cdef extern from "src/parser/io.h": void *new_mmap(char *fname) int del_mmap(void *src) void* buffer_mmap_bytes(void *source, size_t nbytes, diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 63baf91e3c136..90907978763f2 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -28,7 +28,7 @@ See LICENSE for the license #include "../headers/stdint.h" -#include "khash.h" +#include "../klib/khash.h" #define CHUNKSIZE 1024 * 256 #define KB 1024 diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd deleted file mode 100644 index cf23df1279f34..0000000000000 --- a/pandas/_libs/src/util.pxd +++ /dev/null @@ -1,185 +0,0 @@ -from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS -cimport numpy as cnp -cnp.import_array() - -cimport cpython -from cpython cimport PyTypeObject - -cdef extern from "Python.h": - # Note: importing extern-style allows us to declare these as nogil - # functions, whereas `from cpython cimport` does not. - bint PyUnicode_Check(object obj) nogil - bint PyString_Check(object obj) nogil - bint PyBool_Check(object obj) nogil - bint PyFloat_Check(object obj) nogil - bint PyComplex_Check(object obj) nogil - bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil - - -cdef extern from "numpy/arrayobject.h": - PyTypeObject PyFloatingArrType_Type - -cdef extern from "numpy/ndarrayobject.h": - PyTypeObject PyTimedeltaArrType_Type - PyTypeObject PyDatetimeArrType_Type - PyTypeObject PyComplexFloatingArrType_Type - PyTypeObject PyBoolArrType_Type - - bint PyArray_IsIntegerScalar(obj) nogil - bint PyArray_Check(obj) nogil - -# -------------------------------------------------------------------- -# Type Checking - -cdef inline bint is_string_object(object obj) nogil: - return PyString_Check(obj) or PyUnicode_Check(obj) - - -cdef inline bint is_integer_object(object obj) nogil: - return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) - - -cdef inline bint is_float_object(object obj) nogil: - return (PyFloat_Check(obj) or - (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) - - -cdef inline bint is_complex_object(object obj) nogil: - return (PyComplex_Check(obj) or - PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) - - -cdef inline bint is_bool_object(object obj) nogil: - return (PyBool_Check(obj) or - PyObject_TypeCheck(obj, &PyBoolArrType_Type)) - - -cdef inline bint is_timedelta64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - - -cdef inline bint is_datetime64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - -# -------------------------------------------------------------------- - -cdef extern from "numpy_helper.h": - void set_array_not_contiguous(ndarray ao) - - int assign_value_1d(ndarray, Py_ssize_t, object) except -1 - cnp.int64_t get_nat() - object get_value_1d(ndarray, Py_ssize_t) - char *get_c_string(object) except NULL - object char_to_string(char*) - -ctypedef fused numeric: - cnp.int8_t - cnp.int16_t - cnp.int32_t - cnp.int64_t - - cnp.uint8_t - cnp.uint16_t - cnp.uint32_t - cnp.uint64_t - - cnp.float32_t - cnp.float64_t - -cdef extern from "headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN - -cdef inline object get_value_at(ndarray arr, object loc): - cdef: - Py_ssize_t i, sz - int casted - - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - - if i < 0 and sz > 0: - i += sz - elif i >= sz or sz == 0: - raise IndexError('index out of bounds') - - return get_value_1d(arr, i) - -cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): - """Sets a value into the array without checking the writeable flag. - - This should be used when setting values in a loop, check the writeable - flag above the loop and then eschew the check on each iteration. - """ - cdef: - Py_ssize_t i, sz - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - - if i < 0: - i += sz - elif i >= sz: - raise IndexError('index out of bounds') - - assign_value_1d(arr, i, value) - -cdef inline set_value_at(ndarray arr, object loc, object value): - """Sets a value into the array after checking that the array is mutable. - """ - if not cnp.PyArray_ISWRITEABLE(arr): - raise ValueError('assignment destination is read-only') - - set_value_at_unsafe(arr, loc, value) - - -cdef inline is_array(object o): - return cnp.PyArray_Check(o) - -cdef inline bint _checknull(object val): - try: - return val is None or (cpython.PyFloat_Check(val) and val != val) - except ValueError: - return False - -cdef inline bint _checknan(object val): - return not cnp.PyArray_Check(val) and val != val - -cdef inline bint is_period_object(object val): - return getattr(val, '_typ', '_typ') == 'period' - - -cdef inline object unbox_if_zerodim(object arr): - """ - If arr is zerodim array, return a proper array scalar (e.g. np.int64). - Otherwise, return arr as is. - - Parameters - ---------- - arr : object - - Returns - ------- - result : object - """ - if cnp.PyArray_IsZeroDim(arr): - return cnp.PyArray_ToScalar(cnp.PyArray_DATA(arr), arr) - else: - return arr diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 81df7981096ba..0c4acca679dd1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -25,9 +25,7 @@ from tslibs.np_datetime cimport (check_dts_bounds, _string_to_dts, dt64_to_dtstruct, dtstruct_to_dt64, pydatetime_to_dt64, pydate_to_dt64, - get_datetime64_value, - days_per_month_table, - dayofweek, is_leapyear) + get_datetime64_value) from tslibs.np_datetime import OutOfBoundsDatetime from tslibs.parsing import parse_datetime_string @@ -40,6 +38,7 @@ import pytz UTC = pytz.utc +from tslibs.ccalendar cimport get_days_in_month, dayofweek from tslibs.timedeltas cimport cast_from_unit from tslibs.timedeltas import Timedelta from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset, @@ -749,7 +748,7 @@ def monthrange(int64_t year, int64_t month): if month < 1 or month > 12: raise ValueError("bad month number 0; must be 1-12") - days = days_per_month_table[is_leapyear(year)][month - 1] + days = get_days_in_month(year, month) return (dayofweek(year, month, 1), days) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 33b8b32bcf2dc..e30dcce1160de 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -54,7 +54,6 @@ cdef extern from "../src/datetime/np_datetime.h": PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *result) nogil - int days_per_month_table[2][12] int dayofweek(int y, int m, int d) nogil int is_leapyear(int64_t year) nogil diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index e02818dd818df..998225faf5888 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -18,12 +18,12 @@ cnp.import_array() from util cimport is_string_object, is_integer_object from ccalendar import MONTHS, DAYS +from ccalendar cimport get_days_in_month, dayofweek from conversion cimport tz_convert_single, pydt_to_i8 from frequencies cimport get_freq_code from nattype cimport NPY_NAT from np_datetime cimport (pandas_datetimestruct, - dtstruct_to_dt64, dt64_to_dtstruct, - is_leapyear, days_per_month_table, dayofweek) + dtstruct_to_dt64, dt64_to_dtstruct) # --------------------------------------------------------------------- # Constants @@ -390,12 +390,6 @@ class BaseOffset(_BaseOffset): # ---------------------------------------------------------------------- # RelativeDelta Arithmetic -@cython.wraparound(False) -@cython.boundscheck(False) -cdef inline int get_days_in_month(int year, int month) nogil: - return days_per_month_table[is_leapyear(year)][month - 1] - - cdef inline int year_add_months(pandas_datetimestruct dts, int months) nogil: """new year number after shifting pandas_datetimestruct number of months""" return dts.year + (dts.month + months - 1) / 12 diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e2caebe4c4afc..ee95fc503595b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -22,10 +22,10 @@ from cpython.datetime cimport PyDateTime_Check, PyDateTime_IMPORT PyDateTime_IMPORT from np_datetime cimport (pandas_datetimestruct, dtstruct_to_dt64, - dt64_to_dtstruct, is_leapyear) + dt64_to_dtstruct) cimport util -from util cimport is_period_object, is_string_object, INT32_MIN +from util cimport is_period_object, is_string_object from pandas._libs.missing cimport is_null_datetimelike @@ -34,6 +34,7 @@ from timezones cimport is_utc, is_tzlocal, get_utcoffset, get_dst_info from timedeltas cimport delta_to_nanoseconds from ccalendar import MONTH_NUMBERS +from ccalendar cimport is_leapyear from frequencies cimport (get_freq_code, get_base_alias, get_to_timestamp_base, get_freq_str, get_rule_month) @@ -46,7 +47,13 @@ from pandas.tseries import offsets from pandas.tseries import frequencies -cdef extern from "period_helper.h": +cdef extern from "../src/numpy_helper.h": + object char_to_string(char*) + +cdef extern from "../src/headers/stdint.h": + enum: INT32_MIN + +cdef extern from "../src/period_helper.h": ctypedef struct date_info: int64_t absdate double abstime @@ -339,7 +346,7 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): formatted = c_strftime(&dinfo, fmt) - result = util.char_to_string(formatted) + result = char_to_string(formatted) free(formatted) for i in range(len(extra_fmts)): diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index b166babe5992c..ed5ba2d78cf77 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -10,11 +10,6 @@ cnp.import_array() from util cimport is_string_object, get_nat -from pandas._libs.khash cimport (khiter_t, - kh_destroy_int64, kh_put_int64, - kh_init_int64, kh_int64_t, - kh_resize_int64, kh_get_int64) - from cpython.datetime cimport datetime from np_datetime cimport pandas_datetimestruct, dt64_to_dtstruct @@ -346,34 +341,6 @@ class Resolution(object): # ---------------------------------------------------------------------- # Frequency Inference - -# TODO: this is non performant logic here (and duplicative) and this -# simply should call unique_1d directly -# plus no reason to depend on khash directly -cdef unique_deltas(ndarray[int64_t] arr): - cdef: - Py_ssize_t i, n = len(arr) - int64_t val - khiter_t k - kh_int64_t *table - int ret = 0 - list uniques = [] - - table = kh_init_int64() - kh_resize_int64(table, 10) - for i in range(n - 1): - val = arr[i + 1] - arr[i] - k = kh_get_int64(table, val) - if k == table.n_buckets: - kh_put_int64(table, val, &ret) - uniques.append(val) - kh_destroy_int64(table) - - result = np.array(uniques, dtype=np.int64) - result.sort() - return result - - def _is_multiple(us, mult): return us % mult == 0 @@ -410,11 +377,11 @@ class _FrequencyInferer(object): @cache_readonly def deltas(self): - return unique_deltas(self.values) + return np.sort(unique(np.diff(self.values))) @cache_readonly def deltas_asi8(self): - return unique_deltas(self.index.asi8) + return np.sort(unique(np.diff(self.index.asi8))) @cache_readonly def is_unique(self): @@ -520,11 +487,11 @@ class _FrequencyInferer(object): @cache_readonly def mdiffs(self): nmonths = self.fields['Y'] * 12 + self.fields['M'] - return unique_deltas(nmonths.astype('i8')) + return np.sort(unique(np.diff(nmonths.astype('i8')))) @cache_readonly def ydiffs(self): - return unique_deltas(self.fields['Y'].astype('i8')) + return np.sort(unique(np.diff(self.fields['Y'].astype('i8')))) def _infer_daily_rule(self): annual_rule = self._get_annual_rule() diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b9be9c16eb6c3..6f9cda047af18 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -17,8 +17,7 @@ from cpython.datetime cimport (datetime, PyDateTime_IMPORT from util cimport (is_datetime64_object, is_timedelta64_object, - is_integer_object, is_string_object, is_array, - INT64_MAX) + is_integer_object, is_string_object, is_array) cimport ccalendar from conversion import tz_localize_to_utc, date_normalize @@ -29,8 +28,7 @@ from nattype import NaT from nattype cimport NPY_NAT from np_datetime import OutOfBoundsDatetime from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds, - pandas_datetimestruct, dt64_to_dtstruct, - is_leapyear) + pandas_datetimestruct, dt64_to_dtstruct) from timedeltas import Timedelta from timedeltas cimport delta_to_nanoseconds from timezones cimport ( @@ -764,7 +762,7 @@ class Timestamp(_Timestamp): @property def is_leap_year(self): - return bool(is_leapyear(self.year)) + return bool(ccalendar.is_leapyear(self.year)) def tz_localize(self, tz, ambiguous='raise', errors='raise'): """ @@ -1006,7 +1004,7 @@ class Timestamp(_Timestamp): # Add the min and max fields at the class level -cdef int64_t _NS_UPPER_BOUND = INT64_MAX +cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max # the smallest value we could actually represent is # INT64_MIN + 1 == -9223372036854775807 # but to allow overflow free conversion with a microsecond resolution diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd new file mode 100644 index 0000000000000..34375bd18f73b --- /dev/null +++ b/pandas/_libs/tslibs/util.pxd @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +from cython cimport Py_ssize_t + +cimport numpy as cnp +from numpy cimport ndarray, int64_t + + +cdef bint is_string_object(object obj) nogil +cdef bint is_integer_object(object obj) nogil +cdef bint is_float_object(object obj) nogil +cdef bint is_complex_object(object obj) nogil +cdef bint is_bool_object(object obj) nogil +cdef bint is_timedelta64_object(object obj) nogil +cdef bint is_datetime64_object(object obj) nogil +cdef bint is_array(object o) +cdef bint is_period_object(object val) + +cdef bint _checknull(object val) +cdef bint _checknan(object val) + +cdef int64_t get_nat() diff --git a/pandas/_libs/tslibs/util.pyx b/pandas/_libs/tslibs/util.pyx new file mode 100644 index 0000000000000..83f679c6f573c --- /dev/null +++ b/pandas/_libs/tslibs/util.pyx @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +cimport cpython +from cpython cimport PyTypeObject + +cdef extern from "Python.h": + # Note: importing extern-style allows us to declare these as nogil + # functions, whereas `from cpython cimport` does not. + bint PyUnicode_Check(object obj) nogil + bint PyString_Check(object obj) nogil + bint PyBool_Check(object obj) nogil + bint PyFloat_Check(object obj) nogil + bint PyComplex_Check(object obj) nogil + bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS +cnp.import_array() + +cdef extern from "numpy/arrayobject.h": + PyTypeObject PyFloatingArrType_Type + +cdef extern from "numpy/ndarrayobject.h": + PyTypeObject PyTimedeltaArrType_Type + PyTypeObject PyDatetimeArrType_Type + PyTypeObject PyComplexFloatingArrType_Type + PyTypeObject PyBoolArrType_Type + + bint PyArray_IsIntegerScalar(obj) nogil + bint PyArray_Check(obj) nogil + + +cdef int64_t get_nat(): + return np.datetime64('NaT').astype(np.int64) + +# -------------------------------------------------------------------- +# Type Checking + +cdef bint is_string_object(object obj) nogil: + return PyString_Check(obj) or PyUnicode_Check(obj) + + +cdef bint is_integer_object(object obj) nogil: + return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) + + +cdef bint is_float_object(object obj) nogil: + return (PyFloat_Check(obj) or + (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) + + +cdef bint is_complex_object(object obj) nogil: + return (PyComplex_Check(obj) or + PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) + + +cdef bint is_bool_object(object obj) nogil: + return (PyBool_Check(obj) or + PyObject_TypeCheck(obj, &PyBoolArrType_Type)) + + +cdef bint is_timedelta64_object(object obj) nogil: + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) + + +cdef bint is_datetime64_object(object obj) nogil: + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + + +cdef bint is_array(object o): + return cnp.PyArray_Check(o) + + +cdef bint is_period_object(object val): + return getattr(val, '_typ', '_typ') == 'period' + + +# -------------------------------------------------------------------- + + +cdef inline bint _checknull(object val): + try: + return val is None or (cpython.PyFloat_Check(val) and val != val) + except ValueError: + return False + + +cdef inline bint _checknan(object val): + return not cnp.PyArray_Check(val) and val != val diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd new file mode 100644 index 0000000000000..f485bc1514a81 --- /dev/null +++ b/pandas/_libs/util.pxd @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- + +from cython cimport Py_ssize_t + +cimport numpy as cnp +from numpy cimport ndarray + +cdef extern from "src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + +ctypedef fused numeric: + cnp.int8_t + cnp.int16_t + cnp.int32_t + cnp.int64_t + + cnp.uint8_t + cnp.uint16_t + cnp.uint32_t + cnp.uint64_t + + cnp.float32_t + cnp.float64_t + + +cdef extern from "src/numpy_helper.h": + void set_array_not_contiguous(ndarray ao) + + int assign_value_1d(ndarray, Py_ssize_t, object) except -1 + cnp.int64_t get_nat() + object get_value_1d(ndarray, Py_ssize_t) + char *get_c_string(object) except NULL + object char_to_string(char*) + + +from tslibs.util cimport (is_string_object, + is_integer_object, is_float_object, + is_complex_object, is_bool_object, + is_timedelta64_object, is_datetime64_object, + is_array, + is_period_object, _checknull, _checknan) + + +cdef object unbox_if_zerodim(object arr) + +cdef set_value_at(ndarray arr, object loc, object value) +cdef set_value_at_unsafe(ndarray arr, object loc, object value) +cdef object get_value_at(ndarray arr, object loc) diff --git a/pandas/_libs/util.pyx b/pandas/_libs/util.pyx new file mode 100644 index 0000000000000..bc1bd92964567 --- /dev/null +++ b/pandas/_libs/util.pyx @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +cimport cpython +from cpython cimport PyTypeObject + +cimport numpy as cnp +from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS +cnp.import_array() + + +cdef object get_value_at(ndarray arr, object loc): + cdef: + Py_ssize_t i, sz + int casted + + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0 and sz > 0: + i += sz + elif i >= sz or sz == 0: + raise IndexError('index out of bounds') + + return get_value_1d(arr, i) + + +cdef set_value_at_unsafe(ndarray arr, object loc, object value): + """Sets a value into the array without checking the writeable flag. + + This should be used when setting values in a loop, check the writeable + flag above the loop and then eschew the check on each iteration. + """ + cdef: + Py_ssize_t i, sz + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0: + i += sz + elif i >= sz: + raise IndexError('index out of bounds') + + assign_value_1d(arr, i, value) + + +cdef set_value_at(ndarray arr, object loc, object value): + """Sets a value into the array after checking that the array is mutable. + """ + if not cnp.PyArray_ISWRITEABLE(arr): + raise ValueError('assignment destination is read-only') + + set_value_at_unsafe(arr, loc, value) + + +cdef inline object unbox_if_zerodim(object arr): + """ + If arr is zerodim array, return a proper array scalar (e.g. np.int64). + Otherwise, return arr as is. + + Parameters + ---------- + arr : object + + Returns + ------- + result : object + """ + if cnp.PyArray_IsZeroDim(arr): + return cnp.PyArray_ToScalar(cnp.PyArray_DATA(arr), arr) + else: + return arr diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index cacb073da581c..c18b03c506a26 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -12,7 +12,7 @@ from numpy cimport ndarray, double_t, int64_t, float64_t cnp.import_array() -cdef extern from "../src/headers/math.h": +cdef extern from "src/headers/math.h": int signbit(double) nogil double sqrt(double x) nogil diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 26e39f0df8b29..42ac26e4b274e 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -21,7 +21,7 @@ import numpy as np import struct import pandas.io.sas.sas_constants as const -from pandas.io.sas._sas import Parser +from pandas.io.sas.sas import Parser class _subheader_pointer(object): diff --git a/setup.py b/setup.py index 5397a1b84dc4d..401ecfc75476f 100755 --- a/setup.py +++ b/setup.py @@ -36,9 +36,11 @@ def is_platform_mac(): try: import Cython ver = Cython.__version__ + from Cython.Build import cythonize _CYTHON_INSTALLED = ver >= LooseVersion(min_cython_ver) except ImportError: _CYTHON_INSTALLED = False + cythonize = lambda x: x # dummy function min_numpy_ver = '1.9.0' @@ -101,10 +103,12 @@ def is_platform_mac(): _pxifiles.extend(pxi_files) _pxi_dep[module] = pxi_files +numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') -class build_ext(_build_ext): - def build_extensions(self): +class build_ext(_build_ext): + @classmethod + def render_templates(cls): # if builing from c files, don't need to # generate template output if cython: @@ -125,7 +129,8 @@ def build_extensions(self): with open(outfile, "w") as f: f.write(pyxcontent) - numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') + def build_extensions(self): + self.render_templates() for ext in self.extensions: if (hasattr(ext, 'include_dirs') and @@ -134,6 +139,25 @@ def build_extensions(self): _build_ext.build_extensions(self) +def _cythonize(extensions, *args, **kwargs): + """ + Render tempita templates before calling cythonize + + Avoid running cythonize on `python setup.py clean` + See https://github.com/cython/cython/issues/1495 + """ + if len(sys.argv) > 1 and 'clean' in sys.argv: + return + + for ext in extensions: + if (hasattr(ext, 'include_dirs') and + numpy_incl not in ext.include_dirs): + ext.include_dirs.append(numpy_incl) + + build_ext.render_templates() + return cythonize(extensions, *args, **kwargs) + + DESCRIPTION = ("Powerful data structures for data analysis, time series," "and statistics") LONG_DESCRIPTION = """ @@ -328,6 +352,8 @@ class CheckSDist(sdist_class): 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/_libs/tslibs/resolution.pyx', 'pandas/_libs/tslibs/parsing.pyx', + 'pandas/_libs/tslibs/util.pyx', + 'pandas/_libs/util.pyx', 'pandas/_libs/writers.pyx', 'pandas/io/sas/sas.pyx'] @@ -453,183 +479,103 @@ def pxd(name): 'pandas/_libs/src/datetime/np_datetime_strings.h'] np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c'] -tseries_depends = np_datetime_headers + ['pandas/_libs/tslibs/np_datetime.pxd'] # some linux distros require it libraries = ['m'] if not is_platform_windows() else [] ext_data = { '_libs.algos': { - 'pyxfile': '_libs/algos', - 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], - 'depends': _pxi_dep['algos']}, + 'pyxfile': '_libs/algos'}, '_libs.groupby': { - 'pyxfile': '_libs/groupby', - 'pxdfiles': ['_libs/src/util', '_libs/algos'], - 'depends': _pxi_dep['groupby']}, + 'pyxfile': '_libs/groupby'}, '_libs.hashing': { 'pyxfile': '_libs/hashing'}, '_libs.hashtable': { - 'pyxfile': '_libs/hashtable', - 'pxdfiles': ['_libs/hashtable', '_libs/missing', '_libs/khash'], - 'depends': (['pandas/_libs/src/klib/khash_python.h'] + - _pxi_dep['hashtable'])}, + 'pyxfile': '_libs/hashtable'}, '_libs.index': { - 'pyxfile': '_libs/index', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], - 'depends': _pxi_dep['index'], - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/index'}, '_libs.indexing': { 'pyxfile': '_libs/indexing'}, '_libs.internals': { 'pyxfile': '_libs/internals'}, '_libs.interval': { - 'pyxfile': '_libs/interval', - 'pxdfiles': ['_libs/hashtable'], - 'depends': _pxi_dep['interval']}, + 'pyxfile': '_libs/interval'}, '_libs.join': { - 'pyxfile': '_libs/join', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], - 'depends': _pxi_dep['join']}, + 'pyxfile': '_libs/join'}, '_libs.lib': { - 'pyxfile': '_libs/lib', - 'pxdfiles': ['_libs/src/util', - '_libs/missing', - '_libs/tslibs/conversion'], - 'depends': lib_depends + tseries_depends}, + 'pyxfile': '_libs/lib'}, '_libs.missing': { - 'pyxfile': '_libs/missing', - 'pxdfiles': ['_libs/src/util'], - 'depends': tseries_depends}, + 'pyxfile': '_libs/missing'}, '_libs.parsers': { 'pyxfile': '_libs/parsers', 'depends': ['pandas/_libs/src/parser/tokenizer.h', - 'pandas/_libs/src/parser/io.h', - 'pandas/_libs/src/numpy_helper.h'], + 'pandas/_libs/src/parser/io.h'], 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, '_libs.reduction': { - 'pyxfile': '_libs/reduction', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/reduction'}, '_libs.tslibs.period': { 'pyxfile': '_libs/tslibs/period', - 'pxdfiles': ['_libs/src/util', - '_libs/missing', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timezones', - '_libs/tslibs/nattype'], - 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], - 'sources': np_datetime_sources + ['pandas/_libs/src/period_helper.c']}, + 'include': ['pandas/_libs/src/period_helper.h'], + 'sources': ['pandas/_libs/src/period_helper.c']}, '_libs.properties': { 'pyxfile': '_libs/properties', 'include': []}, '_libs.reshape': { - 'pyxfile': '_libs/reshape', - 'depends': _pxi_dep['reshape']}, + 'pyxfile': '_libs/reshape'}, '_libs.skiplist': { - 'pyxfile': '_libs/skiplist', - 'depends': ['pandas/_libs/src/skiplist.h']}, + 'pyxfile': '_libs/skiplist'}, '_libs.sparse': { - 'pyxfile': '_libs/sparse', - 'depends': _pxi_dep['sparse']}, + 'pyxfile': '_libs/sparse'}, '_libs.tslib': { - 'pyxfile': '_libs/tslib', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/conversion', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timestamps', - '_libs/tslibs/timezones', - '_libs/tslibs/nattype'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslib'}, '_libs.tslibs.ccalendar': { 'pyxfile': '_libs/tslibs/ccalendar'}, '_libs.tslibs.conversion': { 'pyxfile': '_libs/tslibs/conversion', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype', - '_libs/tslibs/timezones', - '_libs/tslibs/timedeltas'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'sources': np_datetime_sources, + 'include': np_datetime_headers}, '_libs.tslibs.fields': { - 'pyxfile': '_libs/tslibs/fields', - 'pxdfiles': ['_libs/tslibs/ccalendar', - '_libs/tslibs/nattype'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/fields'}, '_libs.tslibs.frequencies': { - 'pyxfile': '_libs/tslibs/frequencies', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/tslibs/frequencies'}, '_libs.tslibs.nattype': { - 'pyxfile': '_libs/tslibs/nattype', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/tslibs/nattype'}, '_libs.tslibs.np_datetime': { 'pyxfile': '_libs/tslibs/np_datetime', - 'depends': np_datetime_headers, - 'sources': np_datetime_sources}, + 'sources': np_datetime_sources, + 'include': np_datetime_headers}, '_libs.tslibs.offsets': { - 'pyxfile': '_libs/tslibs/offsets', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/conversion', - '_libs/tslibs/frequencies', - '_libs/tslibs/nattype'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/offsets'}, '_libs.tslibs.parsing': { - 'pyxfile': '_libs/tslibs/parsing', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/tslibs/parsing'}, '_libs.tslibs.resolution': { - 'pyxfile': '_libs/tslibs/resolution', - 'pxdfiles': ['_libs/src/util', - '_libs/khash', - '_libs/tslibs/frequencies', - '_libs/tslibs/timezones'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/resolution'}, '_libs.tslibs.strptime': { - 'pyxfile': '_libs/tslibs/strptime', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/strptime'}, '_libs.tslibs.timedeltas': { - 'pyxfile': '_libs/tslibs/timedeltas', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype'], - 'depends': np_datetime_headers, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/timedeltas'}, '_libs.tslibs.timestamps': { - 'pyxfile': '_libs/tslibs/timestamps', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/ccalendar', - '_libs/tslibs/conversion', - '_libs/tslibs/nattype', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timezones'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/timestamps'}, '_libs.tslibs.timezones': { - 'pyxfile': '_libs/tslibs/timezones', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/tslibs/timezones'}, + '_libs.tslibs.util': { + 'pyxfile': '_libs/tslibs/util'}, '_libs.testing': { 'pyxfile': '_libs/testing'}, + '_libs.util': { + 'pyxfile': '_libs/util'}, '_libs.window': { - 'pyxfile': '_libs/window', - 'pxdfiles': ['_libs/skiplist', '_libs/src/util']}, + 'pyxfile': '_libs/window'}, '_libs.writers': { - 'pyxfile': '_libs/writers', - 'pxdfiles': ['_libs/src/util']}, - 'io.sas._sas': { + 'pyxfile': '_libs/writers'}, + 'io.sas.sas': { 'pyxfile': 'io/sas/sas'}} extensions = [] for name, data in ext_data.items(): sources = [srcpath(data['pyxfile'], suffix=suffix, subdir='')] - pxds = [pxd(x) for x in data.get('pxdfiles', [])] - if suffix == '.pyx' and pxds: - sources.extend(pxds) sources.extend(data.get('sources', [])) @@ -718,6 +664,7 @@ def pxd(name): # The build cache system does string matching below this point. # if you change something, be careful. + setup(name=DISTNAME, maintainer=AUTHOR, version=versioneer.get_version(), @@ -727,7 +674,7 @@ def pxd(name): 'data/legacy_pickle/*/*.pickle', 'data/legacy_msgpack/*/*.msgpack', 'data/html_encoding/*.html']}, - ext_modules=extensions, + ext_modules=_cythonize(extensions), maintainer_email=EMAIL, description=DESCRIPTION, license=LICENSE,