From 8491a6b941c649b1371f36a772439d1d5facb57a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 12 Jul 2018 10:59:50 -0700 Subject: [PATCH 1/8] enable cython coverage, use cythonize --- .coveragerc | 2 + pandas/_libs/lib.pyx | 2 +- pandas/_libs/tslibs/util.pxd | 182 +++++++++++++++++++++++++++++++++++ pandas/_libs/util.pxd | 1 + setup.py | 116 +++++++++++++++------- 5 files changed, 265 insertions(+), 38 deletions(-) create mode 100644 pandas/_libs/tslibs/util.pxd create mode 100644 pandas/_libs/util.pxd diff --git a/.coveragerc b/.coveragerc index 3f630aa6cf8f5..f5c8b701a79a8 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,6 +2,7 @@ [run] branch = False omit = */tests/* +plugins = Cython.Coverage [report] # Regexes for lines to exclude from consideration @@ -22,6 +23,7 @@ exclude_lines = if __name__ == .__main__.: ignore_errors = False +show_missing = True [html] directory = coverage_html_report diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 23aebc85e6300..172117f7d8059 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -753,4 +753,4 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, return result -include "inference.pyx" +include "src/inference.pyx" diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd new file mode 100644 index 0000000000000..ccc08a963eff4 --- /dev/null +++ b/pandas/_libs/tslibs/util.pxd @@ -0,0 +1,182 @@ +from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS +cimport numpy as cnp +cnp.import_array() + +cimport cpython +from cpython cimport PyTypeObject + +cdef extern from "Python.h": + # Note: importing extern-style allows us to declare these as nogil + # functions, whereas `from cpython cimport` does not. + bint PyUnicode_Check(object obj) nogil + bint PyString_Check(object obj) nogil + bint PyBool_Check(object obj) nogil + bint PyFloat_Check(object obj) nogil + bint PyComplex_Check(object obj) nogil + bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil + + +cdef extern from "numpy/arrayobject.h": + PyTypeObject PyFloatingArrType_Type + +cdef extern from "numpy/ndarrayobject.h": + PyTypeObject PyTimedeltaArrType_Type + PyTypeObject PyDatetimeArrType_Type + PyTypeObject PyComplexFloatingArrType_Type + PyTypeObject PyBoolArrType_Type + + bint PyArray_IsIntegerScalar(obj) nogil + bint PyArray_Check(obj) nogil + +# -------------------------------------------------------------------- +# Type Checking + +cdef inline bint is_string_object(object obj) nogil: + return PyString_Check(obj) or PyUnicode_Check(obj) + + +cdef inline bint is_integer_object(object obj) nogil: + return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) + + +cdef inline bint is_float_object(object obj) nogil: + return (PyFloat_Check(obj) or + (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) + + +cdef inline bint is_complex_object(object obj) nogil: + return (PyComplex_Check(obj) or + PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) + + +cdef inline bint is_bool_object(object obj) nogil: + return (PyBool_Check(obj) or + PyObject_TypeCheck(obj, &PyBoolArrType_Type)) + + +cdef inline bint is_timedelta64_object(object obj) nogil: + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) + + +cdef inline bint is_datetime64_object(object obj) nogil: + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + +# -------------------------------------------------------------------- + +cdef extern from "../src/numpy_helper.h": + void set_array_not_contiguous(ndarray ao) + + int assign_value_1d(ndarray, Py_ssize_t, object) except -1 + cnp.int64_t get_nat() + object get_value_1d(ndarray, Py_ssize_t) + char *get_c_string(object) except NULL + object char_to_string(char*) + +ctypedef fused numeric: + cnp.int8_t + cnp.int16_t + cnp.int32_t + cnp.int64_t + + cnp.uint8_t + cnp.uint16_t + cnp.uint32_t + cnp.uint64_t + + cnp.float32_t + cnp.float64_t + +cdef extern from "../src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + + +cdef inline object get_value_at(ndarray arr, object loc): + cdef: + Py_ssize_t i, sz + int casted + + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0 and sz > 0: + i += sz + elif i >= sz or sz == 0: + raise IndexError('index out of bounds') + + return get_value_1d(arr, i) + + +cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): + """Sets a value into the array without checking the writeable flag. + + This should be used when setting values in a loop, check the writeable + flag above the loop and then eschew the check on each iteration. + """ + cdef: + Py_ssize_t i, sz + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0: + i += sz + elif i >= sz: + raise IndexError('index out of bounds') + + assign_value_1d(arr, i, value) + +cdef inline set_value_at(ndarray arr, object loc, object value): + """Sets a value into the array after checking that the array is mutable. + """ + if not cnp.PyArray_ISWRITEABLE(arr): + raise ValueError('assignment destination is read-only') + + set_value_at_unsafe(arr, loc, value) + + +cdef inline is_array(object o): + return cnp.PyArray_Check(o) + + +cdef inline bint _checknull(object val): + try: + return val is None or (cpython.PyFloat_Check(val) and val != val) + except ValueError: + return False + + +cdef inline bint is_period_object(object val): + return getattr(val, '_typ', '_typ') == 'period' + + +cdef inline bint is_offset_object(object val): + """ + Check if an object is a DateOffset object. + + Parameters + ---------- + val : object + + Returns + ------- + is_date_offset : bool + """ + return getattr(val, '_typ', None) == "dateoffset" diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd new file mode 100644 index 0000000000000..0b7e66902cbb1 --- /dev/null +++ b/pandas/_libs/util.pxd @@ -0,0 +1 @@ +from tslibs.util cimport * diff --git a/setup.py b/setup.py index 4910fcf292ca6..814367c13278a 100755 --- a/setup.py +++ b/setup.py @@ -40,9 +40,11 @@ def is_platform_windows(): try: import Cython ver = Cython.__version__ + from Cython.Build import cythonize _CYTHON_INSTALLED = ver >= LooseVersion(min_cython_ver) except ImportError: _CYTHON_INSTALLED = False + cythonize = lambda x: x # dummy func # The import of Extension must be after the import of Cython, otherwise # we do not get the appropriately patched class. @@ -419,11 +421,57 @@ def get_tag(self): cmdclass['build_src'] = DummyBuildSrc cmdclass['build_ext'] = CheckingBuildExt +# ---------------------------------------------------------------------- +# Preparation of compiler arguments + if sys.byteorder == 'big': endian_macro = [('__BIG_ENDIAN__', '1')] else: endian_macro = [('__LITTLE_ENDIAN__', '1')] + +# enable coverage by building cython files by setting the environment variable +# "linetrace" (with a Truthy value) +linetrace = os.environ.get('linetrace', False) +CYTHON_TRACE = str(int(bool(linetrace))) + +# Note: if not using `cythonize`, coverage can be enabled by +# pinning `ext.cython_directives = directives` to each ext in extensions. +# github.com/cython/cython/wiki/enhancements-compilerdirectives#in-setuppy +directives = {'linetrace': False} +macros = [] +if linetrace: + # https://pypkg.com/pypi/pytest-cython/f/tests/example-project/setup.py + directives['linetrace'] = True + macros = [('CYTHON_TRACE', '1'), ('CYTHON_TRACE_NOGIL', '1')] + + +# ---------------------------------------------------------------------- +# Specification of Dependencies + +def maybe_cythonize(extensions, *args, **kwargs): + """ + Render tempita templates before calling cythonize + """ + if len(sys.argv) > 1 and 'clean' in sys.argv: + # Avoid running cythonize on `python setup.py clean` + # See https://github.com/cython/cython/issues/1495 + return extensions + + numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') + # TODO: Is this really necessary here? + for ext in extensions: + if (hasattr(ext, 'include_dirs') and + numpy_incl not in ext.include_dirs): + ext.include_dirs.append(numpy_incl) + + if cython: + build_ext.render_templates(_pxifiles) + return cythonize(extensions, *args, **kwargs) + else: + return extensions + + lib_depends = ['inference'] @@ -434,7 +482,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): if suffix == '.pyx': lib_depends = [srcpath(f, suffix='.pyx', subdir='_libs/src') for f in lib_depends] - lib_depends.append('pandas/_libs/src/util.pxd') + lib_depends.append('pandas/_libs/util.pxd') else: lib_depends = [] @@ -466,11 +514,11 @@ def pxd(name): ext_data = { '_libs.algos': { 'pyxfile': '_libs/algos', - 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], + 'pxdfiles': ['_libs/util', '_libs/algos', '_libs/hashtable'], 'depends': _pxi_dep['algos']}, '_libs.groupby': { 'pyxfile': '_libs/groupby', - 'pxdfiles': ['_libs/src/util', '_libs/algos'], + 'pxdfiles': ['_libs/util', '_libs/algos'], 'depends': _pxi_dep['groupby']}, '_libs.hashing': { 'pyxfile': '_libs/hashing'}, @@ -481,7 +529,7 @@ def pxd(name): _pxi_dep['hashtable'])}, '_libs.index': { 'pyxfile': '_libs/index', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'pxdfiles': ['_libs/util', '_libs/hashtable'], 'depends': _pxi_dep['index'], 'sources': np_datetime_sources}, '_libs.indexing': { @@ -494,17 +542,17 @@ def pxd(name): 'depends': _pxi_dep['interval']}, '_libs.join': { 'pyxfile': '_libs/join', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'pxdfiles': ['_libs/util', '_libs/hashtable'], 'depends': _pxi_dep['join']}, '_libs.lib': { 'pyxfile': '_libs/lib', - 'pxdfiles': ['_libs/src/util', + 'pxdfiles': ['_libs/util', '_libs/missing', '_libs/tslibs/conversion'], 'depends': lib_depends + tseries_depends}, '_libs.missing': { 'pyxfile': '_libs/missing', - 'pxdfiles': ['_libs/src/util'], + 'pxdfiles': ['_libs/util'], 'depends': tseries_depends}, '_libs.parsers': { 'pyxfile': '_libs/parsers', @@ -515,10 +563,10 @@ def pxd(name): 'pandas/_libs/src/parser/io.c']}, '_libs.reduction': { 'pyxfile': '_libs/reduction', - 'pxdfiles': ['_libs/src/util']}, + 'pxdfiles': ['_libs/util']}, '_libs.ops': { 'pyxfile': '_libs/ops', - 'pxdfiles': ['_libs/src/util', + 'pxdfiles': ['_libs/util', '_libs/missing']}, '_libs.properties': { 'pyxfile': '_libs/properties', @@ -534,8 +582,7 @@ def pxd(name): 'depends': _pxi_dep['sparse']}, '_libs.tslib': { 'pyxfile': '_libs/tslib', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/conversion', + 'pxdfiles': ['_libs/tslibs/conversion', '_libs/tslibs/timedeltas', '_libs/tslibs/timestamps', '_libs/tslibs/timezones', @@ -547,8 +594,7 @@ def pxd(name): 'pyxfile': '_libs/tslibs/ccalendar'}, '_libs.tslibs.conversion': { 'pyxfile': '_libs/tslibs/conversion', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype', + 'pxdfiles': ['_libs/tslibs/nattype', '_libs/tslibs/timezones', '_libs/tslibs/timedeltas'], 'depends': tseries_depends, @@ -561,18 +607,17 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.tslibs.frequencies': { 'pyxfile': '_libs/tslibs/frequencies', - 'pxdfiles': ['_libs/src/util']}, + 'pxdfiles': []}, '_libs.tslibs.nattype': { 'pyxfile': '_libs/tslibs/nattype', - 'pxdfiles': ['_libs/src/util']}, + 'pxdfiles': []}, '_libs.tslibs.np_datetime': { 'pyxfile': '_libs/tslibs/np_datetime', 'depends': np_datetime_headers, 'sources': np_datetime_sources}, '_libs.tslibs.offsets': { 'pyxfile': '_libs/tslibs/offsets', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/ccalendar', + 'pxdfiles': ['_libs/tslibs/ccalendar', '_libs/tslibs/conversion', '_libs/tslibs/frequencies', '_libs/tslibs/nattype'], @@ -580,11 +625,10 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.tslibs.parsing': { 'pyxfile': '_libs/tslibs/parsing', - 'pxdfiles': ['_libs/src/util']}, + 'pxdfiles': []}, '_libs.tslibs.period': { 'pyxfile': '_libs/tslibs/period', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/ccalendar', + 'pxdfiles': ['_libs/tslibs/ccalendar', '_libs/tslibs/timedeltas', '_libs/tslibs/timezones', '_libs/tslibs/nattype', @@ -593,8 +637,7 @@ def pxd(name): 'sources': np_datetime_sources + ['pandas/_libs/src/period_helper.c']}, '_libs.tslibs.resolution': { 'pyxfile': '_libs/tslibs/resolution', - 'pxdfiles': ['_libs/src/util', - '_libs/khash', + 'pxdfiles': ['_libs/khash', '_libs/tslibs/ccalendar', '_libs/tslibs/frequencies', '_libs/tslibs/timezones'], @@ -602,21 +645,18 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.tslibs.strptime': { 'pyxfile': '_libs/tslibs/strptime', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype'], + 'pxdfiles': ['_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.timedeltas': { 'pyxfile': '_libs/tslibs/timedeltas', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype', + 'pxdfiles': ['_libs/tslibs/nattype', '_libs/tslibs/offsets'], 'depends': np_datetime_headers, 'sources': np_datetime_sources}, '_libs.tslibs.timestamps': { 'pyxfile': '_libs/tslibs/timestamps', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/ccalendar', + 'pxdfiles': ['_libs/tslibs/ccalendar', '_libs/tslibs/conversion', '_libs/tslibs/nattype', '_libs/tslibs/offsets', @@ -626,21 +666,21 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.tslibs.timezones': { 'pyxfile': '_libs/tslibs/timezones', - 'pxdfiles': ['_libs/src/util']}, + 'pxdfiles': []}, '_libs.testing': { 'pyxfile': '_libs/testing'}, '_libs.window': { 'pyxfile': '_libs/window', - 'pxdfiles': ['_libs/skiplist', '_libs/src/util'], + 'pxdfiles': ['_libs/skiplist'], 'language': 'c++', 'suffix': '.cpp'}, '_libs.writers': { 'pyxfile': '_libs/writers', - 'pxdfiles': ['_libs/src/util']}, + 'pxdfiles': []}, 'io.sas._sas': { 'pyxfile': 'io/sas/sas'}, 'io.msgpack._packer': { - 'macros': endian_macro, + 'macros': endian_macro + macros, 'depends': ['pandas/_libs/src/msgpack/pack.h', 'pandas/_libs/src/msgpack/pack_template.h'], 'include': ['pandas/_libs/src/msgpack'] + common_include, @@ -652,7 +692,7 @@ def pxd(name): 'depends': ['pandas/_libs/src/msgpack/unpack.h', 'pandas/_libs/src/msgpack/unpack_define.h', 'pandas/_libs/src/msgpack/unpack_template.h'], - 'macros': endian_macro, + 'macros': endian_macro + macros, 'include': ['pandas/_libs/src/msgpack'] + common_include, 'language': 'c++', 'suffix': '.cpp', @@ -681,7 +721,7 @@ def pxd(name): depends=data.get('depends', []), include_dirs=include, language=data.get('language', 'c'), - define_macros=data.get('macros', []), + define_macros=data.get('macros', macros), extra_compile_args=extra_compile_args) extensions.append(obj) @@ -708,7 +748,8 @@ def pxd(name): 'pandas/_libs/src/ujson/lib', 'pandas/_libs/src/datetime'], extra_compile_args=(['-D_GNU_SOURCE'] + - extra_compile_args)) + extra_compile_args), + define_macros=macros) extensions.append(ujson_ext) @@ -718,7 +759,8 @@ def pxd(name): # extension for pseudo-safely moving bytes into mutable buffers _move_ext = Extension('pandas.util._move', depends=[], - sources=['pandas/util/move.c']) + sources=['pandas/util/move.c'], + define_macros=macros) extensions.append(_move_ext) # The build cache system does string matching below this point. @@ -729,7 +771,7 @@ def pxd(name): version=versioneer.get_version(), packages=find_packages(include=['pandas', 'pandas.*']), package_data={'': ['templates/*', '_libs/*.dll']}, - ext_modules=extensions, + ext_modules=maybe_cythonize(extensions, compiler_directives=directives), maintainer_email=EMAIL, description=DESCRIPTION, license=LICENSE, From 0371ce35420cb907f7304dcaad1560a483d63146 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 12 Jul 2018 14:19:16 -0700 Subject: [PATCH 2/8] coverage misreporting workaround --- .coveragerc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.coveragerc b/.coveragerc index f5c8b701a79a8..df88477a6bfe9 100644 --- a/.coveragerc +++ b/.coveragerc @@ -22,6 +22,11 @@ exclude_lines = if 0: if __name__ == .__main__.: + # Workaround for cython misreporting def/cpdef/cdef lines + # https://github.com/cython/cython/issues/1461 + ^cdef + ^cpdef + ignore_errors = False show_missing = True From 96c0d37e550d826b876bb54e386fab6b57ad150a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 13 Jul 2018 17:55:12 -0700 Subject: [PATCH 3/8] lint cleanups --- .coveragerc | 5 ----- pandas/_libs/tslibs/period.pyx | 4 ++-- pandas/_libs/tslibs/util.pxd | 2 +- setup.py | 3 +++ 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.coveragerc b/.coveragerc index df88477a6bfe9..f5c8b701a79a8 100644 --- a/.coveragerc +++ b/.coveragerc @@ -22,11 +22,6 @@ exclude_lines = if 0: if __name__ == .__main__.: - # Workaround for cython misreporting def/cpdef/cdef lines - # https://github.com/cython/cython/issues/1461 - ^cdef - ^cpdef - ignore_errors = False show_missing = True diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 0ec5d25beeeb9..c302f4b18e924 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1591,8 +1591,8 @@ cdef class _Period(object): return value def __setstate__(self, state): - self.freq=state[1] - self.ordinal=state[2] + self.freq = state[1] + self.ordinal = state[2] def __reduce__(self): object_state = None, self.freq, self.ordinal diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index ccc08a963eff4..0a9a85aea9cca 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -1,4 +1,4 @@ -from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS +from numpy cimport ndarray cimport numpy as cnp cnp.import_array() diff --git a/setup.py b/setup.py index 814367c13278a..525eb20f47ec7 100755 --- a/setup.py +++ b/setup.py @@ -434,6 +434,7 @@ def get_tag(self): # "linetrace" (with a Truthy value) linetrace = os.environ.get('linetrace', False) CYTHON_TRACE = str(int(bool(linetrace))) +# TODO: Maybe make this a CLI option for build_ext? # Note: if not using `cythonize`, coverage can be enabled by # pinning `ext.cython_directives = directives` to each ext in extensions. @@ -449,6 +450,8 @@ def get_tag(self): # ---------------------------------------------------------------------- # Specification of Dependencies +# TODO: Need to check to see if e.g. `linetrace` has changed and possibly +# re-compile. def maybe_cythonize(extensions, *args, **kwargs): """ Render tempita templates before calling cythonize From dd6a2ba5c6dd4e393de8a8efabe47a3260c8ea29 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 14 Jul 2018 08:58:52 -0700 Subject: [PATCH 4/8] post-merge cleanup --- pandas/_libs/src/util.pxd | 205 ----------------------------------- pandas/_libs/tslibs/util.pxd | 27 ++++- 2 files changed, 25 insertions(+), 207 deletions(-) delete mode 100644 pandas/_libs/src/util.pxd diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd deleted file mode 100644 index 728eb63dc836c..0000000000000 --- a/pandas/_libs/src/util.pxd +++ /dev/null @@ -1,205 +0,0 @@ -from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS -cimport numpy as cnp -cnp.import_array() - -cimport cpython -from cpython cimport PyTypeObject -from cpython.string cimport PyString_FromString, PyString_AsString - -DEF PY3 = bytes != str - -cdef extern from "Python.h": - # Note: importing extern-style allows us to declare these as nogil - # functions, whereas `from cpython cimport` does not. - bint PyUnicode_Check(object obj) nogil - bint PyString_Check(object obj) nogil - bint PyBool_Check(object obj) nogil - bint PyFloat_Check(object obj) nogil - bint PyComplex_Check(object obj) nogil - bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil - char* PyUnicode_AsUTF8(object unicode) - object PyUnicode_FromString(const char* u) nogil - - -cdef extern from "numpy/arrayobject.h": - PyTypeObject PyFloatingArrType_Type - -cdef extern from "numpy/ndarrayobject.h": - PyTypeObject PyTimedeltaArrType_Type - PyTypeObject PyDatetimeArrType_Type - PyTypeObject PyComplexFloatingArrType_Type - PyTypeObject PyBoolArrType_Type - - bint PyArray_IsIntegerScalar(obj) nogil - bint PyArray_Check(obj) nogil - -# -------------------------------------------------------------------- -# Type Checking - -cdef inline bint is_string_object(object obj) nogil: - return PyString_Check(obj) or PyUnicode_Check(obj) - - -cdef inline bint is_integer_object(object obj) nogil: - return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) - - -cdef inline bint is_float_object(object obj) nogil: - return (PyFloat_Check(obj) or - (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) - - -cdef inline bint is_complex_object(object obj) nogil: - return (PyComplex_Check(obj) or - PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) - - -cdef inline bint is_bool_object(object obj) nogil: - return (PyBool_Check(obj) or - PyObject_TypeCheck(obj, &PyBoolArrType_Type)) - - -cdef inline bint is_timedelta64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - - -cdef inline bint is_datetime64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - -# -------------------------------------------------------------------- - -cdef extern from "numpy_helper.h": - void set_array_not_contiguous(ndarray ao) - - int assign_value_1d(ndarray, Py_ssize_t, object) except -1 - cnp.int64_t get_nat() - object get_value_1d(ndarray, Py_ssize_t) - -ctypedef fused numeric: - cnp.int8_t - cnp.int16_t - cnp.int32_t - cnp.int64_t - - cnp.uint8_t - cnp.uint16_t - cnp.uint32_t - cnp.uint64_t - - cnp.float32_t - cnp.float64_t - -cdef extern from "headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN - - -cdef inline const char* get_c_string(object obj) except NULL: - """ - returns ASCII or UTF8 (py3) view on python str - python object owns memory, should not be freed - """ - # TODO: this docstring is copied verbatim from version that was - # directly in numpy_helper.C; is it still accurate? - IF PY3: - return PyUnicode_AsUTF8(obj) - ELSE: - return PyString_AsString(obj) - - -cdef inline object char_to_string(const char* data): - IF PY3: - return PyUnicode_FromString(data) - ELSE: - return PyString_FromString(data) - - -cdef inline object get_value_at(ndarray arr, object loc): - cdef: - Py_ssize_t i, sz - int casted - - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - - if i < 0 and sz > 0: - i += sz - elif i >= sz or sz == 0: - raise IndexError('index out of bounds') - - return get_value_1d(arr, i) - - -cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): - """Sets a value into the array without checking the writeable flag. - - This should be used when setting values in a loop, check the writeable - flag above the loop and then eschew the check on each iteration. - """ - cdef: - Py_ssize_t i, sz - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - - if i < 0: - i += sz - elif i >= sz: - raise IndexError('index out of bounds') - - assign_value_1d(arr, i, value) - -cdef inline set_value_at(ndarray arr, object loc, object value): - """Sets a value into the array after checking that the array is mutable. - """ - if not cnp.PyArray_ISWRITEABLE(arr): - raise ValueError('assignment destination is read-only') - - set_value_at_unsafe(arr, loc, value) - - -cdef inline is_array(object o): - return cnp.PyArray_Check(o) - - -cdef inline bint _checknull(object val): - try: - return val is None or (cpython.PyFloat_Check(val) and val != val) - except ValueError: - return False - - -cdef inline bint is_period_object(object val): - return getattr(val, '_typ', '_typ') == 'period' - - -cdef inline bint is_offset_object(object val): - """ - Check if an object is a DateOffset object. - - Parameters - ---------- - val : object - - Returns - ------- - is_date_offset : bool - """ - return getattr(val, '_typ', None) == "dateoffset" diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 0a9a85aea9cca..fdfef4b47aeb4 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -4,6 +4,9 @@ cnp.import_array() cimport cpython from cpython cimport PyTypeObject +from cpython.string cimport PyString_FromString, PyString_AsString + +DEF PY3 = bytes != str cdef extern from "Python.h": # Note: importing extern-style allows us to declare these as nogil @@ -14,6 +17,8 @@ cdef extern from "Python.h": bint PyFloat_Check(object obj) nogil bint PyComplex_Check(object obj) nogil bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil + char* PyUnicode_AsUTF8(object unicode) + object PyUnicode_FromString(const char* u) nogil cdef extern from "numpy/arrayobject.h": @@ -69,8 +74,6 @@ cdef extern from "../src/numpy_helper.h": int assign_value_1d(ndarray, Py_ssize_t, object) except -1 cnp.int64_t get_nat() object get_value_1d(ndarray, Py_ssize_t) - char *get_c_string(object) except NULL - object char_to_string(char*) ctypedef fused numeric: cnp.int8_t @@ -101,6 +104,26 @@ cdef extern from "../src/headers/stdint.h": enum: INT64_MIN +cdef inline const char* get_c_string(object obj) except NULL: + """ + returns ASCII or UTF8 (py3) view on python str + python object owns memory, should not be freed + """ + # TODO: this docstring is copied verbatim from version that was + # directly in numpy_helper.C; is it still accurate? + IF PY3: + return PyUnicode_AsUTF8(obj) + ELSE: + return PyString_AsString(obj) + + +cdef inline object char_to_string(const char* data): + IF PY3: + return PyUnicode_FromString(data) + ELSE: + return PyString_FromString(data) + + cdef inline object get_value_at(ndarray arr, object loc): cdef: Py_ssize_t i, sz From 23c1e211a02dc1e795a28d1eda92e4022f3b58f2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 14 Jul 2018 09:52:01 -0700 Subject: [PATCH 5/8] remove unnecessary pxdfiles --- pandas/_libs/window.pyx | 2 +- setup.py | 87 +++++++---------------------------------- 2 files changed, 15 insertions(+), 74 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 9e704a9bd8d3f..78cda353d2605 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -13,7 +13,7 @@ from numpy cimport ndarray, double_t, int64_t, float64_t cnp.import_array() -cdef extern from "../src/headers/cmath" namespace "std": +cdef extern from "src/headers/cmath" namespace "std": int signbit(double) nogil double sqrt(double x) nogil diff --git a/setup.py b/setup.py index 525eb20f47ec7..49b44c7afbbd7 100755 --- a/setup.py +++ b/setup.py @@ -430,6 +430,13 @@ def get_tag(self): endian_macro = [('__LITTLE_ENDIAN__', '1')] +if is_platform_windows(): + extra_compile_args = [] +else: + # args to ignore warnings + extra_compile_args = ['-Wno-unused-function'] + + # enable coverage by building cython files by setting the environment variable # "linetrace" (with a Truthy value) linetrace = os.environ.get('linetrace', False) @@ -492,16 +499,6 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): common_include = ['pandas/_libs/src/klib', 'pandas/_libs/src'] -def pxd(name): - return pjoin('pandas', name + '.pxd') - - -if is_platform_windows(): - extra_compile_args = [] -else: - # args to ignore warnings - extra_compile_args = ['-Wno-unused-function'] - lib_depends = lib_depends + ['pandas/_libs/src/numpy_helper.h', 'pandas/_libs/src/parse_helper.h', 'pandas/_libs/src/compat_helper.h'] @@ -517,22 +514,18 @@ def pxd(name): ext_data = { '_libs.algos': { 'pyxfile': '_libs/algos', - 'pxdfiles': ['_libs/util', '_libs/algos', '_libs/hashtable'], 'depends': _pxi_dep['algos']}, '_libs.groupby': { 'pyxfile': '_libs/groupby', - 'pxdfiles': ['_libs/util', '_libs/algos'], 'depends': _pxi_dep['groupby']}, '_libs.hashing': { 'pyxfile': '_libs/hashing'}, '_libs.hashtable': { 'pyxfile': '_libs/hashtable', - 'pxdfiles': ['_libs/hashtable', '_libs/missing', '_libs/khash'], 'depends': (['pandas/_libs/src/klib/khash_python.h'] + _pxi_dep['hashtable'])}, '_libs.index': { 'pyxfile': '_libs/index', - 'pxdfiles': ['_libs/util', '_libs/hashtable'], 'depends': _pxi_dep['index'], 'sources': np_datetime_sources}, '_libs.indexing': { @@ -541,21 +534,15 @@ def pxd(name): 'pyxfile': '_libs/internals'}, '_libs.interval': { 'pyxfile': '_libs/interval', - 'pxdfiles': ['_libs/hashtable'], 'depends': _pxi_dep['interval']}, '_libs.join': { 'pyxfile': '_libs/join', - 'pxdfiles': ['_libs/util', '_libs/hashtable'], 'depends': _pxi_dep['join']}, '_libs.lib': { 'pyxfile': '_libs/lib', - 'pxdfiles': ['_libs/util', - '_libs/missing', - '_libs/tslibs/conversion'], 'depends': lib_depends + tseries_depends}, '_libs.missing': { 'pyxfile': '_libs/missing', - 'pxdfiles': ['_libs/util'], 'depends': tseries_depends}, '_libs.parsers': { 'pyxfile': '_libs/parsers', @@ -565,12 +552,9 @@ def pxd(name): 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, '_libs.reduction': { - 'pyxfile': '_libs/reduction', - 'pxdfiles': ['_libs/util']}, + 'pyxfile': '_libs/reduction'}, '_libs.ops': { - 'pyxfile': '_libs/ops', - 'pxdfiles': ['_libs/util', - '_libs/missing']}, + 'pyxfile': '_libs/ops'}, '_libs.properties': { 'pyxfile': '_libs/properties', 'include': []}, @@ -585,101 +569,62 @@ def pxd(name): 'depends': _pxi_dep['sparse']}, '_libs.tslib': { 'pyxfile': '_libs/tslib', - 'pxdfiles': ['_libs/tslibs/conversion', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timestamps', - '_libs/tslibs/timezones', - '_libs/tslibs/nattype', - '_libs/tslibs/offsets'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.ccalendar': { 'pyxfile': '_libs/tslibs/ccalendar'}, '_libs.tslibs.conversion': { 'pyxfile': '_libs/tslibs/conversion', - 'pxdfiles': ['_libs/tslibs/nattype', - '_libs/tslibs/timezones', - '_libs/tslibs/timedeltas'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.fields': { 'pyxfile': '_libs/tslibs/fields', - 'pxdfiles': ['_libs/tslibs/ccalendar', - '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.frequencies': { - 'pyxfile': '_libs/tslibs/frequencies', - 'pxdfiles': []}, + 'pyxfile': '_libs/tslibs/frequencies'}, '_libs.tslibs.nattype': { - 'pyxfile': '_libs/tslibs/nattype', - 'pxdfiles': []}, + 'pyxfile': '_libs/tslibs/nattype'}, '_libs.tslibs.np_datetime': { 'pyxfile': '_libs/tslibs/np_datetime', 'depends': np_datetime_headers, 'sources': np_datetime_sources}, '_libs.tslibs.offsets': { 'pyxfile': '_libs/tslibs/offsets', - 'pxdfiles': ['_libs/tslibs/ccalendar', - '_libs/tslibs/conversion', - '_libs/tslibs/frequencies', - '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.parsing': { - 'pyxfile': '_libs/tslibs/parsing', - 'pxdfiles': []}, + 'pyxfile': '_libs/tslibs/parsing'}, '_libs.tslibs.period': { 'pyxfile': '_libs/tslibs/period', - 'pxdfiles': ['_libs/tslibs/ccalendar', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timezones', - '_libs/tslibs/nattype', - '_libs/tslibs/offsets'], 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], 'sources': np_datetime_sources + ['pandas/_libs/src/period_helper.c']}, '_libs.tslibs.resolution': { 'pyxfile': '_libs/tslibs/resolution', - 'pxdfiles': ['_libs/khash', - '_libs/tslibs/ccalendar', - '_libs/tslibs/frequencies', - '_libs/tslibs/timezones'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.strptime': { 'pyxfile': '_libs/tslibs/strptime', - 'pxdfiles': ['_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.timedeltas': { 'pyxfile': '_libs/tslibs/timedeltas', - 'pxdfiles': ['_libs/tslibs/nattype', - '_libs/tslibs/offsets'], 'depends': np_datetime_headers, 'sources': np_datetime_sources}, '_libs.tslibs.timestamps': { 'pyxfile': '_libs/tslibs/timestamps', - 'pxdfiles': ['_libs/tslibs/ccalendar', - '_libs/tslibs/conversion', - '_libs/tslibs/nattype', - '_libs/tslibs/offsets', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timezones'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.timezones': { - 'pyxfile': '_libs/tslibs/timezones', - 'pxdfiles': []}, + 'pyxfile': '_libs/tslibs/timezones'}, '_libs.testing': { 'pyxfile': '_libs/testing'}, '_libs.window': { 'pyxfile': '_libs/window', - 'pxdfiles': ['_libs/skiplist'], 'language': 'c++', 'suffix': '.cpp'}, '_libs.writers': { - 'pyxfile': '_libs/writers', - 'pxdfiles': []}, + 'pyxfile': '_libs/writers'}, 'io.sas._sas': { 'pyxfile': 'io/sas/sas'}, 'io.msgpack._packer': { @@ -711,10 +656,6 @@ def pxd(name): sources = [srcpath(data['pyxfile'], suffix=source_suffix, subdir='')] - pxds = [pxd(x) for x in data.get('pxdfiles', [])] - if suffix == '.pyx' and pxds: - sources.extend(pxds) - sources.extend(data.get('sources', [])) include = data.get('include', common_include) From d46fe91084e01c08d8a3860288a86f8b13799fbe Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 14 Jul 2018 11:34:55 -0700 Subject: [PATCH 6/8] fix signature of dummy cythonize --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 49b44c7afbbd7..320c0b8d640ae 100755 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ def is_platform_windows(): _CYTHON_INSTALLED = ver >= LooseVersion(min_cython_ver) except ImportError: _CYTHON_INSTALLED = False - cythonize = lambda x: x # dummy func + cythonize = lambda x, *args, **kwargs: x # dummy func # The import of Extension must be after the import of Cython, otherwise # we do not get the appropriately patched class. From 39afe28bac4c3ae726577af3da50441a50b79d5a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 16 Jul 2018 07:48:13 -0700 Subject: [PATCH 7/8] change env variable --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 320c0b8d640ae..270430eb2a51f 100755 --- a/setup.py +++ b/setup.py @@ -438,8 +438,8 @@ def get_tag(self): # enable coverage by building cython files by setting the environment variable -# "linetrace" (with a Truthy value) -linetrace = os.environ.get('linetrace', False) +# "PANDAS_CYTHON_COVERAGE" (with a Truthy value) +linetrace = os.environ.get('PANDAS_CYTHON_COVERAGE', False) CYTHON_TRACE = str(int(bool(linetrace))) # TODO: Maybe make this a CLI option for build_ext? From 5582fefb1db2752a0c5e68872ec9898728b7b8a5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 20 Jul 2018 07:08:25 -0700 Subject: [PATCH 8/8] remove comment, making an issue for it --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 270430eb2a51f..85c5970af018f 100755 --- a/setup.py +++ b/setup.py @@ -441,7 +441,6 @@ def get_tag(self): # "PANDAS_CYTHON_COVERAGE" (with a Truthy value) linetrace = os.environ.get('PANDAS_CYTHON_COVERAGE', False) CYTHON_TRACE = str(int(bool(linetrace))) -# TODO: Maybe make this a CLI option for build_ext? # Note: if not using `cythonize`, coverage can be enabled by # pinning `ext.cython_directives = directives` to each ext in extensions.