diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd index 0d0c5ae058b21..e9f5766f78435 100644 --- a/pandas/_libs/khash.pxd +++ b/pandas/_libs/khash.pxd @@ -16,11 +16,11 @@ from numpy cimport ( cdef extern from "khash_python.h": const int KHASH_TRACE_DOMAIN - ctypedef uint32_t khint_t - ctypedef khint_t khiter_t + ctypedef uint32_t khuint_t + ctypedef khuint_t khiter_t ctypedef struct kh_pymap_t: - khint_t n_buckets, size, n_occupied, upper_bound + khuint_t n_buckets, size, n_occupied, upper_bound uint32_t *flags PyObject **keys size_t *vals @@ -28,15 +28,15 @@ cdef extern from "khash_python.h": kh_pymap_t* kh_init_pymap() void kh_destroy_pymap(kh_pymap_t*) void kh_clear_pymap(kh_pymap_t*) - khint_t kh_get_pymap(kh_pymap_t*, PyObject*) - void kh_resize_pymap(kh_pymap_t*, khint_t) - khint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*) - void kh_del_pymap(kh_pymap_t*, khint_t) + khuint_t kh_get_pymap(kh_pymap_t*, PyObject*) + void kh_resize_pymap(kh_pymap_t*, khuint_t) + khuint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*) + void kh_del_pymap(kh_pymap_t*, khuint_t) bint kh_exist_pymap(kh_pymap_t*, khiter_t) ctypedef struct kh_pyset_t: - khint_t n_buckets, size, n_occupied, upper_bound + khuint_t n_buckets, size, n_occupied, upper_bound uint32_t *flags PyObject **keys size_t *vals @@ -44,17 +44,17 @@ cdef extern from "khash_python.h": kh_pyset_t* kh_init_pyset() void kh_destroy_pyset(kh_pyset_t*) void kh_clear_pyset(kh_pyset_t*) - khint_t kh_get_pyset(kh_pyset_t*, PyObject*) - void kh_resize_pyset(kh_pyset_t*, khint_t) - khint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*) - void kh_del_pyset(kh_pyset_t*, khint_t) + khuint_t kh_get_pyset(kh_pyset_t*, PyObject*) + void kh_resize_pyset(kh_pyset_t*, khuint_t) + khuint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*) + void kh_del_pyset(kh_pyset_t*, khuint_t) bint kh_exist_pyset(kh_pyset_t*, khiter_t) ctypedef char* kh_cstr_t ctypedef struct kh_str_t: - khint_t n_buckets, size, n_occupied, upper_bound + khuint_t n_buckets, size, n_occupied, upper_bound uint32_t *flags kh_cstr_t *keys size_t *vals @@ -62,10 +62,10 @@ cdef extern from "khash_python.h": kh_str_t* kh_init_str() nogil void kh_destroy_str(kh_str_t*) nogil void kh_clear_str(kh_str_t*) nogil - khint_t kh_get_str(kh_str_t*, kh_cstr_t) nogil - void kh_resize_str(kh_str_t*, khint_t) nogil - khint_t kh_put_str(kh_str_t*, kh_cstr_t, int*) nogil - void kh_del_str(kh_str_t*, khint_t) nogil + khuint_t kh_get_str(kh_str_t*, kh_cstr_t) nogil + void kh_resize_str(kh_str_t*, khuint_t) nogil + khuint_t kh_put_str(kh_str_t*, kh_cstr_t, int*) nogil + void kh_del_str(kh_str_t*, khuint_t) nogil bint kh_exist_str(kh_str_t*, khiter_t) nogil @@ -74,16 +74,16 @@ cdef extern from "khash_python.h": int starts[256] kh_str_starts_t* kh_init_str_starts() nogil - khint_t kh_put_str_starts_item(kh_str_starts_t* table, char* key, - int* ret) nogil - khint_t kh_get_str_starts_item(kh_str_starts_t* table, char* key) nogil + khuint_t kh_put_str_starts_item(kh_str_starts_t* table, char* key, + int* ret) nogil + khuint_t kh_get_str_starts_item(kh_str_starts_t* table, char* key) nogil void kh_destroy_str_starts(kh_str_starts_t*) nogil - void kh_resize_str_starts(kh_str_starts_t*, khint_t) nogil + void kh_resize_str_starts(kh_str_starts_t*, khuint_t) nogil # sweep factorize ctypedef struct kh_strbox_t: - khint_t n_buckets, size, n_occupied, upper_bound + khuint_t n_buckets, size, n_occupied, upper_bound uint32_t *flags kh_cstr_t *keys PyObject **vals @@ -91,10 +91,10 @@ cdef extern from "khash_python.h": kh_strbox_t* kh_init_strbox() nogil void kh_destroy_strbox(kh_strbox_t*) nogil void kh_clear_strbox(kh_strbox_t*) nogil - khint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t) nogil - void kh_resize_strbox(kh_strbox_t*, khint_t) nogil - khint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*) nogil - void kh_del_strbox(kh_strbox_t*, khint_t) nogil + khuint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t) nogil + void kh_resize_strbox(kh_strbox_t*, khuint_t) nogil + khuint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*) nogil + void kh_del_strbox(kh_strbox_t*, khuint_t) nogil bint kh_exist_strbox(kh_strbox_t*, khiter_t) nogil diff --git a/pandas/_libs/khash_for_primitive_helper.pxi.in b/pandas/_libs/khash_for_primitive_helper.pxi.in index db8d3e0b19417..9073d87aa91cc 100644 --- a/pandas/_libs/khash_for_primitive_helper.pxi.in +++ b/pandas/_libs/khash_for_primitive_helper.pxi.in @@ -24,7 +24,7 @@ primitive_types = [('int64', 'int64_t'), cdef extern from "khash_python.h": ctypedef struct kh_{{name}}_t: - khint_t n_buckets, size, n_occupied, upper_bound + khuint_t n_buckets, size, n_occupied, upper_bound uint32_t *flags {{c_type}} *keys size_t *vals @@ -32,10 +32,10 @@ cdef extern from "khash_python.h": kh_{{name}}_t* kh_init_{{name}}() nogil void kh_destroy_{{name}}(kh_{{name}}_t*) nogil void kh_clear_{{name}}(kh_{{name}}_t*) nogil - khint_t kh_get_{{name}}(kh_{{name}}_t*, {{c_type}}) nogil - void kh_resize_{{name}}(kh_{{name}}_t*, khint_t) nogil - khint_t kh_put_{{name}}(kh_{{name}}_t*, {{c_type}}, int*) nogil - void kh_del_{{name}}(kh_{{name}}_t*, khint_t) nogil + khuint_t kh_get_{{name}}(kh_{{name}}_t*, {{c_type}}) nogil + void kh_resize_{{name}}(kh_{{name}}_t*, khuint_t) nogil + khuint_t kh_put_{{name}}(kh_{{name}}_t*, {{c_type}}, int*) nogil + void kh_del_{{name}}(kh_{{name}}_t*, khuint_t) nogil bint kh_exist_{{name}}(kh_{{name}}_t*, khiter_t) nogil diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h index bb56b2fe2d145..03b11f77580a5 100644 --- a/pandas/_libs/src/klib/khash.h +++ b/pandas/_libs/src/klib/khash.h @@ -134,32 +134,39 @@ int main() { #if UINT_MAX == 0xffffffffu -typedef unsigned int khint32_t; +typedef unsigned int khuint32_t; +typedef signed int khint32_t; #elif ULONG_MAX == 0xffffffffu -typedef unsigned long khint32_t; +typedef unsigned long khuint32_t; +typedef signed long khint32_t; #endif #if ULONG_MAX == ULLONG_MAX -typedef unsigned long khint64_t; +typedef unsigned long khuint64_t; +typedef signed long khint64_t; #else -typedef unsigned long long khint64_t; +typedef unsigned long long khuint64_t; +typedef signed long long khint64_t; #endif #if UINT_MAX == 0xffffu -typedef unsigned int khint16_t; +typedef unsigned int khuint16_t; +typedef signed int khint16_t; #elif USHRT_MAX == 0xffffu -typedef unsigned short khint16_t; +typedef unsigned short khuint16_t; +typedef signed short khint16_t; #endif #if UCHAR_MAX == 0xffu -typedef unsigned char khint8_t; +typedef unsigned char khuint8_t; +typedef signed char khint8_t; #endif typedef double khfloat64_t; typedef float khfloat32_t; -typedef khint32_t khint_t; -typedef khint_t khiter_t; +typedef khuint32_t khuint_t; +typedef khuint_t khiter_t; #define __ac_isempty(flag, i) ((flag[i>>5]>>(i&0x1fU))&1) #define __ac_isdel(flag, i) (0) @@ -172,15 +179,15 @@ typedef khint_t khiter_t; // specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp -khint32_t PANDAS_INLINE murmur2_32to32(khint32_t k){ - const khint32_t SEED = 0xc70f6907UL; +khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k){ + const khuint32_t SEED = 0xc70f6907UL; // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. - const khint32_t M_32 = 0x5bd1e995; + const khuint32_t M_32 = 0x5bd1e995; const int R_32 = 24; // Initialize the hash to a 'random' value - khint32_t h = SEED ^ 4; + khuint32_t h = SEED ^ 4; //handle 4 bytes: k *= M_32; @@ -204,15 +211,15 @@ khint32_t PANDAS_INLINE murmur2_32to32(khint32_t k){ // - the same case for 32bit and 64bit builds // - no performance difference could be measured compared to a possible x64-version -khint32_t PANDAS_INLINE murmur2_32_32to32(khint32_t k1, khint32_t k2){ - const khint32_t SEED = 0xc70f6907UL; +khuint32_t PANDAS_INLINE murmur2_32_32to32(khuint32_t k1, khuint32_t k2){ + const khuint32_t SEED = 0xc70f6907UL; // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. - const khint32_t M_32 = 0x5bd1e995; + const khuint32_t M_32 = 0x5bd1e995; const int R_32 = 24; // Initialize the hash to a 'random' value - khint32_t h = SEED ^ 4; + khuint32_t h = SEED ^ 4; //handle first 4 bytes: k1 *= M_32; @@ -238,9 +245,9 @@ khint32_t PANDAS_INLINE murmur2_32_32to32(khint32_t k1, khint32_t k2){ return h; } -khint32_t PANDAS_INLINE murmur2_64to32(khint64_t k){ - khint32_t k1 = (khint32_t)k; - khint32_t k2 = (khint32_t)(k >> 32); +khuint32_t PANDAS_INLINE murmur2_64to32(khuint64_t k){ + khuint32_t k1 = (khuint32_t)k; + khuint32_t k2 = (khuint32_t)(k >> 32); return murmur2_32_32to32(k1, k2); } @@ -262,23 +269,23 @@ static const double __ac_HASH_UPPER = 0.77; #define KHASH_DECLARE(name, khkey_t, khval_t) \ typedef struct { \ - khint_t n_buckets, size, n_occupied, upper_bound; \ - khint32_t *flags; \ + khuint_t n_buckets, size, n_occupied, upper_bound; \ + khuint32_t *flags; \ khkey_t *keys; \ khval_t *vals; \ } kh_##name##_t; \ extern kh_##name##_t *kh_init_##name(); \ extern void kh_destroy_##name(kh_##name##_t *h); \ extern void kh_clear_##name(kh_##name##_t *h); \ - extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ - extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ - extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ - extern void kh_del_##name(kh_##name##_t *h, khint_t x); + extern khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern void kh_resize_##name(kh_##name##_t *h, khuint_t new_n_buckets); \ + extern khuint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khuint_t x); #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ typedef struct { \ - khint_t n_buckets, size, n_occupied, upper_bound; \ - khint32_t *flags; \ + khuint_t n_buckets, size, n_occupied, upper_bound; \ + khuint32_t *flags; \ khkey_t *keys; \ khval_t *vals; \ } kh_##name##_t; \ @@ -296,14 +303,14 @@ static const double __ac_HASH_UPPER = 0.77; SCOPE void kh_clear_##name(kh_##name##_t *h) \ { \ if (h && h->flags) { \ - memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khuint32_t)); \ h->size = h->n_occupied = 0; \ } \ } \ - SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + SCOPE khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ { \ if (h->n_buckets) { \ - khint_t inc, k, i, last, mask; \ + khuint_t inc, k, i, last, mask; \ mask = h->n_buckets - 1; \ k = __hash_func(key); i = k & mask; \ inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \ @@ -314,17 +321,17 @@ static const double __ac_HASH_UPPER = 0.77; return __ac_iseither(h->flags, i)? h->n_buckets : i; \ } else return 0; \ } \ - SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + SCOPE void kh_resize_##name(kh_##name##_t *h, khuint_t new_n_buckets) \ { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ - khint32_t *new_flags = 0; \ - khint_t j = 1; \ + khuint32_t *new_flags = 0; \ + khuint_t j = 1; \ { \ kroundup32(new_n_buckets); \ if (new_n_buckets < 4) new_n_buckets = 4; \ - if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + if (h->size >= (khuint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ else { /* hash table size to be changed (shrink or expand); rehash */ \ - new_flags = (khint32_t*)KHASH_MALLOC(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + new_flags = (khuint32_t*)KHASH_MALLOC(__ac_fsize(new_n_buckets) * sizeof(khuint32_t)); \ + memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khuint32_t)); \ if (h->n_buckets < new_n_buckets) { /* expand */ \ h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \ if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \ @@ -336,12 +343,12 @@ static const double __ac_HASH_UPPER = 0.77; if (__ac_iseither(h->flags, j) == 0) { \ khkey_t key = h->keys[j]; \ khval_t val; \ - khint_t new_mask; \ + khuint_t new_mask; \ new_mask = new_n_buckets - 1; \ if (kh_is_map) val = h->vals[j]; \ __ac_set_isempty_true(h->flags, j); \ while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ - khint_t inc, k, i; \ + khuint_t inc, k, i; \ k = __hash_func(key); \ i = k & new_mask; \ inc = __ac_inc(k, new_mask); \ @@ -367,18 +374,18 @@ static const double __ac_HASH_UPPER = 0.77; h->flags = new_flags; \ h->n_buckets = new_n_buckets; \ h->n_occupied = h->size; \ - h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + h->upper_bound = (khuint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ } \ } \ - SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + SCOPE khuint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ { \ - khint_t x; \ + khuint_t x; \ if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \ else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ { \ - khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ + khuint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ else { \ @@ -407,7 +414,7 @@ static const double __ac_HASH_UPPER = 0.77; } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ return x; \ } \ - SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ + SCOPE void kh_del_##name(kh_##name##_t *h, khuint_t x) \ { \ if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ __ac_set_isdel_true(h->flags, x); \ @@ -422,20 +429,23 @@ static const double __ac_HASH_UPPER = 0.77; /*! @function @abstract Integer hash function - @param key The integer [khint32_t] - @return The hash value [khint_t] + @param key The integer [khuint32_t] + @return The hash value [khuint_t] */ -#define kh_int_hash_func(key) (khint32_t)(key) +#define kh_int_hash_func(key) (khuint32_t)(key) /*! @function @abstract Integer comparison function */ #define kh_int_hash_equal(a, b) ((a) == (b)) /*! @function @abstract 64-bit integer hash function - @param key The integer [khint64_t] - @return The hash value [khint_t] + @param key The integer [khuint64_t] + @return The hash value [khuint_t] */ -#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) +PANDAS_INLINE khuint_t kh_int64_hash_func(khuint64_t key) +{ + return (khuint_t)((key)>>33^(key)^(key)<<11); +} /*! @function @abstract 64-bit integer comparison function */ @@ -446,16 +456,16 @@ static const double __ac_HASH_UPPER = 0.77; @param s Pointer to a null terminated string @return The hash value */ -PANDAS_INLINE khint_t __ac_X31_hash_string(const char *s) +PANDAS_INLINE khuint_t __ac_X31_hash_string(const char *s) { - khint_t h = *s; + khuint_t h = *s; if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; return h; } /*! @function @abstract Another interface to const char* hash function @param key Pointer to a null terminated string [const char*] - @return The hash value [khint_t] + @return The hash value [khuint_t] */ #define kh_str_hash_func(key) __ac_X31_hash_string(key) /*! @function @@ -463,7 +473,7 @@ PANDAS_INLINE khint_t __ac_X31_hash_string(const char *s) */ #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) -PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) +PANDAS_INLINE khuint_t __ac_Wang_hash(khuint_t key) { key += ~(key << 15); key ^= (key >> 10); @@ -473,7 +483,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) key ^= (key >> 16); return key; } -#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key) +#define kh_int_hash_func2(k) __ac_Wang_hash((khuint_t)key) /* --- END OF HASH FUNCTIONS --- */ @@ -510,7 +520,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) @abstract Resize a hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] - @param s New size [khint_t] + @param s New size [khuint_t] */ #define kh_resize(name, h, s) kh_resize_##name(h, s) @@ -522,7 +532,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) @param r Extra return code: 0 if the key is present in the hash table; 1 if the bucket is empty (never used); 2 if the element in the bucket has been deleted [int*] - @return Iterator to the inserted element [khint_t] + @return Iterator to the inserted element [khuint_t] */ #define kh_put(name, h, k, r) kh_put_##name(h, k, r) @@ -531,7 +541,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param k Key [type of keys] - @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t] + @return Iterator to the found element, or kh_end(h) is the element is absent [khuint_t] */ #define kh_get(name, h, k) kh_get_##name(h, k) @@ -539,14 +549,14 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) @abstract Remove a key from the hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] - @param k Iterator to the element to be deleted [khint_t] + @param k Iterator to the element to be deleted [khuint_t] */ #define kh_del(name, h, k) kh_del_##name(h, k) /*! @function @abstract Test whether a bucket contains data. @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] + @param x Iterator to the bucket [khuint_t] @return 1 if containing data; 0 otherwise [int] */ #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) @@ -554,7 +564,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) /*! @function @abstract Get key given an iterator @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] + @param x Iterator to the bucket [khuint_t] @return Key [type of keys] */ #define kh_key(h, x) ((h)->keys[x]) @@ -562,7 +572,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) /*! @function @abstract Get value given an iterator @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] + @param x Iterator to the bucket [khuint_t] @return Value [type of values] @discussion For hash sets, calling this results in segfault. */ @@ -576,28 +586,28 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) /*! @function @abstract Get the start iterator @param h Pointer to the hash table [khash_t(name)*] - @return The start iterator [khint_t] + @return The start iterator [khuint_t] */ -#define kh_begin(h) (khint_t)(0) +#define kh_begin(h) (khuint_t)(0) /*! @function @abstract Get the end iterator @param h Pointer to the hash table [khash_t(name)*] - @return The end iterator [khint_t] + @return The end iterator [khuint_t] */ #define kh_end(h) ((h)->n_buckets) /*! @function @abstract Get the number of elements in the hash table @param h Pointer to the hash table [khash_t(name)*] - @return Number of elements in the hash table [khint_t] + @return Number of elements in the hash table [khuint_t] */ #define kh_size(h) ((h)->size) /*! @function @abstract Get the number of buckets in the hash table @param h Pointer to the hash table [khash_t(name)*] - @return Number of buckets in the hash table [khint_t] + @return Number of buckets in the hash table [khuint_t] */ #define kh_n_buckets(h) ((h)->n_buckets) @@ -615,25 +625,18 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) @param name Name of the hash table [symbol] @param khval_t Type of values [type] */ - -// we implicitly convert signed int to unsigned int, thus potential overflows -// for operations (<<,*,+) don't trigger undefined behavior, also >>-operator -// is implementation defined for signed ints if sign-bit is set. -// because we never really "get" the keys, there will be no convertion from -// unsigend int to (signed) int (which would be implementation defined behavior) -// this holds also for 64-, 16- and 8-bit integers #define KHASH_MAP_INIT_INT(name, khval_t) \ KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) #define KHASH_MAP_INIT_UINT(name, khval_t) \ - KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + KHASH_INIT(name, khuint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) /*! @function @abstract Instantiate a hash map containing 64-bit integer keys @param name Name of the hash table [symbol] */ #define KHASH_SET_INIT_UINT64(name) \ - KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + KHASH_INIT(name, khuint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) #define KHASH_SET_INIT_INT64(name) \ KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) @@ -644,7 +647,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) @param khval_t Type of values [type] */ #define KHASH_MAP_INIT_UINT64(name, khval_t) \ - KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + KHASH_INIT(name, khuint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) #define KHASH_MAP_INIT_INT64(name, khval_t) \ KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) @@ -658,7 +661,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) KHASH_INIT(name, khint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) #define KHASH_MAP_INIT_UINT16(name, khval_t) \ - KHASH_INIT(name, khint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + KHASH_INIT(name, khuint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) /*! @function @abstract Instantiate a hash map containing 8bit-integer keys @@ -669,7 +672,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) KHASH_INIT(name, khint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) #define KHASH_MAP_INIT_UINT8(name, khval_t) \ - KHASH_INIT(name, khint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + KHASH_INIT(name, khuint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index 8e4e61b4f3077..c67eff21a1ab1 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -75,14 +75,14 @@ void traced_free(void* ptr){ // predisposed to superlinear running times (see GH 36729 for comparison) -khint64_t PANDAS_INLINE asint64(double key) { - khint64_t val; +khuint64_t PANDAS_INLINE asuint64(double key) { + khuint64_t val; memcpy(&val, &key, sizeof(double)); return val; } -khint32_t PANDAS_INLINE asint32(float key) { - khint32_t val; +khuint32_t PANDAS_INLINE asuint32(float key) { + khuint32_t val; memcpy(&val, &key, sizeof(float)); return val; } @@ -90,7 +90,7 @@ khint32_t PANDAS_INLINE asint32(float key) { #define ZERO_HASH 0 #define NAN_HASH 0 -khint32_t PANDAS_INLINE kh_float64_hash_func(double val){ +khuint32_t PANDAS_INLINE kh_float64_hash_func(double val){ // 0.0 and -0.0 should have the same hash: if (val == 0.0){ return ZERO_HASH; @@ -99,11 +99,11 @@ khint32_t PANDAS_INLINE kh_float64_hash_func(double val){ if ( val!=val ){ return NAN_HASH; } - khint64_t as_int = asint64(val); + khuint64_t as_int = asuint64(val); return murmur2_64to32(as_int); } -khint32_t PANDAS_INLINE kh_float32_hash_func(float val){ +khuint32_t PANDAS_INLINE kh_float32_hash_func(float val){ // 0.0 and -0.0 should have the same hash: if (val == 0.0f){ return ZERO_HASH; @@ -112,7 +112,7 @@ khint32_t PANDAS_INLINE kh_float32_hash_func(float val){ if ( val!=val ){ return NAN_HASH; } - khint32_t as_int = asint32(val); + khuint32_t as_int = asuint32(val); return murmur2_32to32(as_int); } @@ -186,15 +186,15 @@ p_kh_str_starts_t PANDAS_INLINE kh_init_str_starts(void) { return result; } -khint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t* table, char* key, int* ret) { - khint_t result = kh_put_str(table->table, key, ret); +khuint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t* table, char* key, int* ret) { + khuint_t result = kh_put_str(table->table, key, ret); if (*ret != 0) { table->starts[(unsigned char)key[0]] = 1; } return result; } -khint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const char* key) { +khuint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const char* key) { unsigned char ch = *key; if (table->starts[ch]) { if (ch == '\0' || kh_get_str(table->table, key) != table->table->n_buckets) return 1; @@ -207,6 +207,6 @@ void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t* table) { KHASH_FREE(table); } -void PANDAS_INLINE kh_resize_str_starts(kh_str_starts_t* table, khint_t val) { +void PANDAS_INLINE kh_resize_str_starts(kh_str_starts_t* table, khuint_t val) { kh_resize_str(table->table, val); }