-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: Index with null value not serialized correctly to json #50400
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f26cf16
23cd66d
8f5e6e5
147cd88
ef6195a
9afd12b
a6de6ce
52e20ea
53165b3
d3cecc5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -276,6 +276,27 @@ static int is_simple_frame(PyObject *obj) { | |
Py_DECREF(mgr); | ||
return ret; | ||
} | ||
/* TODO: Consider unifying with checknull and co. | ||
in missing.pyx */ | ||
static int is_null_obj(PyObject* obj) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit but I think this should be called |
||
int is_null = 0; | ||
if (PyFloat_Check(obj)) { | ||
double fval = PyFloat_AS_DOUBLE(obj); | ||
is_null = npy_isnan(fval); | ||
} else if (obj == Py_None || object_is_na_type(obj)) { | ||
is_null = 1; | ||
} else if (object_is_decimal_type(obj)) { | ||
PyObject *is_null_obj = PyObject_CallMethod(obj, | ||
"is_nan", | ||
NULL); | ||
is_null = (is_null_obj == Py_True); | ||
if (!is_null_obj) { | ||
return -1; | ||
} | ||
Py_DECREF(is_null_obj); | ||
} | ||
return is_null; | ||
} | ||
|
||
static npy_int64 get_long_attr(PyObject *o, const char *attr) { | ||
// NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT | ||
|
@@ -1283,6 +1304,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
type_num = PyArray_TYPE(labels); | ||
|
||
for (i = 0; i < num; i++) { | ||
int is_null = 0; // Whether current val is a null | ||
item = PyArray_GETITEM(labels, dataptr); | ||
if (!item) { | ||
NpyArr_freeLabels(ret, num); | ||
|
@@ -1320,9 +1342,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
|
||
if (is_datetimelike) { | ||
if (nanosecVal == get_nat()) { | ||
len = 4; | ||
cLabel = PyObject_Malloc(len + 1); | ||
strncpy(cLabel, "null", len + 1); | ||
is_null = 1; | ||
} else { | ||
if (enc->datetimeIso) { | ||
if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { | ||
|
@@ -1348,25 +1368,41 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
len = strlen(cLabel); | ||
} | ||
} | ||
} else { // Fallback to string representation | ||
// Replace item with the string to keep it alive. | ||
Py_SETREF(item, PyObject_Str(item)); | ||
if (item == NULL) { | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} else { | ||
// NA values need special handling | ||
is_null = is_null_obj(item); | ||
if (is_null == -1) { | ||
// Something errored | ||
// Return to let the error surface | ||
return 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Surprised we return 0 here but I see that you are just matching the pattern of the rest of the function. It should really be returning NULL in case of an error - looks to not be handled properly. But of course that is separate from this PR |
||
} | ||
if (!is_null) { | ||
// Otherwise, fallback to string representation | ||
// Replace item with the string to keep it alive. | ||
Py_SETREF(item, PyObject_Str(item)); | ||
if (item == NULL) { | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} | ||
|
||
cLabel = (char *)PyUnicode_AsUTF8(item); | ||
len = strlen(cLabel); | ||
} | ||
} | ||
|
||
cLabel = (char *)PyUnicode_AsUTF8(item); | ||
len = strlen(cLabel); | ||
if (is_null) { | ||
len = 4; | ||
cLabel = PyObject_Malloc(len + 1); | ||
strncpy(cLabel, "null", len + 1); | ||
} | ||
|
||
// Add 1 to include NULL terminator | ||
ret[i] = PyObject_Malloc(len + 1); | ||
memcpy(ret[i], cLabel, len + 1); | ||
Py_DECREF(item); | ||
|
||
if (is_datetimelike) { | ||
if (is_datetimelike || is_null) { | ||
PyObject_Free(cLabel); | ||
} | ||
|
||
|
@@ -1512,8 +1548,20 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |
tc->type = JT_UTF8; | ||
return; | ||
} else if (object_is_decimal_type(obj)) { | ||
GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); | ||
tc->type = JT_DOUBLE; | ||
/* Check for null, since null can't go thru double path */ | ||
PyObject *is_null_obj = PyObject_CallMethod(obj, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this not be replaced with the function you are introducing? Seems like it should work to keep logic consistent? |
||
"is_nan", | ||
NULL); | ||
if (!is_null_obj) { | ||
goto INVALID; | ||
} | ||
if (is_null_obj == Py_False) { | ||
GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); | ||
tc->type = JT_DOUBLE; | ||
} else { | ||
tc->type = JT_NULL; | ||
} | ||
Py_DECREF(is_null_obj); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In a case where |
||
return; | ||
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { | ||
if (object_is_nat_type(obj)) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a quick note in the docstring that this returns -1 on error?