diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 033f47f0c994d..7f7145347296c 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1012,6 +1012,8 @@ I/O - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`) - Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`) - Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`) +- Bug in :meth:`DataFrame.to_json` where it would incorrectly use the string representations of NA-values instead of null when serializing an index (:issue:`31801`) +- Bug in :meth:`DataFrame.to_json` where it would error when serializing ``Decimal("NaN")`` (:issue:`50400`) Period ^^^^^^ diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index a6f18e0aec4d9..513fa6abfc760 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -276,6 +276,27 @@ static int is_simple_frame(PyObject *obj) { Py_DECREF(mgr); return ret; } +/* TODO: Consider unifying with checknull and co. + in missing.pyx */ +static int is_null_obj(PyObject* obj) { + int is_null = 0; + if (PyFloat_Check(obj)) { + double fval = PyFloat_AS_DOUBLE(obj); + is_null = npy_isnan(fval); + } else if (obj == Py_None || object_is_na_type(obj)) { + is_null = 1; + } else if (object_is_decimal_type(obj)) { + PyObject *is_null_obj = PyObject_CallMethod(obj, + "is_nan", + NULL); + is_null = (is_null_obj == Py_True); + if (!is_null_obj) { + return -1; + } + Py_DECREF(is_null_obj); + } + return is_null; +} static npy_int64 get_long_attr(PyObject *o, const char *attr) { // NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT @@ -1283,6 +1304,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, type_num = PyArray_TYPE(labels); for (i = 0; i < num; i++) { + int is_null = 0; // Whether current val is a null item = PyArray_GETITEM(labels, dataptr); if (!item) { NpyArr_freeLabels(ret, num); @@ -1320,9 +1342,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, if (is_datetimelike) { if (nanosecVal == get_nat()) { - len = 4; - cLabel = PyObject_Malloc(len + 1); - strncpy(cLabel, "null", len + 1); + is_null = 1; } else { if (enc->datetimeIso) { if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { @@ -1348,17 +1368,33 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, len = strlen(cLabel); } } - } else { // Fallback to string representation - // Replace item with the string to keep it alive. - Py_SETREF(item, PyObject_Str(item)); - if (item == NULL) { - NpyArr_freeLabels(ret, num); - ret = 0; - break; + } else { + // NA values need special handling + is_null = is_null_obj(item); + if (is_null == -1) { + // Something errored + // Return to let the error surface + return 0; + } + if (!is_null) { + // Otherwise, fallback to string representation + // Replace item with the string to keep it alive. + Py_SETREF(item, PyObject_Str(item)); + if (item == NULL) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + cLabel = (char *)PyUnicode_AsUTF8(item); + len = strlen(cLabel); } + } - cLabel = (char *)PyUnicode_AsUTF8(item); - len = strlen(cLabel); + if (is_null) { + len = 4; + cLabel = PyObject_Malloc(len + 1); + strncpy(cLabel, "null", len + 1); } // Add 1 to include NULL terminator @@ -1366,7 +1402,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, memcpy(ret[i], cLabel, len + 1); Py_DECREF(item); - if (is_datetimelike) { + if (is_datetimelike || is_null) { PyObject_Free(cLabel); } @@ -1512,8 +1548,20 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { tc->type = JT_UTF8; return; } else if (object_is_decimal_type(obj)) { - GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); - tc->type = JT_DOUBLE; + /* Check for null, since null can't go thru double path */ + PyObject *is_null_obj = PyObject_CallMethod(obj, + "is_nan", + NULL); + if (!is_null_obj) { + goto INVALID; + } + if (is_null_obj == Py_False) { + GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); + tc->type = JT_DOUBLE; + } else { + tc->type = JT_NULL; + } + Py_DECREF(is_null_obj); return; } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { if (object_is_nat_type(obj)) { diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d9d76c2d72db3..160dd6340e44c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,6 +1,5 @@ import datetime from datetime import timedelta -from decimal import Decimal from io import StringIO import json import os @@ -1766,15 +1765,16 @@ def test_to_s3(self, s3_resource, s3so): timeout -= 0.1 assert timeout > 0, "Timed out waiting for file to appear on moto" - def test_json_pandas_nulls(self, nulls_fixture, request): + def test_json_pandas_nulls(self, nulls_fixture): # GH 31615 - if isinstance(nulls_fixture, Decimal): - mark = pytest.mark.xfail(reason="not implemented") - request.node.add_marker(mark) - result = DataFrame([[nulls_fixture]]).to_json() assert result == '{"0":{"0":null}}' + def test_json_pandas_index_nulls(self, nulls_fixture): + # GH 31801 + result = Series([1], index=[nulls_fixture]).to_json() + assert result == '{"null":1}' + def test_readjson_bool_series(self): # GH31464 result = read_json("[true, true, false]", typ="series")