diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0ef5636a97d40..082963fe806ee 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -907,6 +907,7 @@ I/O - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`) - Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`) - Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`) +- Bug in :meth:`DataFrame.to_json` where it would produce duplicate column names for orient=split (:issue:`50456`) Period ^^^^^^ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index f2780d5fa6832..72f59ca8e753d 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -182,6 +182,20 @@ def to_json( indent=indent, ).write() + if orient == "split" and isinstance(obj, DataFrame): + if isinstance(obj.columns, MultiIndex): + lst = [] + # backwards of multindex.fromArray + for i in range(len(obj.columns[0])): + sub = [] + for j in range(len(obj.columns)): + sub.append(obj.columns[j][i]) + lst.append(sub) + newS = loads(s) + # fixes columns to original columns + newS["columns"] = lst + s = dumps(newS) + if lines: s = convert_to_line_delimits(s) diff --git a/pandas/tests/io/json/Untitled-1.ipynb b/pandas/tests/io/json/Untitled-1.ipynb new file mode 100644 index 0000000000000..a61008429ceef --- /dev/null +++ b/pandas/tests/io/json/Untitled-1.ipynb @@ -0,0 +1,19 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index aff09a62b0df3..4408ef93ca40b 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1528,13 +1528,6 @@ def test_timedelta_as_label(self, date_format, key): ("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"), ("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"), # TODO: the below have separate encoding procedures - pytest.param( - "split", - "", - marks=pytest.mark.xfail( - reason="Produces JSON but not in a consistent manner" - ), - ), pytest.param( "table", "", diff --git a/pp.py b/pp.py new file mode 100644 index 0000000000000..d6f29d682561b --- /dev/null +++ b/pp.py @@ -0,0 +1,3 @@ +import pandas +print(pandas.__version__) +!pip install pytest \ No newline at end of file