diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 32b548e5f32f1..f3dac20758441 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -136,6 +136,7 @@ Other Enhancements - :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). - :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`) +- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) .. _whatsnew_0220.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ea4a645927d7b..79ba18140c651 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1603,7 +1603,8 @@ def _repr_latex_(self): def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False, compression=None): + default_handler=None, lines=False, compression=None, + index=True): """ Convert the object to a JSON string. @@ -1671,6 +1672,13 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.21.0 + index : boolean, default True + Whether to include the index values in the JSON string. Not + including the index (``index=False``) is only supported when + orient is 'split' or 'table'. + + .. versionadded:: 0.22.0 + Returns ------- same type as input object with filtered info axis @@ -1723,7 +1731,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=double_precision, force_ascii=force_ascii, date_unit=date_unit, default_handler=default_handler, - lines=lines, compression=compression) + lines=lines, compression=compression, + index=index) def to_hdf(self, path_or_buf, key, **kwargs): """Write the contained data to an HDF5 file using HDFStore. diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 21736673350d8..0e0aae0506809 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -28,7 +28,12 @@ # interface to/from def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False, compression=None): + default_handler=None, lines=False, compression=None, + index=True): + + if not index and orient not in ['split', 'table']: + raise ValueError("'index=False' is only valid when 'orient' is " + "'split' or 'table'") path_or_buf = _stringify_path(path_or_buf) if lines and orient != 'records': @@ -49,7 +54,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', s = writer( obj, orient=orient, date_format=date_format, double_precision=double_precision, ensure_ascii=force_ascii, - date_unit=date_unit, default_handler=default_handler).write() + date_unit=date_unit, default_handler=default_handler, + index=index).write() if lines: s = _convert_to_line_delimits(s) @@ -69,7 +75,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', class Writer(object): def __init__(self, obj, orient, date_format, double_precision, - ensure_ascii, date_unit, default_handler=None): + ensure_ascii, date_unit, index, default_handler=None): self.obj = obj if orient is None: @@ -81,6 +87,7 @@ def __init__(self, obj, orient, date_format, double_precision, self.ensure_ascii = ensure_ascii self.date_unit = date_unit self.default_handler = default_handler + self.index = index self.is_copy = None self._format_axes() @@ -89,14 +96,20 @@ def _format_axes(self): raise AbstractMethodError(self) def write(self): + return self._write(self.obj, self.orient, self.double_precision, + self.ensure_ascii, self.date_unit, + self.date_format == 'iso', self.default_handler) + + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): return dumps( - self.obj, - orient=self.orient, - double_precision=self.double_precision, - ensure_ascii=self.ensure_ascii, - date_unit=self.date_unit, - iso_dates=self.date_format == 'iso', - default_handler=self.default_handler + obj, + orient=orient, + double_precision=double_precision, + ensure_ascii=ensure_ascii, + date_unit=date_unit, + iso_dates=iso_dates, + default_handler=default_handler ) @@ -108,6 +121,15 @@ def _format_axes(self): raise ValueError("Series index must be unique for orient=" "'{orient}'".format(orient=self.orient)) + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): + if not self.index and orient == 'split': + obj = {"name": obj.name, "data": obj.values} + return super(SeriesWriter, self)._write(obj, orient, + double_precision, + ensure_ascii, date_unit, + iso_dates, default_handler) + class FrameWriter(Writer): _default_orient = 'columns' @@ -123,12 +145,22 @@ def _format_axes(self): raise ValueError("DataFrame columns must be unique for orient=" "'{orient}'.".format(orient=self.orient)) + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): + if not self.index and orient == 'split': + obj = obj.to_dict(orient='split') + del obj["index"] + return super(FrameWriter, self)._write(obj, orient, + double_precision, + ensure_ascii, date_unit, + iso_dates, default_handler) + class JSONTableWriter(FrameWriter): _default_orient = 'records' def __init__(self, obj, orient, date_format, double_precision, - ensure_ascii, date_unit, default_handler=None): + ensure_ascii, date_unit, index, default_handler=None): """ Adds a `schema` attribut with the Table Schema, resets the index (can't do in caller, because the schema inference needs @@ -137,7 +169,7 @@ def __init__(self, obj, orient, date_format, double_precision, """ super(JSONTableWriter, self).__init__( obj, orient, date_format, double_precision, ensure_ascii, - date_unit, default_handler=default_handler) + date_unit, index, default_handler=default_handler) if date_format != 'iso': msg = ("Trying to write with `orient='table'` and " @@ -146,7 +178,7 @@ def __init__(self, obj, orient, date_format, double_precision, .format(fmt=date_format)) raise ValueError(msg) - self.schema = build_table_schema(obj) + self.schema = build_table_schema(obj, index=self.index) # NotImplementd on a column MultiIndex if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): @@ -168,14 +200,24 @@ def __init__(self, obj, orient, date_format, double_precision, if is_period_dtype(obj.index): obj.index = obj.index.to_timestamp() - self.obj = obj.reset_index() + # exclude index from obj if index=False + if not self.index: + self.obj = obj.reset_index(drop=True) + else: + self.obj = obj.reset_index(drop=False) self.date_format = 'iso' self.orient = 'records' - - def write(self): - data = super(JSONTableWriter, self).write() + self.index = index + + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): + data = super(JSONTableWriter, self)._write(obj, orient, + double_precision, + ensure_ascii, date_unit, + iso_dates, + default_handler) serialized = '{{"schema": {schema}, "data": {data}}}'.format( - schema=dumps(self.schema), data=data) + schema=dumps(self.schema), data=data) return serialized diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index fe447534efdc7..7cf3d6cd7b612 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -9,6 +9,7 @@ read_json, compat) from datetime import timedelta import pandas as pd +import json from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network, @@ -1147,3 +1148,64 @@ def test_data_frame_size_after_to_json(self): size_after = df.memory_usage(index=True, deep=True).sum() assert size_before == size_after + + @pytest.mark.parametrize('data, expected', [ + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']), + {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'), + {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'], + index=[['a', 'b'], ['c', 'd']]), + {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), + (Series([1, 2, 3], name='A'), + {'name': 'A', 'data': [1, 2, 3]}), + (Series([1, 2, 3], name='A').rename_axis('foo'), + {'name': 'A', 'data': [1, 2, 3]}), + (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]), + {'name': 'A', 'data': [1, 2]}), + ]) + def test_index_false_to_json_split(self, data, expected): + # GH 17394 + # Testing index=False in to_json with orient='split' + + result = data.to_json(orient='split', index=False) + result = json.loads(result) + + assert result == expected + + @pytest.mark.parametrize('data', [ + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])), + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')), + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'], + index=[['a', 'b'], ['c', 'd']])), + (Series([1, 2, 3], name='A')), + (Series([1, 2, 3], name='A').rename_axis('foo')), + (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])), + ]) + def test_index_false_to_json_table(self, data): + # GH 17394 + # Testing index=False in to_json with orient='table' + + result = data.to_json(orient='table', index=False) + result = json.loads(result) + + expected = { + 'schema': pd.io.json.build_table_schema(data, index=False), + 'data': DataFrame(data).to_dict(orient='records') + } + + assert result == expected + + @pytest.mark.parametrize('orient', [ + 'records', 'index', 'columns', 'values' + ]) + def test_index_false_error_to_json(self, orient): + # GH 17394 + # Testing error message from to_json with index=False + + df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b']) + + with tm.assert_raises_regex(ValueError, "'index=False' is only " + "valid when 'orient' is " + "'split' or 'table'"): + df.to_json(orient=orient, index=False)