EHN: Add index parameter to to_json

reidy-p · reidy-p · commit df61ceeb6560 · 2017-12-01T13:42:32.000Z
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -45,6 +45,7 @@ Other Enhancements
 - Improved wording of ``ValueError`` raised in :func:`to_datetime` when ``unit=`` is passed with a non-convertible value (:issue:`14350`)
 - :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`)
 - :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`)
+- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
 
 .. _whatsnew_0220.api_breaking:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1295,7 +1295,8 @@ def _repr_latex_(self):
 
     def to_json(self, path_or_buf=None, orient=None, date_format=None,
                 double_precision=10, force_ascii=True, date_unit='ms',
-                default_handler=None, lines=False, compression=None):
+                default_handler=None, lines=False, compression=None,
+                index=True):
         """
         Convert the object to a JSON string.
 
@@ -1363,6 +1364,13 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
 
             .. versionadded:: 0.21.0
 
+        index : boolean, default True
+            Whether to include the index values in the JSON string. A
+            ValueError will be thrown if index is False when orient is not
+            'split' or 'table'.
+
+            .. versionadded:: 0.22.0
+
         Returns
         -------
         same type as input object with filtered info axis
@@ -1415,7 +1423,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
                             double_precision=double_precision,
                             force_ascii=force_ascii, date_unit=date_unit,
                             default_handler=default_handler,
-                            lines=lines, compression=compression)
+                            lines=lines, compression=compression,
+                            index=index)
 
     def to_hdf(self, path_or_buf, key, **kwargs):
         """Write the contained data to an HDF5 file using HDFStore.
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
@@ -28,7 +28,12 @@
 # interface to/from
 def to_json(path_or_buf, obj, orient=None, date_format='epoch',
             double_precision=10, force_ascii=True, date_unit='ms',
-            default_handler=None, lines=False, compression=None):
+            default_handler=None, lines=False, compression=None,
+            index=True):
+
+    if not index and orient not in ['split', 'table']:
+        raise ValueError("'index=False' is only valid when 'orient' is "
+                         "'split' or 'table'")
 
     path_or_buf = _stringify_path(path_or_buf)
     if lines and orient != 'records':
@@ -49,7 +54,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
     s = writer(
         obj, orient=orient, date_format=date_format,
         double_precision=double_precision, ensure_ascii=force_ascii,
-        date_unit=date_unit, default_handler=default_handler).write()
+        date_unit=date_unit, default_handler=default_handler,
+        index=index).write()
 
     if lines:
         s = _convert_to_line_delimits(s)
@@ -69,7 +75,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
 class Writer(object):
 
     def __init__(self, obj, orient, date_format, double_precision,
-                 ensure_ascii, date_unit, default_handler=None):
+                 ensure_ascii, date_unit, index, default_handler=None):
         self.obj = obj
 
         if orient is None:
@@ -81,6 +87,7 @@ def __init__(self, obj, orient, date_format, double_precision,
         self.ensure_ascii = ensure_ascii
         self.date_unit = date_unit
         self.default_handler = default_handler
+        self.index = index
 
         self.is_copy = None
         self._format_axes()
@@ -108,6 +115,19 @@ def _format_axes(self):
             raise ValueError("Series index must be unique for orient="
                              "'{orient}'".format(orient=self.orient))
 
+    def write(self):
+        if not self.index and self.orient == 'split':
+            self.obj = {"name": self.obj.name, "data": self.obj.values}
+        return dumps(
+            self.obj,
+            orient=self.orient,
+            double_precision=self.double_precision,
+            ensure_ascii=self.ensure_ascii,
+            date_unit=self.date_unit,
+            iso_dates=self.date_format == 'iso',
+            default_handler=self.default_handler
+        )
+
 
 class FrameWriter(Writer):
     _default_orient = 'columns'
@@ -123,12 +143,26 @@ def _format_axes(self):
             raise ValueError("DataFrame columns must be unique for orient="
                              "'{orient}'.".format(orient=self.orient))
 
+    def write(self):
+        if not self.index and self.orient == 'split':
+            self.obj = self.obj.to_dict(orient='split')
+            del self.obj["index"]
+        return dumps(
+            self.obj,
+            orient=self.orient,
+            double_precision=self.double_precision,
+            ensure_ascii=self.ensure_ascii,
+            date_unit=self.date_unit,
+            iso_dates=self.date_format == 'iso',
+            default_handler=self.default_handler
+        )
+
 
 class JSONTableWriter(FrameWriter):
     _default_orient = 'records'
 
     def __init__(self, obj, orient, date_format, double_precision,
-                 ensure_ascii, date_unit, default_handler=None):
+                 ensure_ascii, date_unit, index, default_handler=None):
         """
         Adds a `schema` attribut with the Table Schema, resets
         the index (can't do in caller, because the schema inference needs
@@ -137,7 +171,7 @@ def __init__(self, obj, orient, date_format, double_precision,
         """
         super(JSONTableWriter, self).__init__(
             obj, orient, date_format, double_precision, ensure_ascii,
-            date_unit, default_handler=default_handler)
+            date_unit, index, default_handler=default_handler)
 
         if date_format != 'iso':
             msg = ("Trying to write with `orient='table'` and "
@@ -146,7 +180,7 @@ def __init__(self, obj, orient, date_format, double_precision,
                    .format(fmt=date_format))
             raise ValueError(msg)
 
-        self.schema = build_table_schema(obj)
+        self.schema = build_table_schema(obj, index=self.index)
 
         # NotImplementd on a column MultiIndex
         if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
@@ -173,7 +207,17 @@ def __init__(self, obj, orient, date_format, double_precision,
         self.orient = 'records'
 
     def write(self):
-        data = super(JSONTableWriter, self).write()
+        if not self.index:
+            self.obj = self.obj.drop('index', axis=1)
+        data = dumps(
+            self.obj,
+            orient=self.orient,
+            double_precision=self.double_precision,
+            ensure_ascii=self.ensure_ascii,
+            date_unit=self.date_unit,
+            iso_dates=self.date_format == 'iso',
+            default_handler=self.default_handler
+        )
         serialized = '{{"schema": {schema}, "data": {data}}}'.format(
             schema=dumps(self.schema), data=data)
         return serialized
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1147,3 +1147,80 @@ def test_data_frame_size_after_to_json(self):
         size_after = df.memory_usage(index=True, deep=True).sum()
 
         assert size_before == size_after
+
+    def test_index_false_to_json(self):
+        # GH 17394
+        # Testing index parameter in to_json
+        import json
+        df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
+
+        result = df.to_json(orient='split', index=False)
+        result = json.loads(result)
+
+        expected = {
+            'columns': ['a', 'b'],
+            'data': [[1, 2], [4, 5]]
+        }
+
+        assert result == expected
+
+        result = df.to_json(orient='table', index=False)
+        result = json.loads(result)
+
+        schema = {
+            'fields': [{'name': 'a', 'type': 'integer'},
+                       {'name': 'b', 'type': 'integer'}],
+            'pandas_version': '0.20.0'
+        }
+
+        expected = {
+            'schema': schema,
+            'data': [{'a': 1, 'b': 2}, {'a': 4, 'b': 5}]
+        }
+
+        assert result == expected
+
+        s = pd.Series([1, 2, 3], name='A')
+
+        result = s.to_json(orient='split', index=False)
+        result = json.loads(result)
+
+        expected = {
+            'name': 'A',
+            'data': [1, 2, 3]
+        }
+
+        assert result == expected
+
+        result = s.to_json(orient='table', index=False)
+        result = json.loads(result)
+
+        fields = [{'name': 'A', 'type': 'integer'}]
+
+        schema = {
+            'fields': fields,
+            'pandas_version': '0.20.0'
+        }
+
+        expected = {
+            'schema': schema,
+            'data': [{'A': 1}, {'A': 2}, {'A': 3}]
+        }
+
+        assert result == expected
+
+    @pytest.mark.parametrize('orient', [
+        ('records'),
+        ('index'),
+        ('columns'),
+        ('values'),
+    ])
+    def test_index_false_error_to_json(self, orient):
+        # GH 17394
+        # Testing error message from to_json with index=False
+        df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
+
+        with tm.assert_raises_regex(ValueError, "'index=False' is only "
+                                                "valid when 'orient' is "
+                                                "'split' or 'table'"):
+            df.to_json(orient=orient, index=False)