Skip to content

Commit df61cee

Browse files
committed
EHN: Add index parameter to to_json
1 parent 1eedcf6 commit df61cee

File tree

4 files changed

+140
-9
lines changed

4 files changed

+140
-9
lines changed

doc/source/whatsnew/v0.22.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Other Enhancements
4545
- Improved wording of ``ValueError`` raised in :func:`to_datetime` when ``unit=`` is passed with a non-convertible value (:issue:`14350`)
4646
- :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`)
4747
- :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`)
48+
- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
4849

4950
.. _whatsnew_0220.api_breaking:
5051

pandas/core/generic.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,7 +1295,8 @@ def _repr_latex_(self):
12951295

12961296
def to_json(self, path_or_buf=None, orient=None, date_format=None,
12971297
double_precision=10, force_ascii=True, date_unit='ms',
1298-
default_handler=None, lines=False, compression=None):
1298+
default_handler=None, lines=False, compression=None,
1299+
index=True):
12991300
"""
13001301
Convert the object to a JSON string.
13011302
@@ -1363,6 +1364,13 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
13631364
13641365
.. versionadded:: 0.21.0
13651366
1367+
index : boolean, default True
1368+
Whether to include the index values in the JSON string. A
1369+
ValueError will be thrown if index is False when orient is not
1370+
'split' or 'table'.
1371+
1372+
.. versionadded:: 0.22.0
1373+
13661374
Returns
13671375
-------
13681376
same type as input object with filtered info axis
@@ -1415,7 +1423,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
14151423
double_precision=double_precision,
14161424
force_ascii=force_ascii, date_unit=date_unit,
14171425
default_handler=default_handler,
1418-
lines=lines, compression=compression)
1426+
lines=lines, compression=compression,
1427+
index=index)
14191428

14201429
def to_hdf(self, path_or_buf, key, **kwargs):
14211430
"""Write the contained data to an HDF5 file using HDFStore.

pandas/io/json/json.py

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@
2828
# interface to/from
2929
def to_json(path_or_buf, obj, orient=None, date_format='epoch',
3030
double_precision=10, force_ascii=True, date_unit='ms',
31-
default_handler=None, lines=False, compression=None):
31+
default_handler=None, lines=False, compression=None,
32+
index=True):
33+
34+
if not index and orient not in ['split', 'table']:
35+
raise ValueError("'index=False' is only valid when 'orient' is "
36+
"'split' or 'table'")
3237

3338
path_or_buf = _stringify_path(path_or_buf)
3439
if lines and orient != 'records':
@@ -49,7 +54,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
4954
s = writer(
5055
obj, orient=orient, date_format=date_format,
5156
double_precision=double_precision, ensure_ascii=force_ascii,
52-
date_unit=date_unit, default_handler=default_handler).write()
57+
date_unit=date_unit, default_handler=default_handler,
58+
index=index).write()
5359

5460
if lines:
5561
s = _convert_to_line_delimits(s)
@@ -69,7 +75,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
6975
class Writer(object):
7076

7177
def __init__(self, obj, orient, date_format, double_precision,
72-
ensure_ascii, date_unit, default_handler=None):
78+
ensure_ascii, date_unit, index, default_handler=None):
7379
self.obj = obj
7480

7581
if orient is None:
@@ -81,6 +87,7 @@ def __init__(self, obj, orient, date_format, double_precision,
8187
self.ensure_ascii = ensure_ascii
8288
self.date_unit = date_unit
8389
self.default_handler = default_handler
90+
self.index = index
8491

8592
self.is_copy = None
8693
self._format_axes()
@@ -108,6 +115,19 @@ def _format_axes(self):
108115
raise ValueError("Series index must be unique for orient="
109116
"'{orient}'".format(orient=self.orient))
110117

118+
def write(self):
119+
if not self.index and self.orient == 'split':
120+
self.obj = {"name": self.obj.name, "data": self.obj.values}
121+
return dumps(
122+
self.obj,
123+
orient=self.orient,
124+
double_precision=self.double_precision,
125+
ensure_ascii=self.ensure_ascii,
126+
date_unit=self.date_unit,
127+
iso_dates=self.date_format == 'iso',
128+
default_handler=self.default_handler
129+
)
130+
111131

112132
class FrameWriter(Writer):
113133
_default_orient = 'columns'
@@ -123,12 +143,26 @@ def _format_axes(self):
123143
raise ValueError("DataFrame columns must be unique for orient="
124144
"'{orient}'.".format(orient=self.orient))
125145

146+
def write(self):
147+
if not self.index and self.orient == 'split':
148+
self.obj = self.obj.to_dict(orient='split')
149+
del self.obj["index"]
150+
return dumps(
151+
self.obj,
152+
orient=self.orient,
153+
double_precision=self.double_precision,
154+
ensure_ascii=self.ensure_ascii,
155+
date_unit=self.date_unit,
156+
iso_dates=self.date_format == 'iso',
157+
default_handler=self.default_handler
158+
)
159+
126160

127161
class JSONTableWriter(FrameWriter):
128162
_default_orient = 'records'
129163

130164
def __init__(self, obj, orient, date_format, double_precision,
131-
ensure_ascii, date_unit, default_handler=None):
165+
ensure_ascii, date_unit, index, default_handler=None):
132166
"""
133167
Adds a `schema` attribut with the Table Schema, resets
134168
the index (can't do in caller, because the schema inference needs
@@ -137,7 +171,7 @@ def __init__(self, obj, orient, date_format, double_precision,
137171
"""
138172
super(JSONTableWriter, self).__init__(
139173
obj, orient, date_format, double_precision, ensure_ascii,
140-
date_unit, default_handler=default_handler)
174+
date_unit, index, default_handler=default_handler)
141175

142176
if date_format != 'iso':
143177
msg = ("Trying to write with `orient='table'` and "
@@ -146,7 +180,7 @@ def __init__(self, obj, orient, date_format, double_precision,
146180
.format(fmt=date_format))
147181
raise ValueError(msg)
148182

149-
self.schema = build_table_schema(obj)
183+
self.schema = build_table_schema(obj, index=self.index)
150184

151185
# NotImplementd on a column MultiIndex
152186
if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
@@ -173,7 +207,17 @@ def __init__(self, obj, orient, date_format, double_precision,
173207
self.orient = 'records'
174208

175209
def write(self):
176-
data = super(JSONTableWriter, self).write()
210+
if not self.index:
211+
self.obj = self.obj.drop('index', axis=1)
212+
data = dumps(
213+
self.obj,
214+
orient=self.orient,
215+
double_precision=self.double_precision,
216+
ensure_ascii=self.ensure_ascii,
217+
date_unit=self.date_unit,
218+
iso_dates=self.date_format == 'iso',
219+
default_handler=self.default_handler
220+
)
177221
serialized = '{{"schema": {schema}, "data": {data}}}'.format(
178222
schema=dumps(self.schema), data=data)
179223
return serialized

pandas/tests/io/json/test_pandas.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,3 +1147,80 @@ def test_data_frame_size_after_to_json(self):
11471147
size_after = df.memory_usage(index=True, deep=True).sum()
11481148

11491149
assert size_before == size_after
1150+
1151+
def test_index_false_to_json(self):
1152+
# GH 17394
1153+
# Testing index parameter in to_json
1154+
import json
1155+
df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
1156+
1157+
result = df.to_json(orient='split', index=False)
1158+
result = json.loads(result)
1159+
1160+
expected = {
1161+
'columns': ['a', 'b'],
1162+
'data': [[1, 2], [4, 5]]
1163+
}
1164+
1165+
assert result == expected
1166+
1167+
result = df.to_json(orient='table', index=False)
1168+
result = json.loads(result)
1169+
1170+
schema = {
1171+
'fields': [{'name': 'a', 'type': 'integer'},
1172+
{'name': 'b', 'type': 'integer'}],
1173+
'pandas_version': '0.20.0'
1174+
}
1175+
1176+
expected = {
1177+
'schema': schema,
1178+
'data': [{'a': 1, 'b': 2}, {'a': 4, 'b': 5}]
1179+
}
1180+
1181+
assert result == expected
1182+
1183+
s = pd.Series([1, 2, 3], name='A')
1184+
1185+
result = s.to_json(orient='split', index=False)
1186+
result = json.loads(result)
1187+
1188+
expected = {
1189+
'name': 'A',
1190+
'data': [1, 2, 3]
1191+
}
1192+
1193+
assert result == expected
1194+
1195+
result = s.to_json(orient='table', index=False)
1196+
result = json.loads(result)
1197+
1198+
fields = [{'name': 'A', 'type': 'integer'}]
1199+
1200+
schema = {
1201+
'fields': fields,
1202+
'pandas_version': '0.20.0'
1203+
}
1204+
1205+
expected = {
1206+
'schema': schema,
1207+
'data': [{'A': 1}, {'A': 2}, {'A': 3}]
1208+
}
1209+
1210+
assert result == expected
1211+
1212+
@pytest.mark.parametrize('orient', [
1213+
('records'),
1214+
('index'),
1215+
('columns'),
1216+
('values'),
1217+
])
1218+
def test_index_false_error_to_json(self, orient):
1219+
# GH 17394
1220+
# Testing error message from to_json with index=False
1221+
df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
1222+
1223+
with tm.assert_raises_regex(ValueError, "'index=False' is only "
1224+
"valid when 'orient' is "
1225+
"'split' or 'table'"):
1226+
df.to_json(orient=orient, index=False)

0 commit comments

Comments
 (0)