Skip to content

Commit 55170dd

Browse files
committed
Update read_json tests
Split out tests with lines=True into separate test class Parametrize tests Replace """ comments with #.
1 parent a284187 commit 55170dd

File tree

1 file changed

+92
-91
lines changed

1 file changed

+92
-91
lines changed

pandas/tests/io/json/test_pandas.py

Lines changed: 92 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,62 @@ def test_tz_range_is_utc(self):
991991
df = DataFrame({'DT': dti})
992992
assert dumps(df, iso_dates=True) == dfexp
993993

994+
def test_latin_encoding(self):
995+
if compat.PY2:
996+
tm.assert_raises_regex(
997+
TypeError, r'\[unicode\] is not implemented as a table column')
998+
return
999+
1000+
# GH 13774
1001+
pytest.skip("encoding not implemented in .to_json(), "
1002+
"xref #13774")
1003+
1004+
values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
1005+
[b'E\xc9, 17', b'a', b'b', b'c'],
1006+
[b'EE, 17', b'', b'a', b'b', b'c'],
1007+
[b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
1008+
[b'', b'a', b'b', b'c'],
1009+
[b'\xf8\xfc', b'a', b'b', b'c'],
1010+
[b'A\xf8\xfc', b'', b'a', b'b', b'c'],
1011+
[np.nan, b'', b'b', b'c'],
1012+
[b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
1013+
1014+
def _try_decode(x, encoding='latin-1'):
1015+
try:
1016+
return x.decode(encoding)
1017+
except AttributeError:
1018+
return x
1019+
1020+
# not sure how to remove latin-1 from code in python 2 and 3
1021+
values = [[_try_decode(x) for x in y] for y in values]
1022+
1023+
examples = []
1024+
for dtype in ['category', object]:
1025+
for val in values:
1026+
examples.append(Series(val, dtype=dtype))
1027+
1028+
def roundtrip(s, encoding='latin-1'):
1029+
with ensure_clean('test.json') as path:
1030+
s.to_json(path, encoding=encoding)
1031+
retr = read_json(path, encoding=encoding)
1032+
assert_series_equal(s, retr, check_categorical=False)
1033+
1034+
for s in examples:
1035+
roundtrip(s)
1036+
1037+
def test_data_frame_size_after_to_json(self):
1038+
# GH15344
1039+
df = DataFrame({'a': [str(1)]})
1040+
1041+
size_before = df.memory_usage(index=True, deep=True).sum()
1042+
df.to_json()
1043+
size_after = df.memory_usage(index=True, deep=True).sum()
1044+
1045+
assert size_before == size_after
1046+
1047+
1048+
class TestPandasJsonLines(object):
1049+
9941050
def test_read_jsonl(self):
9951051
# GH9180
9961052
result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
@@ -1038,27 +1094,26 @@ def test_to_jsonl(self):
10381094
assert result == expected
10391095
assert_frame_equal(pd.read_json(result, lines=True), df)
10401096

1041-
def test_readjson_chunks(self, lines_json_df):
1042-
"""Basic test that read_json(chunks=True) gives the same result as
1043-
read_json(chunks=False)"""
1097+
@pytest.mark.parametrize("chunksize", [1, 1.0])
1098+
def test_readjson_chunks(self, lines_json_df, chunksize):
1099+
# Basic test that read_json(chunks=True) gives the same result as
1100+
# read_json(chunks=False)
10441101
# GH17048: memory usage when lines=True
10451102

1046-
for cs in [1, 1.0]:
1103+
unchunked = pd.read_json(StringIO(lines_json_df), lines=True)
1104+
reader = pd.read_json(StringIO(lines_json_df), lines=True,
1105+
chunksize=chunksize)
1106+
chunked = pd.concat(reader)
10471107

1048-
unchunked = pd.read_json(StringIO(lines_json_df), lines=True)
1049-
chunked = pd.concat(
1050-
pd.read_json(StringIO(lines_json_df), lines=True, chunksize=cs)
1051-
)
1052-
1053-
assert_frame_equal(chunked, unchunked)
1108+
assert_frame_equal(chunked, unchunked)
10541109

10551110
def test_readjson_chunksize_requires_lines(self, lines_json_df):
10561111
msg = "chunksize can only be passed if lines=True"
10571112
with tm.assert_raises_regex(ValueError, msg):
10581113
pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2)
10591114

10601115
def test_readjson_chunks_series(self):
1061-
"""Test reading line-format JSON to Series with chunksize param"""
1116+
# Test reading line-format JSON to Series with chunksize param
10621117
s = pd.Series({'A': 1, 'B': 2})
10631118

10641119
strio = StringIO(s.to_json(lines=True, orient="records"))
@@ -1072,10 +1127,8 @@ def test_readjson_chunks_series(self):
10721127
assert_series_equal(chunked, unchunked)
10731128

10741129
def test_readjson_each_chunk(self, lines_json_df):
1075-
"""
1076-
Other tests check that the final result of read_json(chunksize=True) is
1077-
correct. This checks that the intermediate chunks read in are correct.
1078-
"""
1130+
# Other tests check that the final result of read_json(chunksize=True)
1131+
# is correct. This checks the intermediate chunks.
10791132
chunks = list(
10801133
pd.read_json(StringIO(lines_json_df), lines=True, chunksize=2)
10811134
)
@@ -1090,27 +1143,29 @@ def test_readjson_chunks_from_file(self):
10901143
unchunked = pd.read_json(path, lines=True)
10911144
assert_frame_equal(unchunked, chunked)
10921145

1093-
def test_readjson_chunks_closes(self):
1094-
for chunksize in [None, 1]:
1095-
with ensure_clean('test.json') as path:
1096-
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
1097-
df.to_json(path, lines=True, orient="records")
1098-
f = open(path, 'r')
1099-
if chunksize is not None:
1100-
pd.concat(pd.read_json(f, lines=True, chunksize=chunksize))
1101-
else:
1102-
pd.read_json(f, lines=True)
1103-
assert f.closed, \
1104-
"didn't close file with chunksize = %s" % chunksize
1146+
@pytest.mark.parametrize("chunksize", [None, 1])
1147+
def test_readjson_chunks_closes(self, chunksize):
1148+
with ensure_clean('test.json') as path:
1149+
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
1150+
df.to_json(path, lines=True, orient="records")
1151+
f = open(path, 'r')
1152+
if chunksize is not None:
1153+
pd.concat(pd.read_json(f, lines=True, chunksize=chunksize))
1154+
else:
1155+
pd.read_json(f, lines=True)
1156+
assert f.closed, \
1157+
"didn't close file with chunksize = %s" % chunksize
11051158

1106-
def test_readjson_invalid_chunksize(self, lines_json_df):
1159+
@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
1160+
def test_readjson_invalid_chunksize(self, lines_json_df, chunksize):
11071161
msg = r"'chunksize' must be an integer >=1"
11081162

1109-
for cs in [0, -1, 2.2, 'foo']:
1110-
with tm.assert_raises_regex(ValueError, msg):
1111-
pd.read_json(StringIO(lines_json_df), lines=True, chunksize=cs)
1163+
with tm.assert_raises_regex(ValueError, msg):
1164+
pd.read_json(StringIO(lines_json_df), lines=True,
1165+
chunksize=chunksize)
11121166

1113-
def test_readjson_chunks_multiple_empty_lines(self):
1167+
@pytest.mark.parametrize("chunksize", [None, 1, 2])
1168+
def test_readjson_chunks_multiple_empty_lines(self, chunksize):
11141169
j = """
11151170
11161171
{"A":1,"B":4}
@@ -1127,62 +1182,8 @@ def test_readjson_chunks_multiple_empty_lines(self):
11271182
11281183
{"A":3,"B":6}
11291184
"""
1130-
for chunksize in [None, 1, 2]:
1131-
orig = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
1132-
test = pd.read_json(j, lines=True, chunksize=chunksize)
1133-
if chunksize is not None:
1134-
test = pd.concat(test)
1135-
tm.assert_frame_equal(orig, test, obj="chunksize: %s" % chunksize)
1136-
1137-
def test_latin_encoding(self):
1138-
if compat.PY2:
1139-
tm.assert_raises_regex(
1140-
TypeError, r'\[unicode\] is not implemented as a table column')
1141-
return
1142-
1143-
# GH 13774
1144-
pytest.skip("encoding not implemented in .to_json(), "
1145-
"xref #13774")
1146-
1147-
values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
1148-
[b'E\xc9, 17', b'a', b'b', b'c'],
1149-
[b'EE, 17', b'', b'a', b'b', b'c'],
1150-
[b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
1151-
[b'', b'a', b'b', b'c'],
1152-
[b'\xf8\xfc', b'a', b'b', b'c'],
1153-
[b'A\xf8\xfc', b'', b'a', b'b', b'c'],
1154-
[np.nan, b'', b'b', b'c'],
1155-
[b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
1156-
1157-
def _try_decode(x, encoding='latin-1'):
1158-
try:
1159-
return x.decode(encoding)
1160-
except AttributeError:
1161-
return x
1162-
1163-
# not sure how to remove latin-1 from code in python 2 and 3
1164-
values = [[_try_decode(x) for x in y] for y in values]
1165-
1166-
examples = []
1167-
for dtype in ['category', object]:
1168-
for val in values:
1169-
examples.append(Series(val, dtype=dtype))
1170-
1171-
def roundtrip(s, encoding='latin-1'):
1172-
with ensure_clean('test.json') as path:
1173-
s.to_json(path, encoding=encoding)
1174-
retr = read_json(path, encoding=encoding)
1175-
assert_series_equal(s, retr, check_categorical=False)
1176-
1177-
for s in examples:
1178-
roundtrip(s)
1179-
1180-
def test_data_frame_size_after_to_json(self):
1181-
# GH15344
1182-
df = DataFrame({'a': [str(1)]})
1183-
1184-
size_before = df.memory_usage(index=True, deep=True).sum()
1185-
df.to_json()
1186-
size_after = df.memory_usage(index=True, deep=True).sum()
1187-
1188-
assert size_before == size_after
1185+
orig = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
1186+
test = pd.read_json(j, lines=True, chunksize=chunksize)
1187+
if chunksize is not None:
1188+
test = pd.concat(test)
1189+
tm.assert_frame_equal(orig, test, obj="chunksize: %s" % chunksize)

0 commit comments

Comments
 (0)