Skip to content

Commit 028d398

Browse files
committed
optimized list indexing and type hints added
1 parent fc4993f commit 028d398

File tree

1 file changed

+40
-28
lines changed

1 file changed

+40
-28
lines changed

pandas/io/json/_json.py

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from io import StringIO
44
from itertools import islice
55
import os
6-
from typing import Any, Callable, Optional, Type
6+
from typing import Any, Callable, Optional, Type, Iterator
77

88
import numpy as np
99

@@ -350,20 +350,20 @@ def _write(
350350
)
351351
def read_json(
352352
path_or_buf=None,
353-
orient=None,
354-
typ="frame",
353+
orient: str = None,
354+
typ: str = "frame",
355355
dtype=None,
356-
convert_axes=None,
356+
convert_axes: bool = None,
357357
convert_dates=True,
358-
keep_default_dates=True,
359-
numpy=False,
360-
precise_float=False,
361-
date_unit=None,
362-
encoding=None,
363-
lines=False,
364-
chunksize=None,
365-
compression="infer",
366-
nrows=None,
358+
keep_default_dates: bool = True,
359+
numpy: bool = False,
360+
precise_float: bool = False,
361+
date_unit: str = None,
362+
encoding: str = None,
363+
lines: bool = False,
364+
chunksize: Optional[int] = None,
365+
compression: str = "infer",
366+
nrows: int = None,
367367
):
368368
"""
369369
Convert a JSON string to pandas object.
@@ -495,10 +495,12 @@ def read_json(
495495
This can only be passed if `lines=True`.
496496
If this is None, the file will be read into memory all at once.
497497
498-
chunksize : int, optional
498+
nrows : int, default None
499499
The number of lines from the line-delimited jsonfile that has to be read.
500500
This can only be passed if `lines=True`.
501501
If this is None, all the rows will be returned.
502+
.. versionadded:: 1.1
503+
502504
503505
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
504506
For on-the-fly decompression of on-disk data. If 'infer', then use
@@ -632,20 +634,20 @@ class JsonReader(abc.Iterator):
632634
def __init__(
633635
self,
634636
filepath_or_buffer,
635-
orient,
636-
typ,
637+
orient: str,
638+
typ: str,
637639
dtype,
638-
convert_axes,
640+
convert_axes: bool,
639641
convert_dates,
640-
keep_default_dates,
641-
numpy,
642-
precise_float,
643-
date_unit,
644-
encoding,
645-
lines,
646-
chunksize,
647-
compression,
648-
nrows,
642+
keep_default_dates: bool,
643+
numpy: bool,
644+
precise_float: bool,
645+
date_unit: str,
646+
encoding: str,
647+
lines: bool,
648+
chunksize: Optional[int],
649+
compression: str,
650+
nrows: int,
649651
):
650652

651653
self.path_or_buf = filepath_or_buffer
@@ -732,6 +734,15 @@ def _combine_lines(self, lines) -> str:
732734
lines = filter(None, map(lambda x: x.strip(), lines))
733735
return "[" + ",".join(lines) + "]"
734736

737+
def _jsonstring_to_list_generaor(self, data: str) -> Iterator[str]:
738+
prev_index = -1
739+
while True:
740+
next_index = data.find("\n", prev_index + 1)
741+
if next_index < 0:
742+
break
743+
yield data[prev_index + 1 : next_index]
744+
prev_index = next_index
745+
735746
def read(self):
736747
"""
737748
Read the whole JSON input into a pandas object.
@@ -740,9 +751,10 @@ def read(self):
740751
obj = concat(self)
741752
elif self.lines:
742753
data = ensure_str(self.data)
743-
data = data.split("\n")
744754
if self.nrows:
745-
data = data[: self.nrows]
755+
data = list(islice(self._jsonstring_to_list_generaor(data), self.nrows))
756+
else:
757+
data = data.split("\n")
746758
obj = self._get_object_parser(self._combine_lines(data))
747759
else:
748760
obj = self._get_object_parser(self.data)

0 commit comments

Comments
 (0)