3
3
from io import StringIO
4
4
from itertools import islice
5
5
import os
6
- from typing import Any , Callable , Optional , Type
6
+ from typing import Any , Callable , Optional , Type , Iterator
7
7
8
8
import numpy as np
9
9
@@ -350,20 +350,20 @@ def _write(
350
350
)
351
351
def read_json (
352
352
path_or_buf = None ,
353
- orient = None ,
354
- typ = "frame" ,
353
+ orient : str = None ,
354
+ typ : str = "frame" ,
355
355
dtype = None ,
356
- convert_axes = None ,
356
+ convert_axes : bool = None ,
357
357
convert_dates = True ,
358
- keep_default_dates = True ,
359
- numpy = False ,
360
- precise_float = False ,
361
- date_unit = None ,
362
- encoding = None ,
363
- lines = False ,
364
- chunksize = None ,
365
- compression = "infer" ,
366
- nrows = None ,
358
+ keep_default_dates : bool = True ,
359
+ numpy : bool = False ,
360
+ precise_float : bool = False ,
361
+ date_unit : str = None ,
362
+ encoding : str = None ,
363
+ lines : bool = False ,
364
+ chunksize : Optional [ int ] = None ,
365
+ compression : str = "infer" ,
366
+ nrows : int = None ,
367
367
):
368
368
"""
369
369
Convert a JSON string to pandas object.
@@ -495,10 +495,12 @@ def read_json(
495
495
This can only be passed if `lines=True`.
496
496
If this is None, the file will be read into memory all at once.
497
497
498
- chunksize : int, optional
498
+ nrows : int, default None
499
499
The number of lines from the line-delimited jsonfile that has to be read.
500
500
This can only be passed if `lines=True`.
501
501
If this is None, all the rows will be returned.
502
+ .. versionadded:: 1.1
503
+
502
504
503
505
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
504
506
For on-the-fly decompression of on-disk data. If 'infer', then use
@@ -632,20 +634,20 @@ class JsonReader(abc.Iterator):
632
634
def __init__ (
633
635
self ,
634
636
filepath_or_buffer ,
635
- orient ,
636
- typ ,
637
+ orient : str ,
638
+ typ : str ,
637
639
dtype ,
638
- convert_axes ,
640
+ convert_axes : bool ,
639
641
convert_dates ,
640
- keep_default_dates ,
641
- numpy ,
642
- precise_float ,
643
- date_unit ,
644
- encoding ,
645
- lines ,
646
- chunksize ,
647
- compression ,
648
- nrows ,
642
+ keep_default_dates : bool ,
643
+ numpy : bool ,
644
+ precise_float : bool ,
645
+ date_unit : str ,
646
+ encoding : str ,
647
+ lines : bool ,
648
+ chunksize : Optional [ int ] ,
649
+ compression : str ,
650
+ nrows : int ,
649
651
):
650
652
651
653
self .path_or_buf = filepath_or_buffer
@@ -732,6 +734,15 @@ def _combine_lines(self, lines) -> str:
732
734
lines = filter (None , map (lambda x : x .strip (), lines ))
733
735
return "[" + "," .join (lines ) + "]"
734
736
737
+ def _jsonstring_to_list_generaor (self , data : str ) -> Iterator [str ]:
738
+ prev_index = - 1
739
+ while True :
740
+ next_index = data .find ("\n " , prev_index + 1 )
741
+ if next_index < 0 :
742
+ break
743
+ yield data [prev_index + 1 : next_index ]
744
+ prev_index = next_index
745
+
735
746
def read (self ):
736
747
"""
737
748
Read the whole JSON input into a pandas object.
@@ -740,9 +751,10 @@ def read(self):
740
751
obj = concat (self )
741
752
elif self .lines :
742
753
data = ensure_str (self .data )
743
- data = data .split ("\n " )
744
754
if self .nrows :
745
- data = data [: self .nrows ]
755
+ data = list (islice (self ._jsonstring_to_list_generaor (data ), self .nrows ))
756
+ else :
757
+ data = data .split ("\n " )
746
758
obj = self ._get_object_parser (self ._combine_lines (data ))
747
759
else :
748
760
obj = self ._get_object_parser (self .data )
0 commit comments