12
12
13
13
from pandas .core .dtypes .common import is_list_like
14
14
from pandas .errors import EmptyDataError
15
- from pandas .io .common import _is_url , urlopen , _validate_header_arg
15
+ from pandas .io .common import _is_url , _urlopen , _validate_header_arg
16
16
from pandas .io .parsers import TextParser
17
17
from pandas .compat import (lrange , lmap , u , string_types , iteritems ,
18
18
raise_with_traceback , binary_type )
@@ -113,7 +113,7 @@ def _get_skiprows(skiprows):
113
113
type (skiprows ).__name__ )
114
114
115
115
116
- def _read (obj ):
116
+ def _read (obj , session = None ):
117
117
"""Try to read from a url, file or string.
118
118
119
119
Parameters
@@ -125,8 +125,7 @@ def _read(obj):
125
125
raw_text : str
126
126
"""
127
127
if _is_url (obj ):
128
- with urlopen (obj ) as url :
129
- text = url .read ()
128
+ text , _ = _urlopen (obj , session = session )
130
129
elif hasattr (obj , 'read' ):
131
130
text = obj .read ()
132
131
elif isinstance (obj , char_types ):
@@ -201,12 +200,13 @@ class _HtmlFrameParser(object):
201
200
functionality.
202
201
"""
203
202
204
- def __init__ (self , io , match , attrs , encoding , displayed_only ):
203
+ def __init__ (self , io , match , attrs , encoding , displayed_only , session = None ):
205
204
self .io = io
206
205
self .match = match
207
206
self .attrs = attrs
208
207
self .encoding = encoding
209
208
self .displayed_only = displayed_only
209
+ self .session = session
210
210
211
211
def parse_tables (self ):
212
212
"""
@@ -590,7 +590,7 @@ def _parse_tfoot_tr(self, table):
590
590
return table .select ('tfoot tr' )
591
591
592
592
def _setup_build_doc (self ):
593
- raw_text = _read (self .io )
593
+ raw_text = _read (self .io , self . session )
594
594
if not raw_text :
595
595
raise ValueError ('No text parsed from document: {doc}'
596
596
.format (doc = self .io ))
@@ -713,7 +713,7 @@ def _build_doc(self):
713
713
714
714
try :
715
715
if _is_url (self .io ):
716
- with urlopen (self .io ) as f :
716
+ with _urlopen (self .io ) as f :
717
717
r = parse (f , parser = parser )
718
718
else :
719
719
# try to parse the input in the simplest way
@@ -887,9 +887,10 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
887
887
888
888
# hack around python 3 deleting the exception variable
889
889
retained = None
890
+ session = kwargs .get ('session' , None )
890
891
for flav in flavor :
891
892
parser = _parser_dispatch (flav )
892
- p = parser (io , compiled_match , attrs , encoding , displayed_only )
893
+ p = parser (io , compiled_match , attrs , encoding , displayed_only , session )
893
894
894
895
try :
895
896
tables = p .parse_tables ()
@@ -925,7 +926,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
925
926
skiprows = None , attrs = None , parse_dates = False ,
926
927
tupleize_cols = None , thousands = ',' , encoding = None ,
927
928
decimal = '.' , converters = None , na_values = None ,
928
- keep_default_na = True , displayed_only = True ):
929
+ keep_default_na = True , displayed_only = True , session = None ):
929
930
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
930
931
931
932
Parameters
@@ -1088,4 +1089,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
1088
1089
thousands = thousands , attrs = attrs , encoding = encoding ,
1089
1090
decimal = decimal , converters = converters , na_values = na_values ,
1090
1091
keep_default_na = keep_default_na ,
1091
- displayed_only = displayed_only )
1092
+ displayed_only = displayed_only , session = session )
0 commit comments