Skip to content

Commit ae80099

Browse files
committed
implement session for excel, html, and json
1 parent df79b02 commit ae80099

File tree

3 files changed

+17
-13
lines changed

3 files changed

+17
-13
lines changed

pandas/io/excel.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,8 @@ def read_excel(io,
317317
"`sheet`")
318318

319319
if not isinstance(io, ExcelFile):
320-
io = ExcelFile(io, engine=engine)
320+
session = kwds.get('session', None)
321+
io = ExcelFile(io, engine=engine, session=session)
321322

322323
return io.parse(
323324
sheet_name=sheet_name,
@@ -381,10 +382,11 @@ def __init__(self, io, **kwds):
381382
if engine is not None and engine != 'xlrd':
382383
raise ValueError("Unknown engine: {engine}".format(engine=engine))
383384

385+
session = kwds.pop('session', None)
384386
# If io is a url, want to keep the data as bytes so can't pass
385387
# to get_filepath_or_buffer()
386388
if _is_url(self._io):
387-
io = _urlopen(self._io)
389+
io, _ = _urlopen(self._io, session=session)
388390
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
389391
io, _, _, _ = get_filepath_or_buffer(self._io)
390392

pandas/io/html.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
from pandas.core.dtypes.common import is_list_like
1414
from pandas.errors import EmptyDataError
15-
from pandas.io.common import _is_url, urlopen, _validate_header_arg
15+
from pandas.io.common import _is_url, _urlopen, _validate_header_arg
1616
from pandas.io.parsers import TextParser
1717
from pandas.compat import (lrange, lmap, u, string_types, iteritems,
1818
raise_with_traceback, binary_type)
@@ -113,7 +113,7 @@ def _get_skiprows(skiprows):
113113
type(skiprows).__name__)
114114

115115

116-
def _read(obj):
116+
def _read(obj, session=None):
117117
"""Try to read from a url, file or string.
118118
119119
Parameters
@@ -125,8 +125,7 @@ def _read(obj):
125125
raw_text : str
126126
"""
127127
if _is_url(obj):
128-
with urlopen(obj) as url:
129-
text = url.read()
128+
text, _ = _urlopen(obj, session=session)
130129
elif hasattr(obj, 'read'):
131130
text = obj.read()
132131
elif isinstance(obj, char_types):
@@ -201,12 +200,13 @@ class _HtmlFrameParser(object):
201200
functionality.
202201
"""
203202

204-
def __init__(self, io, match, attrs, encoding, displayed_only):
203+
def __init__(self, io, match, attrs, encoding, displayed_only, session=None):
205204
self.io = io
206205
self.match = match
207206
self.attrs = attrs
208207
self.encoding = encoding
209208
self.displayed_only = displayed_only
209+
self.session = session
210210

211211
def parse_tables(self):
212212
"""
@@ -590,7 +590,7 @@ def _parse_tfoot_tr(self, table):
590590
return table.select('tfoot tr')
591591

592592
def _setup_build_doc(self):
593-
raw_text = _read(self.io)
593+
raw_text = _read(self.io, self.session)
594594
if not raw_text:
595595
raise ValueError('No text parsed from document: {doc}'
596596
.format(doc=self.io))
@@ -713,7 +713,7 @@ def _build_doc(self):
713713

714714
try:
715715
if _is_url(self.io):
716-
with urlopen(self.io) as f:
716+
with _urlopen(self.io) as f:
717717
r = parse(f, parser=parser)
718718
else:
719719
# try to parse the input in the simplest way
@@ -887,9 +887,10 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
887887

888888
# hack around python 3 deleting the exception variable
889889
retained = None
890+
session = kwargs.get('session', None)
890891
for flav in flavor:
891892
parser = _parser_dispatch(flav)
892-
p = parser(io, compiled_match, attrs, encoding, displayed_only)
893+
p = parser(io, compiled_match, attrs, encoding, displayed_only, session)
893894

894895
try:
895896
tables = p.parse_tables()
@@ -925,7 +926,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
925926
skiprows=None, attrs=None, parse_dates=False,
926927
tupleize_cols=None, thousands=',', encoding=None,
927928
decimal='.', converters=None, na_values=None,
928-
keep_default_na=True, displayed_only=True):
929+
keep_default_na=True, displayed_only=True, session=None):
929930
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
930931
931932
Parameters
@@ -1088,4 +1089,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
10881089
thousands=thousands, attrs=attrs, encoding=encoding,
10891090
decimal=decimal, converters=converters, na_values=na_values,
10901091
keep_default_na=keep_default_na,
1091-
displayed_only=displayed_only)
1092+
displayed_only=displayed_only, session=session)

pandas/io/json/json.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
224224
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
225225
convert_axes=True, convert_dates=True, keep_default_dates=True,
226226
numpy=False, precise_float=False, date_unit=None, encoding=None,
227-
lines=False, chunksize=None, compression='infer'):
227+
lines=False, chunksize=None, compression='infer', session=None):
228228
"""
229229
Convert a JSON string to pandas object
230230
@@ -406,6 +406,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
406406
compression = _infer_compression(path_or_buf, compression)
407407
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
408408
path_or_buf, encoding=encoding, compression=compression,
409+
session=session,
409410
)
410411

411412
json_reader = JsonReader(

0 commit comments

Comments
 (0)