diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index cbd094ec4ef49..1a08d48f3948b 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -103,7 +103,7 @@ Indexing
I/O
^^^
--
+- :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`)
-
-
diff --git a/pandas/io/html.py b/pandas/io/html.py
index 6f98683a1bff1..e1636d8007345 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -742,6 +742,18 @@ def _parse(flavor, io, match, attrs, encoding, **kwargs):
try:
tables = p.parse_tables()
except Exception as caught:
+ # if `io` is an io-like object, check if it's seekable
+ # and try to rewind it before trying the next parser
+ if hasattr(io, 'seekable') and io.seekable():
+ io.seek(0)
+ elif hasattr(io, 'seekable') and not io.seekable():
+ # if we couldn't rewind it, let the user know
+ raise ValueError('The flavor {} failed to parse your input. '
+ 'Since you passed a non-rewindable file '
+ 'object, we can\'t rewind it to try '
+ 'another parser. Try read_html() with a '
+ 'different flavor.'.format(flav))
+
retained = caught
else:
break
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 8dfae2733ef20..399cac905967e 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -967,3 +967,53 @@ def test_importcheck_thread_safety():
while helper_thread1.is_alive() or helper_thread2.is_alive():
pass
assert None is helper_thread1.err is helper_thread2.err
+
+
+def test_parse_failure_unseekable():
+ # Issue #17975
+ _skip_if_no('lxml')
+
+ class UnseekableStringIO(StringIO):
+ def seekable(self):
+ return False
+
+ good = UnseekableStringIO('''
+
''')
+ bad = UnseekableStringIO('''
+ ''')
+
+ assert read_html(good)
+ assert read_html(bad, flavor='bs4')
+
+ bad.seek(0)
+
+ with pytest.raises(ValueError,
+ match='passed a non-rewindable file object'):
+ read_html(bad)
+
+
+def test_parse_failure_rewinds():
+ # Issue #17975
+ _skip_if_no('lxml')
+
+ class MockFile(object):
+ def __init__(self, data):
+ self.data = data
+ self.at_end = False
+
+ def read(self, size=None):
+ data = '' if self.at_end else self.data
+ self.at_end = True
+ return data
+
+ def seek(self, offset):
+ self.at_end = False
+
+ def seekable(self):
+ return True
+
+ good = MockFile('')
+ bad = MockFile('')
+
+ assert read_html(good)
+ assert read_html(bad)