Skip to content

Commit 7829f05

Browse files
authored
DEP: Remove xlrd as being the default reader for xlsx (#39796)
1 parent ebc9327 commit 7829f05

File tree

5 files changed

+61
-22
lines changed

5 files changed

+61
-22
lines changed

doc/source/user_guide/io.rst

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2853,14 +2853,12 @@ See the :ref:`cookbook<cookbook.excel>` for some advanced strategies.
28532853
The `xlrd <https://xlrd.readthedocs.io/en/latest/>`__ package is now only for reading
28542854
old-style ``.xls`` files.
28552855

2856-
Before pandas 1.2.0, the default argument ``engine=None`` to :func:`~pandas.read_excel`
2856+
Before pandas 1.3.0, the default argument ``engine=None`` to :func:`~pandas.read_excel`
28572857
would result in using the ``xlrd`` engine in many cases, including new
2858-
Excel 2007+ (``.xlsx``) files.
2859-
If `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__ is installed,
2860-
many of these cases will now default to using the ``openpyxl`` engine.
2861-
See the :func:`read_excel` documentation for more details.
2858+
Excel 2007+ (``.xlsx``) files. pandas will now default to using the
2859+
`openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__ engine.
28622860

2863-
Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+
2861+
It is strongly encouraged to install ``openpyxl`` to read Excel 2007+
28642862
(``.xlsx``) files.
28652863
**Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.**
28662864
This is no longer supported, switch to using ``openpyxl`` instead.

doc/source/whatsnew/v1.3.0.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ including other versions of pandas.
88

99
{{ header }}
1010

11+
.. warning::
12+
13+
When reading new Excel 2007+ (``.xlsx``) files, the default argument
14+
``engine=None`` to :func:`~pandas.read_excel` will now result in using the
15+
`openpyxl <https://openpyxl.readthedocs.io/en/stable/>`_ engine in all cases
16+
when the option :attr:`io.excel.xlsx.reader` is set to ``"auto"``.
17+
Previously, some cases would use the
18+
`xlrd <https://xlrd.readthedocs.io/en/latest/>`_ engine instead. See
19+
:ref:`What's new 1.2.0 <whatsnew_120>` for background on this change.
20+
1121
.. ---------------------------------------------------------------------------
1222
1323
Enhancements

pandas/io/excel/_base.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,9 @@
158158
``pyxlsb`` will be used.
159159
160160
.. versionadded:: 1.3.0
161-
- Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
162-
then ``openpyxl`` will be used.
163-
- Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised.
164-
- Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. This
165-
case will raise a ``ValueError`` in a future version of pandas.
161+
- Otherwise ``openpyxl`` will be used.
162+
163+
.. versionchanged:: 1.3.0
166164
167165
converters : dict, default None
168166
Dict of functions for converting values in certain columns. Keys can
@@ -1026,7 +1024,7 @@ class ExcelFile:
10261024
Parameters
10271025
----------
10281026
path_or_buffer : str, path object (pathlib.Path or py._path.local.LocalPath),
1029-
a file-like object, xlrd workbook or openpypl workbook.
1027+
a file-like object, xlrd workbook or openpyxl workbook.
10301028
If a string or path object, expected to be a path to a
10311029
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
10321030
engine : str, default None
@@ -1140,9 +1138,7 @@ def __init__(
11401138
stacklevel = 2
11411139
warnings.warn(
11421140
f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
1143-
f"only the xls format is supported. As a result, the "
1144-
f"openpyxl engine will be used if it is installed and the "
1145-
f"engine argument is not specified. Install "
1141+
f"only the xls format is supported. Install "
11461142
f"openpyxl instead.",
11471143
FutureWarning,
11481144
stacklevel=stacklevel,

pandas/io/excel/_util.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,6 @@ def get_default_engine(ext, mode="reader"):
6868
_default_writers["xlsx"] = "xlsxwriter"
6969
return _default_writers[ext]
7070
else:
71-
if (
72-
import_optional_dependency("openpyxl", errors="ignore") is None
73-
and import_optional_dependency("xlrd", errors="ignore") is not None
74-
):
75-
# if no openpyxl but xlrd installed, return xlrd
76-
# the version is handled elsewhere
77-
_default_readers["xlsx"] = "xlrd"
7871
return _default_readers[ext]
7972

8073

pandas/tests/io/excel/test_readers.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,30 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch):
125125
monkeypatch.chdir(datapath("io", "data", "excel"))
126126
monkeypatch.setattr(pd, "read_excel", func)
127127

128+
def test_engine_used(self, read_ext, engine, monkeypatch):
129+
# GH 38884
130+
def parser(self, *args, **kwargs):
131+
return self.engine
132+
133+
monkeypatch.setattr(pd.ExcelFile, "parse", parser)
134+
135+
expected_defaults = {
136+
"xlsx": "openpyxl",
137+
"xlsm": "openpyxl",
138+
"xlsb": "pyxlsb",
139+
"xls": "xlrd",
140+
"ods": "odf",
141+
}
142+
143+
with open("test1" + read_ext, "rb") as f:
144+
result = pd.read_excel(f)
145+
146+
if engine is not None:
147+
expected = engine
148+
else:
149+
expected = expected_defaults[read_ext[1:]]
150+
assert result == expected
151+
128152
def test_usecols_int(self, read_ext, df_ref):
129153
df_ref = df_ref.reindex(columns=["A", "B", "C"])
130154

@@ -1172,6 +1196,24 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch):
11721196
monkeypatch.chdir(datapath("io", "data", "excel"))
11731197
monkeypatch.setattr(pd, "ExcelFile", func)
11741198

1199+
def test_engine_used(self, read_ext, engine, monkeypatch):
1200+
expected_defaults = {
1201+
"xlsx": "openpyxl",
1202+
"xlsm": "openpyxl",
1203+
"xlsb": "pyxlsb",
1204+
"xls": "xlrd",
1205+
"ods": "odf",
1206+
}
1207+
1208+
with pd.ExcelFile("test1" + read_ext) as excel:
1209+
result = excel.engine
1210+
1211+
if engine is not None:
1212+
expected = engine
1213+
else:
1214+
expected = expected_defaults[read_ext[1:]]
1215+
assert result == expected
1216+
11751217
def test_excel_passes_na(self, read_ext):
11761218
with pd.ExcelFile("test4" + read_ext) as excel:
11771219
parsed = pd.read_excel(

0 commit comments

Comments
 (0)