Skip to content

Commit e602f7b

Browse files
authored
PERF: load plotting entrypoint only when necessary (#41503)
1 parent 963561b commit e602f7b

File tree

3 files changed

+81
-48
lines changed

3 files changed

+81
-48
lines changed

asv_bench/benchmarks/plotting.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
import importlib
2+
import sys
3+
14
import matplotlib
25
import numpy as np
6+
import pkg_resources
37

48
from pandas import (
59
DataFrame,
@@ -13,6 +17,8 @@
1317
except ImportError:
1418
from pandas.tools.plotting import andrews_curves
1519

20+
from pandas.plotting._core import _get_plot_backend
21+
1622
matplotlib.use("Agg")
1723

1824

@@ -99,4 +105,28 @@ def time_plot_andrews_curves(self):
99105
andrews_curves(self.df, "Name")
100106

101107

108+
class BackendLoading:
109+
repeat = 1
110+
number = 1
111+
warmup_time = 0
112+
113+
def setup(self):
114+
dist = pkg_resources.get_distribution("pandas")
115+
spec = importlib.machinery.ModuleSpec("my_backend", None)
116+
mod = importlib.util.module_from_spec(spec)
117+
mod.plot = lambda *args, **kwargs: 1
118+
119+
backends = pkg_resources.get_entry_map("pandas")
120+
my_entrypoint = pkg_resources.EntryPoint(
121+
"pandas_plotting_backend", mod.__name__, dist=dist
122+
)
123+
backends["pandas_plotting_backends"][mod.__name__] = my_entrypoint
124+
for i in range(10):
125+
backends["pandas_plotting_backends"][str(i)] = my_entrypoint
126+
sys.modules["my_backend"] = mod
127+
128+
def time_get_plot_backend(self):
129+
_get_plot_backend("my_backend")
130+
131+
102132
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,7 @@ Performance improvements
815815
- Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`)
816816
- Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`)
817817
- Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`)
818+
- Performance improvement in :meth:`Series.plot` and :meth:`DataFrame.plot` with entry point lazy loading (:issue:`41492`)
818819

819820
.. ---------------------------------------------------------------------------
820821

pandas/plotting/_core.py

Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
from __future__ import annotations
22

33
import importlib
4+
import types
45
from typing import (
56
TYPE_CHECKING,
67
Sequence,
78
)
89

10+
import pkg_resources
11+
912
from pandas._config import get_option
1013

1114
from pandas._typing import IndexLabel
@@ -865,7 +868,7 @@ def _get_call_args(backend_name, data, args, kwargs):
865868
if args and isinstance(data, ABCSeries):
866869
positional_args = str(args)[1:-1]
867870
keyword_args = ", ".join(
868-
f"{name}={repr(value)}" for (name, default), value in zip(arg_def, args)
871+
f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)
869872
)
870873
msg = (
871874
"`Series.plot()` should not be called with positional "
@@ -876,7 +879,7 @@ def _get_call_args(backend_name, data, args, kwargs):
876879
)
877880
raise TypeError(msg)
878881

879-
pos_args = {name: value for value, (name, _) in zip(args, arg_def)}
882+
pos_args = {name: value for (name, _), value in zip(arg_def, args)}
880883
if backend_name == "pandas.plotting._matplotlib":
881884
kwargs = dict(arg_def, **pos_args, **kwargs)
882885
else:
@@ -1724,91 +1727,90 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
17241727
return self(kind="hexbin", x=x, y=y, C=C, **kwargs)
17251728

17261729

1727-
_backends = {}
1730+
_backends: dict[str, types.ModuleType] = {}
17281731

17291732

1730-
def _find_backend(backend: str):
1733+
def _load_backend(backend: str) -> types.ModuleType:
17311734
"""
1732-
Find a pandas plotting backend>
1735+
Load a pandas plotting backend.
17331736
17341737
Parameters
17351738
----------
17361739
backend : str
17371740
The identifier for the backend. Either an entrypoint item registered
1738-
with pkg_resources, or a module name.
1739-
1740-
Notes
1741-
-----
1742-
Modifies _backends with imported backends as a side effect.
1741+
with pkg_resources, "matplotlib", or a module name.
17431742
17441743
Returns
17451744
-------
17461745
types.ModuleType
17471746
The imported backend.
17481747
"""
1749-
import pkg_resources # Delay import for performance.
1748+
if backend == "matplotlib":
1749+
# Because matplotlib is an optional dependency and first-party backend,
1750+
# we need to attempt an import here to raise an ImportError if needed.
1751+
try:
1752+
module = importlib.import_module("pandas.plotting._matplotlib")
1753+
except ImportError:
1754+
raise ImportError(
1755+
"matplotlib is required for plotting when the "
1756+
'default backend "matplotlib" is selected.'
1757+
) from None
1758+
return module
1759+
1760+
found_backend = False
17501761

17511762
for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"):
1752-
if entry_point.name == "matplotlib":
1753-
# matplotlib is an optional dependency. When
1754-
# missing, this would raise.
1755-
continue
1756-
_backends[entry_point.name] = entry_point.load()
1763+
found_backend = entry_point.name == backend
1764+
if found_backend:
1765+
module = entry_point.load()
1766+
break
17571767

1758-
try:
1759-
return _backends[backend]
1760-
except KeyError:
1768+
if not found_backend:
17611769
# Fall back to unregistered, module name approach.
17621770
try:
17631771
module = importlib.import_module(backend)
1772+
found_backend = True
17641773
except ImportError:
17651774
# We re-raise later on.
17661775
pass
1767-
else:
1768-
if hasattr(module, "plot"):
1769-
# Validate that the interface is implemented when the option
1770-
# is set, rather than at plot time.
1771-
_backends[backend] = module
1772-
return module
1776+
1777+
if found_backend:
1778+
if hasattr(module, "plot"):
1779+
# Validate that the interface is implemented when the option is set,
1780+
# rather than at plot time.
1781+
return module
17731782

17741783
raise ValueError(
1775-
f"Could not find plotting backend '{backend}'. Ensure that you've installed "
1776-
f"the package providing the '{backend}' entrypoint, or that the package has a "
1777-
"top-level `.plot` method."
1784+
f"Could not find plotting backend '{backend}'. Ensure that you've "
1785+
f"installed the package providing the '{backend}' entrypoint, or that "
1786+
"the package has a top-level `.plot` method."
17781787
)
17791788

17801789

1781-
def _get_plot_backend(backend=None):
1790+
def _get_plot_backend(backend: str | None = None):
17821791
"""
17831792
Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).
17841793
1785-
The plotting system of pandas has been using matplotlib, but the idea here
1786-
is that it can also work with other third-party backends. In the future,
1787-
this function will return the backend from a pandas option, and all the
1788-
rest of the code in this file will use the backend specified there for the
1789-
plotting.
1794+
The plotting system of pandas uses matplotlib by default, but the idea here
1795+
is that it can also work with other third-party backends. This function
1796+
returns the module which provides a top-level `.plot` method that will
1797+
actually do the plotting. The backend is specified from a string, which
1798+
either comes from the keyword argument `backend`, or, if not specified, from
1799+
the option `pandas.options.plotting.backend`. All the rest of the code in
1800+
this file uses the backend specified there for the plotting.
17901801
17911802
The backend is imported lazily, as matplotlib is a soft dependency, and
17921803
pandas can be used without it being installed.
1804+
1805+
Notes
1806+
-----
1807+
Modifies `_backends` with imported backend as a side effect.
17931808
"""
17941809
backend = backend or get_option("plotting.backend")
17951810

1796-
if backend == "matplotlib":
1797-
# Because matplotlib is an optional dependency and first-party backend,
1798-
# we need to attempt an import here to raise an ImportError if needed.
1799-
try:
1800-
import pandas.plotting._matplotlib as module
1801-
except ImportError:
1802-
raise ImportError(
1803-
"matplotlib is required for plotting when the "
1804-
'default backend "matplotlib" is selected.'
1805-
) from None
1806-
1807-
_backends["matplotlib"] = module
1808-
18091811
if backend in _backends:
18101812
return _backends[backend]
18111813

1812-
module = _find_backend(backend)
1814+
module = _load_backend(backend)
18131815
_backends[backend] = module
18141816
return module

0 commit comments

Comments
 (0)