From ad6a20267248afcbcd4d4e6c71aa959148ef5507 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Apr 2018 14:50:32 -0500 Subject: [PATCH 1/7] PERF: Faster Series.__getattribute__ Closes #19764 --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 6 ++++++ pandas/core/indexes/interval.py | 1 + pandas/core/indexes/numeric.py | 1 + pandas/core/indexes/period.py | 1 + pandas/tests/indexes/datetimelike.py | 4 ++++ pandas/tests/indexes/test_base.py | 4 ++++ pandas/tests/indexes/test_category.py | 4 ++++ pandas/tests/indexes/test_multi.py | 4 ++++ pandas/tests/indexes/test_numeric.py | 4 ++++ pandas/tests/indexes/test_range.py | 4 ++++ 12 files changed, 35 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ffa4f1068f84d..7082850488326 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -958,6 +958,7 @@ Performance Improvements - Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`) - Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`) - Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) +- Improved performance of ``Series.__getattribute__`` when the Series has certain index types. This manifiested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) - Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) .. _whatsnew_0230.docs: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 86342b6996abf..f78b2cb996f42 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4375,7 +4375,7 @@ def __getattr__(self, name): name in self._accessors): return object.__getattribute__(self, name) else: - if name in self._info_axis: + if self._info_axis._is_dotable and name in self._info_axis: return self[name] return object.__getattribute__(self, name) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2ceec1592d49b..c87ca9c3c8d04 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -244,6 +244,12 @@ class Index(IndexOpsMixin, PandasObject): _engine_type = libindex.ObjectEngine _accessors = set(['str']) + # Whether items can be selected from NDFrame. + # Some indexes (DatetimeIndex, Int64Index) cannot contain + # valid Python identifiers. Setting _is_dotable = False is an + # optimization. + # https://github.com/pandas-dev/pandas/issues/19764 + _is_dotable = True str = CachedAccessor("str", StringMethods) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 54800d0d76d2e..82123062cd57a 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -207,6 +207,7 @@ class IntervalIndex(IntervalMixin, Index): _typ = 'intervalindex' _comparables = ['name'] _attributes = ['name', 'closed'] + _is_dotable = False # can't contain Python identifiers # we would like our indexing holder to defer to us _defer_to_indexing = True diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 1fe0c8fa289e6..4267bde342932 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -31,6 +31,7 @@ class NumericIndex(Index): """ _is_numeric_dtype = True + _is_dotable = False # Can't contain Python identifiers def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 4a224d4e6ee7f..a8ab01c2cb2b0 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -204,6 +204,7 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ _typ = 'periodindex' _attributes = ['name', 'freq'] + _is_dotable = False # Can't contain Python identifiers # define my properties & methods for delegation _other_ops = [] diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 7d01a2a70145d..20a03ccaff696 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -8,6 +8,10 @@ class DatetimeLike(Base): + def test_is_dotable(self): + idx = self.create_index() + assert idx._is_dotable is False + def test_shift_identity(self): idx = self.create_index() diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7cd880b51661d..bda58e9935cc4 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -66,6 +66,10 @@ def generate_index_types(self, skip_index_keys=[]): if key not in skip_index_keys: yield key, idx + def test_is_dotable(self): + idx = self.create_index() + assert idx._is_dotable is True + def test_new_axis(self): new_index = self.dateIndex[None, :] assert new_index.ndim == 2 diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index e9fddfde90348..7d72a121d34e0 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -33,6 +33,10 @@ def create_index(self, categories=None, ordered=False): return CategoricalIndex( list('aabbca'), categories=categories, ordered=ordered) + def test_is_dotable(self): + ci = self.create_index(categories=list('abcd')) + assert ci._is_dotable is True + def test_construction(self): ci = self.create_index(categories=list('abcd')) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index da11ac645ae07..184aa1f576fbd 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -48,6 +48,10 @@ def setup_method(self, method): def create_index(self): return self.index + def test_is_dotable(self): + idx = self.create_index() + assert idx._is_dotable is True + def test_boolean_context_compat2(self): # boolean context compat diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index bafb6ae2e45f4..f09bc1550dca8 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -64,6 +64,10 @@ def test_index_rdiv_timedelta(self, scalar_td, index): class Numeric(Base): + def test_is_dotable(self): + idx = self.create_index() + assert idx._is_dotable is False + def test_numeric_compat(self): pass # override Base method diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 8990834ebe91a..a7ec91a6afe13 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -44,6 +44,10 @@ def check_binop(self, ops, scalars, idxs): expected = op(Int64Index(idx), scalar) tm.assert_index_equal(result, expected) + def test_is_dotable(self): + idx = self.create_index() + assert idx._is_dotable is False + def test_binops(self): ops = [operator.add, operator.sub, operator.mul, operator.floordiv, operator.truediv] From 55072a078bac11ba311ee8f94dd04db384a96f3a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Apr 2018 15:14:03 -0500 Subject: [PATCH 2/7] Renamed --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/tests/indexes/datetimelike.py | 4 ++-- pandas/tests/indexes/test_base.py | 4 ++-- pandas/tests/indexes/test_category.py | 4 ++-- pandas/tests/indexes/test_multi.py | 4 ++-- pandas/tests/indexes/test_numeric.py | 4 ++-- pandas/tests/indexes/test_range.py | 4 ++-- 12 files changed, 19 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 7082850488326..c2fdd9bac46e6 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -958,7 +958,7 @@ Performance Improvements - Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`) - Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`) - Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) -- Improved performance of ``Series.__getattribute__`` when the Series has certain index types. This manifiested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) +- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifiested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) - Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) .. _whatsnew_0230.docs: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f78b2cb996f42..588fba69ae497 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4375,7 +4375,7 @@ def __getattr__(self, name): name in self._accessors): return object.__getattribute__(self, name) else: - if self._info_axis._is_dotable and name in self._info_axis: + if self._info_axis.__can_hold_identifiers and name in self._info_axis: return self[name] return object.__getattribute__(self, name) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c87ca9c3c8d04..4f35e265f2abd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -246,10 +246,10 @@ class Index(IndexOpsMixin, PandasObject): _accessors = set(['str']) # Whether items can be selected from NDFrame. # Some indexes (DatetimeIndex, Int64Index) cannot contain - # valid Python identifiers. Setting _is_dotable = False is an + # valid Python identifiers. Setting __can_hold_identifiers = False is an # optimization. # https://github.com/pandas-dev/pandas/issues/19764 - _is_dotable = True + __can_hold_identifiers = True str = CachedAccessor("str", StringMethods) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 82123062cd57a..be9fca9b33c30 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -207,7 +207,7 @@ class IntervalIndex(IntervalMixin, Index): _typ = 'intervalindex' _comparables = ['name'] _attributes = ['name', 'closed'] - _is_dotable = False # can't contain Python identifiers + __can_hold_identifiers = False # can't contain Python identifiers # we would like our indexing holder to defer to us _defer_to_indexing = True diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 4267bde342932..f10c40adf8223 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -31,7 +31,7 @@ class NumericIndex(Index): """ _is_numeric_dtype = True - _is_dotable = False # Can't contain Python identifiers + __can_hold_identifiers = False # Can't contain Python identifiers def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a8ab01c2cb2b0..867ac136c2bbc 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -204,7 +204,7 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ _typ = 'periodindex' _attributes = ['name', 'freq'] - _is_dotable = False # Can't contain Python identifiers + __can_hold_identifiers = False # Can't contain Python identifiers # define my properties & methods for delegation _other_ops = [] diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 20a03ccaff696..ad1ff6380503c 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -8,9 +8,9 @@ class DatetimeLike(Base): - def test_is_dotable(self): + def test__can_hold_identifiers(self): idx = self.create_index() - assert idx._is_dotable is False + assert idx.__can_hold_identifiers is False def test_shift_identity(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index bda58e9935cc4..303b13f1796db 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -66,9 +66,9 @@ def generate_index_types(self, skip_index_keys=[]): if key not in skip_index_keys: yield key, idx - def test_is_dotable(self): + def test__can_hold_identifiers(self): idx = self.create_index() - assert idx._is_dotable is True + assert idx.__can_hold_identifiers is True def test_new_axis(self): new_index = self.dateIndex[None, :] diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 7d72a121d34e0..324c8d63d45ec 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -33,9 +33,9 @@ def create_index(self, categories=None, ordered=False): return CategoricalIndex( list('aabbca'), categories=categories, ordered=ordered) - def test_is_dotable(self): + def test__can_hold_identifiers(self): ci = self.create_index(categories=list('abcd')) - assert ci._is_dotable is True + assert ci.__can_hold_identifiers is True def test_construction(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 184aa1f576fbd..a2b4dcc7aba95 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -48,9 +48,9 @@ def setup_method(self, method): def create_index(self): return self.index - def test_is_dotable(self): + def test__can_hold_identifiers(self): idx = self.create_index() - assert idx._is_dotable is True + assert idx.__can_hold_identifiers is True def test_boolean_context_compat2(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index f09bc1550dca8..dc687e623893f 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -64,9 +64,9 @@ def test_index_rdiv_timedelta(self, scalar_td, index): class Numeric(Base): - def test_is_dotable(self): + def test__can_hold_identifiers(self): idx = self.create_index() - assert idx._is_dotable is False + assert idx.__can_hold_identifiers is False def test_numeric_compat(self): pass # override Base method diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index a7ec91a6afe13..75722bfde70df 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -44,9 +44,9 @@ def check_binop(self, ops, scalars, idxs): expected = op(Int64Index(idx), scalar) tm.assert_index_equal(result, expected) - def test_is_dotable(self): + def test__can_hold_identifiers(self): idx = self.create_index() - assert idx._is_dotable is False + assert idx.__can_hold_identifiers is False def test_binops(self): ops = [operator.add, operator.sub, operator.mul, operator.floordiv, From ad871ce05b0d36f77582bb10fe62c100711f327e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Apr 2018 15:16:51 -0500 Subject: [PATCH 3/7] Linting --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 588fba69ae497..4b7225dd9a60b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4375,7 +4375,8 @@ def __getattr__(self, name): name in self._accessors): return object.__getattribute__(self, name) else: - if self._info_axis.__can_hold_identifiers and name in self._info_axis: + if (self._info_axis.__can_hold_identifiers and + name in self._info_axis): return self[name] return object.__getattribute__(self, name) From 4b7334c577f39aaecbb0285df4d32a1b7129c428 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Apr 2018 16:40:05 -0500 Subject: [PATCH 4/7] Fixed find-replace fail --- pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/tests/indexes/datetimelike.py | 4 ++-- pandas/tests/indexes/test_base.py | 4 ++-- pandas/tests/indexes/test_category.py | 4 ++-- pandas/tests/indexes/test_multi.py | 4 ++-- pandas/tests/indexes/test_numeric.py | 4 ++-- pandas/tests/indexes/test_range.py | 4 ++-- 11 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4b7225dd9a60b..e7de5b502750c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4375,7 +4375,7 @@ def __getattr__(self, name): name in self._accessors): return object.__getattribute__(self, name) else: - if (self._info_axis.__can_hold_identifiers and + if (self._info_axis._can_hold_identifiers and name in self._info_axis): return self[name] return object.__getattribute__(self, name) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4f35e265f2abd..b596698d844fa 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -246,10 +246,10 @@ class Index(IndexOpsMixin, PandasObject): _accessors = set(['str']) # Whether items can be selected from NDFrame. # Some indexes (DatetimeIndex, Int64Index) cannot contain - # valid Python identifiers. Setting __can_hold_identifiers = False is an + # valid Python identifiers. Setting _can_hold_identifiers = False is an # optimization. # https://github.com/pandas-dev/pandas/issues/19764 - __can_hold_identifiers = True + _can_hold_identifiers = True str = CachedAccessor("str", StringMethods) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index be9fca9b33c30..515f15e7c691a 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -207,7 +207,7 @@ class IntervalIndex(IntervalMixin, Index): _typ = 'intervalindex' _comparables = ['name'] _attributes = ['name', 'closed'] - __can_hold_identifiers = False # can't contain Python identifiers + _can_hold_identifiers = False # can't contain Python identifiers # we would like our indexing holder to defer to us _defer_to_indexing = True diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index f10c40adf8223..c50bc09b45904 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -31,7 +31,7 @@ class NumericIndex(Index): """ _is_numeric_dtype = True - __can_hold_identifiers = False # Can't contain Python identifiers + _can_hold_identifiers = False # Can't contain Python identifiers def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 867ac136c2bbc..5558616c93998 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -204,7 +204,7 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ _typ = 'periodindex' _attributes = ['name', 'freq'] - __can_hold_identifiers = False # Can't contain Python identifiers + _can_hold_identifiers = False # Can't contain Python identifiers # define my properties & methods for delegation _other_ops = [] diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index ad1ff6380503c..7b16877abe91f 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -8,9 +8,9 @@ class DatetimeLike(Base): - def test__can_hold_identifiers(self): + def test_can_hold_identifiers(self): idx = self.create_index() - assert idx.__can_hold_identifiers is False + assert idx._can_hold_identifiers is False def test_shift_identity(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 303b13f1796db..bc61a87048da0 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -66,9 +66,9 @@ def generate_index_types(self, skip_index_keys=[]): if key not in skip_index_keys: yield key, idx - def test__can_hold_identifiers(self): + def test_can_hold_identifiers(self): idx = self.create_index() - assert idx.__can_hold_identifiers is True + assert idx._can_hold_identifiers is True def test_new_axis(self): new_index = self.dateIndex[None, :] diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 324c8d63d45ec..401d9e5490f76 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -33,9 +33,9 @@ def create_index(self, categories=None, ordered=False): return CategoricalIndex( list('aabbca'), categories=categories, ordered=ordered) - def test__can_hold_identifiers(self): + def test_can_hold_identifiers(self): ci = self.create_index(categories=list('abcd')) - assert ci.__can_hold_identifiers is True + assert ci._can_hold_identifiers is True def test_construction(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index a2b4dcc7aba95..fde2ef47087fa 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -48,9 +48,9 @@ def setup_method(self, method): def create_index(self): return self.index - def test__can_hold_identifiers(self): + def test_can_hold_identifiers(self): idx = self.create_index() - assert idx.__can_hold_identifiers is True + assert idx._can_hold_identifiers is True def test_boolean_context_compat2(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index dc687e623893f..d94c1f2889894 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -64,9 +64,9 @@ def test_index_rdiv_timedelta(self, scalar_td, index): class Numeric(Base): - def test__can_hold_identifiers(self): + def test_can_hold_identifiers(self): idx = self.create_index() - assert idx.__can_hold_identifiers is False + assert idx._can_hold_identifiers is False def test_numeric_compat(self): pass # override Base method diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 75722bfde70df..732e4947f65c2 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -44,9 +44,9 @@ def check_binop(self, ops, scalars, idxs): expected = op(Int64Index(idx), scalar) tm.assert_index_equal(result, expected) - def test__can_hold_identifiers(self): + def test_can_hold_identifiers(self): idx = self.create_index() - assert idx.__can_hold_identifiers is False + assert idx._can_hold_identifiers is False def test_binops(self): ops = [operator.add, operator.sub, operator.mul, operator.floordiv, From 857364f4112749a8f7e3131dc58b05e73d377e67 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 28 Apr 2018 11:46:18 -0500 Subject: [PATCH 5/7] asv --- asv_bench/benchmarks/series_methods.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 478aba278029c..3f6522c3403d9 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -121,3 +121,16 @@ def setup(self): def time_dir_strings(self): dir(self.s) + + +class SeriesGetattr(object): + # https://github.com/pandas-dev/pandas/issues/19764 + goal_time = 0.2 + + def setup(self): + self.s = Series(1, + index=date_range("2012-01-01", freq='s', + periods=int(1e6))) + + def time_series_datetimeindex_repr(self): + getattr(self.s, 'a', None) From e0710f304c53b0b2f027dc2cdec37a20084bed16 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 30 Apr 2018 06:03:35 -0500 Subject: [PATCH 6/7] Move to instance properties --- pandas/core/indexes/base.py | 23 +++++++++++++++++------ pandas/core/indexes/interval.py | 7 ++++++- pandas/core/indexes/numeric.py | 7 ++++++- pandas/core/indexes/period.py | 1 - 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b596698d844fa..3381755f753b1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -244,12 +244,6 @@ class Index(IndexOpsMixin, PandasObject): _engine_type = libindex.ObjectEngine _accessors = set(['str']) - # Whether items can be selected from NDFrame. - # Some indexes (DatetimeIndex, Int64Index) cannot contain - # valid Python identifiers. Setting _can_hold_identifiers = False is an - # optimization. - # https://github.com/pandas-dev/pandas/issues/19764 - _can_hold_identifiers = True str = CachedAccessor("str", StringMethods) @@ -2089,6 +2083,23 @@ def __getitem__(self, key): else: return result + @property + def _can_hold_identifiers(self): + """ + Whether the Index class *can* hold Python identifiers. + + This is useful for short-circuting lookups in NDFrame.__getattr__. + Some index-classes can't hold identifiers (NumericIndex, + DatetimeIndex), so there's no reason to search the index when a user + does `Series.foo`. + + Note that we don't care about `foo` here. This just a property + of the index class itself, nothing to do with an instance. + + https://github.com/pandas-dev/pandas/issues/19764 + """ + return True + def append(self, other): """ Append a collection of Index options together diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 515f15e7c691a..cbc8ebe5ce2b2 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -207,7 +207,6 @@ class IntervalIndex(IntervalMixin, Index): _typ = 'intervalindex' _comparables = ['name'] _attributes = ['name', 'closed'] - _can_hold_identifiers = False # can't contain Python identifiers # we would like our indexing holder to defer to us _defer_to_indexing = True @@ -1304,6 +1303,12 @@ def __getitem__(self, value): return self._shallow_copy(left, right) + @property + def _can_hold_identifiers(self): + # perf: Intervals aren't valid Python identifiers. + # https://github.com/pandas-dev/pandas/issues/19764 + return False + # __repr__ associated methods are based on MultiIndex def _format_with_header(self, header, **kwargs): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index c50bc09b45904..dbb982318e4b0 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -31,7 +31,6 @@ class NumericIndex(Index): """ _is_numeric_dtype = True - _can_hold_identifiers = False # Can't contain Python identifiers def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False): @@ -115,6 +114,12 @@ def is_all_dates(self): """ return False + @property + def _can_hold_identifiers(self): + # perf: Numeric elements are not valid identifiers. + # https://github.com/pandas-dev/pandas/issues/19764 + return False + _num_index_shared_docs['class_descr'] = """ Immutable ndarray implementing an ordered, sliceable set. The basic object diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 5558616c93998..4a224d4e6ee7f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -204,7 +204,6 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ _typ = 'periodindex' _attributes = ['name', 'freq'] - _can_hold_identifiers = False # Can't contain Python identifiers # define my properties & methods for delegation _other_ops = [] From 1b598d0b99b7804f8c0c8cef21393bd7959764f4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 30 Apr 2018 19:10:03 -0500 Subject: [PATCH 7/7] function --- pandas/core/generic.py | 3 +-- pandas/core/indexes/base.py | 20 ++++++++------------ pandas/core/indexes/interval.py | 6 ------ pandas/core/indexes/numeric.py | 6 ------ pandas/tests/indexes/datetimelike.py | 3 ++- pandas/tests/indexes/test_base.py | 3 ++- pandas/tests/indexes/test_category.py | 5 +++-- pandas/tests/indexes/test_multi.py | 3 ++- pandas/tests/indexes/test_numeric.py | 3 ++- pandas/tests/indexes/test_range.py | 3 ++- 10 files changed, 22 insertions(+), 33 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e7de5b502750c..75216f12e5739 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4375,8 +4375,7 @@ def __getattr__(self, name): name in self._accessors): return object.__getattribute__(self, name) else: - if (self._info_axis._can_hold_identifiers and - name in self._info_axis): + if self._info_axis._can_hold_identifiers_and_holds_name(name): return self[name] return object.__getattribute__(self, name) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3381755f753b1..f208687a0cf1b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2083,22 +2083,18 @@ def __getitem__(self, key): else: return result - @property - def _can_hold_identifiers(self): + def _can_hold_identifiers_and_holds_name(self, name): """ - Whether the Index class *can* hold Python identifiers. - - This is useful for short-circuting lookups in NDFrame.__getattr__. - Some index-classes can't hold identifiers (NumericIndex, - DatetimeIndex), so there's no reason to search the index when a user - does `Series.foo`. - - Note that we don't care about `foo` here. This just a property - of the index class itself, nothing to do with an instance. + Faster check for ``name in self`` when we know `name` is a Python + identifier (e.g. in NDFrame.__getattr__, which hits this to support + . key lookup). For indexes that can't hold identifiers (everything + but object & categorical) we just return False. https://github.com/pandas-dev/pandas/issues/19764 """ - return True + if self.is_object() or self.is_categorical(): + return name in self + return False def append(self, other): """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index cbc8ebe5ce2b2..54800d0d76d2e 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1303,12 +1303,6 @@ def __getitem__(self, value): return self._shallow_copy(left, right) - @property - def _can_hold_identifiers(self): - # perf: Intervals aren't valid Python identifiers. - # https://github.com/pandas-dev/pandas/issues/19764 - return False - # __repr__ associated methods are based on MultiIndex def _format_with_header(self, header, **kwargs): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index dbb982318e4b0..1fe0c8fa289e6 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -114,12 +114,6 @@ def is_all_dates(self): """ return False - @property - def _can_hold_identifiers(self): - # perf: Numeric elements are not valid identifiers. - # https://github.com/pandas-dev/pandas/issues/19764 - return False - _num_index_shared_docs['class_descr'] = """ Immutable ndarray implementing an ordered, sliceable set. The basic object diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 7b16877abe91f..e32e18ea0ec4a 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -10,7 +10,8 @@ class DatetimeLike(Base): def test_can_hold_identifiers(self): idx = self.create_index() - assert idx._can_hold_identifiers is False + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False def test_shift_identity(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index bc61a87048da0..95ddf1f6cd8bd 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -68,7 +68,8 @@ def generate_index_types(self, skip_index_keys=[]): def test_can_hold_identifiers(self): idx = self.create_index() - assert idx._can_hold_identifiers is True + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True def test_new_axis(self): new_index = self.dateIndex[None, :] diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 401d9e5490f76..6a1a1a5bdba4f 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -34,8 +34,9 @@ def create_index(self, categories=None, ordered=False): list('aabbca'), categories=categories, ordered=ordered) def test_can_hold_identifiers(self): - ci = self.create_index(categories=list('abcd')) - assert ci._can_hold_identifiers is True + idx = self.create_index(categories=list('abcd')) + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True def test_construction(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index fde2ef47087fa..37f70090c179f 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -50,7 +50,8 @@ def create_index(self): def test_can_hold_identifiers(self): idx = self.create_index() - assert idx._can_hold_identifiers is True + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True def test_boolean_context_compat2(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index d94c1f2889894..49322d9b7abd6 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -66,7 +66,8 @@ class Numeric(Base): def test_can_hold_identifiers(self): idx = self.create_index() - assert idx._can_hold_identifiers is False + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False def test_numeric_compat(self): pass # override Base method diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 732e4947f65c2..38f4b341116b8 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -46,7 +46,8 @@ def check_binop(self, ops, scalars, idxs): def test_can_hold_identifiers(self): idx = self.create_index() - assert idx._can_hold_identifiers is False + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False def test_binops(self): ops = [operator.add, operator.sub, operator.mul, operator.floordiv,