Skip to content

Commit 9cbc179

Browse files
committed
Merge pull request #11575 from JanSchulz/prevent_new_accessor_attributes
Prevent adding new attributes to the accessors .str, .dt and .cat
2 parents 28bfb14 + 065415d commit 9cbc179

File tree

11 files changed

+118
-45
lines changed

11 files changed

+118
-45
lines changed

doc/source/whatsnew/v0.17.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ Bug Fixes
101101
- Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`)
102102

103103
- Bug in using ``DataFrame.ix`` with a multi-index indexer(:issue:`11372`)
104+
- Prevent adding new attributes to the accessors ``.str``, ``.dt`` and ``.cat``. Retrieving such
105+
a value was not possible, so error out on setting it. (:issue:`10673`)
104106

105107

106108
- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`)

pandas/core/base.py

+25-36
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import pandas.core.nanops as nanops
88
import pandas.lib as lib
99
from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg
10-
from pandas.core.strings import StringMethods
1110
from pandas.core.common import AbstractMethodError
1211

1312
_shared_docs = dict()
@@ -111,6 +110,31 @@ def _reset_cache(self, key=None):
111110
else:
112111
self._cache.pop(key, None)
113112

113+
class NoNewAttributesMixin(object):
114+
"""Mixin which prevents adding new attributes.
115+
116+
Prevents additional attributes via xxx.attribute = "something" after a call to
117+
`self.__freeze()`. Mainly used to prevent the user from using wrong attrirbutes
118+
on a accessor (`Series.cat/.str/.dt`).
119+
120+
If you really want to add a new attribute at a later time, you need to use
121+
`object.__setattr__(self, key, value)`.
122+
"""
123+
124+
def _freeze(self):
125+
"""Prevents setting additional attributes"""
126+
object.__setattr__(self, "__frozen", True)
127+
128+
129+
# prevent adding any attribute via s.xxx.new_attribute = ...
130+
def __setattr__(self, key, value):
131+
# _cache is used by a decorator
132+
# dict lookup instead of getattr as getattr is false for getter which error
133+
if getattr(self, "__frozen", False) and not (key in type(self).__dict__ or key == "_cache"):
134+
raise AttributeError( "You cannot add any new attribute '{key}'".format(key=key))
135+
object.__setattr__(self, key, value)
136+
137+
114138
class PandasDelegate(PandasObject):
115139
""" an abstract base class for delegating methods/properties """
116140

@@ -547,41 +571,6 @@ def searchsorted(self, key, side='left'):
547571
#### needs tests/doc-string
548572
return self.values.searchsorted(key, side=side)
549573

550-
# string methods
551-
def _make_str_accessor(self):
552-
from pandas.core.series import Series
553-
from pandas.core.index import Index
554-
if isinstance(self, Series) and not com.is_object_dtype(self.dtype):
555-
# this really should exclude all series with any non-string values,
556-
# but that isn't practical for performance reasons until we have a
557-
# str dtype (GH 9343)
558-
raise AttributeError("Can only use .str accessor with string "
559-
"values, which use np.object_ dtype in "
560-
"pandas")
561-
elif isinstance(self, Index):
562-
# see scc/inferrence.pyx which can contain string values
563-
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
564-
if self.inferred_type not in allowed_types:
565-
message = ("Can only use .str accessor with string values "
566-
"(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
567-
raise AttributeError(message)
568-
if self.nlevels > 1:
569-
message = "Can only use .str accessor with Index, not MultiIndex"
570-
raise AttributeError(message)
571-
return StringMethods(self)
572-
573-
str = AccessorProperty(StringMethods, _make_str_accessor)
574-
575-
def _dir_additions(self):
576-
return set()
577-
578-
def _dir_deletions(self):
579-
try:
580-
getattr(self, 'str')
581-
except AttributeError:
582-
return set(['str'])
583-
return set()
584-
585574
_shared_docs['drop_duplicates'] = (
586575
"""Return %(klass)s with duplicate values removed
587576

pandas/core/categorical.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas.compat import u
99

1010
from pandas.core.algorithms import factorize
11-
from pandas.core.base import PandasObject, PandasDelegate
11+
from pandas.core.base import PandasObject, PandasDelegate, NoNewAttributesMixin
1212
import pandas.core.common as com
1313
from pandas.core.missing import interpolate_2d
1414
from pandas.util.decorators import cache_readonly, deprecate_kwarg
@@ -1743,7 +1743,7 @@ def repeat(self, repeats):
17431743

17441744
##### The Series.cat accessor #####
17451745

1746-
class CategoricalAccessor(PandasDelegate):
1746+
class CategoricalAccessor(PandasDelegate, NoNewAttributesMixin):
17471747
"""
17481748
Accessor object for categorical properties of the Series values.
17491749
@@ -1768,6 +1768,7 @@ class CategoricalAccessor(PandasDelegate):
17681768
def __init__(self, values, index):
17691769
self.categorical = values
17701770
self.index = index
1771+
self._freeze()
17711772

17721773
def _delegate_property_get(self, name):
17731774
return getattr(self.categorical, name)

pandas/core/index.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
_values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype,
2727
_ensure_object, _ensure_int64, is_bool_indexer,
2828
is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype)
29+
from pandas.core.strings import StringAccessorMixin
2930
from pandas.core.config import get_option
3031
from pandas.io.common import PerformanceWarning
3132

@@ -64,7 +65,7 @@ def _new_Index(cls, d):
6465
and breaks __new__ """
6566
return cls.__new__(cls, **d)
6667

67-
class Index(IndexOpsMixin, PandasObject):
68+
class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
6869

6970
"""
7071
Immutable ndarray implementing an ordered, sliceable set. The basic object

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from pandas.core import generic, base
3333
from pandas.core.internals import SingleBlockManager
3434
from pandas.core.categorical import Categorical, CategoricalAccessor
35+
import pandas.core.strings as strings
3536
from pandas.tseries.common import (maybe_to_datetimelike,
3637
CombinedDatetimelikeProperties)
3738
from pandas.tseries.index import DatetimeIndex
@@ -85,7 +86,7 @@ def wrapper(self):
8586
# Series class
8687

8788

88-
class Series(base.IndexOpsMixin, generic.NDFrame):
89+
class Series(base.IndexOpsMixin, strings.StringAccessorMixin, generic.NDFrame,):
8990

9091
"""
9192
One-dimensional ndarray with axis labels (including time series).

pandas/core/strings.py

+43-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import numpy as np
22

33
from pandas.compat import zip
4-
from pandas.core.common import isnull, _values_from_object, is_bool_dtype, is_list_like
4+
from pandas.core.common import (isnull, _values_from_object, is_bool_dtype, is_list_like,
5+
is_categorical_dtype, is_object_dtype)
56
import pandas.compat as compat
7+
from pandas.core.base import AccessorProperty, NoNewAttributesMixin
68
from pandas.util.decorators import Appender, deprecate_kwarg
79
import re
810
import pandas.lib as lib
@@ -1044,7 +1046,7 @@ def do_copy(target):
10441046
return do_copy
10451047

10461048

1047-
class StringMethods(object):
1049+
class StringMethods(NoNewAttributesMixin):
10481050

10491051
"""
10501052
Vectorized string functions for Series and Index. NAs stay NA unless
@@ -1059,6 +1061,7 @@ class StringMethods(object):
10591061

10601062
def __init__(self, series):
10611063
self.series = series
1064+
self._freeze()
10621065

10631066
def __getitem__(self, key):
10641067
if isinstance(key, slice):
@@ -1542,3 +1545,41 @@ def rindex(self, sub, start=0, end=None):
15421545
isdecimal = _noarg_wrapper(lambda x: compat.u_safe(x).isdecimal(),
15431546
docstring=_shared_docs['ismethods'] %
15441547
_shared_docs['isdecimal'])
1548+
1549+
class StringAccessorMixin(object):
1550+
""" Mixin to add a `.str` acessor to the class."""
1551+
1552+
# string methods
1553+
def _make_str_accessor(self):
1554+
from pandas.core.series import Series
1555+
from pandas.core.index import Index
1556+
if isinstance(self, Series) and not is_object_dtype(self.dtype):
1557+
# this really should exclude all series with any non-string values,
1558+
# but that isn't practical for performance reasons until we have a
1559+
# str dtype (GH 9343)
1560+
raise AttributeError("Can only use .str accessor with string "
1561+
"values, which use np.object_ dtype in "
1562+
"pandas")
1563+
elif isinstance(self, Index):
1564+
# see scc/inferrence.pyx which can contain string values
1565+
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
1566+
if self.inferred_type not in allowed_types:
1567+
message = ("Can only use .str accessor with string values "
1568+
"(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
1569+
raise AttributeError(message)
1570+
if self.nlevels > 1:
1571+
message = "Can only use .str accessor with Index, not MultiIndex"
1572+
raise AttributeError(message)
1573+
return StringMethods(self)
1574+
1575+
str = AccessorProperty(StringMethods, _make_str_accessor)
1576+
1577+
def _dir_additions(self):
1578+
return set()
1579+
1580+
def _dir_deletions(self):
1581+
try:
1582+
getattr(self, 'str')
1583+
except AttributeError:
1584+
return set(['str'])
1585+
return set()

pandas/tests/test_base.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pandas.compat as compat
77
import pandas as pd
88
from pandas.compat import u, StringIO
9-
from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate
9+
from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate, NoNewAttributesMixin
1010
import pandas.core.common as com
1111
from pandas.tseries.base import DatetimeIndexOpsMixin
1212
from pandas.util.testing import assertRaisesRegexp, assertIsInstance
@@ -908,6 +908,25 @@ def test_lookup_nan(self):
908908
self.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs)))
909909

910910

911+
class TestNoNewAttributesMixin(tm.TestCase):
912+
913+
def test_mixin(self):
914+
class T(NoNewAttributesMixin):
915+
pass
916+
917+
t = T()
918+
self.assertFalse(hasattr(t, "__frozen"))
919+
t.a = "test"
920+
self.assertEqual(t.a, "test")
921+
t._freeze()
922+
#self.assertTrue("__frozen" not in dir(t))
923+
self.assertIs(getattr(t, "__frozen"), True)
924+
def f():
925+
t.b = "test"
926+
self.assertRaises(AttributeError, f)
927+
self.assertFalse(hasattr(t, "b"))
928+
929+
911930
if __name__ == '__main__':
912931
import nose
913932

pandas/tests/test_categorical.py

+6
Original file line numberDiff line numberDiff line change
@@ -3634,6 +3634,12 @@ def test_cat_accessor_api(self):
36343634
invalid.cat
36353635
self.assertFalse(hasattr(invalid, 'cat'))
36363636

3637+
def test_cat_accessor_no_new_attributes(self):
3638+
# https://github.com/pydata/pandas/issues/10673
3639+
c = Series(list('aabbcde')).astype('category')
3640+
with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
3641+
c.cat.xlabel = "a"
3642+
36373643
def test_pickle_v0_14_1(self):
36383644

36393645
# we have the name warning

pandas/tests/test_series.py

+6
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,12 @@ def f():
247247
s.dt.hour[0] = 5
248248
self.assertRaises(com.SettingWithCopyError, f)
249249

250+
def test_dt_accessor_no_new_attributes(self):
251+
# https://github.com/pydata/pandas/issues/10673
252+
s = Series(date_range('20130101',periods=5,freq='D'))
253+
with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
254+
s.dt.xlabel = "a"
255+
250256
def test_strftime(self):
251257
# GH 10086
252258
s = Series(date_range('20130101', periods=5))

pandas/tests/test_strings.py

+6
Original file line numberDiff line numberDiff line change
@@ -2034,6 +2034,12 @@ def test_index_str_accessor_visibility(self):
20342034
with self.assertRaisesRegexp(AttributeError, message):
20352035
idx.str
20362036

2037+
def test_str_accessor_no_new_attributes(self):
2038+
# https://github.com/pydata/pandas/issues/10673
2039+
s = Series(list('aabbcde'))
2040+
with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
2041+
s.str.xlabel = "a"
2042+
20372043
def test_method_on_bytes(self):
20382044
lhs = Series(np.array(list('abc'), 'S1').astype(object))
20392045
rhs = Series(np.array(list('def'), 'S1').astype(object))

pandas/tseries/common.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
## datetimelike delegation ##
22

33
import numpy as np
4-
from pandas.core.base import PandasDelegate
4+
from pandas.core.base import PandasDelegate, NoNewAttributesMixin
55
from pandas.core import common as com
66
from pandas.tseries.index import DatetimeIndex
77
from pandas.tseries.period import PeriodIndex
@@ -59,12 +59,13 @@ def maybe_to_datetimelike(data, copy=False):
5959

6060
raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
6161

62-
class Properties(PandasDelegate):
62+
class Properties(PandasDelegate, NoNewAttributesMixin):
6363

6464
def __init__(self, values, index, name):
6565
self.values = values
6666
self.index = index
6767
self.name = name
68+
self._freeze()
6869

6970
def _delegate_property_get(self, name):
7071
from pandas import Series

0 commit comments

Comments
 (0)