Skip to content

Commit 065415d

Browse files
committed
Prevent adding new attributes to the accessors .str, .dt and .cat
This commit is mostly for the benefit of users who misspell things on the accessors. Assigning to `Series.str`, `Series.dt`, or `Series.cat` was not failing although you couldn't get the value back: ``` In[10]: a = pandas.Series(pandas.Categorical(list("abc"))) In[11]: a.cat.labels = [1,2] In[12]: a.cat.labels AttributeError: 'CategoricalAccessor' object has no attribute 'labels' ``` Now we fail early: ``` In[10]: a = pandas.Series(pandas.Categorical(list("abc"))) In[11]: a.cat.labels = [1,2] AttributeError: You cannot add any new attribute 'labels' ``` Refactor/add a StringAccessorMixin to break a import cycle.
1 parent 6c4d2c7 commit 065415d

File tree

11 files changed

+118
-45
lines changed

11 files changed

+118
-45
lines changed

doc/source/whatsnew/v0.17.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ Bug Fixes
7676
- Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`)
7777
- Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`)
7878
- Bug in using ``DataFrame.ix`` with a multi-index indexer(:issue:`11372`)
79+
- Prevent adding new attributes to the accessors ``.str``, ``.dt`` and ``.cat``. Retrieving such
80+
a value was not possible, so error out on setting it. (:issue:`10673`)
7981

8082

8183
- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`)

pandas/core/base.py

+25-36
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import pandas.core.nanops as nanops
88
import pandas.lib as lib
99
from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg
10-
from pandas.core.strings import StringMethods
1110
from pandas.core.common import AbstractMethodError
1211

1312
_shared_docs = dict()
@@ -111,6 +110,31 @@ def _reset_cache(self, key=None):
111110
else:
112111
self._cache.pop(key, None)
113112

113+
class NoNewAttributesMixin(object):
114+
"""Mixin which prevents adding new attributes.
115+
116+
Prevents additional attributes via xxx.attribute = "something" after a call to
117+
`self.__freeze()`. Mainly used to prevent the user from using wrong attrirbutes
118+
on a accessor (`Series.cat/.str/.dt`).
119+
120+
If you really want to add a new attribute at a later time, you need to use
121+
`object.__setattr__(self, key, value)`.
122+
"""
123+
124+
def _freeze(self):
125+
"""Prevents setting additional attributes"""
126+
object.__setattr__(self, "__frozen", True)
127+
128+
129+
# prevent adding any attribute via s.xxx.new_attribute = ...
130+
def __setattr__(self, key, value):
131+
# _cache is used by a decorator
132+
# dict lookup instead of getattr as getattr is false for getter which error
133+
if getattr(self, "__frozen", False) and not (key in type(self).__dict__ or key == "_cache"):
134+
raise AttributeError( "You cannot add any new attribute '{key}'".format(key=key))
135+
object.__setattr__(self, key, value)
136+
137+
114138
class PandasDelegate(PandasObject):
115139
""" an abstract base class for delegating methods/properties """
116140

@@ -517,41 +541,6 @@ def searchsorted(self, key, side='left'):
517541
#### needs tests/doc-string
518542
return self.values.searchsorted(key, side=side)
519543

520-
# string methods
521-
def _make_str_accessor(self):
522-
from pandas.core.series import Series
523-
from pandas.core.index import Index
524-
if isinstance(self, Series) and not com.is_object_dtype(self.dtype):
525-
# this really should exclude all series with any non-string values,
526-
# but that isn't practical for performance reasons until we have a
527-
# str dtype (GH 9343)
528-
raise AttributeError("Can only use .str accessor with string "
529-
"values, which use np.object_ dtype in "
530-
"pandas")
531-
elif isinstance(self, Index):
532-
# see scc/inferrence.pyx which can contain string values
533-
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
534-
if self.inferred_type not in allowed_types:
535-
message = ("Can only use .str accessor with string values "
536-
"(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
537-
raise AttributeError(message)
538-
if self.nlevels > 1:
539-
message = "Can only use .str accessor with Index, not MultiIndex"
540-
raise AttributeError(message)
541-
return StringMethods(self)
542-
543-
str = AccessorProperty(StringMethods, _make_str_accessor)
544-
545-
def _dir_additions(self):
546-
return set()
547-
548-
def _dir_deletions(self):
549-
try:
550-
getattr(self, 'str')
551-
except AttributeError:
552-
return set(['str'])
553-
return set()
554-
555544
_shared_docs['drop_duplicates'] = (
556545
"""Return %(klass)s with duplicate values removed
557546

pandas/core/categorical.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas.compat import u
99

1010
from pandas.core.algorithms import factorize
11-
from pandas.core.base import PandasObject, PandasDelegate
11+
from pandas.core.base import PandasObject, PandasDelegate, NoNewAttributesMixin
1212
import pandas.core.common as com
1313
from pandas.core.missing import interpolate_2d
1414
from pandas.util.decorators import cache_readonly, deprecate_kwarg
@@ -1717,7 +1717,7 @@ def repeat(self, repeats):
17171717

17181718
##### The Series.cat accessor #####
17191719

1720-
class CategoricalAccessor(PandasDelegate):
1720+
class CategoricalAccessor(PandasDelegate, NoNewAttributesMixin):
17211721
"""
17221722
Accessor object for categorical properties of the Series values.
17231723
@@ -1742,6 +1742,7 @@ class CategoricalAccessor(PandasDelegate):
17421742
def __init__(self, values, index):
17431743
self.categorical = values
17441744
self.index = index
1745+
self._freeze()
17451746

17461747
def _delegate_property_get(self, name):
17471748
return getattr(self.categorical, name)

pandas/core/index.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
_values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype,
2626
_ensure_object, _ensure_int64, is_bool_indexer,
2727
is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype)
28+
from pandas.core.strings import StringAccessorMixin
2829
from pandas.core.config import get_option
2930
from pandas.io.common import PerformanceWarning
3031

@@ -64,7 +65,7 @@ def _new_Index(cls, d):
6465
and breaks __new__ """
6566
return cls.__new__(cls, **d)
6667

67-
class Index(IndexOpsMixin, PandasObject):
68+
class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
6869

6970
"""
7071
Immutable ndarray implementing an ordered, sliceable set. The basic object

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from pandas.core import generic, base
3333
from pandas.core.internals import SingleBlockManager
3434
from pandas.core.categorical import Categorical, CategoricalAccessor
35+
import pandas.core.strings as strings
3536
from pandas.tseries.common import (maybe_to_datetimelike,
3637
CombinedDatetimelikeProperties)
3738
from pandas.tseries.index import DatetimeIndex
@@ -85,7 +86,7 @@ def wrapper(self):
8586
# Series class
8687

8788

88-
class Series(base.IndexOpsMixin, generic.NDFrame):
89+
class Series(base.IndexOpsMixin, strings.StringAccessorMixin, generic.NDFrame,):
8990

9091
"""
9192
One-dimensional ndarray with axis labels (including time series).

pandas/core/strings.py

+43-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import numpy as np
22

33
from pandas.compat import zip
4-
from pandas.core.common import isnull, _values_from_object, is_bool_dtype, is_list_like
4+
from pandas.core.common import (isnull, _values_from_object, is_bool_dtype, is_list_like,
5+
is_categorical_dtype, is_object_dtype)
56
import pandas.compat as compat
7+
from pandas.core.base import AccessorProperty, NoNewAttributesMixin
68
from pandas.util.decorators import Appender, deprecate_kwarg
79
import re
810
import pandas.lib as lib
@@ -1044,7 +1046,7 @@ def do_copy(target):
10441046
return do_copy
10451047

10461048

1047-
class StringMethods(object):
1049+
class StringMethods(NoNewAttributesMixin):
10481050

10491051
"""
10501052
Vectorized string functions for Series and Index. NAs stay NA unless
@@ -1059,6 +1061,7 @@ class StringMethods(object):
10591061

10601062
def __init__(self, series):
10611063
self.series = series
1064+
self._freeze()
10621065

10631066
def __getitem__(self, key):
10641067
if isinstance(key, slice):
@@ -1542,3 +1545,41 @@ def rindex(self, sub, start=0, end=None):
15421545
isdecimal = _noarg_wrapper(lambda x: compat.u_safe(x).isdecimal(),
15431546
docstring=_shared_docs['ismethods'] %
15441547
_shared_docs['isdecimal'])
1548+
1549+
class StringAccessorMixin(object):
1550+
""" Mixin to add a `.str` acessor to the class."""
1551+
1552+
# string methods
1553+
def _make_str_accessor(self):
1554+
from pandas.core.series import Series
1555+
from pandas.core.index import Index
1556+
if isinstance(self, Series) and not is_object_dtype(self.dtype):
1557+
# this really should exclude all series with any non-string values,
1558+
# but that isn't practical for performance reasons until we have a
1559+
# str dtype (GH 9343)
1560+
raise AttributeError("Can only use .str accessor with string "
1561+
"values, which use np.object_ dtype in "
1562+
"pandas")
1563+
elif isinstance(self, Index):
1564+
# see scc/inferrence.pyx which can contain string values
1565+
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
1566+
if self.inferred_type not in allowed_types:
1567+
message = ("Can only use .str accessor with string values "
1568+
"(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
1569+
raise AttributeError(message)
1570+
if self.nlevels > 1:
1571+
message = "Can only use .str accessor with Index, not MultiIndex"
1572+
raise AttributeError(message)
1573+
return StringMethods(self)
1574+
1575+
str = AccessorProperty(StringMethods, _make_str_accessor)
1576+
1577+
def _dir_additions(self):
1578+
return set()
1579+
1580+
def _dir_deletions(self):
1581+
try:
1582+
getattr(self, 'str')
1583+
except AttributeError:
1584+
return set(['str'])
1585+
return set()

pandas/tests/test_base.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pandas.compat as compat
77
import pandas as pd
88
from pandas.compat import u, StringIO
9-
from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate
9+
from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate, NoNewAttributesMixin
1010
import pandas.core.common as com
1111
from pandas.tseries.base import DatetimeIndexOpsMixin
1212
from pandas.util.testing import assertRaisesRegexp, assertIsInstance
@@ -825,6 +825,25 @@ def test_lookup_nan(self):
825825
self.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs)))
826826

827827

828+
class TestNoNewAttributesMixin(tm.TestCase):
829+
830+
def test_mixin(self):
831+
class T(NoNewAttributesMixin):
832+
pass
833+
834+
t = T()
835+
self.assertFalse(hasattr(t, "__frozen"))
836+
t.a = "test"
837+
self.assertEqual(t.a, "test")
838+
t._freeze()
839+
#self.assertTrue("__frozen" not in dir(t))
840+
self.assertIs(getattr(t, "__frozen"), True)
841+
def f():
842+
t.b = "test"
843+
self.assertRaises(AttributeError, f)
844+
self.assertFalse(hasattr(t, "b"))
845+
846+
828847
if __name__ == '__main__':
829848
import nose
830849

pandas/tests/test_categorical.py

+6
Original file line numberDiff line numberDiff line change
@@ -3625,6 +3625,12 @@ def test_cat_accessor_api(self):
36253625
invalid.cat
36263626
self.assertFalse(hasattr(invalid, 'cat'))
36273627

3628+
def test_cat_accessor_no_new_attributes(self):
3629+
# https://github.com/pydata/pandas/issues/10673
3630+
c = Series(list('aabbcde')).astype('category')
3631+
with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
3632+
c.cat.xlabel = "a"
3633+
36283634
def test_pickle_v0_14_1(self):
36293635

36303636
# we have the name warning

pandas/tests/test_series.py

+6
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,12 @@ def f():
247247
s.dt.hour[0] = 5
248248
self.assertRaises(com.SettingWithCopyError, f)
249249

250+
def test_dt_accessor_no_new_attributes(self):
251+
# https://github.com/pydata/pandas/issues/10673
252+
s = Series(date_range('20130101',periods=5,freq='D'))
253+
with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
254+
s.dt.xlabel = "a"
255+
250256
def test_strftime(self):
251257
# GH 10086
252258
s = Series(date_range('20130101', periods=5))

pandas/tests/test_strings.py

+6
Original file line numberDiff line numberDiff line change
@@ -2034,6 +2034,12 @@ def test_index_str_accessor_visibility(self):
20342034
with self.assertRaisesRegexp(AttributeError, message):
20352035
idx.str
20362036

2037+
def test_str_accessor_no_new_attributes(self):
2038+
# https://github.com/pydata/pandas/issues/10673
2039+
s = Series(list('aabbcde'))
2040+
with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
2041+
s.str.xlabel = "a"
2042+
20372043
def test_method_on_bytes(self):
20382044
lhs = Series(np.array(list('abc'), 'S1').astype(object))
20392045
rhs = Series(np.array(list('def'), 'S1').astype(object))

pandas/tseries/common.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
## datetimelike delegation ##
22

33
import numpy as np
4-
from pandas.core.base import PandasDelegate
4+
from pandas.core.base import PandasDelegate, NoNewAttributesMixin
55
from pandas.core import common as com
66
from pandas.tseries.index import DatetimeIndex
77
from pandas.tseries.period import PeriodIndex
@@ -59,12 +59,13 @@ def maybe_to_datetimelike(data, copy=False):
5959

6060
raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
6161

62-
class Properties(PandasDelegate):
62+
class Properties(PandasDelegate, NoNewAttributesMixin):
6363

6464
def __init__(self, values, index, name):
6565
self.values = values
6666
self.index = index
6767
self.name = name
68+
self._freeze()
6869

6970
def _delegate_property_get(self, name):
7071
from pandas import Series

0 commit comments

Comments
 (0)