Skip to content

Commit b766ad9

Browse files
committed
API: let +, * and - pass thru but eval via Python
But give a warning suggesting a better way to do it
1 parent 9c92636 commit b766ad9

File tree

5 files changed

+85
-15
lines changed

5 files changed

+85
-15
lines changed

doc/source/release.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,8 @@ API Changes
156156
- ``to_excel`` now converts ``np.inf`` into a string representation,
157157
customizable by the ``inf_rep`` keyword argument (Excel has no native inf
158158
representation) (:issue:`6782`)
159-
- Arithmetic ops are now disallowed when passed two bool dtype Series or
160-
DataFrames (:issue:`6762`).
159+
- Arithmetic ops on bool dtype arrays/scalars now give a warning indicating
160+
that they are evaluated in Python space (:issue:`6762`, :issue:`7210`).
161161
- Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`)
162162

163163
- ``DataFrame.plot`` and ``Series.plot`` now support a ``table`` keyword for plotting ``matplotlib.Table``. The ``table`` keyword can receive the following values.

doc/source/v0.14.0.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,17 +186,18 @@ API changes
186186
- Added ``factorize`` functions to ``Index`` and ``Series`` to get indexer and unique values (:issue:`7090`)
187187
- ``describe`` on a DataFrame with a mix of Timestamp and string like objects returns a different Index (:issue:`7088`).
188188
Previously the index was unintentionally sorted.
189-
- arithmetic operations with **only** ``bool`` dtypes now raise an error
190-
(:issue:`7011`, :issue:`6762`, :issue:`7015`)
189+
- arithmetic operations with **only** ``bool`` dtypes warn for ``+``, ``-``,
190+
and ``*`` operations and raise for all others (:issue:`7011`, :issue:`6762`,
191+
:issue:`7015`, :issue:`7210`)
191192

192193
.. code-block:: python
193194

194195
x = pd.Series(np.random.rand(10) > 0.5)
195196
y = True
196-
x * y
197+
x + y # warning generated: should do x | y instead
198+
x / y # this raises because it doesn't make sense
197199

198-
# this now raises for arith ops like ``+``, ``*``, etc.
199-
NotImplementedError: operator '*' not implemented for bool dtypes
200+
NotImplementedError: operator '/' not implemented for bool dtypes
200201

201202

202203
.. _whatsnew_0140.display:

pandas/computation/expressions.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
77
"""
88

9+
import warnings
910
import numpy as np
1011
from pandas.core.common import _values_from_object
1112
from distutils.version import LooseVersion
@@ -170,11 +171,23 @@ def _has_bool_dtype(x):
170171
return isinstance(x, (bool, np.bool_))
171172

172173

173-
def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('+', '*', '-', '/',
174-
'//', '**'))):
175-
if op_str in not_allowed and _has_bool_dtype(a) and _has_bool_dtype(b):
176-
raise NotImplementedError("operator %r not implemented for bool "
177-
"dtypes" % op_str)
174+
def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('/', '//', '**')),
175+
unsupported=None):
176+
if unsupported is None:
177+
unsupported = {'+': '|', '*': '&', '-': '^'}
178+
179+
if _has_bool_dtype(a) and _has_bool_dtype(b):
180+
if op_str in unsupported:
181+
warnings.warn("evaluating in Python space because the %r operator"
182+
" is not supported by numexpr for the bool "
183+
"dtype, use %r instead" % (op_str,
184+
unsupported[op_str]))
185+
return False
186+
187+
if op_str in not_allowed:
188+
raise NotImplementedError("operator %r not implemented for bool "
189+
"dtypes" % op_str)
190+
return True
178191

179192

180193
def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True,
@@ -193,7 +206,7 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True,
193206
return the results
194207
use_numexpr : whether to try to use numexpr (default True)
195208
"""
196-
_bool_arith_check(op_str, a, b)
209+
use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
197210
if use_numexpr:
198211
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
199212
**eval_kwargs)

pandas/tests/test_expressions.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,8 @@ def testit():
343343
def test_bool_ops_raise_on_arithmetic(self):
344344
df = DataFrame({'a': np.random.rand(10) > 0.5,
345345
'b': np.random.rand(10) > 0.5})
346-
names = 'add', 'mul', 'sub', 'div', 'truediv', 'floordiv', 'pow'
347-
ops = '+', '*', '-', '/', '/', '//', '**'
346+
names = 'div', 'truediv', 'floordiv', 'pow'
347+
ops = '/', '/', '//', '**'
348348
msg = 'operator %r not implemented for bool dtypes'
349349
for op, name in zip(ops, names):
350350
if not compat.PY3 or name != 'div':
@@ -369,6 +369,49 @@ def test_bool_ops_raise_on_arithmetic(self):
369369
with tm.assertRaisesRegexp(TypeError, err_msg):
370370
f(df, True)
371371

372+
def test_bool_ops_warn_on_arithmetic(self):
373+
n = 10
374+
df = DataFrame({'a': np.random.rand(n) > 0.5,
375+
'b': np.random.rand(n) > 0.5})
376+
names = 'add', 'mul', 'sub'
377+
ops = '+', '*', '-'
378+
subs = {'+': '|', '*': '&', '-': '^'}
379+
sub_funcs = {'|': 'or_', '&': 'and_', '^': 'xor'}
380+
for op, name in zip(ops, names):
381+
f = getattr(operator, name)
382+
fe = getattr(operator, sub_funcs[subs[op]])
383+
384+
with tm.use_numexpr(True, min_elements=5):
385+
with tm.assert_produces_warning():
386+
r = f(df, df)
387+
e = fe(df, df)
388+
tm.assert_frame_equal(r, e)
389+
390+
with tm.assert_produces_warning():
391+
r = f(df.a, df.b)
392+
e = fe(df.a, df.b)
393+
tm.assert_series_equal(r, e)
394+
395+
with tm.assert_produces_warning():
396+
r = f(df.a, True)
397+
e = fe(df.a, True)
398+
tm.assert_series_equal(r, e)
399+
400+
with tm.assert_produces_warning():
401+
r = f(False, df.a)
402+
e = fe(False, df.a)
403+
tm.assert_series_equal(r, e)
404+
405+
with tm.assert_produces_warning():
406+
r = f(False, df)
407+
e = fe(False, df)
408+
tm.assert_frame_equal(r, e)
409+
410+
with tm.assert_produces_warning():
411+
r = f(df, True)
412+
e = fe(df, True)
413+
tm.assert_frame_equal(r, e)
414+
372415

373416
if __name__ == '__main__':
374417
import nose

pandas/util/testing.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
raise_with_traceback, httplib
3636
)
3737

38+
from pandas.computation import expressions as expr
39+
3840
from pandas import bdate_range
3941
from pandas.tseries.index import DatetimeIndex
4042
from pandas.tseries.period import PeriodIndex
@@ -1576,3 +1578,14 @@ def __enter__(self):
15761578
def __exit__(self, exc_type, exc_value, traceback):
15771579

15781580
np.random.set_state(self.start_state)
1581+
1582+
1583+
@contextmanager
1584+
def use_numexpr(use, min_elements=expr._MIN_ELEMENTS):
1585+
olduse = expr._USE_NUMEXPR
1586+
oldmin = expr._MIN_ELEMENTS
1587+
expr.set_use_numexpr(use)
1588+
expr._MIN_ELEMENTS = min_elements
1589+
yield
1590+
expr._MIN_ELEMENTS = oldmin
1591+
expr.set_use_numexpr(olduse)

0 commit comments

Comments
 (0)