Skip to content

Commit f6f06aa

Browse files
committed
Merge pull request #5343 from jreback/assign_eval
ENH: allow in-line expression assignment with df.eval
2 parents f41c102 + 640d4c9 commit f6f06aa

File tree

7 files changed

+134
-17
lines changed

7 files changed

+134
-17
lines changed

doc/source/enhancingperf.rst

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,18 +441,27 @@ The ``DataFrame.eval`` method (Experimental)
441441
In addition to the top level :func:`~pandas.eval` function you can also
442442
evaluate an expression in the "context" of a ``DataFrame``.
443443

444-
445444
.. ipython:: python
446445
447446
df = DataFrame(randn(5, 2), columns=['a', 'b'])
448447
df.eval('a + b')
449448
450-
451449
Any expression that is a valid :func:`~pandas.eval` expression is also a valid
452450
``DataFrame.eval`` expression, with the added benefit that *you don't have to
453451
prefix the name of the* ``DataFrame`` *to the column you're interested in
454452
evaluating*.
455453

454+
In addition, you can perform in-line assignment of columns within an expression.
455+
This can allow for *formulaic evaluation*. Only a signle assignement is permitted.
456+
It can be a new column name or an existing column name. It must be a string-like.
457+
458+
.. ipython:: python
459+
460+
df = DataFrame(dict(a = range(5), b = range(5,10)))
461+
df.eval('c=a+b')
462+
df.eval('d=a+b+c')
463+
df.eval('a=1')
464+
df
456465
457466
Local Variables
458467
~~~~~~~~~~~~~~~

doc/source/release.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ Experimental Features
7272
``numexpr`` behind the scenes. This results in large speedups for complicated
7373
expressions involving large DataFrames/Series.
7474
- :class:`~pandas.DataFrame` has a new :meth:`~pandas.DataFrame.eval` that
75-
evaluates an expression in the context of the ``DataFrame``.
75+
evaluates an expression in the context of the ``DataFrame``; allows
76+
inline expression assignment
7677
- A :meth:`~pandas.DataFrame.query` method has been added that allows
7778
you to select elements of a ``DataFrame`` using a natural query syntax nearly
7879
identical to Python syntax.

pandas/computation/eval.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ def _convert_expression(expr):
113113

114114

115115
def eval(expr, parser='pandas', engine='numexpr', truediv=True,
116-
local_dict=None, global_dict=None, resolvers=None, level=2):
116+
local_dict=None, global_dict=None, resolvers=None, level=2,
117+
target=None):
117118
"""Evaluate a Python expression as a string using various backends.
118119
119120
The following arithmetic operations are supported: ``+``, ``-``, ``*``,
@@ -169,6 +170,8 @@ def eval(expr, parser='pandas', engine='numexpr', truediv=True,
169170
level : int, optional
170171
The number of prior stack frames to traverse and add to the current
171172
scope. Most users will **not** need to change this parameter.
173+
target : a target object for assignment, optional, default is None
174+
essentially this is a passed in resolver
172175
173176
Returns
174177
-------
@@ -194,7 +197,7 @@ def eval(expr, parser='pandas', engine='numexpr', truediv=True,
194197

195198
# get our (possibly passed-in) scope
196199
env = _ensure_scope(global_dict=global_dict, local_dict=local_dict,
197-
resolvers=resolvers, level=level)
200+
resolvers=resolvers, level=level, target=target)
198201

199202
parsed_expr = Expr(expr, engine=engine, parser=parser, env=env,
200203
truediv=truediv)
@@ -203,4 +206,10 @@ def eval(expr, parser='pandas', engine='numexpr', truediv=True,
203206
eng = _engines[engine]
204207
eng_inst = eng(parsed_expr)
205208
ret = eng_inst.evaluate()
209+
210+
# assign if needed
211+
if env.target is not None and parsed_expr.assigner is not None:
212+
env.target[parsed_expr.assigner] = ret
213+
return None
214+
206215
return ret

pandas/computation/expr.py

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@
2121
_arith_ops_syms, _unary_ops_syms, is_term)
2222
from pandas.computation.ops import _reductions, _mathops, _LOCAL_TAG
2323
from pandas.computation.ops import Op, BinOp, UnaryOp, Term, Constant, Div
24+
from pandas.computation.ops import UndefinedVariableError
2425

2526

2627
def _ensure_scope(level=2, global_dict=None, local_dict=None, resolvers=None,
27-
**kwargs):
28+
target=None, **kwargs):
2829
"""Ensure that we are grabbing the correct scope."""
2930
return Scope(gbls=global_dict, lcls=local_dict, level=level,
30-
resolvers=resolvers)
31+
resolvers=resolvers, target=target)
3132

3233

3334
def _check_disjoint_resolver_names(resolver_keys, local_keys, global_keys):
@@ -88,20 +89,23 @@ class Scope(StringMixin):
8889
resolver_keys : frozenset
8990
"""
9091
__slots__ = ('globals', 'locals', 'resolvers', '_global_resolvers',
91-
'resolver_keys', '_resolver', 'level', 'ntemps')
92+
'resolver_keys', '_resolver', 'level', 'ntemps', 'target')
9293

93-
def __init__(self, gbls=None, lcls=None, level=1, resolvers=None):
94+
def __init__(self, gbls=None, lcls=None, level=1, resolvers=None, target=None):
9495
self.level = level
9596
self.resolvers = tuple(resolvers or [])
9697
self.globals = dict()
9798
self.locals = dict()
99+
self.target = target
98100
self.ntemps = 1 # number of temporary variables in this scope
99101

100102
if isinstance(lcls, Scope):
101103
ld, lcls = lcls, dict()
102104
self.locals.update(ld.locals.copy())
103105
self.globals.update(ld.globals.copy())
104106
self.resolvers += ld.resolvers
107+
if ld.target is not None:
108+
self.target = ld.target
105109
self.update(ld.level)
106110

107111
frame = sys._getframe(level)
@@ -130,9 +134,10 @@ def __init__(self, gbls=None, lcls=None, level=1, resolvers=None):
130134

131135
def __unicode__(self):
132136
return com.pprint_thing("locals: {0}\nglobals: {0}\nresolvers: "
133-
"{0}".format(list(self.locals.keys()),
134-
list(self.globals.keys()),
135-
list(self.resolver_keys)))
137+
"{0}\ntarget: {0}".format(list(self.locals.keys()),
138+
list(self.globals.keys()),
139+
list(self.resolver_keys),
140+
self.target))
136141

137142
def __getitem__(self, key):
138143
return self.resolve(key, globally=False)
@@ -417,6 +422,7 @@ def __init__(self, env, engine, parser, preparser=_preparse):
417422
self.engine = engine
418423
self.parser = parser
419424
self.preparser = preparser
425+
self.assigner = None
420426

421427
def visit(self, node, **kwargs):
422428
if isinstance(node, string_types):
@@ -575,9 +581,33 @@ def visit_Slice(self, node, **kwargs):
575581
return slice(lower, upper, step)
576582

577583
def visit_Assign(self, node, **kwargs):
578-
cmpr = ast.Compare(ops=[ast.Eq()], left=node.targets[0],
579-
comparators=[node.value])
580-
return self.visit(cmpr)
584+
"""
585+
support a single assignment node, like
586+
587+
c = a + b
588+
589+
set the assigner at the top level, must be a Name node which
590+
might or might not exist in the resolvers
591+
592+
"""
593+
594+
if len(node.targets) != 1:
595+
raise SyntaxError('can only assign a single expression')
596+
if not isinstance(node.targets[0], ast.Name):
597+
raise SyntaxError('left hand side of an assignment must be a single name')
598+
if self.env.target is None:
599+
raise ValueError('cannot assign without a target object')
600+
601+
try:
602+
assigner = self.visit(node.targets[0], **kwargs)
603+
except (UndefinedVariableError):
604+
assigner = node.targets[0].id
605+
606+
self.assigner = getattr(assigner,'name',assigner)
607+
if self.assigner is None:
608+
raise SyntaxError('left hand side of an assignment must be a single resolvable name')
609+
610+
return self.visit(node.value, **kwargs)
581611

582612
def visit_Attribute(self, node, **kwargs):
583613
attr = node.attr
@@ -669,7 +699,7 @@ def visitor(x, y):
669699
return reduce(visitor, operands)
670700

671701

672-
_python_not_supported = frozenset(['Assign', 'Dict', 'Call', 'BoolOp',
702+
_python_not_supported = frozenset(['Dict', 'Call', 'BoolOp',
673703
'In', 'NotIn'])
674704
_numexpr_supported_calls = frozenset(_reductions + _mathops)
675705

@@ -712,6 +742,10 @@ def __init__(self, expr, engine='numexpr', parser='pandas', env=None,
712742
self.terms = self.parse()
713743
self.truediv = truediv
714744

745+
@property
746+
def assigner(self):
747+
return getattr(self._visitor,'assigner',None)
748+
715749
def __call__(self):
716750
self.env.locals['truediv'] = self.truediv
717751
return self.terms(self.env)

pandas/computation/pytables.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,11 @@ def visit_USub(self, node, **kwargs):
389389
def visit_Index(self, node, **kwargs):
390390
return self.visit(node.value).value
391391

392+
def visit_Assign(self, node, **kwargs):
393+
cmpr = ast.Compare(ops=[ast.Eq()], left=node.targets[0],
394+
comparators=[node.value])
395+
return self.visit(cmpr)
396+
392397
def visit_Subscript(self, node, **kwargs):
393398
value = self.visit(node.value)
394399
slobj = self.visit(node.slice)

pandas/computation/tests/test_eval.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from pandas.computation.ops import (_binary_ops_dict, _unary_ops_dict,
2525
_special_case_arith_ops_syms,
2626
_arith_ops_syms, _bool_ops_syms)
27+
from pandas.computation.common import NameResolutionError
2728
import pandas.computation.expr as expr
2829
import pandas.util.testing as tm
2930
from pandas.util.testing import (assert_frame_equal, randbool,
@@ -1151,9 +1152,65 @@ def test_assignment_fails(self):
11511152
df = DataFrame(np.random.randn(5, 3), columns=list('abc'))
11521153
df2 = DataFrame(np.random.randn(5, 3))
11531154
expr1 = 'df = df2'
1154-
self.assertRaises(NotImplementedError, self.eval, expr1,
1155+
self.assertRaises(ValueError, self.eval, expr1,
11551156
local_dict={'df': df, 'df2': df2})
11561157

1158+
def test_assignment_column(self):
1159+
skip_if_no_ne('numexpr')
1160+
df = DataFrame(np.random.randn(5, 2), columns=list('ab'))
1161+
orig_df = df.copy()
1162+
1163+
# multiple assignees
1164+
self.assertRaises(SyntaxError, df.eval, 'd c = a + b')
1165+
1166+
# invalid assignees
1167+
self.assertRaises(SyntaxError, df.eval, 'd,c = a + b')
1168+
self.assertRaises(SyntaxError, df.eval, 'Timestamp("20131001") = a + b')
1169+
1170+
# single assignment - existing variable
1171+
expected = orig_df.copy()
1172+
expected['a'] = expected['a'] + expected['b']
1173+
df = orig_df.copy()
1174+
df.eval('a = a + b')
1175+
assert_frame_equal(df,expected)
1176+
1177+
# single assignment - new variable
1178+
expected = orig_df.copy()
1179+
expected['c'] = expected['a'] + expected['b']
1180+
df = orig_df.copy()
1181+
df.eval('c = a + b')
1182+
assert_frame_equal(df,expected)
1183+
1184+
# with a local name overlap
1185+
def f():
1186+
df = orig_df.copy()
1187+
a = 1
1188+
df.eval('a = 1 + b')
1189+
return df
1190+
1191+
df = f()
1192+
expected = orig_df.copy()
1193+
expected['a'] = 1 + expected['b']
1194+
assert_frame_equal(df,expected)
1195+
1196+
df = orig_df.copy()
1197+
def f():
1198+
a = 1
1199+
df.eval('a=a+b')
1200+
self.assertRaises(NameResolutionError, f)
1201+
1202+
# multiple assignment
1203+
df = orig_df.copy()
1204+
df.eval('c = a + b')
1205+
self.assertRaises(SyntaxError, df.eval, 'c = a = b')
1206+
1207+
# explicit targets
1208+
df = orig_df.copy()
1209+
self.eval('c = df.a + df.b', local_dict={'df' : df}, target=df)
1210+
expected = orig_df.copy()
1211+
expected['c'] = expected['a'] + expected['b']
1212+
assert_frame_equal(df,expected)
1213+
11571214
def test_basic_period_index_boolean_expression(self):
11581215
df = mkdf(2, 2, data_gen_f=f, c_idx_type='p', r_idx_type='i')
11591216

pandas/core/frame.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,12 +1797,14 @@ def eval(self, expr, **kwargs):
17971797
>>> from pandas import DataFrame
17981798
>>> df = DataFrame(randn(10, 2), columns=list('ab'))
17991799
>>> df.eval('a + b')
1800+
>>> df.eval('c=a + b')
18001801
"""
18011802
resolvers = kwargs.pop('resolvers', None)
18021803
if resolvers is None:
18031804
index_resolvers = self._get_resolvers()
18041805
resolvers = [self, index_resolvers]
18051806
kwargs['local_dict'] = _ensure_scope(resolvers=resolvers, **kwargs)
1807+
kwargs['target'] = self
18061808
return _eval(expr, **kwargs)
18071809

18081810
def _slice(self, slobj, axis=0, raise_on_error=False, typ=None):

0 commit comments

Comments
 (0)