From 6794d8d038913de8e33293d021b92e2b13fc92f1 Mon Sep 17 00:00:00 2001 From: onesandzeroes Date: Sat, 15 Nov 2014 09:28:15 +1100 Subject: [PATCH 1/6] New eval keyword to disallow assignment --- pandas/computation/eval.py | 13 ++++++++++--- pandas/core/frame.py | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py index e3096a85ca7d7..6ec09ea324e24 100644 --- a/pandas/computation/eval.py +++ b/pandas/computation/eval.py @@ -138,7 +138,7 @@ def _check_for_locals(expr, stack_level, parser): def eval(expr, parser='pandas', engine='numexpr', truediv=True, local_dict=None, global_dict=None, resolvers=(), level=0, - target=None): + target=None, assignment_allowed=True): """Evaluate a Python expression as a string using various backends. The following arithmetic operations are supported: ``+``, ``-``, ``*``, @@ -196,6 +196,9 @@ def eval(expr, parser='pandas', engine='numexpr', truediv=True, scope. Most users will **not** need to change this parameter. target : a target object for assignment, optional, default is None essentially this is a passed in resolver + assignment_allowed : bool + Whether the eval should be able to modify the input through + assigment. Returns ------- @@ -236,7 +239,11 @@ def eval(expr, parser='pandas', engine='numexpr', truediv=True, # assign if needed if env.target is not None and parsed_expr.assigner is not None: - env.target[parsed_expr.assigner] = ret - return None + if assignment_allowed: + env.target[parsed_expr.assigner] = ret + return None + else: + raise ValueError("Expression includes assignment statement: \n" + "\t not allowed from DataFrame.query") return ret diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb192aeca5b6d..5d598e02947c6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1935,7 +1935,7 @@ def query(self, expr, **kwargs): >>> df[df.a > df.b] # same result as the previous expression """ kwargs['level'] = kwargs.pop('level', 0) + 1 - res = self.eval(expr, **kwargs) + res = self.eval(expr, assignment_allowed=False, **kwargs) try: return self.loc[res] From afe2d479d9d4269a84dbe621eb6826d48fb8c330 Mon Sep 17 00:00:00 2001 From: onesandzeroes Date: Sun, 16 Nov 2014 15:48:17 +1100 Subject: [PATCH 2/6] Add test --- pandas/tests/test_frame.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index cfa0ed1a11772..855e2300685a8 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -15225,6 +15225,16 @@ def test_query_builtin(self): result = df.query('sin > 5', engine=engine, parser=parser) tm.assert_frame_equal(expected, result) + def test_assignment_not_allowed(self): + df = DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c']}) + a_before = df['a'].copy() + self.assertRaisesRegexp( + ValueError, 'assignment statement', df.query, 'a=1', + engine=self.engine, parser=self.parser + ) + a_after = df['a'].copy() + assert_series_equal(a_before, a_after) + class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython): From 79e356d9c3dcceb38e9f9f4a581bbe600a36e5a2 Mon Sep 17 00:00:00 2001 From: onesandzeroes Date: Thu, 26 Mar 2015 21:25:54 +1100 Subject: [PATCH 3/6] Revert "New eval keyword to disallow assignment" This reverts commit 655fb5e5be3981b43ff146d68a0fa53e75d98dd1. --- pandas/computation/eval.py | 13 +++---------- pandas/core/frame.py | 2 +- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py index 6ec09ea324e24..e3096a85ca7d7 100644 --- a/pandas/computation/eval.py +++ b/pandas/computation/eval.py @@ -138,7 +138,7 @@ def _check_for_locals(expr, stack_level, parser): def eval(expr, parser='pandas', engine='numexpr', truediv=True, local_dict=None, global_dict=None, resolvers=(), level=0, - target=None, assignment_allowed=True): + target=None): """Evaluate a Python expression as a string using various backends. The following arithmetic operations are supported: ``+``, ``-``, ``*``, @@ -196,9 +196,6 @@ def eval(expr, parser='pandas', engine='numexpr', truediv=True, scope. Most users will **not** need to change this parameter. target : a target object for assignment, optional, default is None essentially this is a passed in resolver - assignment_allowed : bool - Whether the eval should be able to modify the input through - assigment. Returns ------- @@ -239,11 +236,7 @@ def eval(expr, parser='pandas', engine='numexpr', truediv=True, # assign if needed if env.target is not None and parsed_expr.assigner is not None: - if assignment_allowed: - env.target[parsed_expr.assigner] = ret - return None - else: - raise ValueError("Expression includes assignment statement: \n" - "\t not allowed from DataFrame.query") + env.target[parsed_expr.assigner] = ret + return None return ret diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5d598e02947c6..bb192aeca5b6d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1935,7 +1935,7 @@ def query(self, expr, **kwargs): >>> df[df.a > df.b] # same result as the previous expression """ kwargs['level'] = kwargs.pop('level', 0) + 1 - res = self.eval(expr, assignment_allowed=False, **kwargs) + res = self.eval(expr, **kwargs) try: return self.loc[res] From 3fe7951de74b1a21f0437f3ba02b431f56137989 Mon Sep 17 00:00:00 2001 From: onesandzeroes Date: Thu, 26 Mar 2015 21:52:42 +1100 Subject: [PATCH 4/6] Add new specialized query parser --- pandas/computation/expr.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index b6a1fcbec8339..73ccd11e966d9 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -589,6 +589,7 @@ def visitor(x, y): _python_not_supported = frozenset(['Dict', 'Call', 'BoolOp', 'In', 'NotIn']) _numexpr_supported_calls = frozenset(_reductions + _mathops) +_query_not_supported = frozenset(['Assign']) @disallow((_unsupported_nodes | _python_not_supported) - @@ -602,6 +603,17 @@ def __init__(self, env, engine, parser, super(PandasExprVisitor, self).__init__(env, engine, parser, preparser) +@disallow((_unsupported_nodes | _python_not_supported | _query_not_supported) - + (_boolop_nodes | frozenset(['BoolOp', 'Attribute', 'In', 'NotIn', + 'Tuple']))) +class PandasQueryExprVisitor(BaseExprVisitor): + + def __init__(self, env, engine, parser, + preparser=partial(_preparse, f=compose(_replace_locals, + _replace_booleans))): + super(PandasQueryExprVisitor, self).__init__(env, engine, parser, preparser) + + @disallow(_unsupported_nodes | _python_not_supported | frozenset(['Not'])) class PythonExprVisitor(BaseExprVisitor): @@ -659,4 +671,5 @@ def names(self): return frozenset(term.name for term in com.flatten(self.terms)) -_parsers = {'python': PythonExprVisitor, 'pandas': PandasExprVisitor} +_parsers = {'python': PythonExprVisitor, 'pandas': PandasExprVisitor, + 'pandas_query': PandasQueryExprVisitor} From 67a44c55823dbde7e45f7ba956dace760e04333c Mon Sep 17 00:00:00 2001 From: onesandzeroes Date: Thu, 26 Mar 2015 21:54:13 +1100 Subject: [PATCH 5/6] Use 'pandas_query' parser by default for queries --- pandas/core/frame.py | 1 + pandas/tests/test_frame.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb192aeca5b6d..163a78d0a28a8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1935,6 +1935,7 @@ def query(self, expr, **kwargs): >>> df[df.a > df.b] # same result as the previous expression """ kwargs['level'] = kwargs.pop('level', 0) + 1 + kwargs['parser'] = kwargs.pop('parser', 'pandas_query') res = self.eval(expr, **kwargs) try: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 855e2300685a8..3dffc2c28727b 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -15210,7 +15210,7 @@ class TestDataFrameQueryPythonPandas(TestDataFrameQueryNumExprPandas): def setUpClass(cls): super(TestDataFrameQueryPythonPandas, cls).setUpClass() cls.engine = 'python' - cls.parser = 'pandas' + cls.parser = 'pandas_query' cls.frame = _frame.copy() def test_query_builtin(self): From 70d8345d38f68f9460ad6dbe16af91ddda5796cd Mon Sep 17 00:00:00 2001 From: onesandzeroes Date: Thu, 26 Mar 2015 21:54:42 +1100 Subject: [PATCH 6/6] Update unit test --- pandas/tests/test_frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 3dffc2c28727b..eda6d833f2b64 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -15225,12 +15225,12 @@ def test_query_builtin(self): result = df.query('sin > 5', engine=engine, parser=parser) tm.assert_frame_equal(expected, result) - def test_assignment_not_allowed(self): + def test_query_with_assign_statement(self): df = DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c']}) a_before = df['a'].copy() self.assertRaisesRegexp( - ValueError, 'assignment statement', df.query, 'a=1', - engine=self.engine, parser=self.parser + NotImplementedError, "'Assign' nodes are not implemented", + df.query, 'a=1', engine=self.engine, parser=self.parser ) a_after = df['a'].copy() assert_series_equal(a_before, a_after)