Skip to content

Commit 8c579b1

Browse files
bpo-32856: Optimize the assignment idiom in comprehensions. (GH-16814)
Now `for y in [expr]` in comprehensions is as fast as a simple assignment `y = expr`.
1 parent 0cc6b5e commit 8c579b1

File tree

8 files changed

+145
-18
lines changed

8 files changed

+145
-18
lines changed

Doc/whatsnew/3.9.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,17 @@ case), and one used ``__VENV_NAME__`` instead.
315315
Optimizations
316316
=============
317317

318+
* Optimized the idiom for assignment a temporary variable in comprehensions.
319+
Now ``for y in [expr]`` in comprehensions is as fast as a simple assignment
320+
``y = expr``. For example:
321+
322+
sums = [s for s in [0] for x in data for s in [s + x]]
323+
324+
Unlike to the ``:=`` operator this idiom does not leak a variable to the
325+
outer scope.
326+
327+
(Contributed by Serhiy Storchaka in :issue:`32856`.)
328+
318329

319330
Build and C API Changes
320331
=======================

Lib/test/test_dictcomps.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,5 +111,22 @@ def add_call(pos, value):
111111
self.assertEqual(actual, expected)
112112
self.assertEqual(actual_calls, expected_calls)
113113

114+
def test_assignment_idiom_in_comprehensions(self):
115+
expected = {1: 1, 2: 4, 3: 9, 4: 16}
116+
actual = {j: j*j for i in range(4) for j in [i+1]}
117+
self.assertEqual(actual, expected)
118+
expected = {3: 2, 5: 6, 7: 12, 9: 20}
119+
actual = {j+k: j*k for i in range(4) for j in [i+1] for k in [j+1]}
120+
self.assertEqual(actual, expected)
121+
expected = {3: 2, 5: 6, 7: 12, 9: 20}
122+
actual = {j+k: j*k for i in range(4) for j, k in [(i+1, i+2)]}
123+
self.assertEqual(actual, expected)
124+
125+
def test_star_expression(self):
126+
expected = {0: 0, 1: 1, 2: 4, 3: 9}
127+
self.assertEqual({i: i*i for i in [*range(4)]}, expected)
128+
self.assertEqual({i: i*i for i in (*range(4),)}, expected)
129+
130+
114131
if __name__ == "__main__":
115132
unittest.main()

Lib/test/test_genexps.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,22 @@
1515
>>> list((i,j) for i in range(4) for j in range(i) )
1616
[(1, 0), (2, 0), (2, 1), (3, 0), (3, 1), (3, 2)]
1717
18+
Test the idiom for temporary variable assignment in comprehensions.
19+
20+
>>> list((j*j for i in range(4) for j in [i+1]))
21+
[1, 4, 9, 16]
22+
>>> list((j*k for i in range(4) for j in [i+1] for k in [j+1]))
23+
[2, 6, 12, 20]
24+
>>> list((j*k for i in range(4) for j, k in [(i+1, i+2)]))
25+
[2, 6, 12, 20]
26+
27+
Not assignment
28+
29+
>>> list((i*i for i in [*range(4)]))
30+
[0, 1, 4, 9]
31+
>>> list((i*i for i in (*range(4),)))
32+
[0, 1, 4, 9]
33+
1834
Make sure the induction variable is not exposed
1935
2036
>>> i = 20

Lib/test/test_listcomps.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,22 @@
1616
>>> [(i,j) for i in range(4) for j in range(i)]
1717
[(1, 0), (2, 0), (2, 1), (3, 0), (3, 1), (3, 2)]
1818
19+
Test the idiom for temporary variable assignment in comprehensions.
20+
21+
>>> [j*j for i in range(4) for j in [i+1]]
22+
[1, 4, 9, 16]
23+
>>> [j*k for i in range(4) for j in [i+1] for k in [j+1]]
24+
[2, 6, 12, 20]
25+
>>> [j*k for i in range(4) for j, k in [(i+1, i+2)]]
26+
[2, 6, 12, 20]
27+
28+
Not assignment
29+
30+
>>> [i*i for i in [*range(4)]]
31+
[0, 1, 4, 9]
32+
>>> [i*i for i in (*range(4),)]
33+
[0, 1, 4, 9]
34+
1935
Make sure the induction variable is not exposed
2036
2137
>>> i = 20

Lib/test/test_peepholer.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,20 @@ def f(x):
495495
return 6
496496
self.check_lnotab(f)
497497

498+
def test_assignment_idiom_in_comprehensions(self):
499+
def listcomp():
500+
return [y for x in a for y in [f(x)]]
501+
self.assertEqual(count_instr_recursively(listcomp, 'FOR_ITER'), 1)
502+
def setcomp():
503+
return {y for x in a for y in [f(x)]}
504+
self.assertEqual(count_instr_recursively(setcomp, 'FOR_ITER'), 1)
505+
def dictcomp():
506+
return {y: y for x in a for y in [f(x)]}
507+
self.assertEqual(count_instr_recursively(dictcomp, 'FOR_ITER'), 1)
508+
def genexpr():
509+
return (y for x in a for y in [f(x)])
510+
self.assertEqual(count_instr_recursively(genexpr, 'FOR_ITER'), 1)
511+
498512

499513
class TestBuglets(unittest.TestCase):
500514

Lib/test/test_setcomps.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,22 @@
2121
>>> list(sorted({(i,j) for i in range(4) for j in range(i)}))
2222
[(1, 0), (2, 0), (2, 1), (3, 0), (3, 1), (3, 2)]
2323
24+
Test the idiom for temporary variable assignment in comprehensions.
25+
26+
>>> sorted({j*j for i in range(4) for j in [i+1]})
27+
[1, 4, 9, 16]
28+
>>> sorted({j*k for i in range(4) for j in [i+1] for k in [j+1]})
29+
[2, 6, 12, 20]
30+
>>> sorted({j*k for i in range(4) for j, k in [(i+1, i+2)]})
31+
[2, 6, 12, 20]
32+
33+
Not assignment
34+
35+
>>> sorted({i*i for i in [*range(4)]})
36+
[0, 1, 4, 9]
37+
>>> sorted({i*i for i in (*range(4),)})
38+
[0, 1, 4, 9]
39+
2440
Make sure the induction variable is not exposed
2541
2642
>>> i = 20
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Optimized the idiom for assignment a temporary variable in comprehensions.
2+
Now ``for y in [expr]`` in comprehensions is as fast as a simple assignment
3+
``y = expr``.

Python/compile.c

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,13 @@ static int compiler_set_qualname(struct compiler *);
212212
static int compiler_sync_comprehension_generator(
213213
struct compiler *c,
214214
asdl_seq *generators, int gen_index,
215+
int depth,
215216
expr_ty elt, expr_ty val, int type);
216217

217218
static int compiler_async_comprehension_generator(
218219
struct compiler *c,
219220
asdl_seq *generators, int gen_index,
221+
int depth,
220222
expr_ty elt, expr_ty val, int type);
221223

222224
static PyCodeObject *assemble(struct compiler *, int addNone);
@@ -4343,22 +4345,24 @@ compiler_call_helper(struct compiler *c,
43434345
static int
43444346
compiler_comprehension_generator(struct compiler *c,
43454347
asdl_seq *generators, int gen_index,
4348+
int depth,
43464349
expr_ty elt, expr_ty val, int type)
43474350
{
43484351
comprehension_ty gen;
43494352
gen = (comprehension_ty)asdl_seq_GET(generators, gen_index);
43504353
if (gen->is_async) {
43514354
return compiler_async_comprehension_generator(
4352-
c, generators, gen_index, elt, val, type);
4355+
c, generators, gen_index, depth, elt, val, type);
43534356
} else {
43544357
return compiler_sync_comprehension_generator(
4355-
c, generators, gen_index, elt, val, type);
4358+
c, generators, gen_index, depth, elt, val, type);
43564359
}
43574360
}
43584361

43594362
static int
43604363
compiler_sync_comprehension_generator(struct compiler *c,
43614364
asdl_seq *generators, int gen_index,
4365+
int depth,
43624366
expr_ty elt, expr_ty val, int type)
43634367
{
43644368
/* generate code for the iterator, then each of the ifs,
@@ -4386,12 +4390,38 @@ compiler_sync_comprehension_generator(struct compiler *c,
43864390
}
43874391
else {
43884392
/* Sub-iter - calculate on the fly */
4389-
VISIT(c, expr, gen->iter);
4390-
ADDOP(c, GET_ITER);
4393+
/* Fast path for the temporary variable assignment idiom:
4394+
for y in [f(x)]
4395+
*/
4396+
asdl_seq *elts;
4397+
switch (gen->iter->kind) {
4398+
case List_kind:
4399+
elts = gen->iter->v.List.elts;
4400+
break;
4401+
case Tuple_kind:
4402+
elts = gen->iter->v.Tuple.elts;
4403+
break;
4404+
default:
4405+
elts = NULL;
4406+
}
4407+
if (asdl_seq_LEN(elts) == 1) {
4408+
expr_ty elt = asdl_seq_GET(elts, 0);
4409+
if (elt->kind != Starred_kind) {
4410+
VISIT(c, expr, elt);
4411+
start = NULL;
4412+
}
4413+
}
4414+
if (start) {
4415+
VISIT(c, expr, gen->iter);
4416+
ADDOP(c, GET_ITER);
4417+
}
4418+
}
4419+
if (start) {
4420+
depth++;
4421+
compiler_use_next_block(c, start);
4422+
ADDOP_JREL(c, FOR_ITER, anchor);
4423+
NEXT_BLOCK(c);
43914424
}
4392-
compiler_use_next_block(c, start);
4393-
ADDOP_JREL(c, FOR_ITER, anchor);
4394-
NEXT_BLOCK(c);
43954425
VISIT(c, expr, gen->target);
43964426

43974427
/* XXX this needs to be cleaned up...a lot! */
@@ -4405,7 +4435,7 @@ compiler_sync_comprehension_generator(struct compiler *c,
44054435

44064436
if (++gen_index < asdl_seq_LEN(generators))
44074437
if (!compiler_comprehension_generator(c,
4408-
generators, gen_index,
4438+
generators, gen_index, depth,
44094439
elt, val, type))
44104440
return 0;
44114441

@@ -4420,18 +4450,18 @@ compiler_sync_comprehension_generator(struct compiler *c,
44204450
break;
44214451
case COMP_LISTCOMP:
44224452
VISIT(c, expr, elt);
4423-
ADDOP_I(c, LIST_APPEND, gen_index + 1);
4453+
ADDOP_I(c, LIST_APPEND, depth + 1);
44244454
break;
44254455
case COMP_SETCOMP:
44264456
VISIT(c, expr, elt);
4427-
ADDOP_I(c, SET_ADD, gen_index + 1);
4457+
ADDOP_I(c, SET_ADD, depth + 1);
44284458
break;
44294459
case COMP_DICTCOMP:
44304460
/* With '{k: v}', k is evaluated before v, so we do
44314461
the same. */
44324462
VISIT(c, expr, elt);
44334463
VISIT(c, expr, val);
4434-
ADDOP_I(c, MAP_ADD, gen_index + 1);
4464+
ADDOP_I(c, MAP_ADD, depth + 1);
44354465
break;
44364466
default:
44374467
return 0;
@@ -4440,15 +4470,18 @@ compiler_sync_comprehension_generator(struct compiler *c,
44404470
compiler_use_next_block(c, skip);
44414471
}
44424472
compiler_use_next_block(c, if_cleanup);
4443-
ADDOP_JABS(c, JUMP_ABSOLUTE, start);
4444-
compiler_use_next_block(c, anchor);
4473+
if (start) {
4474+
ADDOP_JABS(c, JUMP_ABSOLUTE, start);
4475+
compiler_use_next_block(c, anchor);
4476+
}
44454477

44464478
return 1;
44474479
}
44484480

44494481
static int
44504482
compiler_async_comprehension_generator(struct compiler *c,
44514483
asdl_seq *generators, int gen_index,
4484+
int depth,
44524485
expr_ty elt, expr_ty val, int type)
44534486
{
44544487
comprehension_ty gen;
@@ -4492,9 +4525,10 @@ compiler_async_comprehension_generator(struct compiler *c,
44924525
NEXT_BLOCK(c);
44934526
}
44944527

4528+
depth++;
44954529
if (++gen_index < asdl_seq_LEN(generators))
44964530
if (!compiler_comprehension_generator(c,
4497-
generators, gen_index,
4531+
generators, gen_index, depth,
44984532
elt, val, type))
44994533
return 0;
45004534

@@ -4509,18 +4543,18 @@ compiler_async_comprehension_generator(struct compiler *c,
45094543
break;
45104544
case COMP_LISTCOMP:
45114545
VISIT(c, expr, elt);
4512-
ADDOP_I(c, LIST_APPEND, gen_index + 1);
4546+
ADDOP_I(c, LIST_APPEND, depth + 1);
45134547
break;
45144548
case COMP_SETCOMP:
45154549
VISIT(c, expr, elt);
4516-
ADDOP_I(c, SET_ADD, gen_index + 1);
4550+
ADDOP_I(c, SET_ADD, depth + 1);
45174551
break;
45184552
case COMP_DICTCOMP:
45194553
/* With '{k: v}', k is evaluated before v, so we do
45204554
the same. */
45214555
VISIT(c, expr, elt);
45224556
VISIT(c, expr, val);
4523-
ADDOP_I(c, MAP_ADD, gen_index + 1);
4557+
ADDOP_I(c, MAP_ADD, depth + 1);
45244558
break;
45254559
default:
45264560
return 0;
@@ -4583,7 +4617,7 @@ compiler_comprehension(struct compiler *c, expr_ty e, int type,
45834617
ADDOP_I(c, op, 0);
45844618
}
45854619

4586-
if (!compiler_comprehension_generator(c, generators, 0, elt,
4620+
if (!compiler_comprehension_generator(c, generators, 0, 0, elt,
45874621
val, type))
45884622
goto error_in_scope;
45894623

0 commit comments

Comments
 (0)