@@ -329,12 +329,22 @@ def default_lib_path(data_dir: str, pyversion: Tuple[int, int],
329
329
('data_json' , str ), # path of <id>.data.json
330
330
('suppressed' , List [str ]), # dependencies that weren't imported
331
331
('flags' , Optional [List [str ]]), # build flags
332
+ ('dep_prios' , List [int ]),
332
333
])
333
- # NOTE: dependencies + suppressed == all unreachable imports;
334
+ # NOTE: dependencies + suppressed == all reachable imports;
334
335
# suppressed contains those reachable imports that were prevented by
335
336
# --silent-imports or simply not found.
336
337
337
338
339
+ # Priorities used for imports. (Here, top-level includes inside a class.)
340
+ # These are used to determine a more predictable order in which the
341
+ # nodes in an import cycle are processed.
342
+ PRI_HIGH = 5 # top-level "from X import blah"
343
+ PRI_MED = 10 # top-level "import X"
344
+ PRI_LOW = 20 # either form inside a function
345
+ PRI_ALL = 99 # include all priorities
346
+
347
+
338
348
class BuildManager :
339
349
"""This class holds shared state for building a mypy program.
340
350
@@ -395,12 +405,13 @@ def __init__(self, data_dir: str,
395
405
self .missing_modules = set () # type: Set[str]
396
406
397
407
def all_imported_modules_in_file (self ,
398
- file : MypyFile ) -> List [Tuple [str , int ]]:
408
+ file : MypyFile ) -> List [Tuple [int , str , int ]]:
399
409
"""Find all reachable import statements in a file.
400
410
401
- Return list of tuples (module id, import line number) for all modules
402
- imported in file.
411
+ Return list of tuples (priority, module id, import line number)
412
+ for all modules imported in file; lower numbers == higher priority .
403
413
"""
414
+
404
415
def correct_rel_imp (imp : Union [ImportFrom , ImportAll ]) -> str :
405
416
"""Function to correct for relative imports."""
406
417
file_id = file .fullname ()
@@ -415,21 +426,23 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
415
426
416
427
return new_id
417
428
418
- res = [] # type: List[Tuple[str, int]]
429
+ res = [] # type: List[Tuple[int, str, int]]
419
430
for imp in file .imports :
420
431
if not imp .is_unreachable :
421
432
if isinstance (imp , Import ):
433
+ pri = PRI_MED if imp .is_top_level else PRI_LOW
422
434
for id , _ in imp .ids :
423
- res .append ((id , imp .line ))
435
+ res .append ((pri , id , imp .line ))
424
436
elif isinstance (imp , ImportFrom ):
425
437
cur_id = correct_rel_imp (imp )
426
438
pos = len (res )
427
439
all_are_submodules = True
428
440
# Also add any imported names that are submodules.
441
+ pri = PRI_MED if imp .is_top_level else PRI_LOW
429
442
for name , __ in imp .names :
430
443
sub_id = cur_id + '.' + name
431
444
if self .is_module (sub_id ):
432
- res .append ((sub_id , imp .line ))
445
+ res .append ((pri , sub_id , imp .line ))
433
446
else :
434
447
all_are_submodules = False
435
448
# If all imported names are submodules, don't add
@@ -438,9 +451,12 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
438
451
# cur_id is also a dependency, and we should
439
452
# insert it *before* any submodules.
440
453
if not all_are_submodules :
441
- res .insert (pos , ((cur_id , imp .line )))
454
+ pri = PRI_HIGH if imp .is_top_level else PRI_LOW
455
+ res .insert (pos , ((pri , cur_id , imp .line )))
442
456
elif isinstance (imp , ImportAll ):
443
- res .append ((correct_rel_imp (imp ), imp .line ))
457
+ pri = PRI_HIGH if imp .is_top_level else PRI_LOW
458
+ res .append ((pri , correct_rel_imp (imp ), imp .line ))
459
+
444
460
return res
445
461
446
462
def is_module (self , id : str ) -> bool :
@@ -773,16 +789,18 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
773
789
data_json ,
774
790
meta .get ('suppressed' , []),
775
791
meta .get ('flags' ),
792
+ meta .get ('dep_prios' , []),
776
793
)
777
794
if (m .id != id or m .path != path or
778
795
m .mtime is None or m .size is None or
779
796
m .dependencies is None or m .data_mtime is None ):
780
797
return None
781
798
782
- # Metadata generated by older mypy version and no flags were saved
783
- if m .flags is None :
799
+ # Ignore cache if generated by an older mypy version.
800
+ if m .flags is None or len ( m . dependencies ) != len ( m . dep_prios ) :
784
801
return None
785
802
803
+ # Ignore cache if (relevant) flags aren't the same.
786
804
cached_flags = select_flags_affecting_cache (m .flags )
787
805
current_flags = select_flags_affecting_cache (manager .flags )
788
806
if cached_flags != current_flags :
@@ -821,6 +839,7 @@ def random_string():
821
839
822
840
def write_cache (id : str , path : str , tree : MypyFile ,
823
841
dependencies : List [str ], suppressed : List [str ],
842
+ dep_prios : List [int ],
824
843
manager : BuildManager ) -> None :
825
844
"""Write cache files for a module.
826
845
@@ -830,6 +849,7 @@ def write_cache(id: str, path: str, tree: MypyFile,
830
849
tree: the fully checked module data
831
850
dependencies: module IDs on which this module depends
832
851
suppressed: module IDs which were suppressed as dependencies
852
+ dep_prios: priorities (parallel array to dependencies)
833
853
manager: the build manager (for pyversion, log/trace)
834
854
"""
835
855
path = os .path .abspath (path )
@@ -859,6 +879,7 @@ def write_cache(id: str, path: str, tree: MypyFile,
859
879
'dependencies' : dependencies ,
860
880
'suppressed' : suppressed ,
861
881
'flags' : manager .flags ,
882
+ 'dep_prios' : dep_prios ,
862
883
}
863
884
with open (meta_json_tmp , 'w' ) as f :
864
885
json .dump (meta , f , sort_keys = True )
@@ -1031,6 +1052,7 @@ class State:
1031
1052
tree = None # type: Optional[MypyFile]
1032
1053
dependencies = None # type: List[str]
1033
1054
suppressed = None # type: List[str] # Suppressed/missing dependencies
1055
+ priorities = None # type: Dict[str, int]
1034
1056
1035
1057
# Map each dependency to the line number where it is first imported
1036
1058
dep_line_map = None # type: Dict[str, int]
@@ -1132,6 +1154,9 @@ def __init__(self,
1132
1154
# compare them to the originals later.
1133
1155
self .dependencies = list (self .meta .dependencies )
1134
1156
self .suppressed = list (self .meta .suppressed )
1157
+ assert len (self .meta .dependencies ) == len (self .meta .dep_prios )
1158
+ self .priorities = {id : pri
1159
+ for id , pri in zip (self .meta .dependencies , self .meta .dep_prios )}
1135
1160
self .dep_line_map = {}
1136
1161
else :
1137
1162
# Parse the file (and then some) to get the dependencies.
@@ -1267,8 +1292,10 @@ def parse_file(self) -> None:
1267
1292
# Also keep track of each dependency's source line.
1268
1293
dependencies = []
1269
1294
suppressed = []
1295
+ priorities = {} # type: Dict[str, int] # id -> priority
1270
1296
dep_line_map = {} # type: Dict[str, int] # id -> line
1271
- for id , line in manager .all_imported_modules_in_file (self .tree ):
1297
+ for pri , id , line in manager .all_imported_modules_in_file (self .tree ):
1298
+ priorities [id ] = min (pri , priorities .get (id , PRI_ALL ))
1272
1299
if id == self .id :
1273
1300
continue
1274
1301
# Omit missing modules, as otherwise we could not type-check
@@ -1299,6 +1326,7 @@ def parse_file(self) -> None:
1299
1326
# for differences (e.g. --silent-imports).
1300
1327
self .dependencies = dependencies
1301
1328
self .suppressed = suppressed
1329
+ self .priorities = priorities
1302
1330
self .dep_line_map = dep_line_map
1303
1331
self .check_blockers ()
1304
1332
@@ -1338,8 +1366,10 @@ def type_check(self) -> None:
1338
1366
1339
1367
def write_cache (self ) -> None :
1340
1368
if self .path and INCREMENTAL in self .manager .flags and not self .manager .errors .is_errors ():
1369
+ dep_prios = [self .priorities .get (dep , PRI_HIGH ) for dep in self .dependencies ]
1341
1370
write_cache (self .id , self .path , self .tree ,
1342
1371
list (self .dependencies ), list (self .suppressed ),
1372
+ dep_prios ,
1343
1373
self .manager )
1344
1374
1345
1375
@@ -1408,10 +1438,9 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
1408
1438
# dependencies) to roots (those from which everything else can be
1409
1439
# reached).
1410
1440
for ascc in sccs :
1411
- # Sort the SCC's nodes in *reverse* order or encounter.
1412
- # This is a heuristic for handling import cycles.
1441
+ # Order the SCC's nodes using a heuristic.
1413
1442
# Note that ascc is a set, and scc is a list.
1414
- scc = sorted ( ascc , key = lambda id : - graph [ id ]. order )
1443
+ scc = order_ascc ( graph , ascc )
1415
1444
# If builtins is in the list, move it last. (This is a bit of
1416
1445
# a hack, but it's necessary because the builtins module is
1417
1446
# part of a small cycle involving at least {builtins, abc,
@@ -1420,6 +1449,12 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
1420
1449
if 'builtins' in ascc :
1421
1450
scc .remove ('builtins' )
1422
1451
scc .append ('builtins' )
1452
+ if manager .flags .count (VERBOSE ) >= 2 :
1453
+ for id in scc :
1454
+ manager .trace ("Priorities for %s:" % id ,
1455
+ " " .join ("%s:%d" % (x , graph [id ].priorities [x ])
1456
+ for x in graph [id ].dependencies
1457
+ if x in ascc and x in graph [id ].priorities ))
1423
1458
# Because the SCCs are presented in topological sort order, we
1424
1459
# don't need to look at dependencies recursively for staleness
1425
1460
# -- the immediate dependencies are sufficient.
@@ -1446,7 +1481,7 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
1446
1481
# cache file is newer than any scc node's cache file.
1447
1482
oldest_in_scc = min (graph [id ].meta .data_mtime for id in scc )
1448
1483
newest_in_deps = 0 if not deps else max (graph [dep ].meta .data_mtime for dep in deps )
1449
- if manager .flags .count (VERBOSE ) >= 2 : # Dump all mtimes for extreme debugging.
1484
+ if manager .flags .count (VERBOSE ) >= 3 : # Dump all mtimes for extreme debugging.
1450
1485
all_ids = sorted (ascc | deps , key = lambda id : graph [id ].meta .data_mtime )
1451
1486
for id in all_ids :
1452
1487
if id in scc :
@@ -1486,6 +1521,53 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
1486
1521
process_stale_scc (graph , scc )
1487
1522
1488
1523
1524
+ def order_ascc (graph : Graph , ascc : AbstractSet [str ], pri_max : int = PRI_ALL ) -> List [str ]:
1525
+ """Come up with the ideal processing order within an SCC.
1526
+
1527
+ Using the priorities assigned by all_imported_modules_in_file(),
1528
+ try to reduce the cycle to a DAG, by omitting arcs representing
1529
+ dependencies of lower priority.
1530
+
1531
+ In the simplest case, if we have A <--> B where A has a top-level
1532
+ "import B" (medium priority) but B only has the reverse "import A"
1533
+ inside a function (low priority), we turn the cycle into a DAG by
1534
+ dropping the B --> A arc, which leaves only A --> B.
1535
+
1536
+ If all arcs have the same priority, we fall back to sorting by
1537
+ reverse global order (the order in which modules were first
1538
+ encountered).
1539
+
1540
+ The algorithm is recursive, as follows: when as arcs of different
1541
+ priorities are present, drop all arcs of the lowest priority,
1542
+ identify SCCs in the resulting graph, and apply the algorithm to
1543
+ each SCC thus found. The recursion is bounded because at each
1544
+ recursion the spread in priorities is (at least) one less.
1545
+
1546
+ In practice there are only a few priority levels (currently
1547
+ N=3) and in the worst case we just carry out the same algorithm
1548
+ for finding SCCs N times. Thus the complexity is no worse than
1549
+ the complexity of the original SCC-finding algorithm -- see
1550
+ strongly_connected_components() below for a reference.
1551
+ """
1552
+ if len (ascc ) == 1 :
1553
+ return [s for s in ascc ]
1554
+ pri_spread = set ()
1555
+ for id in ascc :
1556
+ state = graph [id ]
1557
+ for dep in state .dependencies :
1558
+ if dep in ascc :
1559
+ pri = state .priorities .get (dep , PRI_HIGH )
1560
+ if pri < pri_max :
1561
+ pri_spread .add (pri )
1562
+ if len (pri_spread ) == 1 :
1563
+ # Filtered dependencies are uniform -- order by global order.
1564
+ return sorted (ascc , key = lambda id : - graph [id ].order )
1565
+ pri_max = max (pri_spread )
1566
+ sccs = sorted_components (graph , ascc , pri_max )
1567
+ # The recursion is bounded by the len(pri_spread) check above.
1568
+ return [s for ss in sccs for s in order_ascc (graph , ss , pri_max )]
1569
+
1570
+
1489
1571
def process_fresh_scc (graph : Graph , scc : List [str ]) -> None :
1490
1572
"""Process the modules in one SCC from their cached data."""
1491
1573
for id in scc :
@@ -1517,7 +1599,9 @@ def process_stale_scc(graph: Graph, scc: List[str]) -> None:
1517
1599
graph [id ].write_cache ()
1518
1600
1519
1601
1520
- def sorted_components (graph : Graph ) -> List [AbstractSet [str ]]:
1602
+ def sorted_components (graph : Graph ,
1603
+ vertices : Optional [AbstractSet [str ]] = None ,
1604
+ pri_max : int = PRI_ALL ) -> List [AbstractSet [str ]]:
1521
1605
"""Return the graph's SCCs, topologically sorted by dependencies.
1522
1606
1523
1607
The sort order is from leaves (nodes without dependencies) to
@@ -1527,17 +1611,17 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]:
1527
1611
dependencies that aren't present in graph.keys() are ignored.
1528
1612
"""
1529
1613
# Compute SCCs.
1530
- vertices = set ( graph )
1531
- edges = { id : [ dep for dep in st . dependencies if dep in graph ]
1532
- for id , st in graph . items () }
1614
+ if vertices is None :
1615
+ vertices = set ( graph )
1616
+ edges = { id : deps_filtered ( graph , vertices , id , pri_max ) for id in vertices }
1533
1617
sccs = list (strongly_connected_components (vertices , edges ))
1534
1618
# Topsort.
1535
1619
sccsmap = {id : frozenset (scc ) for scc in sccs for id in scc }
1536
1620
data = {} # type: Dict[AbstractSet[str], Set[AbstractSet[str]]]
1537
1621
for scc in sccs :
1538
1622
deps = set () # type: Set[AbstractSet[str]]
1539
1623
for id in scc :
1540
- deps .update (sccsmap [x ] for x in graph [ id ]. dependencies if x in graph )
1624
+ deps .update (sccsmap [x ] for x in deps_filtered ( graph , vertices , id , pri_max ) )
1541
1625
data [frozenset (scc )] = deps
1542
1626
res = []
1543
1627
for ready in topsort (data ):
@@ -1554,7 +1638,17 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]:
1554
1638
return res
1555
1639
1556
1640
1557
- def strongly_connected_components (vertices : Set [str ],
1641
+ def deps_filtered (graph : Graph , vertices : AbstractSet [str ], id : str , pri_max : int ) -> List [str ]:
1642
+ """Filter dependencies for id with pri < pri_max."""
1643
+ if id not in vertices :
1644
+ return []
1645
+ state = graph [id ]
1646
+ return [dep
1647
+ for dep in state .dependencies
1648
+ if dep in vertices and state .priorities .get (dep , PRI_HIGH ) < pri_max ]
1649
+
1650
+
1651
+ def strongly_connected_components (vertices : AbstractSet [str ],
1558
1652
edges : Dict [str , List [str ]]) -> Iterator [Set [str ]]:
1559
1653
"""Compute Strongly Connected Components of a directed graph.
1560
1654
0 commit comments