# topsort - dependency (topological) sorting and cycle finding functions
# Copyright (C) 2007 RADLogic
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation;
# version 2.1 of the License.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# See http://www.fsf.org/licensing/licenses/lgpl.txt for full license text.
"""Provide toplogical sorting (i.e. dependency sorting) functions.
The topsort function is based on code posted on Usenet by Tim Peters.
Modifications:
- added doctests
- changed some bits to use current Python idioms
(listcomp instead of filter, +=/-=, inherit from Exception)
- added a topsort_levels version that ports items in each dependency level
into a sub-list
- added find_cycles to aid in cycle debugging
Run this module directly to run the doctests (unittests).
Make sure they all pass before checking in any modifications.
Requires Python >= 2.2
(For Python 2.2 also requires separate sets.py module)
This requires the rad_util.py module.
"""
# Provide support for Python 2.2*
from __future__ import generators
__version__ = '$Revision: 0.9 $'
__date__ = '$Date: 2007/03/27 04:15:26 $'
__credits__ = '''Tim Peters -- original topsort code
Tim Wegener -- doctesting, updating to current idioms, topsort_levels,
find_cycles
'''
# Make Python 2.3 sets look like Python 2.4 sets.
try:
set
except NameError:
from sets import Set as set
from rad_util import is_rotated
class CycleError(Exception):
"""Cycle Error"""
pass
def topsort(pairlist):
"""Topologically sort a list of (parent, child) pairs.
Return a list of the elements in dependency order (parent to child order).
>>> print topsort( [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)] )
[1, 2, 3, 5, 4, 6]
>>> print topsort( [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)] )
[1, 2, 3, 4, 5, 6]
>>> print topsort( [(1,2), (2,3), (3,2)] )
Traceback (most recent call last):
CycleError: ([1], {2: 1, 3: 1}, {2: [3], 3: [2]})
"""
num_parents = {} # element -> # of predecessors
children = {} # element -> list of successors
for parent, child in pairlist:
# Make sure every element is a key in num_parents.
if not num_parents.has_key( parent ):
num_parents[parent] = 0
if not num_parents.has_key( child ):
num_parents[child] = 0
# Since child has a parent, increment child's num_parents count.
num_parents[child] += 1
# ... and parent gains a child.
children.setdefault(parent, []).append(child)
# Suck up everything without a parent.
answer = [x for x in num_parents.keys() if num_parents[x] == 0]
# For everything in answer, knock down the parent count on its children.
# Note that answer grows *in* the loop.
for parent in answer:
del num_parents[parent]
if children.has_key( parent ):
for child in children[parent]:
num_parents[child] -= 1
if num_parents[child] == 0:
answer.append( child )
# Following "del" isn't needed; just makes
# CycleError details easier to grasp.
del children[parent]
if num_parents:
# Everything in num_parents has at least one child ->
# there's a cycle.
raise CycleError(answer, num_parents, children)
return answer
def topsort_levels(pairlist):
"""Topologically sort a list of (parent, child) pairs into depth levels.
This returns a generator.
Turn this into a an iterator using the iter built-in function.
(if you iterate over the iterator, each element gets generated when
it is asked for, rather than generating the whole list up-front.)
Each generated element is a list of items at that dependency level.
>>> dependency_pairs = [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)]
>>> for level in iter(topsort_levels( dependency_pairs )):
... print level
[1]
[2, 3]
[4, 5]
[6]
>>> dependency_pairs = [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)]
>>> for level in iter(topsort_levels( dependency_pairs )):
... print level
[1]
[2, 3]
[4]
[5]
[6]
>>> dependency_pairs = [(1,2), (2,3), (3,4), (4, 3)]
>>> try:
... for level in iter(topsort_levels( dependency_pairs )):
... print level
... except CycleError, exc:
... print 'CycleError:', exc
[1]
[2]
CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
The cycle error should look like.
CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
# todo: Make the doctest more robust (i.e. handle arbitrary dict order).
"""
num_parents = {} # element -> # of predecessors
children = {} # element -> list of successors
for parent, child in pairlist:
# Make sure every element is a key in num_parents.
if not num_parents.has_key( parent ):
num_parents[parent] = 0
if not num_parents.has_key( child ):
num_parents[child] = 0
# Since child has a parent, increment child's num_parents count.
num_parents[child] += 1
# ... and parent gains a child.
children.setdefault(parent, []).append(child)
return topsort_levels_core(num_parents, children)
def topsort_levels_core(num_parents, children):
"""Topologically sort a bunch of interdependent items based on dependency.
This returns a generator.
Turn this into a an iterator using the iter built-in function.
(if you iterate over the iterator, each element gets generated when
it is asked for, rather than generating the whole list up-front.)
Each generated element is a list of items at that dependency level.
>>> list(topsort_levels_core(
... {1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2},
... {1: [2, 3, 5, 6], 2: [5], 3: [4], 4: [], 5: [6]}))
[[1], [2, 3], [4, 5], [6]]
>>> list(topsort_levels_core(
... {1: 0, 2: 2, 3: 1},
... {1: [2], 2: [3], 3: [2]}))
Traceback (most recent call last):
CycleError: ({2: 1, 3: 1}, {2: [3], 3: [2]})
This function has a more complicated interface than topsort_levels,
but is useful if the data is easier to generate in this form.
Arguments:
num_parents -- key: item, value: number of parents (predecessors)
children -- key: item, value: list of children (successors)
"""
while 1:
# Suck up everything without a predecessor.
level_parents = [x for x in num_parents.keys() if num_parents[x] == 0]
if not level_parents:
break
# Offer the next generated item,
# which is a list of the items at this dependency level.
yield level_parents
# For everything item in this level,
# decrement the parent count,
# since we have accounted for its parent.
for level_parent in level_parents:
del num_parents[level_parent]
if children.has_key(level_parent):
for level_parent_child in children[level_parent]:
num_parents[level_parent_child] -= 1
del children[level_parent]
if num_parents:
# Everything in num_parents has at least one child ->
# there's a cycle.
raise CycleError(num_parents, children)
else:
# This is the end of the generator.
raise StopIteration
def find_cycles(parent_children):
"""Yield cycles. Each result is a list of items comprising a cycle.
Use a 'stack' based approach to find all the cycles.
This is a generator, so yields each cycle as it finds it.
It is implicit that the last item in each cycle list is a parent of the
first item (thereby forming a cycle).
Arguments:
parent_children -- parent -> collection of children
Simplest cycle:
>>> cycles = list(find_cycles({'A': ['B'], 'B': ['A']}))
>>> len(cycles)
1
>>> cycle = cycles[0]
>>> cycle.sort()
>>> print cycle
['A', 'B']
Simplest cycle with extra baggage at the start and the end:
>>> cycles = list(find_cycles(parent_children={'A': ['B'],
... 'B': ['C'],
... 'C': ['B', 'D'],
... 'D': [],
... }))
>>> len(cycles)
1
>>> cycle = cycles[0]
>>> cycle.sort()
>>> print cycle
['B', 'C']
Double cycle:
>>> cycles = list(find_cycles(parent_children={'A': ['B'],
... 'B': ['C1', 'C2'],
... 'C1': ['D1'],
... 'D1': ['E1'],
... 'E1': ['D1'],
... 'C2': ['D2'],
... 'D2': ['E2'],
... 'E2': ['D2'],
... }))
>>> len(cycles)
2
>>> for cycle in cycles:
... cycle.sort()
>>> cycles.sort()
>>> cycle1 = cycles[0]
>>> cycle1.sort()
>>> print cycle1
['D1', 'E1']
>>> cycle2 = cycles[1]
>>> cycle2.sort()
>>> print cycle2
['D2', 'E2']
Simple cycle with children not specified for one item:
# todo: Should this barf instead?
>>> cycles = list(find_cycles(parent_children={'A': ['B'],
... 'B': ['A'],
... 'C': ['D']}))
>>> len(cycles)
1
>>> cycle = cycles[0]
>>> cycle.sort()
>>> print cycle
['A', 'B']
Diamond cycle
>>> cycles = list(find_cycles(parent_children={'A': ['B1', 'B2'],
... 'B1': ['C'],
... 'B2': ['C'],
... 'C': ['A', 'B1']}))
>>> len(cycles)
3
>>> sorted_cycles = []
>>> for cycle in cycles:
... cycle = list(cycle)
... cycle.sort()
... sorted_cycles.append(cycle)
>>> sorted_cycles.sort()
>>> for cycle in sorted_cycles:
... print cycle
['A', 'B1', 'C']
['A', 'B2', 'C']
['B1', 'C']
Hairy case (order can matter if something is wrong):
(Note order of B and C in the list.)
>>> cycles = list(find_cycles(parent_children={
... 'TD': ['DD'],
... 'TC': ['DC'],
... 'DC': ['DQ'],
... 'C': ['DQ'],
... 'DQ': ['IA', 'TO'],
... 'IA': ['A'],
... 'A': ['B', 'C'],
... }))
>>> len(cycles)
1
>>> cycle = cycles[0]
>>> cycle.sort()
>>> print cycle
['A', 'C', 'DQ', 'IA']
"""
cycles = []
visited_nodes = set()
for parent in parent_children:
if parent in visited_nodes:
# This node is part of a path that has already been traversed.
continue
paths = [[parent]]
while paths:
path = paths.pop()
parent = path[-1]
try:
children = parent_children[parent]
except KeyError:
continue
for child in children:
# Keeping a set of the path nodes, for O(1) lookups at the
# expense of more memory and complexity, actually makes speed
# worse. (Due to construction of sets.)
# This is O(N).
if child in path:
# This is a cycle.
cycle = path[path.index(child):]
# Check that this is not a dup cycle.
is_dup = False
for other_cycle in cycles:
if is_rotated(other_cycle, cycle):
is_dup = True
break
if not is_dup:
cycles.append(cycle)
yield cycle
else:
# Push this new path onto the 'stack'.
# This is probably the most expensive part of the algorithm
# (a list copy).
paths.append(path + [child])
# Mark the node as visited.
visited_nodes.add(child)
if __name__ == '__main__':
# Run the doctest tests.
import sys
import doctest
doctest.testmod(sys.modules['__main__'])