From: Sapan Bhatia Date: Mon, 1 Sep 2014 21:41:19 +0000 (-0400) Subject: Added generic depth first search for parallel execution X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=b1d8a0f47e60e705883e86e2b9921d7cc1acf377;p=plstackapi.git Added generic depth first search for parallel execution --- diff --git a/planetstack/ec2_observer/toposort.py b/planetstack/ec2_observer/toposort.py index a2c9389..6d28f4a 100644 --- a/planetstack/ec2_observer/toposort.py +++ b/planetstack/ec2_observer/toposort.py @@ -10,6 +10,103 @@ import pdb from datetime import datetime from collections import defaultdict +# Assumes that there are no empty dependencies +# in the graph. E.g. Foo -> [] +def dfs(graph, visit): + nodes = graph.keys() + edge_nodes = set() + + for n in nodes: + edge_nodes|=set(graph[n]) + + print 'Edge nodes=',edge_nodes + print 'Nodes=',nodes + + sinks = list(edge_nodes - set(nodes)) + sources = list(set(nodes) - edge_nodes) + + print 'Sinks =',sinks + print 'Sources =',sources + + nodes.extend(sinks) + + for s in sinks: + graph[s]=[] + + visited = set(sources) + stack = sources + while stack: + current = stack.pop() + visit(current) + for node in graph[current]: + if node not in visited: + stack.append(node) + visited.add(node) + + +# Topological sort + find connected components +# Notes: +# - Uses a stack instead of recursion +# - Forfeits optimization involving tracking currently visited nodes + +def toposort_with_components(g, steps=None): + # Get set of all nodes, including those without outgoing edges + keys = set(g.keys()) + values = set({}) + for v in g.values(): + values=values | set(v) + + all_nodes=list(keys|values) + if (not steps): + steps = all_nodes + + # Final order + order = {} + + # Index of newest component + cidx = 0 + + # DFS stack, not using recursion + stack = [] + + # Unmarked set + unmarked = all_nodes + + # visiting = [] - skip, don't expect 1000s of nodes, |E|/|V| is small + + while unmarked: + stack.insert(0,unmarked[0]) # push first unmarked + + while (stack): + n = stack[0] + add = True + try: + for m in g[n]: + if (m in unmarked): + if (m not in stack): + add = False + stack.insert(0,m) + else: + # Should not happen, if so there's a loop + print 'Loop at %s'%m + except KeyError: + pass + if (add): + if (n in steps): + order.append(n) + item = stack.pop(0) + unmarked.remove(item) + + noorder = list(set(steps) - set(order)) + + # Steps that do not have an order get pushed as + # separate components that run in parallel. + for s in noorder: + order['%d'%cidx] = [s] + cidx+=1 + + return order + noorder + # Topological sort # Notes: # - Uses a stack instead of recursion