Added generic depth first search for parallel execution
[plstackapi.git] / planetstack / ec2_observer / toposort.py
1 #!/usr/bin/python
2
3 import time
4 import traceback
5 import commands
6 import threading
7 import json
8 import pdb
9
10 from datetime import datetime
11 from collections import defaultdict
12
13 # Assumes that there are no empty dependencies
14 # in the graph. E.g. Foo -> []
15 def dfs(graph, visit):
16         nodes = graph.keys()
17         edge_nodes = set()
18
19         for n in nodes:
20                 edge_nodes|=set(graph[n])
21
22         print 'Edge nodes=',edge_nodes
23         print 'Nodes=',nodes
24
25         sinks = list(edge_nodes - set(nodes))
26         sources = list(set(nodes) - edge_nodes)
27         
28         print 'Sinks =',sinks
29         print 'Sources =',sources
30
31         nodes.extend(sinks)
32
33         for s in sinks:
34                 graph[s]=[]
35
36         visited = set(sources)
37         stack = sources
38         while stack:
39                 current = stack.pop()
40                 visit(current)
41                 for node in graph[current]:
42                         if node not in visited:
43                                 stack.append(node)
44                                 visited.add(node)
45
46
47 # Topological sort + find connected components
48 # Notes:
49 # - Uses a stack instead of recursion
50 # - Forfeits optimization involving tracking currently visited nodes
51
52 def toposort_with_components(g, steps=None):
53         # Get set of all nodes, including those without outgoing edges
54         keys = set(g.keys())
55         values = set({})
56         for v in g.values():
57                 values=values | set(v)
58         
59         all_nodes=list(keys|values)
60         if (not steps):
61                 steps = all_nodes
62
63         # Final order
64         order = {}
65
66         # Index of newest component
67         cidx = 0
68
69         # DFS stack, not using recursion
70         stack = []
71
72         # Unmarked set
73         unmarked = all_nodes
74
75         # visiting = [] - skip, don't expect 1000s of nodes, |E|/|V| is small
76
77         while unmarked:
78                 stack.insert(0,unmarked[0]) # push first unmarked
79
80                 while (stack):
81                         n = stack[0]
82                         add = True
83                         try:
84                                 for m in g[n]:
85                                         if (m in unmarked):
86                                                 if (m not in stack):
87                                                         add = False
88                                                         stack.insert(0,m)
89                                                 else:
90                                                         # Should not happen, if so there's a loop
91                                                         print 'Loop at %s'%m
92                         except KeyError:
93                                 pass
94                         if (add):
95                                 if (n in steps):
96                                         order.append(n)
97                                 item = stack.pop(0)
98                                 unmarked.remove(item)
99
100         noorder = list(set(steps) - set(order))
101
102         # Steps that do not have an order get pushed as
103         # separate components that run in parallel.
104         for s in noorder:
105                 order['%d'%cidx] = [s]
106                 cidx+=1
107
108         return order + noorder
109
110 # Topological sort
111 # Notes:
112 # - Uses a stack instead of recursion
113 # - Forfeits optimization involving tracking currently visited nodes
114 def toposort(g, steps=None):
115         # Get set of all nodes, including those without outgoing edges
116         keys = set(g.keys())
117         values = set({})
118         for v in g.values():
119                 values=values | set(v)
120         
121         all_nodes=list(keys|values)
122         if (not steps):
123                 steps = all_nodes
124
125         # Final order
126         order = []
127
128         # DFS stack, not using recursion
129         stack = []
130
131         # Unmarked set
132         unmarked = all_nodes
133
134         # visiting = [] - skip, don't expect 1000s of nodes, |E|/|V| is small
135
136         while unmarked:
137                 stack.insert(0,unmarked[0]) # push first unmarked
138
139                 while (stack):
140                         n = stack[0]
141                         add = True
142                         try:
143                                 for m in g[n]:
144                                         if (m in unmarked):
145                                                 if (m not in stack):
146                                                         add = False
147                                                         stack.insert(0,m)
148                                                 else:
149                                                         # Should not happen, if so there's a loop
150                                                         print 'Loop at %s'%m
151                         except KeyError:
152                                 pass
153                         if (add):
154                                 if (n in steps):
155                                         order.append(n)
156                                 item = stack.pop(0)
157                                 unmarked.remove(item)
158
159         noorder = list(set(steps) - set(order))
160         return order + noorder
161
162 def main():
163         graph_file=open('planetstack.deps').read()
164         g = json.loads(graph_file)
165         print toposort(g)
166
167 if (__name__=='__main__'):
168         main()
169
170 #print toposort({'a':'b','b':'c','c':'d','d':'c'},['d','c','b','a'])