3 # Copyright (c) 2004 The Trustees of Princeton University (Trustees).
5 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
13 from threading import *
17 # Global config options
20 from util.process import *
26 # Correlates input with policy to form actions
40 FROM="support@planet-lab.org"
41 TECHEMAIL="tech-%s@sites.planet-lab.org"
42 PIEMAIL="pi-%s@sites.planet-lab.org"
45 XMLRPC_SERVER = 'https://www.planet-lab.org/PLCAPI/'
47 # Time between comon refresh
49 # Time to refresh DB and remove unused entries
51 # Time between policy enforce/update
52 #POLSLEEP=43200 #12hrs
55 # Global list of all running threads. Any threads added to
56 # list will be monitored.
58 # Seconds between checking threads
62 logger = logging.getLogger("monitor")
63 logger.setLevel(logging.DEBUG)
64 fh = logging.FileHandler(LOG, mode = 'a')
65 fh.setLevel(logging.DEBUG)
66 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
67 fh.setFormatter(formatter)
72 Usage: %s [OPTIONS]...
75 -d, --debug Enable debugging (default: %s)
76 --status Print memory usage statistics and exit
77 -h, --help This message
78 """.lstrip() % (sys.argv[0], debug)
82 Launches threads and adds them to the runningthreads global list.
83 Assigns name for thread, starts.
85 def startThread(fnct, name):
86 runningthreads[name] = fnct
87 runningthreads[name].setName(name)
89 logger.info("Starting thread " + name)
90 runningthreads[name].start()
91 except Exception, err:
92 logger.error("Thread: " + name + " " + error)
96 Watches threads and catches exceptions. Each launched thread is
97 watched and state is logged.
99 class ThreadWatcher(Thread):
101 Thread.__init__(self)
106 time.sleep(WATCHSLEEP)
108 def checkThreads(self):
109 # Iterate through treads, compare with last running.
110 for thread in runningthreads.keys():
111 # If thread found dead, remove from queue
112 if not runningthreads[thread].isAlive():
113 logger.error("***********Thread died: %s**********" %(thread))
114 del runningthreads[thread]
119 Thread.__init__(self)
126 Start threads, do some housekeeping, then daemonize.
130 global status, logger
133 longopts = ["debug", "status", "help"]
134 (opts, argv) = getopt.getopt(sys.argv[1:], "dvf:s:ph", longopts)
135 except getopt.GetoptError, err:
136 print "Error: " + err.msg
140 for (opt, optval) in opts:
141 if opt == "-d" or opt == "--debug":
143 print "Running in DEBUG mode: NO EMAILS SENT AND NO SLICES SQUEEZED."
144 elif opt == "--status":
145 #print summary(names)
153 # writepid("monitor")
155 # Init stuff. Watch Threads to see if they die. Perhaps send email?
156 logger.info('Monitor Started')
157 startThread(ThreadWatcher(), "Watcher")
160 # Nodes to check. Queue of all sick nodes.
161 toCheck = Queue.Queue()
162 # Nodes that are sick w/o tickets
163 sickNoTicket = Queue.Queue()
164 # Comon DB of all nodes
166 # Nodes that are down. Use this to maintain DB; cleanup.
167 #alldown = Queue.Queue()
170 # Nodes we've emailed.
171 # host - > (type of email, time)
176 # Event based. Add to queue(toCheck) and hosts are queried.
177 rt1 = rt.RT(tickets, toCheck, sickNoTicket)
178 rt2 = rt.RT(tickets, toCheck, sickNoTicket)
179 rt3 = rt.RT(tickets, toCheck, sickNoTicket)
180 rt4 = rt.RT(tickets, toCheck, sickNoTicket)
181 rt5 = rt.RT(tickets, toCheck, sickNoTicket)
182 # Kind of a hack. Cleans the DB for stale entries and updates db.
183 clean = Thread(target=rt5.cleanTickets)
184 # Poll Comon. Refreshes Comon data every COSLEEP seconds
185 cm1 = comon.Comon(cdb, toCheck)
187 # Actually digest the info and do something with it.
188 pol = policy.Policy(cm1, sickNoTicket, emailed)
190 # Load emailed sites from last run.
191 pol.emailedStore("LOAD")
194 startThread(rt1,"rt1")
195 startThread(rt2,"rt2")
196 startThread(rt3,"rt3")
197 startThread(rt4,"rt4")
198 startThread(rt5,"rt5")
199 startThread(clean,"cleanrt5")
202 startThread(cm1,"comon")
204 # Wait for threads to init. Probably should join, but work on that later.
207 # Start Sending Emails
208 startThread(pol, "policy")
211 while (sickNoTicket.empty() == False) or (toCheck.empty() == False):
216 # Store state of emails
217 pol.emailedStore("WRITE")
222 logger.info('Monitor Exitted')
224 # removepid("monitor")
227 if __name__ == '__main__':
230 except KeyboardInterrupt:
231 print "Killed. Exitting."
232 logger.info('Monitor Killed')