3 # Copyright (c) 2004 The Trustees of Princeton University (Trustees).
5 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
13 from threading import *
18 from util.process import *
24 # Correlates input with policy to form actions
40 FROM="support@planet-lab.org"
41 TECHEMAIL="tech-%s@sites.planet-lab.org"
42 PIEMAIL="pi-%s@sites.planet-lab.org"
45 XMLRPC_SERVER = 'https://www.planet-lab.org/PLCAPI/'
47 # Time between comon refresh
49 # Time to refresh DB and remove unused entries
51 # Time between policy enforce/update
52 #POLSLEEP=43200 #12hrs
55 # Global list of all running threads. Any threads added to
56 # list will be monitored.
58 # Seconds between checking threads
62 logger = logging.getLogger("monitor")
63 logger.setLevel(logging.DEBUG)
64 fh = logging.FileHandler(LOG, mode = 'a')
65 fh.setLevel(logging.DEBUG)
66 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
67 fh.setFormatter(formatter)
72 Usage: %s [OPTIONS]...
75 -d, --debug Enable debugging (default: %s)
76 --status Print memory usage statistics and exit
77 -h, --help This message
78 """.lstrip() % (sys.argv[0], debug)
82 Launches threads and adds them to the runningthreads global list.
83 Assigns name for thread, starts.
85 def startThread(fnct, name):
86 runningthreads[name] = fnct
87 runningthreads[name].setName(name)
89 logger.info("Starting thread " + name)
90 runningthreads[name].start()
91 except Exception, err:
92 logger.error("Thread: " + name + " " + error)
96 Watches threads and catches exceptions. Each launched thread is
97 watched and state is logged.
99 class ThreadWatcher(Thread):
101 Thread.__init__(self)
106 time.sleep(WATCHSLEEP)
108 def checkThreads(self):
109 # Iterate through treads, compare with last running.
110 for thread in runningthreads.keys():
111 # If thread found dead, remove from queue
112 if not runningthreads[thread].isAlive():
113 logger.error("Thread Died: %s" %(thread))
114 del runningthreads[thread]
119 Thread.__init__(self)
126 Start threads, do some housekeeping, then daemonize.
130 global debug, status, logger
133 longopts = ["debug", "status", "help"]
134 (opts, argv) = getopt.getopt(sys.argv[1:], "dvf:s:ph", longopts)
135 except getopt.GetoptError, err:
136 print "Error: " + err.msg
140 for (opt, optval) in opts:
141 if opt == "-d" or opt == "--debug":
143 elif opt == "--status":
144 #print summary(names)
152 # writepid("monitor")
154 # Init stuff. Watch Threads to see if they die. Perhaps send email?
155 logger.info('Monitor Started')
156 startThread(ThreadWatcher(), "Watcher")
159 # Nodes to check. Queue of all sick nodes.
160 toCheck = Queue.Queue()
161 # Nodes that are sick w/o tickets
162 sickNoTicket = Queue.Queue()
163 # Comon DB of all nodes
165 # Nodes that are down. Use this to maintain DB; cleanup.
166 #alldown = Queue.Queue()
169 # Nodes we've emailed.
170 # host - > (type of email, time)
175 # Event based. Add to queue(toCheck) and hosts are queried.
176 rt1 = rt.RT(tickets, toCheck, sickNoTicket)
177 rt2 = rt.RT(tickets, toCheck, sickNoTicket)
178 rt3 = rt.RT(tickets, toCheck, sickNoTicket)
179 rt4 = rt.RT(tickets, toCheck, sickNoTicket)
180 rt5 = rt.RT(tickets, toCheck, sickNoTicket)
181 # Kind of a hack. Cleans the DB for stale entries and updates db.
182 clean = Thread(target=rt5.cleanTickets)
183 # Poll Comon. Refreshes Comon data every COSLEEP seconds
184 cm1 = comon.Comon(cdb, toCheck)
186 # Actually digest the info and do something with it.
187 pol = policy.Policy(cm1, sickNoTicket, emailed)
189 # Load emailed sites from last run.
190 pol.emailedStore("LOAD")
193 startThread(rt1,"rt1")
194 startThread(rt2,"rt2")
195 startThread(rt3,"rt3")
196 startThread(rt4,"rt4")
197 startThread(rt5,"rt5")
198 startThread(clean,"cleanrt5")
201 startThread(cm1,"comon")
203 # Wait for threads to init. Probably should join, but work on that later.
206 # Start Sending Emails
207 startThread(pol, "policy")
210 while (sickNoTicket.empty() == False) or (toCheck.empty() == False):
217 # Store state of emails
218 pol.emailedStore("WRITE")
223 logger.info('Monitor Exitted')
225 # removepid("monitor")
228 if __name__ == '__main__':
231 except KeyboardInterrupt:
232 print "Killed. Exitting."
233 logger.info('Monitor Killed')