2 # Copyright (c) 2004 The Trustees of Princeton University (Trustees).
4 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
10 #from monitor import *
11 from threading import *
22 logger = logging.getLogger("monitor")
24 # Time to enforce policy
27 # Days between emails (enforce 'squeeze' after this time).
32 # DNS, kinda down (sick)
33 # clock, kinda down (sick)
34 # Full disk, going to be down
38 # suspend slice creation
41 def __init__(self, comonthread, sickNoTicket, emailed):
42 self.cmn = comonthread
43 # host - > (time of email, type of email)
44 self.emailed = emailed
45 # all sick nodes w/o tickets
46 self.sickNoTicket = sickNoTicket
49 #def getAllSick(self):
50 # for bucket in self.cmn.comonbkts.keys():
51 # for host in getattr(self.cmn, bucket):
52 # if host not in self.cursickw.keys():
53 # self.cursick.put(host)
59 # Get list of nodes in debug from PLC
60 #dbgNodes = NodesDebug()
62 node = self.sickNoTicket.get(block = True)
64 id = mailer.siteId(node)
67 logger.info("loginbase for %s not found" %node)
68 elif node not in self.emailed.keys():
70 if node in self.cmn.down:
71 logger.debug("POLICY: Emailing (down) " + node)
72 self.emailed[node] = ("down", time.localtime())
73 msg = emailTxt.mailtxt.DOWN \
75 mailer.email(node + " down", msg,
76 "tech-" + id + "@sites.planet-lab.org")
80 if node in self.cmn.ssh:
81 logger.debug("POLICY: Emailing (ssh) " + node)
82 self.emailed[node] = ("ssh", time.localtime())
83 msg = emailTxt.mailtxt.SSH \
85 mailer.email(node + " down", msg,
86 "tech-" + id + "@sites.planet-lab.org")
90 if node in self.cmn.dns:
91 logger.debug("POLICY: Emailing (dns)" + node)
92 self.emailed[node] = ("dns", time.localtime())
93 msg = emailTxt.mailtxt.DNS \
95 mailer.email("Please update DNS used by " \
97 "tech-" + id + "@sites.planet-lab.org")
102 Prints, logs, and emails status of up nodes, down nodes, and buckets.
108 Store/Load state of emails. When, where, what.
110 def emailedStore(self, action):
114 logger.info("Found and reading " + DAT)
115 self.emailed.update(pickle.load(f))
116 if action == "WRITE":
118 logger.info("Writing " + DAT)
119 pickle.dump(self.emailed, f)
121 except Exception, err:
122 logger.info("Problem with DAT, %s" %err)
129 Returns list of nodes in dbg as reported by PLC
133 api = xmlrpclib.Server(XMLRPC_SERVER, verbose=False)
134 anon = {'AuthMethod': "anonymous"}
135 allnodes = api.AnonAdmGetNodes(anon, [], ['hostname','boot_state'])
136 for node in allnodes:
137 if node['boot_state'] == 'dbg': dbgNodes.append(node['hostname'])
138 logger.info("%s nodes in debug according to PLC." %len(dbgNodes))
145 logger.setLevel(logging.DEBUG)
146 ch = logging.StreamHandler()
147 ch.setLevel(logging.DEBUG)
148 formatter = logging.Formatter('%(message)s')
149 ch.setFormatter(formatter)
150 logger.addHandler(ch)
154 a = Policy(None, tmp)
155 a.emailedStore("LOAD")
159 if __name__ == '__main__':
161 XMLRPC_SERVER = 'https://www.planet-lab.org/PLCAPI/'
164 except KeyboardInterrupt:
165 print "Killed. Exitting."
166 logger.info('Monitor Killed')