2 # Copyright (c) 2004 The Trustees of Princeton University (Trustees).
4 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
10 #from monitor import *
11 from threading import *
20 #Hack to auth structure
24 logger = logging.getLogger("monitor")
26 # Time to enforce policy
29 # Days between emails (enforce 'squeeze' after this time).
32 # Where to email the summary
33 SUMTO = "faiyaza@cs.princeton.edu"
34 TECHEMAIL="tech-%s@sites.planet-lab.org"
35 PIEMAIL="pi-%s@sites.planet-lab.org"
36 SLICEMAIL="%s@slices.planet-lab.org"
37 PLCEMAIL="support@planet-lab.org"
46 # DNS, kinda down (sick)
47 # clock, kinda down (sick)
48 # Full disk, going to be down
52 # suspend slice creation
55 def __init__(self, comonthread, sickNoTicket, emailed):
56 self.cmn = comonthread
57 # host - > (time of email, type of email)
58 self.emailed = emailed
59 # all sick nodes w/o tickets
60 self.sickNoTicket = sickNoTicket
63 #def getAllSick(self):
64 # for bucket in self.cmn.comonbkts.keys():
65 # for host in getattr(self.cmn, bucket):
66 # if host not in self.cursickw.keys():
67 # self.cursick.put(host)
73 # Get list of nodes in debug from PLC
74 #dbgNodes = NodesDebug()
75 global TECHEMAIL, PIEMAIL
76 node = self.sickNoTicket.get(block = True)
78 id = mailer.siteId(node)
80 # Send appropriate message for node if in appropriate bucket.
81 # If we know where to send a message
83 logger.info("loginbase for %s not found" %node)
84 # And we didn't email already.
86 # If first email, send to Tech
87 target = [TECHEMAIL % id]
89 # If disk is foobarred, PLC should check it.
90 if (node in self.cmn.filerw) and \
91 (node not in self.emailed.keys()):
93 logger.info("Emailing PLC for " + node)
95 # If in dbg, set to rins, then reboot. Inform PLC.
96 if (node in self.cmn.dbg):
97 logger.info("Node in dbg - " + node)
100 # If its a disk, email PLC; dont bother going through this loop.
101 if (node in self.emailed.keys()) and \
102 (node not in self.cmn.filerw):
103 # If we emailed before, how long ago?
104 delta = time.localtime()[2] - self.emailed[node][1][2]
105 # If more than PI thresh, but less than slicethresh
106 if (delta >= PITHRESH) and (delta < SLICETHRESH):
107 logger.info("Emailing PI for " + node)
108 target.append(PIEMAIL % id)
109 # If more than PI thresh and slicethresh
110 if (delta >= PITHRESH) and (delta > SLICETHRESH):
111 logger.info("Emailing slices for " + node)
112 # Email slices at site.
113 slices = mailer.slices(id)
116 target.append(SLICEMAIL % slice)
118 # Find the bucket the node is in and send appropriate email
119 # to approriate list of people.
120 for bkt in self.cmn.comonbkts.keys():
121 if (node in getattr(self.cmn, bkt)):
122 # Send predefined message for that bucket.
123 logger.info("POLICY: Emailing (%s) %s - %s"\
124 %(bkt, node, target))
125 tmp = getattr(emailTxt.mailtxt, bkt)
126 sbj = tmp[0] % {'hostname': node}
127 msg = tmp[1] % {'hostname': node}
128 mailer.email(sbj, msg, target)
129 self.emailed[node] = (bkt , time.localtime())
134 Prints, logs, and emails status of up nodes, down nodes, and buckets.
137 sub = "Monitor Summary"
138 msg = "\nThe following nodes were acted upon: \n\n"
139 for (node, (type, date)) in self.emailed.items():
140 msg +="%s\t(%s)\t%s:%s:%s\n" %(node,type,date[3],date[4],date[5])
141 mailer.email(sub, msg, [SUMTO])
146 Store/Load state of emails. When, where, what.
148 def emailedStore(self, action):
152 logger.info("Found and reading " + DAT)
153 self.emailed.update(pickle.load(f))
154 if action == "WRITE":
156 logger.debug("Writing " + DAT)
157 pickle.dump(self.emailed, f)
159 except Exception, err:
160 logger.info("Problem with DAT, %s" %err)
165 self.emailedStore("WRITE")
167 Returns list of nodes in dbg as reported by PLC
171 api = xmlrpclib.Server(XMLRPC_SERVER, verbose=False)
172 anon = {'AuthMethod': "anonymous"}
173 allnodes = api.AnonAdmGetNodes(anon, [], ['hostname','boot_state'])
174 for node in allnodes:
175 if node['boot_state'] == 'dbg': dbgNodes.append(node['hostname'])
176 logger.info("%s nodes in debug according to PLC." %len(dbgNodes))
183 logger.setLevel(logging.DEBUG)
184 ch = logging.StreamHandler()
185 ch.setLevel(logging.DEBUG)
186 formatter = logging.Formatter('%(message)s')
187 ch.setFormatter(formatter)
188 logger.addHandler(ch)
192 #a = Policy(None, tmp)
193 #a.emailedStore("LOAD")
195 print siteId("princetoan")
198 if __name__ == '__main__':
200 XMLRPC_SERVER = 'https://www.planet-lab.org/PLCAPI/'
203 except KeyboardInterrupt:
204 print "Killed. Exitting."
205 logger.info('Monitor Killed')