import logging
import mailer
import emailTxt
+import pickle
+import xml, xmlrpclib
+import Queue
+
+DAT="./monitor.dat"
logger = logging.getLogger("monitor")
# Time to enforce policy
POLSLEEP = 7200
+# Days between emails (enforce 'squeeze' after this time).
+SQUEEZE = 3
# IF:
# no SSH, down.
# bad disk, down
# suspend slice creation
# kill slices
class Policy(Thread):
- def __init__(self, comonthread, tickets):
+ def __init__(self, comonthread, sickNoTicket, emailed):
self.cmn = comonthread
- self.tickets = tickets
- # host - > time of email
- self.emailed = {}
- # all sick nodes w/ tickets
- self.cursickw = tickets
+ # host - > (time of email, type of email)
+ self.emailed = emailed
# all sick nodes w/o tickets
- self.cursick = []
+ self.sickNoTicket = sickNoTicket
Thread.__init__(self)
- '''
- Gets all nodes without tickets and puts them in self.cursick
- '''
- def getAllSick(self):
- self.cursick = []
- for bucket in self.cmn.comonbkts.keys():
- for host in getattr(self.cmn, bucket):
- if host not in self.cursickw.keys():
- if host not in self.cursick:
- self.cursick.append(host)
- logger.debug("Nodes sick wo tickets %s " % len(self.cursick))
+
+ #def getAllSick(self):
+ # for bucket in self.cmn.comonbkts.keys():
+ # for host in getattr(self.cmn, bucket):
+ # if host not in self.cursickw.keys():
+ # self.cursick.put(host)
'''
- Acts on sick nodes.
+ Acts on sick nodes
'''
- def emailSick(self):
- for node in self.cmn.ssh:
- if node in self.cursick:
- if node not in self.emailed.keys():
- logger.debug("Emailing " + node)
- try:
- self.emailed[node] = "ssh"
- mailer.email('DISREGARD',
- emailTxt.mailtxt.STANDARD % {'hostname': node},
- "tech-" + mailer.siteId(node) + "@sites.planet-lab.org")
- except Exception, err:
- logger.info(err)
+ def emailsick(self):
+ # Get list of nodes in debug from PLC
+ #dbgNodes = NodesDebug()
+
+ node = self.sickNoTicket.get(block = True)
+ # Get the login base
+ id = mailer.siteId(node)
+
+ if not id:
+ logger.info("loginbase for %s not found" %node)
+ elif node not in self.emailed.keys():
+ # Email about Down.
+ if node in self.cmn.down:
+ logger.debug("POLICY: Emailing (down) " + node)
+ self.emailed[node] = ("down", time.localtime())
+ msg = emailTxt.mailtxt.DOWN \
+ % {'hostname': node}
+ mailer.email(node + " down", msg,
+ "tech-" + id + "@sites.planet-lab.org")
+ return
+
+ # Email about no SSH.
+ if node in self.cmn.ssh:
+ logger.debug("POLICY: Emailing (ssh) " + node)
+ self.emailed[node] = ("ssh", time.localtime())
+ msg = emailTxt.mailtxt.SSH \
+ % {'hostname': node}
+ mailer.email(node + " down", msg,
+ "tech-" + id + "@sites.planet-lab.org")
+ return
+
+ # Email about DNS
+ if node in self.cmn.dns:
+ logger.debug("POLICY: Emailing (dns)" + node)
+ self.emailed[node] = ("dns", time.localtime())
+ msg = emailTxt.mailtxt.DNS \
+ % {'hostname': node}
+ mailer.email("Please update DNS used by " \
+ + node, msg,
+ "tech-" + id + "@sites.planet-lab.org")
+ return
+
'''
Prints, logs, and emails status of up nodes, down nodes, and buckets.
def status(self):
return 0
+ '''
+ Store/Load state of emails. When, where, what.
+ '''
+ def emailedStore(self, action):
+ try:
+ if action == "LOAD":
+ f = open(DAT, "r+")
+ logger.info("Found and reading " + DAT)
+ self.emailed.update(pickle.load(f))
+ if action == "WRITE":
+ f = open(DAT, "w")
+ logger.info("Writing " + DAT)
+ pickle.dump(self.emailed, f)
+ f.close()
+ except Exception, err:
+ logger.info("Problem with DAT, %s" %err)
+
def run(self):
- #while 1:
- self.getAllSick()
- self.emailSick()
+ while 1:
+ self.emailsick()
+
+'''
+Returns list of nodes in dbg as reported by PLC
+'''
+def NodesDebug():
+ dbgNodes = []
+ api = xmlrpclib.Server(XMLRPC_SERVER, verbose=False)
+ anon = {'AuthMethod': "anonymous"}
+ allnodes = api.AnonAdmGetNodes(anon, [], ['hostname','boot_state'])
+ for node in allnodes:
+ if node['boot_state'] == 'dbg': dbgNodes.append(node['hostname'])
+ logger.info("%s nodes in debug according to PLC." %len(dbgNodes))
+ return dbgNodes
+
+
+
+
+def main():
+ logger.setLevel(logging.DEBUG)
+ ch = logging.StreamHandler()
+ ch.setLevel(logging.DEBUG)
+ formatter = logging.Formatter('%(message)s')
+ ch.setFormatter(formatter)
+ logger.addHandler(ch)
+
+ #print NodesDebug()
+ tmp = Queue.Queue()
+ a = Policy(None, tmp)
+ a.emailedStore("LOAD")
+ print a.emailed
+
+ os._exit(0)
+if __name__ == '__main__':
+ import os
+ XMLRPC_SERVER = 'https://www.planet-lab.org/PLCAPI/'
+ try:
+ main()
+ except KeyboardInterrupt:
+ print "Killed. Exitting."
+ logger.info('Monitor Killed')
+ os._exit(0)