#
# Faiyaz Ahmed <faiyaza@cs.princeton.edu>
#
-# $Id: emailTxt.py,v 1.9 2007/08/08 13:26:46 soltesz Exp $
+# $Id: emailTxt.py,v 1.10 2007/08/29 17:26:50 soltesz Exp $
#
ERROR- This is an error state, where there is absolutely no contact
with PlanetLab.
""")
+
+ nmreset =("""NM Reset at %(loginbase)s""",
+ """
+Monitor restarted NM on the following machines:
+
+%(hostname_list)s
+
+ """)
+
# TODO: need reminder versions for repeats...
newdown=[newdown_one, newdown_two, newdown_three]
newbootcd=[newbootcd_one, newbootcd_two, newbootcd_three]
newthankyou=[thankyou,thankyou,thankyou]
+ NMReset=[nmreset,nmreset,nmreset]
down=("""PlanetLab node %(hostname)s down.""", """As part of PlanetLab node monitoring, we noticed %(hostname)s has been down for %(days)s days.
count += 1
print "%d %s %s" % (count, nodename, externalState['nodes'][pcu_id]['values'])
- soltesz.dbDump(config.dbname, externalState, 'php')
+ soltesz.dbDump(config.dbname, externalState)
# this will be called when an exception occurs within a thread
def handle_exception(request, result):
def main():
global externalState
- externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState, 'php')
+ externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState)
cohash = {}
if config.increment:
if config.filename == "":
print "Calling API GetPCUs() : refresh(%s)" % config.refresh
l_pcus = soltesz.if_cached_else_refresh(1,
- config.refresh, "pculist", lambda : plc.GetPCUs(), 'php')
+ config.refresh, "pculist", lambda : plc.GetPCUs())
l_pcus = [pcu['pcu_id'] for pcu in l_pcus]
else:
l_pcus = config.getListFromFile(config.filename)
except Exception, err:
print "Exception: %s" % err
print "Saving data... exitting."
- soltesz.dbDump(config.dbname, externalState, 'php')
+ soltesz.dbDump(config.dbname, externalState)
sys.exit(0)
--- /dev/null
+#!/usr/bin/python
+import soltesz
+from config import config
+from optparse import OptionParser
+from printbadbysite import *
+
+
+def main():
+ db = soltesz.dbLoad(config.dbname)
+
+ ## Field widths used for printing
+ maxFieldLengths = { 'nodename' : -45,
+ 'ping' : 6,
+ 'ssh' : 6,
+ 'pcu' : 7,
+ 'category' : 9,
+ 'state' : 5,
+ 'kernel' : 10.65,
+ 'comonstats' : 5,
+ 'plcsite' : 12,
+ 'bootcd' : 10.65}
+ ## create format string based on config.fields
+ fields = {}
+ format = ""
+ for f in config.fields.split(','):
+ fields[f] = "%%(%s)%ds" % (f, maxFieldLengths[f])
+ for f in config.fields.split(','):
+ format += fields[f] + " "
+
+
+ d_n = db['nodes']
+ l_nodes = d_n.keys()
+
+ # category by site
+ #bysite = {}
+ #for nodename in l_nodes:
+ # if 'plcsite' in d_n[nodename]['values'] and \
+ # 'login_base' in d_n[nodename]['values']['plcsite']:
+ # loginbase = d_n[nodename]['values']['plcsite']['login_base']
+ # if loginbase not in bysite:
+ # bysite[loginbase] = []
+ # d_n[nodename]['values']['nodename'] = nodename
+ # bysite[loginbase].append(d_n[nodename]['values'])
+
+ # d2 was an array of [{node}, {}, ...]
+ # the bysite is a loginbase dict of [{node}, {node}]
+ d2 = []
+ for nodename in l_nodes:
+ vals=d_n[nodename]['values']
+ v = {}
+ v.update(vals)
+ v['nodename'] = nodename
+ if 'plcsite' in vals and 'status' in vals['plcsite'] and vals['plcsite']['status'] == "SUCCESS":
+ site_string = "<b>%-20s</b> %2s nodes :: %2s of %4s slices" % ( \
+ vals['plcsite']['login_base'],
+ vals['plcsite']['num_nodes'],
+ vals['plcsite']['num_slices'],
+ vals['plcsite']['max_slices'])
+ v['site_string'] = site_string
+ d2.append(v)
+ else:
+ #print "ERROR: ", nodename, vals, "<br>"
+ pass
+ #site_string = "<b>UNKNOWN</b>"
+
+
+ if config.cmpping:
+ d2.sort(cmp=cmpPing)
+ elif config.cmpssh:
+ d2.sort(cmp=cmpSSH)
+ elif config.cmpcategory:
+ d2.sort(cmp=cmpCategory)
+ elif config.cmpstate:
+ d2.sort(cmp=cmpState)
+ elif config.cmpdays:
+ d2.sort(cmp=cmpDays)
+ elif config.cmpkernel:
+ d2.sort(cmp=cmpUname)
+ else:
+ d2.sort(cmp=cmpCategory)
+
+
+ for row in d2:
+ site_string = row['site_string']
+ vals = row
+ # convert uname values into a single kernel version string
+ if 'kernel' in vals:
+ kernel = vals['kernel'].split()
+ if len(kernel) > 0:
+ if kernel[0] == "Linux":
+ vals['kernel'] = kernel[2]
+ else:
+ vals['ssherror'] = vals['kernel']
+ vals['kernel'] = ""
+ else:
+ vals['ssherror'] = ""
+ vals['kernel'] = ""
+ continue
+
+ str = format % vals
+ fields = str.split()
+ #print "<tr>"
+ s = fields_to_html(fields, vals)
+
+ keys = categories.keys()
+ for cat in ['BOOT-ALPHA', 'BOOT-PROD', 'BOOT-OLDBOOTCD', 'DEBUG-ALPHA',
+ 'DEBUG-PROD', 'DEBUG-OLDBOOTCD', 'DOWN-ERROR']:
+ if cat not in keys:
+ categories[cat] = 0
+ keys = categories.keys()
+ for cat in ['BOOT-ALPHA', 'BOOT-PROD', 'BOOT-OLDBOOTCD', 'DEBUG-ALPHA',
+ 'DEBUG-PROD', 'DEBUG-OLDBOOTCD', 'DOWN-ERROR']:
+ if cat in keys:
+ print "%d," % categories[cat],
+ print ""
+import cgi
+if __name__ == '__main__':
+ parser = OptionParser()
+ parser.set_defaults(cmpdays=False,
+ comon="sshstatus",
+ fields="nodename,ping,ssh,pcu,category,state,kernel,bootcd",
+ dbname="findbad", # -070724-1",
+ cmpping=False,
+ cmpssh=False,
+ cmpcategory=False,
+ cmpstate=False)
+ parser.add_option("", "--fields", dest="dbname", help="")
+ parser.add_option("", "--dbname", dest="dbname", help="")
+ parser.add_option("", "--days", dest="cmpdays", action="store_true", help="")
+ parser.add_option("", "--ping", dest="cmpping", action="store_true", help="")
+ parser.add_option("", "--ssh", dest="cmpssh", action="store_true", help="")
+ parser.add_option("", "--category", dest="cmpcategory", action="store_true", help="")
+ parser.add_option("", "--kernel", dest="cmpkernel", action="store_true", help="")
+ parser.add_option("", "--state", dest="cmpstate", action="store_true", help="")
+ parser.add_option("", "--comon", dest="comon", help="")
+ config = config(parser)
+ config.parse_args()
+ main()
--- /dev/null
+#!/usr/bin/python
+
+import plc
+from config import config
+import soltesz
+import sys
+
+config = config()
+
+def dsites_from_lsites(l_sites):
+ d_sites = {}
+ id2lb = {}
+ for site in l_sites:
+ if not site['login_base'] in d_sites:
+ d_sites[site['login_base']] = site
+ id2lb[site['site_id']] = site['login_base']
+ else:
+ #print "Two sites have the same login_base value %s!" % site['login_base']
+ sys.exit(1)
+ return (d_sites, id2lb)
+
+def dsn_from_dsln(d_sites, id2lb, l_nodes):
+ dsn = {}
+ hn2lb = {}
+ for node in l_nodes:
+ # this won't reach sites without nodes, which I guess isn't a problem.
+ if node['site_id'] in id2lb.keys():
+ login_base = id2lb[node['site_id']]
+ else:
+ for i in id2lb:
+ print i, " ", id2lb[i]
+ raise Exception, "Node has missing site id!! %s %d" %(node['hostname'], node['site_id'])
+ if not login_base in dsn:
+ dsn[login_base] = {}
+ dsn[login_base]['plc'] = d_sites[login_base]
+ dsn[login_base]['monitor'] = {} # event log, or something
+
+ hostname = node['hostname']
+ dsn[login_base][hostname] = {}
+ dsn[login_base][hostname]['plc'] = node
+ dsn[login_base][hostname]['comon'] = {}
+ dsn[login_base][hostname]['monitor'] = {}
+
+ hn2lb[hostname] = login_base
+ return (dsn, hn2lb)
+
+def create_plcdb():
+
+ # get sites, and stats
+ l_sites = plc.getSites({'peer_id':None}, ['login_base', 'site_id'])
+ if len(l_sites) == 0:
+ sys.exit(1)
+ (d_sites,id2lb) = dsites_from_lsites(l_sites)
+
+ # get nodes at each site, and
+ l_nodes = plc.getNodes({'peer_id':None}, ['hostname', 'site_id', 'version', 'last_updated', 'date_created', 'last_contact', 'pcu_ids'])
+ (plcdb, hn2lb) = dsn_from_dsln(d_sites, id2lb, l_nodes)
+
+ # save information for future.
+ id2lb = id2lb
+ hn2lb = hn2lb
+ db = plcdb
+
+ if config.cachenodes:
+ soltesz.dbDump("plcdb_hn2lb", hn2lb)
+ soltesz.dbDump("l_plcnodes", l_nodes)
+ soltesz.dbDump("l_plcsites", l_sites)
+
+ return l_nodes
+
+
+if __name__ == '__main__':
+ create_plcdb()