From: Stephen Soltesz <soltesz@cs.princeton.edu> Date: Thu, 18 Dec 2008 00:57:49 +0000 (+0000) Subject: modified *list templates with abreviated information X-Git-Tag: Monitor-2.0-0~7 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=bbdd1222ad57a915bbb3d872a1cf1da759ef85e3;p=monitor.git modified *list templates with abreviated information consolidated *view templates into a single template, pcuview. should rename it. updated findbad/findbadpcu to update the global round only after data collection is complete. this solves the 'no information' errors when new scan is started. --- diff --git a/findbad.py b/findbad.py index c7449d2..4d1beed 100755 --- a/findbad.py +++ b/findbad.py @@ -19,6 +19,7 @@ from monitor.wrapper import plc, plccache from nodequery import verify,query_to_dict,node_select import traceback +from nodecommon import nmap_port_status #print "starting sqlfindbad.py" # QUERY all nodes. @@ -35,6 +36,19 @@ round = 1 global_round = round count = 0 +def collectNMAP(nodename, cohash): + #### RUN NMAP ############################### + values = {} + nmap = util.command.CMD() + print "nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename + (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename) + # NOTE: an empty / error value for oval, will still work. + (values['port_status'], continue_probe) = nmap_port_status(oval) + + values['date_checked'] = datetime.now() + + return (nodename, values) + def collectPingAndSSH(nodename, cohash): ### RUN PING ###################### ping = command.CMD() @@ -45,9 +59,9 @@ def collectPingAndSSH(nodename, cohash): if oval == "": # An error occurred - values['ping'] = "NOPING" + values['ping_status'] = False else: - values['ping'] = "PING" + values['ping_status'] = True try: for port in [22, 806]: @@ -55,13 +69,13 @@ def collectPingAndSSH(nodename, cohash): (oval, errval) = ssh.run_noexcept2(""" <<\EOF echo "{" - echo ' "kernel":"'`uname -a`'",' + echo ' "kernel_version":"'`uname -a`'",' echo ' "bmlog":"'`ls /tmp/bm.log`'",' - echo ' "bootcd":"'`cat /mnt/cdrom/bootme/ID`'",' - echo ' "nm":"'`ps ax | grep nm.py | grep -v grep`'",' - echo ' "readonlyfs":"'`touch /var/log/monitor 2>&1`'",' - echo ' "dns":"'`host boot.planet-lab.org 2>&1`'",' - echo ' "princeton_comon":"'`ls -d /vservers/princeton_comon`'",' + echo ' "bootcd_version":"'`cat /mnt/cdrom/bootme/ID`'",' + echo ' "nm_status":"'`ps ax | grep nm.py | grep -v grep`'",' + echo ' "fs_status":"'`touch /var/log/monitor 2>&1`'",' + echo ' "dns_status":"'`host boot.planet-lab.org 2>&1`'",' + echo ' "princeton_comon_dir":"'`ls -d /vservers/princeton_comon`'",' ID=`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'` echo ' "princeton_comon_running":"'`ls -d /proc/virtual/$ID`'",' @@ -69,20 +83,20 @@ def collectPingAndSSH(nodename, cohash): echo "}" EOF """) - values['ssherror'] = errval + values['ssh_error'] = errval if len(oval) > 0: #print "OVAL: %s" % oval values.update(eval(oval)) - values['sshport'] = port + values['ssh_portused'] = port break else: - values.update({'kernel': "", 'bmlog' : "", 'bootcd' : '', - 'nm' : '', - 'readonlyfs' : '', - 'dns' : '', - 'princeton_comon' : "", + values.update({'kernel_version': "", 'bmlog' : "", 'bootcd_version' : '', + 'nm_status' : '', + 'fs_status' : '', + 'dns_status' : '', + 'princeton_comon_dir' : "", 'princeton_comon_running' : "", - 'princeton_comon_procs' : "", 'sshport' : None}) + 'princeton_comon_procs' : "", 'ssh_portused' : None}) except: print traceback.print_exc() sys.exit(1) @@ -94,79 +108,79 @@ EOF """) #errval = "" #(oval, errval) = ssh.run_noexcept('echo `uname -a ; ls /tmp/bm.log`') - oval = values['kernel'] + oval = values['kernel_version'] if "2.6.17" in oval or "2.6.2" in oval: - values['ssh'] = 'SSH' - values['category'] = 'PROD' + values['ssh_status'] = True + values['observed_category'] = 'PROD' if "bm.log" in values['bmlog']: - values['state'] = 'DEBUG' + values['observed_status'] = 'DEBUG' else: - values['state'] = 'BOOT' + values['observed_status'] = 'BOOT' elif "2.6.12" in oval or "2.6.10" in oval: - values['ssh'] = 'SSH' - values['category'] = 'OLDPROD' + values['ssh_status'] = True + values['observed_category'] = 'OLDPROD' if "bm.log" in values['bmlog']: - values['state'] = 'DEBUG' + values['observed_status'] = 'DEBUG' else: - values['state'] = 'BOOT' + values['observed_status'] = 'BOOT' # NOTE: on 2.6.8 kernels, with 4.2 bootstrapfs, the chroot command fails. I have no idea why. elif "2.4" in oval or "2.6.8" in oval: b_getbootcd_id = False - values['ssh'] = 'SSH' - values['category'] = 'OLDBOOTCD' - values['state'] = 'DEBUG' + values['ssh_status'] = True + values['observed_category'] = 'OLDBOOTCD' + values['observed_status'] = 'DEBUG' elif oval != "": - values['ssh'] = 'SSH' - values['category'] = 'UNKNOWN' + values['ssh_status'] = True + values['observed_category'] = 'UNKNOWN' if "bm.log" in values['bmlog']: - values['state'] = 'DEBUG' + values['observed_status'] = 'DEBUG' else: - values['state'] = 'BOOT' + values['observed_status'] = 'BOOT' else: # An error occurred. b_getbootcd_id = False - values['ssh'] = 'NOSSH' - values['category'] = 'ERROR' - values['state'] = 'DOWN' + values['ssh_status'] = False + values['observed_category'] = 'ERROR' + values['observed_status'] = 'DOWN' val = errval.strip() - values['ssherror'] = val - values['kernel'] = "" + values['ssh_error'] = val + values['kernel_version'] = "" - #values['kernel'] = val + #values['kernel_version'] = val if b_getbootcd_id: # try to get BootCD for all nodes that are not 2.4 nor inaccessible #(oval, errval) = ssh.run_noexcept('cat /mnt/cdrom/bootme/ID') - oval = values['bootcd'] + oval = values['bootcd_version'] if "BootCD" in oval: - values['bootcd'] = oval + values['bootcd_version'] = oval if "v2" in oval and \ ( nodename is not "planetlab1.cs.unc.edu" and \ nodename is not "planetlab2.cs.unc.edu" ): - values['category'] = 'OLDBOOTCD' + values['observed_category'] = 'OLDBOOTCD' else: - values['bootcd'] = "" + values['bootcd_version'] = "" else: - values['bootcd'] = "" + values['bootcd_version'] = "" # TODO: get bm.log for debug nodes. # 'zcat /tmp/bm.log' #(oval, errval) = ssh.run_noexcept('ps ax | grep nm.py | grep -v grep') - oval = values['nm'] + oval = values['nm_status'] if "nm.py" in oval: - values['nm'] = "Y" + values['nm_status'] = "Y" else: - values['nm'] = "N" + values['nm_status'] = "N" continue_slice_check = True #(oval, errval) = ssh.run_noexcept('ls -d /vservers/princeton_comon') - oval = values['princeton_comon'] - if "princeton_comon" in oval: - values['princeton_comon'] = True + oval = values['princeton_comon_dir'] + if "princeton_comon_dir" in oval: + values['princeton_comon_dir'] = True else: - values['princeton_comon'] = False + values['princeton_comon_dir'] = False continue_slice_check = False if continue_slice_check: @@ -189,9 +203,9 @@ EOF """) if nodename in cohash: - values['comonstats'] = cohash[nodename] + values['comon_stats'] = cohash[nodename] else: - values['comonstats'] = {'resptime': '-1', + values['comon_stats'] = {'resptime': '-1', 'uptime': '-1', 'sshstatus': '-1', 'lastcotop': '-1', @@ -208,7 +222,11 @@ EOF """) except: traceback.print_exc() plc_lock.release() - values['plcnode'] = d_node + values['plc_node_stats'] = d_node + + ##### NMAP ################### + (n, v) = collectNMAP(nodename, None) + values.update(v) ### GET PLC PCU ###################### site_id = -1 @@ -220,7 +238,7 @@ EOF """) site_id = d_node['site_id'] - values['pcu'] = d_pcu + values['plc_pcuid'] = d_pcu ### GET PLC SITE ###################### plc_lock.acquire() @@ -234,8 +252,8 @@ EOF """) traceback.print_exc() plc_lock.release() - values['plcsite'] = d_site - values['date_checked'] = time.time() + values['plc_site_stats'] = d_site + values['date_checked'] = datetime.now() except: print traceback.print_exc() @@ -248,9 +266,9 @@ def recordPingAndSSH(request, result): try: if values is not None: - fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", - if_new_set={'round' : global_round}) - global_round = fbsync.round + #fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", + # if_new_set={'round' : global_round}) + #global_round = fbsync.round fbnodesync = FindbadNodeRecordSync.findby_or_create(hostname=nodename, if_new_set={'round' : global_round}) @@ -262,43 +280,38 @@ def recordPingAndSSH(request, result): fbrec = FindbadNodeRecord.findby_or_create( round=global_round, hostname=nodename) - before = fbrec.to_dict() - print "BEFORE, ", before - fbrec.flush() - time.sleep(2) - print "Setting VALUES" - fbrec.set( date_checked=datetime.fromtimestamp(values['date_checked']), - loginbase=values['loginbase'], - kernel_version=values['kernel'], - bootcd_version=values['bootcd'], - nm_status=values['nm'], - fs_status=values['readonlyfs'], - dns_status=values['dns'], - princeton_comon_dir=values['princeton_comon'], - princeton_comon_running=values['princeton_comon_running'], - princeton_comon_procs=values['princeton_comon_procs'], - plc_node_stats = values['plcnode'], - plc_site_stats = values['plcsite'], - plc_pcuid = values['pcu'], - comon_stats = values['comonstats'], - ping_status = (values['ping'] == "PING"), - ssh_portused = values['sshport'], - ssh_status = (values['ssh'] == "SSH"), - ssh_error = values['ssherror'], - observed_status = values['state'], - observed_category = values['category']) - after = fbrec.to_dict() - print "AFTER , ", after - - for v in before.keys(): - if before[v] == after[v]: - print "SAME FOR KEY %s" % v - print "%s : %s\t%s" % ( v, before[v], after[v] ) + + fbrec.set( **values ) + #date_checked=values['date_checked'], + #loginbase=values['loginbase'], + #kernel_version=values['kernel_version'], + #bootcd_version=values['bootcd_version'], + #nm_status=values['nm_status'], + #fs_status=values['fs_status'], + #dns_status=values['dns_status'], + #princeton_comon_dir=values['princeton_comon_dir'], + #princeton_comon_running=values['princeton_comon_running'], + #princeton_comon_procs=values['princeton_comon_procs'], + #plc_node_stats = values['plc_node_stats'], + #plc_site_stats = values['plc_site_stats'], + #plc_pcuid = values['plc_pcuid'], + #comon_stats = values['comon_stats'], + #ping_status = values['ping_status'], + #ssh_portused = values['ssh_portused'], + #ssh_status = values['ssh_status'], + #ssh_error = values['ssh_error'], + #observed_status = values['observed_status'], + #observed_category = values['observed_category']) + + #for v in before.keys(): + # if before[v] == after[v]: + # print "SAME FOR KEY %s" % v + # print "%s : %s\t%s" % ( v, before[v], after[v] ) fbrec.flush() fbnodesync.round = global_round fbnodesync.flush() - fbsync.flush() + #fbsync.flush() count += 1 print "%d %s %s" % (count, nodename, values) @@ -312,6 +325,16 @@ def handle_exception(request, result): for i in result: print "Result: %s" % i +def externalprobe(hostname): + try: + (nodename, values) = collectNMAP(hostname, {}) + recordPingAndSSH(None, (nodename, values)) + session.flush() + return True + except: + print traceback.print_exc() + return False + def probe(hostname): try: (nodename, values) = collectPingAndSSH(hostname, {}) @@ -335,7 +358,7 @@ def checkAndRecordState(l_nodes, cohash): node_round = fbnodesync.round fbnodesync.flush() - if node_round < global_round: + if node_round < global_round or config.force: # recreate node stats when refreshed #print "%s" % nodename req = threadpool.WorkRequest(collectPingAndSSH, [nodename, cohash], {}, @@ -378,9 +401,6 @@ def main(): if config.increment: # update global round number to force refreshes across all nodes global_round += 1 - fbsync.round = global_round - - fbsync.flush() cotop = comon.Comon() # lastcotop measures whether cotop is actually running. this is a better @@ -417,6 +437,11 @@ def main(): checkAndRecordState(l_nodes, cohash) + if config.increment: + # update global round number to force refreshes across all nodes + fbsync.round = global_round + fbsync.flush() + return 0 @@ -425,13 +450,16 @@ if __name__ == '__main__': parser = parsermodule.getParser(['nodesets']) - parser.set_defaults( increment=False, dbname="findbad", cachenodes=False) + parser.set_defaults( increment=False, dbname="findbad", cachenodes=False, + force=False,) parser.add_option("", "--cachenodes", action="store_true", help="Cache node lookup from PLC") parser.add_option("", "--dbname", dest="dbname", metavar="FILE", help="Specify the name of the database to which the information is saved") parser.add_option("-i", "--increment", action="store_true", dest="increment", help="Increment round number to force refresh or retry") + parser.add_option("", "--force", action="store_true", dest="force", + help="Force probe without incrementing global 'round'.") parser = parsermodule.getParser(['defaults'], parser) diff --git a/findbadpcu.py b/findbadpcu.py index 468107d..0d06d1e 100755 --- a/findbadpcu.py +++ b/findbadpcu.py @@ -20,25 +20,13 @@ from monitor import database from monitor import util from monitor.wrapper import plc, plccache from nodequery import pcu_select +from nodecommon import nmap_port_status plc_lock = threading.Lock() global_round = 1 errorState = {} count = 0 -def nmap_port_status(status): - ps = {} - l_nmap = status.split() - ports = l_nmap[4:] - - continue_probe = False - for port in ports: - results = port.split('/') - ps[results[0]] = results[1] - if results[1] == "open": - continue_probe = True - return (ps, continue_probe) - def get_pcu(pcuname): plc_lock.acquire() try: @@ -176,7 +164,16 @@ def collectPingAndSSH(pcuname, cohash): if b_except or not continue_probe: return (None, None, None) - + #### RUN NMAP ############################### + if continue_probe: + nmap = util.command.CMD() + print "nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats']) + (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats'])) + # NOTE: an empty / error value for oval, will still work. + (values['port_status'], continue_probe) = nmap_port_status(oval) + else: + values['port_status'] = None + #### COMPLETE ENTRY ####################### values['entry_complete'] = [] @@ -203,7 +200,8 @@ def collectPingAndSSH(pcuname, cohash): # If there are no nodes associated with this PCU, then we cannot continue. if len(values['plc_pcu_stats']['node_ids']) == 0: continue_probe = False - values['entry_complete'] += ['NoNodeIds'] + values['entry_complete'] += ['nodeids'] + #### DNS and IP MATCH ####################### if values['plc_pcu_stats']['hostname'] is not None and values['plc_pcu_stats']['hostname'] is not "" and \ @@ -230,19 +228,11 @@ def collectPingAndSSH(pcuname, cohash): values['plc_pcu_stats']['hostname'] = "No_entry_in_DB" continue_probe = False - #### RUN NMAP ############################### - if continue_probe: - nmap = util.command.CMD() - (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats'])) - # NOTE: an empty / error value for oval, will still work. - (values['port_status'], continue_probe) = nmap_port_status(oval) - else: - values['port_status'] = None - ###### DRY RUN ############################ if 'node_ids' in values['plc_pcu_stats'] and len(values['plc_pcu_stats']['node_ids']) > 0: - rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0], values, continue_probe, 1, True) + rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0], + values, 1, True) else: rb_ret = "Not_Run" # No nodes to test" @@ -268,15 +258,15 @@ def recordPingAndSSH(request, result): if values is not None: pcu_id = int(nodename) - fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, - if_new_set={'round': global_round}) - global_round = fbsync.round + #fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, + # if_new_set={'round': global_round}) + #global_round = fbsync.round fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : global_round}) fbrec = FindbadPCURecord( date_checked=datetime.fromtimestamp(values['date_checked']), - round=fbsync.round, + round=global_round, plc_pcuid=pcu_id, plc_pcu_stats=values['plc_pcu_stats'], dns_status=values['dns_status'], @@ -287,7 +277,7 @@ def recordPingAndSSH(request, result): fbnodesync.round = global_round fbnodesync.flush() - fbsync.flush() + #fbsync.flush() fbrec.flush() count += 1 @@ -379,7 +369,7 @@ def main(): l_pcus = [pcu for pcu in sets.Set(pcus)] elif config.nodelist == None and config.pcuid == None: - print "Calling API GetPCUs() : refresh(%s)" % config.refresh + print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls l_pcus = [pcu['pcu_id'] for pcu in l_pcus] elif config.nodelist is not None: l_pcus = util.file.getListFromFile(config.nodelist) @@ -391,11 +381,15 @@ def main(): if config.increment: # update global round number to force refreshes across all nodes global_round += 1 - fbsync.round = global_round - fbsync.flush() checkAndRecordState(l_pcus, cohash) + if config.increment: + # update global round number to force refreshes across all nodes + fbsync.round = global_round + fbsync.flush() + session.flush() + return 0 diff --git a/nodebad.py b/nodebad.py index baa016c..f9f6edf 100755 --- a/nodebad.py +++ b/nodebad.py @@ -14,6 +14,7 @@ from monitor import config from monitor.wrapper import plc,plccache from monitor.const import MINUP from monitor.database.info.model import FindbadNodeRecord, HistoryNodeRecord +from monitor.database.dborm import mon_session as session from monitor.model import * @@ -54,6 +55,10 @@ def checkAndRecordState(l_nodes, l_plcnodes): print traceback.print_exc() continue + if not noderec: + print "none object for %s"% nodename + continue + node_state = noderec.observed_status if noderec.plc_node_stats: boot_state = noderec.plc_node_stats['boot_state'] @@ -80,6 +85,7 @@ def checkAndRecordState(l_nodes, l_plcnodes): # replace with another operations that also commits all pending ops, such # as session.commit() or flush() or something print HistoryNodeRecord.query.count() + session.flush() return True diff --git a/nodecommon.py b/nodecommon.py index 082550b..051cd61 100644 --- a/nodecommon.py +++ b/nodecommon.py @@ -122,6 +122,20 @@ def getvalue(fb, path): return None return values +def nmap_port_status(status): + ps = {} + l_nmap = status.split() + ports = l_nmap[4:] + + continue_probe = False + for port in ports: + results = port.split('/') + ps[results[0]] = results[1] + if results[1] == "open": + continue_probe = True + return (ps, continue_probe) + + def nodegroup_display(node, fbdata, conf=None): node['current'] = get_current_state(fbdata) diff --git a/nodequery.py b/nodequery.py index 48a5f73..bcebf15 100755 --- a/nodequery.py +++ b/nodequery.py @@ -256,7 +256,7 @@ def query_to_dict(query): def pcu_in(fbdata): #if 'plcnode' in fbdata: if 'plc_node_stats' in fbdata: - if 'pcu_ids' in fbdata['plc_node_stats']: + if fbdata['plc_node_stats'] and 'pcu_ids' in fbdata['plc_node_stats']: if len(fbdata['plc_node_stats']['pcu_ids']) > 0: return True return False @@ -275,19 +275,28 @@ def pcu_select(str_query, nodelist=None): dict_query = query_to_dict(str_query) print "dict_query", dict_query - - for noderec in fbquery: - if nodelist is not None: - if noderec.hostname not in nodelist: continue - - fb_nodeinfo = noderec.to_dict() - if pcu_in(fb_nodeinfo): - pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=get(fb_nodeinfo, 'plc_node_stats.pcu_ids')[0]).first() - if pcurec: - pcuinfo = pcurec.to_dict() - if verify(dict_query, pcuinfo): - nodenames.append(noderec.hostname) - pcunames.append(pcuinfo['plc_pcuid']) + print 'length %s' % len(fbpcuquery.all()) + + for pcurec in fbpcuquery: + pcuinfo = pcurec.to_dict() + if verify(dict_query, pcuinfo): + #nodenames.append(noderec.hostname) + #print 'appending %s' % pcuinfo['plc_pcuid'] + pcunames.append(pcuinfo['plc_pcuid']) + + #for noderec in fbquery: + # if nodelist is not None: + # if noderec.hostname not in nodelist: continue +# +# fb_nodeinfo = noderec.to_dict() +# if pcu_in(fb_nodeinfo): +# pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=get(fb_nodeinfo, +# 'plc_node_stats.pcu_ids')[0]).first() +# if pcurec: +# pcuinfo = pcurec.to_dict() +# if verify(dict_query, pcuinfo): +# nodenames.append(noderec.hostname) +# pcunames.append(pcuinfo['plc_pcuid']) return (nodenames, pcunames) def node_select(str_query, nodelist=None, fb=None): diff --git a/pcubad.py b/pcubad.py index b31599f..6ca478f 100755 --- a/pcubad.py +++ b/pcubad.py @@ -11,6 +11,7 @@ from pcucontrol import reboot from monitor import parser as parsermodule from monitor import config from monitor.database.info.model import HistoryPCURecord, FindbadPCURecord +from monitor.database.dborm import mon_session as session from monitor.wrapper import plc,plccache from monitor.const import MINUP @@ -93,6 +94,7 @@ def checkAndRecordState(l_pcus, l_plcpcus): # replace with another operations that also commits all pending ops, such # as session.commit() or flush() or something print HistoryPCURecord.query.count() + session.flush() return True diff --git a/pcucontrol/models/racadm.py b/pcucontrol/models/racadm.py index 8dec875..f4e69dc 100755 --- a/pcucontrol/models/racadm.py +++ b/pcucontrol/models/racadm.py @@ -100,7 +100,9 @@ def racadm_reboot(host, username, password, dryrun, state="powercycle"): from optparse import OptionParser parser = OptionParser() -parser.set_defaults(ip="", user="", password="", state="powercycle") +parser.set_defaults(ip="", user="", password="", dryrun=False, state="powercycle") +parser.add_option("-d", "", dest="dryrun", action="store_true", + help="enable dryrun tests. no action is taken") parser.add_option("-r", "", dest="ip", metavar="nodename.edu", help="A single node name to add to the nodegroup") parser.add_option("-u", "", dest="user", metavar="username", @@ -117,6 +119,6 @@ if __name__ == '__main__': options.user is not "" and \ options.password is not "": - racadm_reboot(options.ip, options.user, options.password, False, options.state) + racadm_reboot(options.ip, options.user, options.password, options.dryrun, options.state) else: parser.print_help() diff --git a/pcucontrol/reboot.py b/pcucontrol/reboot.py index 04fe4da..decaf1d 100755 --- a/pcucontrol/reboot.py +++ b/pcucontrol/reboot.py @@ -120,6 +120,7 @@ class Transport: HTTP = 3 HTTPS = 4 IPAL = 5 + DRAC = 6 TELNET_TIMEOUT = 120 @@ -231,7 +232,10 @@ class PCUControl(Transport,PCUModel,PCURecord): PCURecord.__init__(self, plc_pcu_record) type = None if self.port_status: - if '22' in supported_ports and self.port_status['22'] == "open": + # NOTE: prefer racadm port over ssh + if '5869' in supported_ports and self.port_status['5869'] == "open": + type = Transport.DRAC# DRAC cards user this port. + elif '22' in supported_ports and self.port_status['22'] == "open": type = Transport.SSH elif '23' in supported_ports and self.port_status['23'] == "open": type = Transport.TELNET @@ -240,9 +244,6 @@ class PCUControl(Transport,PCUModel,PCURecord): type = Transport.HTTPS elif '80' in supported_ports and self.port_status['80'] == "open": type = Transport.HTTP - elif '5869' in supported_ports and self.port_status['5869'] == "open": - # For DRAC cards. Racadm opens this port. - type = Transport.HTTP elif '9100' in supported_ports and self.port_status['9100'] == "open": type = Transport.IPAL elif '16992' in supported_ports and self.port_status['16992'] == "open": @@ -343,7 +344,13 @@ class IPAL(PCUControl): def run(self, node_port, dryrun): if self.type == Transport.IPAL: - return self.run_ipal(node_port, dryrun) + ret = self.run_ipal(node_port, dryrun) + if ret != 0: + ret2 = self.run_telnet(node_port, dryrun) + if ret2 != 0: + return ret + return ret2 + return ret elif self.type == Transport.TELNET: return self.run_telnet(node_port, dryrun) else: @@ -636,10 +643,53 @@ class IntelAMT(PCUControl): return cmd.system(cmd_str, self.TELNET_TIMEOUT) class DRAC(PCUControl): + supported_ports = [22,443,5869] def run(self, node_port, dryrun): + if self.type == Transport.DRAC: + print "trying racadm_reboot..." + return racadm_reboot(self.host, self.username, self.password, node_port, dryrun) + elif self.type == Transport.SSH: + return self.run_ssh(node_port, dryrun) + else: + raise ExceptionNoTransport("No implementation for open ports") - print "trying racadm_reboot..." - racadm_reboot(self.host, self.username, self.password, node_port, dryrun) + def run_ssh(self, node_port, dryrun): + ssh_options="-o StrictHostKeyChecking=no "+\ + "-o PasswordAuthentication=yes "+\ + "-o PubkeyAuthentication=no" + s = pxssh.pxssh() + if not s.login(self.host, self.username, self.password, ssh_options, + original_prompts="Dell", login_timeout=TELNET_TIMEOUT): + raise ExceptionPassword("Invalid Password") + + print "logging in..." + s.send("\r\n\r\n") + try: + # Testing Reboot ? + #index = s.expect(["DRAC 5", "[%s]#" % self.username ]) + # NOTE: be careful to escape any characters used by 're.compile' + index = s.expect(["\$", "\[%s\]#" % self.username ]) + print "INDEX:", index + if dryrun: + if index == 0: + s.send("racadm getsysinfo") + elif index == 1: + s.send("getsysinfo") + else: + if index == 0: + s.send("racadm serveraction powercycle") + elif index == 1: + s.send("serveraction powercycle") + + s.send("exit") + + except pexpect.EOF: + raise ExceptionPrompt("EOF before expected Prompt") + except pexpect.TIMEOUT: + print s + raise ExceptionPrompt("Timeout before expected Prompt") + + s.close() return 0 @@ -1080,7 +1130,7 @@ class ePowerSwitchOld(PCUControl): return 0 class ManualPCU(PCUControl): - supported_ports = [22,23,80,443,9100,16992] + supported_ports = [22,23,80,443] def run(self, node_port, dryrun): if not dryrun: @@ -1291,7 +1341,7 @@ def racadm_reboot(host, username, password, port, dryrun): logger.debug("runcmd raised exception %s" % err) if verbose: logger.debug(err) - return -1 + return err def pcu_name(pcu): if pcu['hostname'] is not None and pcu['hostname'] is not "": @@ -1372,6 +1422,8 @@ class Unknown(PCUControl): supported_ports = [22,23,80,443,5869,9100,16992] def model_to_object(modelname): + if modelname is None: + return ManualPCU if "AMT" in modelname: return IntelAMT elif "BayTech" in modelname: diff --git a/web/MonitorWeb/monitorweb/controllers.py b/web/MonitorWeb/monitorweb/controllers.py index e5d0da2..a3e3021 100644 --- a/web/MonitorWeb/monitorweb/controllers.py +++ b/web/MonitorWeb/monitorweb/controllers.py @@ -1,5 +1,6 @@ import turbogears as tg from turbogears import controllers, expose, flash, exception_handler +from turbogears import widgets from cherrypy import request, response import cherrypy # from monitorweb import model @@ -20,28 +21,52 @@ from monitorweb.templates.links import * import findbad -def format_ports(pcu): + +def query_to_dict(query): + """ take a url query string and chop it up """ + val = {} + query_fields = query.split('&') + for f in query_fields: + (k,v) = urllib.splitvalue(f) + val[k] = v + + return val + +def format_ports(data, pcumodel=None): retval = [] - if pcu.port_status and len(pcu.port_status.keys()) > 0 : - obj = reboot.model_to_object(pcu.plc_pcu_stats['model']) - for port in obj.supported_ports: + filtered_length=0 + + if pcumodel: + supported_ports=reboot.model_to_object(pcumodel).supported_ports + else: + # ports of a production node + supported_ports=[22,80,806] + + if data and len(data.keys()) > 0 : + for port in supported_ports: try: - state = pcu.port_status[str(port)] + state = data[str(port)] except: state = "unknown" + + if state == "filtered": + filtered_length += 1 retval.append( (port, state) ) if retval == []: retval = [( "Closed/Filtered", "state" )] + if filtered_length == len(supported_ports): + retval = [( "All Filtered", "state" )] + return retval def format_pcu_shortstatus(pcu): status = "error" if pcu: if pcu.reboot_trial_status == str(0): - status = "ok" + status = "Ok" elif pcu.reboot_trial_status == "NetDown" or pcu.reboot_trial_status == "Not_Run": status = pcu.reboot_trial_status else: @@ -56,21 +81,44 @@ def prep_pcu_for_display(pcu): except: pcu.loginbase = "unknown" - pcu.ports = format_ports(pcu) + pcu.ports = format_ports(pcu.port_status, pcu.plc_pcu_stats['model']) pcu.status = format_pcu_shortstatus(pcu) + #print pcu.entry_complete + pcu.entry_complete_str = pcu.entry_complete + #pcu.entry_complete_str += "".join([ f[0] for f in pcu.entry_complete.split() ]) + if pcu.dns_status == "NOHOSTNAME": + pcu.dns_short_status = 'NoHost' + elif pcu.dns_status == "DNS-OK": + pcu.dns_short_status = 'Ok' + elif pcu.dns_status == "DNS-NOENTRY": + pcu.dns_short_status = 'NoEntry' + elif pcu.dns_status == "NO-DNS-OR-IP": + pcu.dns_short_status = 'NoHostOrIP' + elif pcu.dns_status == "DNS-MISMATCH": + pcu.dns_short_status = 'Mismatch' + +class NodeWidget(widgets.Widget): + pass + def prep_node_for_display(node): if node.plc_pcuid: pcu = FindbadPCURecord.get_latest_by(plc_pcuid=node.plc_pcuid).first() if pcu: node.pcu_status = pcu.reboot_trial_status + node.pcu_short_status = format_pcu_shortstatus(pcu) + node.pcu = pcu + prep_pcu_for_display(node.pcu) else: + node.pcu_short_status = "none" node.pcu_status = "nodata" - node.pcu_short_status = format_pcu_shortstatus(pcu) + node.pcu = None else: node.pcu_status = "nopcu" node.pcu_short_status = "none" + node.pcu = None + if node.kernel_version: node.kernel = node.kernel_version.split()[2] @@ -81,7 +129,20 @@ def prep_node_for_display(node): node.loginbase = site_id2lb[node.plc_node_stats['site_id']] except: node.loginbase = "unknown" - + + if node.loginbase: + node.site = HistorySiteRecord.by_loginbase(node.loginbase) + + node.history = HistoryNodeRecord.by_hostname(node.hostname) + + if node.port_status: + node.ports = format_ports(node.port_status) + try: + exists = node.plc_node_stats['last_contact'] + except: + node.plc_node_stats = {'last_contact' : None} + + class Root(controllers.RootController): @expose(template="monitorweb.templates.welcome") @@ -91,7 +152,7 @@ class Root(controllers.RootController): flash("Your application is now running") return dict(now=time.ctime()) - @expose(template="monitorweb.templates.nodeview") + @expose(template="monitorweb.templates.pcuview") def nodeview(self, hostname=None): nodequery=[] if hostname: @@ -100,7 +161,7 @@ class Root(controllers.RootController): prep_node_for_display(node) nodequery += [node] - return dict(nodequery=nodequery) + return self.pcuview(None, hostname) # dict(nodequery=nodequery) @expose(template="monitorweb.templates.nodelist") def node(self, filter='BOOT'): @@ -116,7 +177,7 @@ class Root(controllers.RootController): if node.observed_status != 'DOWN': filtercount[node.observed_status] += 1 else: - if node.plc_node_stats['last_contact'] != None: + if node.plc_node_stats and node.plc_node_stats['last_contact'] != None: filtercount[node.observed_status] += 1 else: filtercount['neverboot'] += 1 @@ -129,7 +190,7 @@ class Root(controllers.RootController): else: query.append(node) elif filter == "neverboot": - if node.plc_node_stats['last_contact'] == None: + if not node.plc_node_stats or node.plc_node_stats['last_contact'] == None: query.append(node) elif filter == "pending": # TODO: look in message logs... @@ -137,15 +198,22 @@ class Root(controllers.RootController): elif filter == "all": query.append(node) - return dict(now=time.ctime(), query=query, fc=filtercount) + widget = NodeWidget(template='monitorweb.templates.node_template') + return dict(now=time.ctime(), query=query, fc=filtercount, nodewidget=widget) def nodeaction_handler(self, tg_exceptions=None): """Handle any kind of error.""" refurl = request.headers.get("Referer",link("pcu")) print refurl + # TODO: do this more intelligently... - if len(urllib.splitquery(refurl)) > 1: - pcuid = urllib.splitvalue(urllib.splitquery(refurl)[1])[1] + uri_fields = urllib.splitquery(refurl) + if uri_fields[1] is not None: + val = query_to_dict(uri_fields[1]) + if 'pcuid' in val: + pcuid = val['pcuid'] + elif 'hostname' in val: + pcuid = FindbadNodeRecord.get_latest_by(hostname=val['hostname']).first().plc_pcuid else: pcuid=None @@ -155,7 +223,6 @@ class Root(controllers.RootController): print pcuid return self.pcuview(pcuid, **dict(exceptions=tg_exceptions)) - #return dict(pcuquery=[], nodequery=[], exceptions=tg_exceptions) def nodeaction(self, **data): for item in data.keys(): @@ -167,8 +234,11 @@ class Root(controllers.RootController): flash("No hostname given in submitted data") return - if 'submit' in data: - action = data['submit'] + if 'submit' in data or 'type' in data: + try: + action = data['submit'] + except: + action = data['type'] else: flash("No submit action given in submitted data") return @@ -178,43 +248,86 @@ class Root(controllers.RootController): ret = reboot.reboot_str(str(hostname)) print ret if ret: raise RuntimeError("Error using PCU: " + ret) + flash("Reboot appeared to work. All at most 5 minutes. Run ExternalScan to check current status.") - elif action == "ExternalProbe": - raise RuntimeError("THIS IS A PROBLEM") - - elif action == "DeepProbe": + elif action == "ExternalScan": + findbad.externalprobe(str(hostname)) + flash("External Scan Successful!") + elif action == "InternalScan": findbad.probe(str(hostname)) + flash("Internal Scan Successful!") else: # unknown action - flash("Unknown action given") + raise RuntimeError("Unknown action given") return # TODO: add form validation @expose(template="monitorweb.templates.pcuview") @exception_handler(nodeaction_handler,"isinstance(tg_exceptions,RuntimeError)") - def pcuview(self, pcuid=None, **data): + def pcuview(self, loginbase=None, pcuid=None, hostname=None, **data): + sitequery=[] pcuquery=[] nodequery=[] - if 'submit' in data.keys(): + exceptions = None + + for key in data: + print key, data[key] + + if 'submit' in data.keys() or 'type' in data.keys(): + if hostname: data['hostname'] = hostname self.nodeaction(**data) if 'exceptions' in data: exceptions = data['exceptions'] - else: - exceptions = None - if pcuid: + if loginbase: + sitequery = [HistorySiteRecord.by_loginbase(loginbase)] + pcus = {} + for plcnode in site_lb2hn[loginbase]: + for node in FindbadNodeRecord.get_latest_by(hostname=plcnode['hostname']): + # NOTE: reformat some fields. + prep_node_for_display(node) + nodequery += [node] + if node.plc_pcuid: # not None + pcu = FindbadPCURecord.get_latest_by(plc_pcuid=node.plc_pcuid).first() + prep_pcu_for_display(pcu) + pcus[node.plc_pcuid] = pcu + + for pcuid_key in pcus: + pcuquery += [pcus[pcuid_key]] + + if pcuid and hostname is None: + print "pcuid: %s" % pcuid for pcu in FindbadPCURecord.get_latest_by(plc_pcuid=pcuid): # NOTE: count filter prep_pcu_for_display(pcu) pcuquery += [pcu] - for nodename in pcu.plc_pcu_stats['nodenames']: - print "query for %s" % nodename - node = FindbadNodeRecord.get_latest_by(hostname=nodename).first() - print "%s" % node - if node: - prep_node_for_display(node) - nodequery += [node] - return dict(pcuquery=pcuquery, nodequery=nodequery, exceptions=exceptions) + if 'site_id' in pcu.plc_pcu_stats: + sitequery = [HistorySiteRecord.by_loginbase(pcu.loginbase)] + + if 'nodenames' in pcu.plc_pcu_stats: + for nodename in pcu.plc_pcu_stats['nodenames']: + print "query for %s" % nodename + q = FindbadNodeRecord.get_latest_by(hostname=nodename) + node = q.first() + print "%s" % node.port_status + print "%s" % node.to_dict() + print "%s" % len(q.all()) + if node: + prep_node_for_display(node) + nodequery += [node] + + if hostname and pcuid is None: + for node in FindbadNodeRecord.get_latest_by(hostname=hostname): + # NOTE: reformat some fields. + prep_node_for_display(node) + sitequery = [node.site] + nodequery += [node] + if node.plc_pcuid: # not None + pcu = FindbadPCURecord.get_latest_by(plc_pcuid=node.plc_pcuid).first() + prep_pcu_for_display(pcu) + pcuquery += [pcu] + + return dict(sitequery=sitequery, pcuquery=pcuquery, nodequery=nodequery, exceptions=exceptions) @expose(template="monitorweb.templates.pculist") def pcu(self, filter='all'): diff --git a/web/MonitorWeb/monitorweb/static/css/style.css b/web/MonitorWeb/monitorweb/static/css/style.css index 7bb4078..df07184 100644 --- a/web/MonitorWeb/monitorweb/static/css/style.css +++ b/web/MonitorWeb/monitorweb/static/css/style.css @@ -57,12 +57,18 @@ a.info span{display: none} a.info:hover span{ /*the span will display just on :hover state*/ display:block; position:absolute; - top:2em; left:2em; width:15em; + top:1em; left:-7em; width: 100%; border:1px solid #AAA; color:#DDD; background-color:black; text-align: center} +div#legend a:hover span {display: block; + float: left; width: 30em; + padding: 5px; margin: 5px; z-index: 100; + color: #333; background: white; + font: 10px Verdana, sans-serif; text-align: left;} + div#links a:hover span {display: block; /*position: absolute; top: 200px; left: 0; width: 125px;*/ /*position: relative; top: 0px; left: 40; width: 30em;*/ @@ -84,14 +90,14 @@ a.right { float: right; } #portfiltered { background-color: gold; } #dns-DNS-OK { background-color: lightgreen; } -#dns-NOHOSTNAME { background-color: white; } +/*#dns-NOHOSTNAME { background-color: white; }*/ #dns-DNS-MISMATCH { background-color: gold; } #dns-DNS-NOENTRY { background-color: indianred; } #dns-NO-DNS-OR-IP { background-color: indianred; } #status-NetDown { background-color: lightgrey; } #status-Not_Run { background-color: lightgrey; } -#status-ok { background-color: darkseagreen; } +#status-Ok { background-color: darkseagreen; } #status-0 { background-color: darkseagreen; } #status-error { background-color: indianred; } #status-none { background-color: white; } @@ -196,8 +202,6 @@ h2 { span.code { font-size: 120%; /*font-weight: bold;*/ - margin: 20 20 20 20; - padding: 20 20 20 20; } #status_block { diff --git a/web/MonitorWeb/monitorweb/templates/actionlist.kid b/web/MonitorWeb/monitorweb/templates/actionlist.kid index 843906a..eb79269 100644 --- a/web/MonitorWeb/monitorweb/templates/actionlist.kid +++ b/web/MonitorWeb/monitorweb/templates/actionlist.kid @@ -4,6 +4,7 @@ layout_params['page_title'] = "Monitor Node View" from monitor.util import diff_time from monitor import config from time import mktime +from links import * def zabbix_event_ack_link(eventid): return "http://" + config.MONITOR_HOSTNAME + "/zabbix/acknow.php?eventid=" + str(eventid) @@ -40,7 +41,7 @@ def zabbix_event_ack_link(eventid): <tbody> <tr py:for="i,node in enumerate(query)" class="${i%2 and 'odd' or 'even'}" > <td></td> - <td><a href="siteview?loginbase=${node[0]}">${node[0]}</a></td> + <td><a href="${link('pcuview', loginbase=node[0])}">${node[0]}</a></td> <td nowrap="true" py:content="node[1]"></td> <td nowrap='true' id="severity-${node[3]}" py:content="node[2]"></td> <td nowrap='true' py:content="diff_time(int(node[4]))"></td> diff --git a/web/MonitorWeb/monitorweb/templates/nodelist.kid b/web/MonitorWeb/monitorweb/templates/nodelist.kid index 669f02f..5b4e7c3 100644 --- a/web/MonitorWeb/monitorweb/templates/nodelist.kid +++ b/web/MonitorWeb/monitorweb/templates/nodelist.kid @@ -28,26 +28,13 @@ from links import * <thead> <tr> <th mochi:format="int"></th> - <th mochi:format="str">Site</th> - <th>Hostname</th> - <th>ping</th> - <!--th>ssh</th--> - <th>pcu</th> - <th>status</th> - <th>kernel</th> - <th>last_contact</th> + ${nodewidget.display(node=None, header=True)} </tr> </thead> <tbody> <tr py:for="i,node in enumerate(query)" class="${i%2 and 'odd' or 'even'}" > <td></td> - <td><a href="${link('siteview', loginbase=node.loginbase)}">${node.loginbase}</a></td> - <td nowrap="true"><a target="_top" href="${link('nodeview', hostname=node.hostname)}" py:content="node.hostname"></a></td> - <td py:content="node.ping_status"></td> - <td id="status-${node.pcu_short_status}" py:content="node.pcu_short_status"></td> - <td py:content="node.observed_status"></td> - <td nowrap="true" py:content="node.kernel"></td> - <td py:content="diff_time(node.plc_node_stats['last_contact'])"></td> + ${nodewidget.display(node=node, header=None)} </tr> </tbody> </table> diff --git a/web/MonitorWeb/monitorweb/templates/nodeview.kid b/web/MonitorWeb/monitorweb/templates/nodeview.kid index 354761c..dc2820e 100644 --- a/web/MonitorWeb/monitorweb/templates/nodeview.kid +++ b/web/MonitorWeb/monitorweb/templates/nodeview.kid @@ -2,6 +2,8 @@ <?python layout_params['page_title'] = "Monitor Node View" from monitor.util import diff_time +from time import mktime +from pcucontrol.reboot import pcu_name, model_to_object from links import * ?> <html py:layout="'sitemenu.kid'" @@ -18,8 +20,8 @@ from links import * <th>Hostname</th> <th>ping</th> <!--th>ssh</th--> - <th>pcu</th> <th>kernel</th> + <th>last_change</th> <th>last_contact</th> </tr> </thead> @@ -34,16 +36,47 @@ from links import * <span class="icon">${node.hostname}</span></a> </td> <td py:content="node.ping_status"></td> - <td py:if="node.pcu_short_status != 'none'" id="status-${node.pcu_short_status}"> - <a href="${link('pcuview', pcuid=node.plc_node_stats['pcu_ids'])}">${node.pcu_short_status}</a></td> - <td py:if="node.pcu_short_status == 'none'" id="status-${node.pcu_short_status}"> - ${node.pcu_short_status}</td> <td nowrap="true" py:content="node.kernel"></td> + <td py:content="diff_time(mktime(node.history.last_changed.timetuple()))"></td> <td py:content="diff_time(node.plc_node_stats['last_contact'])"></td> </tr> </tbody> </table> - <h3 py:if="node.pcu_short_status != 'none'">PCU Status</h3> + <h3 py:if="node.pcu is not None">Controlling PCU</h3> + <table py:if="node.pcu is not None" id="sortable_table" class="datagrid" border="1" width="100%"> + <thead> + <tr> + <th mochi:format="int"></th> + <th>PCU Name</th> + <th>Model</th> + <th width="80%">Test Results</th> + </tr> + </thead> + <tbody> + <?python pcu = node.pcu ?> + <tr> + <td></td> + <td nowrap="true" > + <a class="ext-link" href="${plc_pcu_uri_id(pcu.plc_pcu_stats['pcu_id'])}"> + <span class="icon">${pcu_name(pcu.plc_pcu_stats)}</span> + </a> + </td> + <td py:content="pcu.plc_pcu_stats['model']"></td> + <td width="20%" nowrap='true' align='center' id="status-${node.pcu_short_status}"> + <div id="links"> + <a class="info" py:if="'error' in node.pcu_short_status" + href="${link('pcuview', pcuid=node.plc_pcuid)}"> + Error<span><pre>${node.pcu.reboot_trial_status}</pre></span></a> + <a py:if="'error' not in node.pcu_short_status and 'none' not in node.pcu_short_status" + href="${link('pcuview', pcuid=node.plc_pcuid)}" + py:content="node.pcu_short_status">Reboot Status</a> + <span py:if="'none' in node.pcu_short_status" + py:content="node.pcu_short_status">Reboot Status</span> + </div> + </td> + </tr> + </tbody> + </table> <h3>Actions Taken</h3> </div> diff --git a/web/MonitorWeb/monitorweb/templates/pculist.kid b/web/MonitorWeb/monitorweb/templates/pculist.kid index 99ad41a..f6043dd 100644 --- a/web/MonitorWeb/monitorweb/templates/pculist.kid +++ b/web/MonitorWeb/monitorweb/templates/pculist.kid @@ -29,8 +29,7 @@ from links import * <th mochi:format="int"></th> <th mochi:format="str">Site</th> <th>PCU Name</th> - <th>Missing Fields</th> - <th>DNS Status</th> + <th>Config</th> <th nowrap='true' >Port Status</th> <th nowrap='true' width="80%">Test Results</th> <th>Model</th> @@ -40,7 +39,13 @@ from links import * <tbody> <tr py:for="i,node in enumerate(query)" class="${i%2 and 'odd' or 'even'}" > <td></td> - <td><a href="${link('siteview', loginbase=node.loginbase)}">${node.loginbase}</a></td> + <td nowrap='true'> + <div class='oneline'> + <a class='left' href="${link('pcuview', loginbase=node.loginbase)}">${node.loginbase}</a> + <a class='right' href="${plc_site_uri(node.loginbase)}"> + <img style='display: inline' border='0' src="static/images/extlink.gif" align='right'/></a> + </div> + </td> <td nowrap='true'> <div class='oneline'> <a class='left' href="${link('pcuview', pcuid=node.plc_pcuid)}">${pcu_name(node.plc_pcu_stats)}</a> @@ -48,9 +53,8 @@ from links import * <img style='display: inline' border='0' src="static/images/extlink.gif" align='right'/></a> </div> </td> - <td py:content="node.entry_complete"></td> - <td id="dns-${node.dns_status}" py:content="node.dns_status"></td> - <td> + <td py:content="node.entry_complete_str"></td> + <td nowrap='true'> <span py:for="port,state in node.ports" id="port${state}" py:content="'%s, ' % port">80</span> </td> @@ -58,7 +62,7 @@ from links import * <div id="links"> <a class="info" py:if="'error' in node.status" href="${link('pcuview', pcuid=node.plc_pcuid)}"> - Error Message<span><pre>${node.reboot_trial_status}</pre></span></a> + Error<span><pre>${node.reboot_trial_status}</pre></span></a> <a py:if="'error' not in node.status" href="${link('pcuview', pcuid=node.plc_pcuid)}" py:content="node.status">Reboot Status</a> diff --git a/web/MonitorWeb/monitorweb/templates/pcuview.kid b/web/MonitorWeb/monitorweb/templates/pcuview.kid index 4eed424..5bf82b8 100644 --- a/web/MonitorWeb/monitorweb/templates/pcuview.kid +++ b/web/MonitorWeb/monitorweb/templates/pcuview.kid @@ -12,16 +12,40 @@ from links import * xmlns:mochi="http://www.mochi.org"> <div py:match="item.tag == 'content'"> - <h3>PCU Status</h3> - <table id="sortable_table" class="datagrid" border="1" width="100%"> + <h3 py:if="len(sitequery) > 0">Site Status</h3> + <table py:if="len(sitequery) > 0" id="sub-table" border="1" width="100%"> + <thead> + <tr> + <th>Site name</th> + <th>Enabled</th> + <th>Penalty</th> + <th>Slices/Max</th> + <th>Nodes/Total</th> + <th>Status</th> + </tr> + </thead> + <tbody> + <tr py:for="i,site in enumerate(sitequery)" class="${i%2 and 'odd' or 'even'}" > + <td nowrap="true"><a class="ext-link" href="${plc_site_uri(site.loginbase)}"> + <span class="icon">${site.loginbase}</span></a> + </td> + <td py:content="site.enabled"></td> + <td>n/a</td> + <td>${site.slices_used}/${site.slices_total}</td> + <td>${site.nodes_up} / ${site.nodes_total}</td> + <td id="site-${site.status}" py:content="diff_time(mktime(site.last_changed.timetuple()))"></td> + </tr> + </tbody> + </table> + <h3 py:if="len(pcuquery) != 0" >PCU Status</h3> + <table py:if="len(pcuquery) != 0" id="sortable_table" class="datagrid" border="1" width="100%"> <thead> <tr> <th mochi:format="int"></th> - <th mochi:format="str">Site</th> <th>PCU Name</th> <th>Missing Fields</th> <th>DNS Status</th> - <th>Port Status</th> + <th nowrap='true'>Port Status</th> <th width="80%">Test Results</th> <th>Model</th> <th>Nodes</th> @@ -30,10 +54,6 @@ from links import * <tbody> <tr py:for="i,pcu in enumerate(pcuquery)" class="${i%2 and 'odd' or 'even'}" > <td></td> - <td><a class="ext-link" href="${plc_site_uri_id(pcu.plc_pcu_stats['site_id'])}"> - <span class="icon">${pcu.loginbase}</span> - </a> - </td> <td nowrap="true" > <a class="ext-link" href="${plc_pcu_uri_id(pcu.plc_pcu_stats['pcu_id'])}"> <span class="icon">${pcu_name(pcu.plc_pcu_stats)}</span> @@ -41,62 +61,91 @@ from links import * </td> <td py:content="pcu.entry_complete"></td> <td id="dns-${pcu.dns_status}" py:content="pcu.dns_status"></td> - <td> + <td nowrap='true'> <span py:for="port,state in pcu.ports" id="port${state}" py:content="'%s, ' % port">80</span> </td> - <td width="40" id="status-${pcu.status}"><pre py:content="pcu.reboot_trial_status"></pre></td> + <td width="40" id="status-${pcu.status}"><pre class="results" py:content="pcu.reboot_trial_status"></pre></td> <td py:content="pcu.plc_pcu_stats['model']"></td> <td py:content="len(pcu.plc_pcu_stats['node_ids'])"></td> </tr> </tbody> </table> - <h4>Convenience Calls</h4> - <?python - if len(pcuquery) == 0: pcu = None - ?> - <div py:if="pcu is not None" class="code"> - <span py:for="port,state in pcu.ports"> - <span class="code" py:if="port == 22 and state == 'open'"> - ssh -o PasswordAuthentication=yes -o PubkeyAuthentication=no - ${pcu.plc_pcu_stats['username']}@${pcu_name(pcu.plc_pcu_stats)} - </span> - <span class="code" py:if="port == 23 and state == 'open'"> - telnet ${pcu_name(pcu.plc_pcu_stats)} - </span> - <span class="code" py:if="port == 80 and state == 'open'"> - <a href="http://${pcu_name(pcu.plc_pcu_stats)}">http://${pcu_name(pcu.plc_pcu_stats)}</a> - </span> - <span class="code" py:if="port == 443 and state == 'open'"> - <a href="https://${pcu_name(pcu.plc_pcu_stats)}">https://${pcu_name(pcu.plc_pcu_stats)}</a> - <br/> - /usr/share/monitor/racadm.py -r ${pcu.plc_pcu_stats['ip']} - -u ${pcu.plc_pcu_stats['username']} -p '${pcu.plc_pcu_stats['password']}' - <br/> - /usr/share/monitor/pcucontrol/models/hpilo/locfg.pl - -f /usr/share/monitor/pcucontrol/models/hpilo/iloxml/Reset_Server.xml - -s ${pcu_name(pcu.plc_pcu_stats)} - -u ${pcu.plc_pcu_stats['username']} - -p '${pcu.plc_pcu_stats['password']} ' | grep MESSAGE" - </span> - <span class="code" py:if="port == 16992 and state == 'open'"> - /usr/share/monitor/pcucontrol/models/intelamt/remoteControl -A - -verbose 'http://${pcu_name(pcu.plc_pcu_stats)}:16992/RemoteControlService' - -user admin -pass '${pcu.plc_pcu_stats['password']}' - </span> - </span> - </div> - <h3>Controls</h3> - <table id="sortable_table" class="datagrid" border="1" width="100%"> + <div class="oneline" id="legend" py:if="len(pcuquery) == 0"> + <em>There no PCUs associated with this host.</em> + </div> + <div class="oneline" id="legend" py:if="len(pcuquery) > 0"> + <em>Legend: </em> + <a class="info" href="#">DNS Status<span> + <table border="1" align="center" width="100%"> + <tr><th colspan="2">Legend for 'DNS Status'</th></tr> + + <tr><td id="dns-DNS-OK">DNS-OK</td> + <td>This indicates that the DNS name and registered IP address match.</td> + </tr> + <tr><td id="dns-DNS-MISMATCH">DNS-MISMATCH</td> + <td>Sometimes, the registered IP and DNS IP address do not match. + In these cases it is not clear which is correct, + so an error is flagged.</td> + </tr> + <tr><td id="dns-DNS-NOENTRY">DNS-NOENTRY</td> + <td>While a hostname is provided in the registration, the hostname is not actually registered in DNS.</td> + </tr> + <tr><td id="dns-NOHOSTNAME">NOHOSTNAME</td> + <td>While we prefer that a hostname be registered, it is not + strictly required, since simply the IP address, if it is static, is enough to access the PCU.</td> + </tr> + </table> + </span> </a> + <a class="info" href="#">Port Status<span> + <table border="1" align="center" width="100%"> + <tr><th colspan="2">Legend for 'Port Status'</th></tr> + + <tr><td id="portopen">Open</td> + <td>Green port numbers are believed to be open.</td> + </tr> + <tr><td id="portfiltered">Filtered</td> + <td>Gold port numbers are believed to be filtered or simply offline.</td> + </tr> + <tr><td id="portclosed">Closed</td> + <td>Finally, red ports appear to be closed.</td> + </tr> + </table> + </span> </a> + <a class="info" href="#">Test Results<span> + <table border="1" align="center" width="100%"> + <tr><th colspan="2">Legend for 'Test Results'</th></tr> + + <tr><td id="status-0">OK</td> + <td>The PCU is accessible, and short of actually rebooting the node, everything appears to work.</td> + </tr> + <tr><td id="status-NetDown">NetDown</td> + <td>The PCU is inaccessible from the PlanetLab address block 128.112.139.0/25, or it is simply offline.</td> + </tr> + <tr><td id="status-Not_Run">Not_Run</td> + <td>Previous errors, such as DNS or an incomplete configuration prevented the actual test from begin performed.</td> + </tr> + <tr><td id="status-error">Other Errors</td> + <td>Other errors are reported by the test that are more specific to the block encountered by the script.</td> + </tr> + </table> + </span> </a> + </div> + <h3>Nodes</h3> + <p py:if="len(nodequery) == 0"> + There are no registered nodes for this site. + </p> + <table py:if="len(nodequery) > 0" id="sortable_table" class="datagrid" border="1" width="100%"> <thead> <tr> <th mochi:format="int"></th> <th>Hostname</th> <th>last_contact</th> <th>Last_checked</th> - <th>External Probe</th> - <th>Internal Probe</th> - <th>Reboot</th> + <th nowrap='true'>Port Status</th> + <th></th> + <th></th> + <th></th> </tr> </thead> <tbody> @@ -108,26 +157,33 @@ from links import * </td> <td py:content="diff_time(node.plc_node_stats['last_contact'])"></td> <td py:content="diff_time(mktime(node.date_checked.timetuple()))"></td> + <td> + <span py:for="port,state in node.ports" + id="port${state}" py:content="'%s, ' % port">80</span> + </td> <td> <!-- TODO: add some values/code to authenticate the operation. --> - <form action="${link('pcuview', pcuid=pcu.plc_pcuid)}" name="nodeaction" method='post'> + <form action="${link('pcuview', hostname=node.hostname)}" name="externalscan${i}" method='post'> <input type='hidden' name='hostname' value='${node.hostname}'/> - <input type='submit' name='submit' value='ExternalProbe' /> + <input type='hidden' name='type' value='ExternalScan' /> </form> + <a onclick='document.externalscan${i}.submit();' href="javascript: void(1);">ExternalScan</a> </td> <td> <!-- TODO: add some values/code to authenticate the operation. --> - <form action="${link('pcuview', pcuid=pcu.plc_pcuid)}" name="nodeaction" method='post'> + <form action="${link('pcuview', hostname=node.hostname)}" name="internalscan${i}" method='post'> <input type='hidden' name='hostname' value='${node.hostname}'/> - <input type='submit' name='submit' value='DeepProbe' /> + <input type='hidden' name='type' value='InternalScan' /> </form> + <a onclick='javascript: document.internalscan${i}.submit();' href="javascript: void(1);">InternalScan</a> </td> - <td> + <td py:if="len(pcuquery) > 0"> <!-- TODO: add some values/code to authenticate the operation. --> - <form action="${link('pcuview', pcuid=pcu.plc_pcuid)}" name="nodeaction" method='post'> + <form action="${link('pcuview', pcuid=pcu.plc_pcuid)}" name="reboot${i}" method='post'> <input type='hidden' name='hostname' value='${node.hostname}'/> - <input type='submit' name='submit' value='Reboot' /> + <input type='hidden' name='type' value='Reboot' /> </form> + <a onclick='javascript: document.reboot${i}.submit();' href="javascript: void(1);">Reboot</a> </td> </tr> </tbody> @@ -137,58 +193,46 @@ from links import * </div> <div id="status_block" class="flash" py:if="value_of('tg_flash', None)" py:content="tg_flash"></div> - <h3>Legend</h3> - - <table border="1" align="center" width="80%"> - <tr><th colspan="2">Legend for 'DNS Status'</th></tr> - - <tr><td id="dns-DNS-OK">DNS-OK</td> - <td>This indicates that the DNS name and registered IP address match.</td> - </tr> - <tr><td id="dns-DNS-MISMATCH">DNS-MISMATCH</td> - <td>Sometimes, the registered IP and DNS IP address do not match. - In these cases it is not clear which is correct, - so an error is flagged.</td> - </tr> - <tr><td id="dns-DNS-NOENTRY">DNS-NOENTRY</td> - <td>While a hostname is provided in the registration, the hostname is not actually registered in DNS.</td> - </tr> - <tr><td id="dns-NOHOSTNAME">NOHOSTNAME</td> - <td>While we prefer that a hostname be registered, it is not - strictly required, since simply the IP address, if it is static, is enough to access the PCU.</td> - </tr> - <tr><td> </td></tr> - <!--/table> - <table border=1--> - <tr><th colspan="2">Legend for 'Port Status'</th></tr> - - <tr><td id="portopen">Open</td> - <td>Green port numbers are believed to be open.</td> - </tr> - <tr><td id="portfiltered">Filtered</td> - <td>Gold port numbers are believed to be filtered or simply offline.</td> - </tr> - <tr><td id="portclosed">Closed</td> - <td>Finally, red ports appear to be closed.</td> - </tr> - <tr><td> </td></tr> - <!--/table> - <table border=1--> - <tr><th colspan="2">Legend for 'Test Results'</th></tr> - - <tr><td id="status-0">OK</td> - <td>The PCU is accessible, and short of actually rebooting the node, everything appears to work.</td> - </tr> - <tr><td id="status-NetDown">NetDown</td> - <td>The PCU is inaccessible from the PlanetLab address block 128.112.139.0/25, or it is simply offline.</td> - </tr> - <tr><td id="status-Not_Run">Not_Run</td> - <td>Previous errors, such as DNS or an incomplete configuration prevented the actual test from begin performed.</td> - </tr> - <tr><td id="status-error">Other Errors</td> - <td>Other errors are reported by the test that are more specific to the block encountered by the script.</td> - </tr> - </table> + <h4 py:if="len(pcuquery) > 0">Convenience Calls</h4> + <?python + if len(pcuquery) == 0: pcu = None + ?> + <div py:if="pcu is not None" class="code"> + <span py:for="port,state in pcu.ports"> + <span class="code" py:if="port == 22 and state == 'open'"> + ssh -o PasswordAuthentication=yes -o PubkeyAuthentication=no + ${pcu.plc_pcu_stats['username']}@${pcu_name(pcu.plc_pcu_stats)} + </span> + <span class="code" py:if="port == 23 and state == 'open'"> + telnet ${pcu_name(pcu.plc_pcu_stats)} + </span> + <span class="code" py:if="port == 80 and state == 'open'"> + <a href="http://${pcu_name(pcu.plc_pcu_stats)}">http://${pcu_name(pcu.plc_pcu_stats)}</a> + </span> + <span class="code" py:if="port == 443 and state == 'open'"> + <br/> + <a href="https://${pcu_name(pcu.plc_pcu_stats)}">https://${pcu_name(pcu.plc_pcu_stats)}</a> + <br/> + curl -s --form 'user=${pcu.plc_pcu_stats['username']}' + --form 'password=${pcu.plc_pcu_stats['password']}' + --insecure https://${pcu_name(pcu.plc_pcu_stats)}/cgi-bin/webcgi/index + <br/> + /usr/share/monitor/pcucontrol/models/racadm.py -r ${pcu.plc_pcu_stats['ip']} + -u ${pcu.plc_pcu_stats['username']} -p '${pcu.plc_pcu_stats['password']}' + <br/> + /usr/share/monitor/pcucontrol/models/hpilo/locfg.pl + -f /usr/share/monitor/pcucontrol/models/hpilo/iloxml/Reset_Server.xml + -s ${pcu_name(pcu.plc_pcu_stats)} + -u ${pcu.plc_pcu_stats['username']} + -p '${pcu.plc_pcu_stats['password']} ' | grep MESSAGE" + </span> + <span class="code" py:if="port == 16992 and state == 'open'"> + /usr/share/monitor/pcucontrol/models/intelamt/remoteControl -A + -verbose 'http://${pcu_name(pcu.plc_pcu_stats)}:16992/RemoteControlService' + -user admin -pass '${pcu.plc_pcu_stats['password']}' + </span> + </span> + </div> </div> diff --git a/web/MonitorWeb/monitorweb/templates/sitelist.kid b/web/MonitorWeb/monitorweb/templates/sitelist.kid index 50b296e..a9b7685 100644 --- a/web/MonitorWeb/monitorweb/templates/sitelist.kid +++ b/web/MonitorWeb/monitorweb/templates/sitelist.kid @@ -1,6 +1,8 @@ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <?python layout_params['page_title'] = "Monitor Site List" +from monitor.util import diff_time +from time import mktime from links import * ?> <html py:layout="'sitemenu.kid'" @@ -26,18 +28,28 @@ from links import * <tr> <th></th> <th>Site name</th> - <th>Status</th> - <th mochi:format="int">Slices (created / max)</th> - <th mochi:format="int">Nodes (online / registered)</th> + <th>Enabled</th> + <th>Penalty</th> + <th mochi:format="int">Slices/Max</th> + <th mochi:format="int">Nodes/Total</th> + <th>Last Change</th> </tr> </thead> <tbody> <tr py:for="i,site in enumerate(query)" class="${i%2 and 'odd' or 'even'}" > <td></td> - <td nowrap="true"><a href="${link('siteview', loginbase=site.loginbase)}">${site.loginbase}</a></td> - <td id="site-${site.status}" py:content="site.last_changed"></td> + <td nowrap="true"> + <div class='oneline'> + <a class='left' href="${link('pcuview', loginbase=site.loginbase)}">${site.loginbase}</a> + <a class='right' href="${plc_site_uri(site.loginbase)}"> + <img style='display: inline' border='0' src="static/images/extlink.gif" align='right'/></a> + </div> + </td> + <td py:content="site.enabled"></td> + <td>n/a</td> <td>${site.slices_used}/${site.slices_total}</td> <td>${site.nodes_up} / ${site.nodes_total}</td> + <td id="site-${site.status}" py:content="diff_time(mktime(site.last_changed.timetuple()))"></td> </tr> </tbody> </table> diff --git a/web/MonitorWeb/monitorweb/templates/siteview.kid b/web/MonitorWeb/monitorweb/templates/siteview.kid index 039a2b7..0999b31 100644 --- a/web/MonitorWeb/monitorweb/templates/siteview.kid +++ b/web/MonitorWeb/monitorweb/templates/siteview.kid @@ -2,6 +2,7 @@ <?python layout_params['page_title'] = "Monitor Site View" from monitor.util import diff_time +from time import mktime from links import * ?> <html py:layout="'sitemenu.kid'" @@ -14,10 +15,11 @@ from links import * <thead> <tr> <th>Site name</th> - <th>Status</th> <th>Enabled</th> - <th>Slices (created / max)</th> - <th>Nodes (online / registered)</th> + <th>Penalty</th> + <th>Slices/Max</th> + <th>Nodes/Total</th> + <th>Status</th> </tr> </thead> <tbody> @@ -25,15 +27,19 @@ from links import * <td nowrap="true"><a class="ext-link" href="${plc_site_uri(site.loginbase)}"> <span class="icon">${site.loginbase}</span></a> </td> - <td id="site-${site.status}" py:content="site.last_changed"></td> - <td py:content="site.enabled"></td> - <td>${site.slices_used}/${site.slices_total}</td> - <td>${site.nodes_up} / ${site.nodes_total}</td> + <td py:content="site.enabled"></td> + <td>n/a</td> + <td>${site.slices_used}/${site.slices_total}</td> + <td>${site.nodes_up} / ${site.nodes_total}</td> + <td id="site-${site.status}" py:content="diff_time(mktime(site.last_changed.timetuple()))"></td> </tr> </tbody> </table> <h3>Node List</h3> - <table id="sortable_table" class="datagrid" border="1" width="100%"> + <p py:if="len(nodequery) == 0"> + There are no registered nodes for this PCU. + </p> + <table py:if="len(nodequery) > 0" id="sortable_table" class="datagrid" border="1" width="100%"> <thead> <tr> <th mochi:format="int"></th> @@ -49,7 +55,7 @@ from links import * <tr py:for="i,node in enumerate(nodequery)" class="${i%2 and 'odd' or 'even'}" > <td></td> <td id="node-${node.observed_status}" nowrap="true"> - <a href="${link('nodeview', hostname=node.hostname)}" py:content="node.hostname">your.host.org</a></td> + <a href="${link('pcuview', hostname=node.hostname)}" py:content="node.hostname">your.host.org</a></td> <td py:content="node.ping_status"></td> <td py:if="node.pcu_short_status != 'none'" id="status-${node.pcu_short_status}"> <a href="${link('pcuview', pcuid=node.plc_node_stats['pcu_ids'])}">${node.pcu_short_status}</a></td>