From 3f501b69f366b8b6c62d35d6aea4ebf5fa0f1192 Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Sat, 13 Dec 2008 00:19:20 +0000 Subject: [PATCH] many improvements. improved views and links between them. improved pcuview added testing functions for rebooting, and probing the node state in real-time. improved pcucontrol/reboot.py object model. improved findbad.py ability to update a single record. more needed here. --- findbad.py | 77 ++- findbadpcu.py | 50 +- getconf.py | 1 + monitor-default.conf | 1 + monitor/database/info/findbad.py | 4 +- monitor/wrapper/plc.py | 8 + monitor/wrapper/plccache.py | 4 +- pcucontrol/reboot.py | 450 ++++++++++-------- setup.py | 2 +- web/MonitorWeb/monitorweb/controllers.py | 80 +++- .../monitorweb/static/css/style.css | 56 ++- web/MonitorWeb/monitorweb/templates/links.py | 5 +- .../monitorweb/templates/nodelist.kid | 17 +- .../monitorweb/templates/nodeview.kid | 6 +- .../monitorweb/templates/pculist.kid | 36 +- .../monitorweb/templates/pcuview.kid | 112 ++++- .../monitorweb/templates/sitelist.kid | 13 +- .../monitorweb/templates/sitemenu.kid | 13 +- .../monitorweb/templates/siteview.kid | 7 +- zabbix/ext_boot_state | 5 +- 20 files changed, 647 insertions(+), 300 deletions(-) diff --git a/findbad.py b/findbad.py index 77dd120..c7449d2 100755 --- a/findbad.py +++ b/findbad.py @@ -20,7 +20,7 @@ from monitor.wrapper import plc, plccache from nodequery import verify,query_to_dict,node_select import traceback -print "starting sqlfindbad.py" +#print "starting sqlfindbad.py" # QUERY all nodes. COMON_COTOPURL= "http://summer.cs.princeton.edu/status/tabulator.cgi?" + \ "table=table_nodeview&" + \ @@ -254,34 +254,51 @@ def recordPingAndSSH(request, result): fbnodesync = FindbadNodeRecordSync.findby_or_create(hostname=nodename, if_new_set={'round' : global_round}) - fbrec = FindbadNodeRecord( - date_checked=datetime.fromtimestamp(values['date_checked']), + # NOTE: This code will either add a new record for the new global_round, + # OR it will find the previous value, and update it + # with new information. + # The data that is 'lost' is not that important, b/c older + # history still exists. + fbrec = FindbadNodeRecord.findby_or_create( round=global_round, - hostname=nodename, - loginbase=values['loginbase'], - kernel_version=values['kernel'], - bootcd_version=values['bootcd'], - nm_status=values['nm'], - fs_status=values['readonlyfs'], - dns_status=values['dns'], - princeton_comon_dir=values['princeton_comon'], - princeton_comon_running=values['princeton_comon_running'], - princeton_comon_procs=values['princeton_comon_procs'], - plc_node_stats = values['plcnode'], - plc_site_stats = values['plcsite'], - plc_pcuid = values['pcu'], - comon_stats = values['comonstats'], - ping_status = (values['ping'] == "PING"), - ssh_portused = values['sshport'], - ssh_status = (values['ssh'] == "SSH"), - ssh_error = values['ssherror'], - observed_status = values['state'], - observed_category = values['category'], - ) + hostname=nodename) + before = fbrec.to_dict() + print "BEFORE, ", before + fbrec.flush() + time.sleep(2) + print "Setting VALUES" + fbrec.set( date_checked=datetime.fromtimestamp(values['date_checked']), + loginbase=values['loginbase'], + kernel_version=values['kernel'], + bootcd_version=values['bootcd'], + nm_status=values['nm'], + fs_status=values['readonlyfs'], + dns_status=values['dns'], + princeton_comon_dir=values['princeton_comon'], + princeton_comon_running=values['princeton_comon_running'], + princeton_comon_procs=values['princeton_comon_procs'], + plc_node_stats = values['plcnode'], + plc_site_stats = values['plcsite'], + plc_pcuid = values['pcu'], + comon_stats = values['comonstats'], + ping_status = (values['ping'] == "PING"), + ssh_portused = values['sshport'], + ssh_status = (values['ssh'] == "SSH"), + ssh_error = values['ssherror'], + observed_status = values['state'], + observed_category = values['category']) + after = fbrec.to_dict() + print "AFTER , ", after + + for v in before.keys(): + if before[v] == after[v]: + print "SAME FOR KEY %s" % v + print "%s : %s\t%s" % ( v, before[v], after[v] ) + + fbrec.flush() fbnodesync.round = global_round fbnodesync.flush() fbsync.flush() - fbrec.flush() count += 1 print "%d %s %s" % (count, nodename, values) @@ -295,6 +312,16 @@ def handle_exception(request, result): for i in result: print "Result: %s" % i +def probe(hostname): + try: + (nodename, values) = collectPingAndSSH(hostname, {}) + recordPingAndSSH(None, (nodename, values)) + session.flush() + return True + except: + print traceback.print_exc() + return False + def checkAndRecordState(l_nodes, cohash): global global_round diff --git a/findbadpcu.py b/findbadpcu.py index 070ddac..468107d 100755 --- a/findbadpcu.py +++ b/findbadpcu.py @@ -26,7 +26,7 @@ global_round = 1 errorState = {} count = 0 -def nmap_portstatus(status): +def nmap_port_status(status): ps = {} l_nmap = status.split() ports = l_nmap[4:] @@ -179,31 +179,31 @@ def collectPingAndSSH(pcuname, cohash): #### COMPLETE ENTRY ####################### - values['complete_entry'] = [] + values['entry_complete'] = [] #if values['protocol'] is None or values['protocol'] is "": - # values['complete_entry'] += ["protocol"] + # values['entry_complete'] += ["protocol"] if values['plc_pcu_stats']['model'] is None or values['plc_pcu_stats']['model'] is "": - values['complete_entry'] += ["model"] + values['entry_complete'] += ["model"] # Cannot continue due to this condition continue_probe = False if values['plc_pcu_stats']['password'] is None or values['plc_pcu_stats']['password'] is "": - values['complete_entry'] += ["password"] + values['entry_complete'] += ["password"] # Cannot continue due to this condition continue_probe = False - if len(values['complete_entry']) > 0: + if len(values['entry_complete']) > 0: continue_probe = False if values['plc_pcu_stats']['hostname'] is None or values['plc_pcu_stats']['hostname'] is "": - values['complete_entry'] += ["hostname"] + values['entry_complete'] += ["hostname"] if values['plc_pcu_stats']['ip'] is None or values['plc_pcu_stats']['ip'] is "": - values['complete_entry'] += ["ip"] + values['entry_complete'] += ["ip"] # If there are no nodes associated with this PCU, then we cannot continue. if len(values['plc_pcu_stats']['node_ids']) == 0: continue_probe = False - values['complete_entry'] += ['NoNodeIds'] + values['entry_complete'] += ['NoNodeIds'] #### DNS and IP MATCH ####################### if values['plc_pcu_stats']['hostname'] is not None and values['plc_pcu_stats']['hostname'] is not "" and \ @@ -212,21 +212,21 @@ def collectPingAndSSH(pcuname, cohash): try: ipaddr = socket.gethostbyname(values['plc_pcu_stats']['hostname']) if ipaddr == values['plc_pcu_stats']['ip']: - values['dnsmatch'] = "DNS-OK" + values['dns_status'] = "DNS-OK" else: - values['dnsmatch'] = "DNS-MISMATCH" + values['dns_status'] = "DNS-MISMATCH" continue_probe = False except Exception, err: - values['dnsmatch'] = "DNS-NOENTRY" + values['dns_status'] = "DNS-NOENTRY" values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip'] #print err else: if values['plc_pcu_stats']['ip'] is not None and values['plc_pcu_stats']['ip'] is not "": - values['dnsmatch'] = "NOHOSTNAME" + values['dns_status'] = "NOHOSTNAME" values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip'] else: - values['dnsmatch'] = "NO-DNS-OR-IP" + values['dns_status'] = "NO-DNS-OR-IP" values['plc_pcu_stats']['hostname'] = "No_entry_in_DB" continue_probe = False @@ -235,14 +235,14 @@ def collectPingAndSSH(pcuname, cohash): nmap = util.command.CMD() (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats'])) # NOTE: an empty / error value for oval, will still work. - (values['portstatus'], continue_probe) = nmap_portstatus(oval) + (values['port_status'], continue_probe) = nmap_port_status(oval) else: - values['portstatus'] = None + values['port_status'] = None ###### DRY RUN ############################ if 'node_ids' in values['plc_pcu_stats'] and len(values['plc_pcu_stats']['node_ids']) > 0: - rb_ret = reboot.reboot_test(values['plc_pcu_stats']['nodenames'][0], values, continue_probe, 1, True) + rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0], values, continue_probe, 1, True) else: rb_ret = "Not_Run" # No nodes to test" @@ -255,6 +255,7 @@ def collectPingAndSSH(pcuname, cohash): print "____________________________________" errors['traceback'] = traceback.format_exc() print errors['traceback'] + values['reboot'] = errors['traceback'] values['date_checked'] = time.time() return (pcuname, values, errors) @@ -278,9 +279,9 @@ def recordPingAndSSH(request, result): round=fbsync.round, plc_pcuid=pcu_id, plc_pcu_stats=values['plc_pcu_stats'], - dns_status=values['dnsmatch'], - port_status=values['portstatus'], - entry_complete=" ".join(values['complete_entry']), + dns_status=values['dns_status'], + port_status=values['port_status'], + entry_complete=" ".join(values['entry_complete']), reboot_trial_status="%s" % values['reboot'], ) fbnodesync.round = global_round @@ -398,6 +399,7 @@ def main(): return 0 +print "main" if __name__ == '__main__': import logging logger = logging.getLogger("monitor") @@ -416,7 +418,7 @@ if __name__ == '__main__': site=None, dbname="findbadpcus", cachenodes=False, - refresh=False, + cachecalls=True, force=False, ) parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", @@ -432,7 +434,7 @@ if __name__ == '__main__': help="Cache node lookup from PLC") parser.add_option("", "--dbname", dest="dbname", metavar="FILE", help="Specify the name of the database to which the information is saved") - parser.add_option("", "--refresh", action="store_true", dest="refresh", + parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls", help="Refresh the cached values") parser.add_option("-i", "--increment", action="store_true", dest="increment", help="Increment round number to force refresh or retry") @@ -440,6 +442,10 @@ if __name__ == '__main__': help="Force probe without incrementing global 'round'.") parser = parsermodule.getParser(['defaults'], parser) config = parsermodule.parse_args(parser) + if hasattr(config, 'cachecalls') and not config.cachecalls: + # NOTE: if explicilty asked, refresh cached values. + print "Reloading PLCCache" + plccache.init() try: # NOTE: evidently, there is a bizarre interaction between iLO and ssh # when LANG is set... Do not know why. Unsetting LANG, fixes the problem. diff --git a/getconf.py b/getconf.py index 1f84674..32641fb 100755 --- a/getconf.py +++ b/getconf.py @@ -15,6 +15,7 @@ def getconf(hostname, force=False, media=None): f.write( api.AdmGenerateNodeConfFile(n[0]['node_id']) ) f.close() print os.system("cd bootcd; ./build.sh -f %s.txt -t iso -o /plc/data/var/www/html/bootcds/%s.iso &> /dev/null" % ( hostname, hostname)) + print "cd bootcd; ./build.sh -f %s.txt -t usb_partition -o /plc/data/var/www/html/bootcds/%s-partition.usb &> /dev/null" % ( hostname, hostname) print os.system("cd bootcd; ./build.sh -f %s.txt -t usb_partition -o /plc/data/var/www/html/bootcds/%s-partition.usb &> /dev/null" % ( hostname, hostname)) else: # assume that the images have already been generated.. diff --git a/monitor-default.conf b/monitor-default.conf index 967a6ae..7414d41 100644 --- a/monitor-default.conf +++ b/monitor-default.conf @@ -42,6 +42,7 @@ zabbix_dburi=postgres://zabbixuser:<...>@localhost:5432/zabbix cachetime=60 [commandline] +embedded=True echo=False debug=0 mail=1 diff --git a/monitor/database/info/findbad.py b/monitor/database/info/findbad.py index 566c2ae..e58ef3a 100644 --- a/monitor/database/info/findbad.py +++ b/monitor/database/info/findbad.py @@ -32,7 +32,7 @@ class FindbadNodeRecord(Entity): fbsync = FindbadNodeRecordSync.get_by(hostname="global") if fbsync: kwargs['round'] = fbsync.round - return cls.query.filter_by(**kwargs) + return cls.query.filter_by(**kwargs).order_by(FindbadNodeRecord.date_checked.desc()) else: return [] @@ -95,7 +95,7 @@ class FindbadPCURecord(Entity): def get_latest_by(cls, **kwargs): fbsync = FindbadPCURecordSync.get_by(plc_pcuid=0) kwargs['round'] = fbsync.round - return cls.query.filter_by(**kwargs) + return cls.query.filter_by(**kwargs).order_by(FindbadPCURecord.date_checked.desc()) # ACCOUNTING date_checked = Field(DateTime) round = Field(Int,default=0) diff --git a/monitor/wrapper/plc.py b/monitor/wrapper/plc.py index 255e3c1..c8ea068 100644 --- a/monitor/wrapper/plc.py +++ b/monitor/wrapper/plc.py @@ -87,6 +87,11 @@ class CachedPLC(PLC): def run_or_returncached(*params): cachename = self._param_to_str(name, *params) #print "cachename is %s" % cachename + if hasattr(config, 'refresh'): + refresh = config.refresh + else: + refresh = False + if 'Get' in name: if not database.cachedRecently(cachename): load_old_cache = False @@ -207,7 +212,10 @@ def getpcu(nodename): anon = {'AuthMethod': "anonymous"} nodeinfo = api.GetNodes(auth.auth, {"hostname": nodename}, ["pcu_ids", "ports"])[0] if nodeinfo['pcu_ids']: + print nodeinfo sitepcu = api.GetPCUs(auth.auth, nodeinfo['pcu_ids'])[0] + print sitepcu + print nodeinfo["ports"] sitepcu[nodename] = nodeinfo["ports"][0] return sitepcu else: diff --git a/monitor/wrapper/plccache.py b/monitor/wrapper/plccache.py index 45f879c..96b5646 100755 --- a/monitor/wrapper/plccache.py +++ b/monitor/wrapper/plccache.py @@ -14,7 +14,8 @@ def dsites_from_lsites(l_sites): id2lb[site['site_id']] = site['login_base'] else: #print "Two sites have the same login_base value %s!" % site['login_base'] - sys.exit(1) + #sys.exit(1) + continue return (d_sites, id2lb) def dsn_from_dsln(d_sites, id2lb, l_nodes): @@ -114,6 +115,7 @@ def create_plcdb(): l_sites = plc.getSites({'peer_id':None}, ['login_base', 'site_id', 'abbreviated_name', 'latitude', 'longitude', 'max_slices', 'slice_ids', 'node_ids' ]) if len(l_sites) == 0: + print "no sites! exiting..." sys.exit(1) (d_sites,id2lb) = dsites_from_lsites(l_sites) diff --git a/pcucontrol/reboot.py b/pcucontrol/reboot.py index bfb7f3c..04fe4da 100755 --- a/pcucontrol/reboot.py +++ b/pcucontrol/reboot.py @@ -26,6 +26,8 @@ sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh") import pcucontrol.transports.pyssh as pyssh from monitor import config +from monitor.database.info.model import FindbadPCURecord + # Timeouts in seconds TELNET_TIMEOUT = 45 @@ -101,9 +103,9 @@ class PCUModel(PCU): # This class captures the observed pcu records from FindBadPCUs.py class PCURecord: def __init__(self, pcu_record_dict): - for field in ['nodenames', 'portstatus', - 'dnsmatch', - 'complete_entry', ]: + for field in ['port_status', + 'dns_status', + 'entry_complete', ]: if field in pcu_record_dict: if field == "reboot": self.__setattr__("reboot_str", pcu_record_dict[field]) @@ -116,7 +118,8 @@ class Transport: TELNET = 1 SSH = 2 HTTP = 3 - IPAL = 4 + HTTPS = 4 + IPAL = 5 TELNET_TIMEOUT = 120 @@ -227,26 +230,27 @@ class PCUControl(Transport,PCUModel,PCURecord): PCUModel.__init__(self, plc_pcu_record) PCURecord.__init__(self, plc_pcu_record) type = None - if self.portstatus: - if '22' in supported_ports and self.portstatus['22'] == "open": + if self.port_status: + if '22' in supported_ports and self.port_status['22'] == "open": type = Transport.SSH - elif '23' in supported_ports and self.portstatus['23'] == "open": + elif '23' in supported_ports and self.port_status['23'] == "open": type = Transport.TELNET - elif '80' in supported_ports and self.portstatus['80'] == "open": - type = Transport.HTTP - elif '443' in supported_ports and self.portstatus['443'] == "open": + # NOTE: prefer https over http + elif '443' in supported_ports and self.port_status['443'] == "open": + type = Transport.HTTPS + elif '80' in supported_ports and self.port_status['80'] == "open": type = Transport.HTTP - elif '5869' in supported_ports and self.portstatus['5869'] == "open": + elif '5869' in supported_ports and self.port_status['5869'] == "open": # For DRAC cards. Racadm opens this port. type = Transport.HTTP - elif '9100' in supported_ports and self.portstatus['9100'] == "open": + elif '9100' in supported_ports and self.port_status['9100'] == "open": type = Transport.IPAL - elif '16992' in supported_ports and self.portstatus['16992'] == "open": + elif '16992' in supported_ports and self.port_status['16992'] == "open": type = Transport.HTTP else: raise ExceptionPort("Unsupported Port: No transport from open ports") else: - raise Exception("No Portstatus: No transport because no open ports") + raise ExceptionPort("No Portstatus: No transport because no open ports") Transport.__init__(self, type, verbose) def run(self, node_port, dryrun): @@ -259,19 +263,25 @@ class PCUControl(Transport,PCUModel,PCURecord): except ExceptionNotFound, err: return "error: " + str(err) except ExceptionPassword, err: - return "password exception: " + str(err) + return "Password exception: " + str(err) except ExceptionTimeout, err: - return "timeout exception: " + str(err) + return "Timeout exception: " + str(err) except ExceptionUsername, err: - return "exception: no username prompt: " + str(err) + return "No username prompt: " + str(err) except ExceptionSequence, err: - return "sequence error: " + str(err) + return "Sequence error: " + str(err) except ExceptionPrompt, err: - return "prompt exception: " + str(err) + return "Prompt exception: " + str(err) + except ExceptionNoTransport, err: + return "No Transport: " + str(err) except ExceptionPort, err: - return "no ports exception: " + str(err) + return "No ports exception: " + str(err) except socket.error, err: return "socket error: timeout: " + str(err) + except urllib2.HTTPError, err: + return "HTTPError: " + str(err) + except urllib2.URLError, err: + return "URLError: " + str(err) except EOFError, err: if self.verbose: logger.debug("reboot: EOF") @@ -337,7 +347,7 @@ class IPAL(PCUControl): elif self.type == Transport.TELNET: return self.run_telnet(node_port, dryrun) else: - raise Exception("Unimplemented Transport for IPAL") + raise ExceptionNoTransport("Unimplemented Transport for IPAL") def run_telnet(self, node_port, dryrun): # TELNET version of protocol... @@ -439,39 +449,30 @@ class IPAL(PCUControl): s.close() return 0 +class APCControl(PCUControl): + supported_ports = [22,23,80,443] + reboot_sequence = [] -class APCEurope(PCUControl): def run(self, node_port, dryrun): - self.open(self.host, self.username) - self.sendPassword(self.password) - - self.ifThenSend("\r\n> ", "1", ExceptionPassword) - self.ifThenSend("\r\n> ", "2") - self.ifThenSend("\r\n> ", str(node_port)) - # 3- Immediate Reboot - self.ifThenSend("\r\n> ", "3") - - if not dryrun: - self.ifThenSend("Enter 'YES' to continue or to cancel", - "YES\r\n", - ExceptionSequence) + print "RUNNING!!!!!!!!!!!!" + if self.type == Transport.HTTPS or self.type == Transport.HTTP: + print "APC via http...." + return self.run_http_or_https(node_port, dryrun) else: - self.ifThenSend("Enter 'YES' to continue or to cancel", - "", ExceptionSequence) - self.ifThenSend("Press to continue...", "", ExceptionSequence) - - self.close() - return 0 + print "APC via telnet/ssh...." + return self.run_telnet_or_ssh(node_port, dryrun) -class APCBrazil(PCUControl): - def run(self, node_port, dryrun): + def run_telnet_or_ssh(self, node_port, dryrun): self.open(self.host, self.username) self.sendPassword(self.password) - self.ifThenSend("\r\n> ", "1", ExceptionPassword) - self.ifThenSend("\r\n> ", str(node_port)) - # 4- Immediate Reboot - self.ifThenSend("\r\n> ", "4") + first = True + for val in self.reboot_sequence: + if first: + self.ifThenSend("\r\n> ", val, ExceptionPassword) + first = False + else: + self.ifThenSend("\r\n> ", val) if not dryrun: self.ifThenSend("Enter 'YES' to continue or to cancel", @@ -485,115 +486,135 @@ class APCBrazil(PCUControl): self.close() return 0 -class APCBerlin(PCUControl): - def run(self, node_port, dryrun): - self.open(self.host, self.username) - self.sendPassword(self.password) - - self.ifThenSend("\r\n> ", "1", ExceptionPassword) - self.ifThenSend("\r\n> ", "2") - self.ifThenSend("\r\n> ", "1") - self.ifThenSend("\r\n> ", str(node_port)) - # 3- Immediate Reboot - self.ifThenSend("\r\n> ", "3") - + def run_http_or_https(self, node_port, dryrun): if not dryrun: - self.ifThenSend("Enter 'YES' to continue or to cancel", - "YES\r\n", - ExceptionSequence) - else: - self.ifThenSend("Enter 'YES' to continue or to cancel", - "", ExceptionSequence) - self.ifThenSend("Press to continue...", "", ExceptionSequence) + # send reboot signal. + # TODO: send a ManualPCU() reboot request for this PCU. + # NOTE: this model defies automation because, the port numbering + # and the form numbers are not consistent across models. There is + # not direct mapping from port# to form#. + return "Manual Reboot Required" - self.close() - return 0 + else: + # TODO: also send message for https, since that doesn't work this way... + if self.type == Transport.HTTPS: + cmd = self.get_https_cmd() + elif self.type == Transport.HTTP: + cmd = self.get_http_cmd() + else: + raise ExceptionNoTransport("Unsupported transport for http command") -class APCFolsom(PCUControl): - def run(self, node_port, dryrun): - self.open(self.host, self.username) - self.sendPassword(self.password) + cmd = cmd % ( self.username, self.password, self.host) + print "CMD: %s" % cmd - self.ifThenSend("\r\n> ", "1", ExceptionPassword) - self.ifThenSend("\r\n> ", "2") - self.ifThenSend("\r\n> ", "1") - self.ifThenSend("\r\n> ", str(node_port)) - self.ifThenSend("\r\n> ", "1") + p = os.popen(cmd) + result = p.read() + if len(result.split('\n')) > 2: + self.logout() + return 0 + else: + # NOTE: an error has occurred, so no need to log out. + print "RESULT: ", result + return result - # 3- Immediate Reboot - self.ifThenSend("\r\n> ", "3") + def get_https_cmd(self): + version = self.get_version() + print "VERSION: %s" % version + if "AP96" in version: + cmd = "curl -s --insecure --user '%s:%s' https://%s/outlets.htm " + \ + " | grep -E '^[^<]+' " + \ + " | grep -v 'Protected Object' " + else: + # NOTE: no other case known right now... + cmd = "curl -s --insecure --user '%s:%s' https://%s/outlets.htm " + \ + " | grep -E '^[^<]+' " + \ + " | grep -v 'Protected Object' " + + return cmd + + def get_http_cmd(self): + version = self.get_version() + print "VERSION: %s" % version + if "AP7900" in version: + cmd = "curl -s --anyauth --user '%s:%s' http://%s/rPDUout.htm | grep -E '^[^<]+'" + elif "AP7920" in version: + cmd = "curl -s --anyauth --user '%s:%s' http://%s/ms3out.htm | grep -E '^[^<]+' " + else: + # default case... + print "USING DEFAULT" + cmd = "curl -s --anyauth --user '%s:%s' http://%s/ms3out.htm | grep -E '^[^<]+' " + + return cmd + + def get_version(self): + # NOTE: this command returns and formats all data. + #cmd = """curl -s --anyauth --user '%s:%s' http://%s/about.htm """ + + # """ | sed -e "s/<[^>]*>//g" -e "s/ //g" -e "/^$/d" """ + + # """ | awk '{line=$0 ; if ( ! /:/ && length(pline) > 0 ) \ + # { print pline, line } else { pline=line} }' """ + + # """ | grep Model """ + + # NOTE: we may need to return software version, no model version to + # know which file to request on the server. + + if self.type == Transport.HTTP: + cmd = """curl -s --anyauth --user '%s:%s' http://%s/about.htm """ + \ + """ | sed -e "s/<[^>]*>//g" -e "s/ //g" -e "/^$/d" """ + \ + """ | grep -E "AP[[:digit:]]+" """ + #""" | grep -E "v[[:digit:]].*" """ + elif self.type == Transport.HTTPS: + cmd = """curl -s --insecure --user '%s:%s' https://%s/about.htm """ + \ + """ | sed -e "s/<[^>]*>//g" -e "s/ //g" -e "/^$/d" """ + \ + """ | grep -E "AP[[:digit:]]+" """ + #""" | grep -E "v[[:digit:]].*" """ + else: + raise ExceptionNoTransport("Unsupported transport to get version") - if not dryrun: - self.ifThenSend("Enter 'YES' to continue or to cancel", - "YES\r\n", - ExceptionSequence) + cmd = cmd % ( self.username, self.password, self.host) + p = os.popen(cmd) + result = p.read() + return result.strip() + + def logout(self): + # NOTE: log out again, to allow other uses to access the machine. + if self.type == Transport.HTTP: + cmd = """curl -s --anyauth --user '%s:%s' http://%s/logout.htm """ + \ + """ | grep -E '^[^<]+' """ + elif self.type == Transport.HTTPS: + cmd = """curl -s --insecure --user '%s:%s' http://%s/logout.htm """ + \ + """ | grep -E '^[^<]+' """ else: - self.ifThenSend("Enter 'YES' to continue or to cancel", - "", ExceptionSequence) - self.ifThenSend("Press to continue...", "", ExceptionSequence) + raise ExceptionNoTransport("Unsupported transport to logout") - self.close() - return 0 + cmd = cmd % ( self.username, self.password, self.host) + p = os.popen(cmd) + print p.read() -class APCMaster(PCUControl): - supported_ports = [22,23] +class APCControl12p3(APCControl): def run(self, node_port, dryrun): - print "Rebooting %s" % self.host - self.open(self.host, self.username) - self.sendPassword(self.password) + self.reboot_sequence = ["1", "2", str(node_port), "3"] + return super(APCControl12p3, self).run(node_port, dryrun) - # 1- Device Manager - self.ifThenSend("\r\n> ", "1", ExceptionPassword) - # 3- Outlet Control/Config - self.ifThenSend("\r\n> ", "3") - # n- Outlet n - self.ifThenSend("\r\n> ", str(node_port)) - # 1- Control Outlet - self.ifThenSend("\r\n> ", "1") - # 3- Immediate Reboot - self.ifThenSend("\r\n> ", "3") - - if not dryrun: - self.ifThenSend("Enter 'YES' to continue or to cancel", - "YES\r\n", - ExceptionSequence) - else: - self.ifThenSend("Enter 'YES' to continue or to cancel", - "", ExceptionSequence) - self.ifThenSend("Press to continue...", "", ExceptionSequence) - - self.close() - return 0 +class APCControl1p4(APCControl): + def run(self, node_port, dryrun): + self.reboot_sequence = ["1", str(node_port), "4"] + return super(APCControl1p4, self).run(node_port, dryrun) -class APC(PCUControl): - def __init__(self, plc_pcu_record, verbose): - PCUControl.__init__(self, plc_pcu_record, verbose) +class APCControl121p3(APCControl): + def run(self, node_port, dryrun): + self.reboot_sequence = ["1", "2", "1", str(node_port), "3"] + return super(APCControl121p3, self).run(node_port, dryrun) - self.master = APCMaster(plc_pcu_record, verbose) - self.folsom = APCFolsom(plc_pcu_record, verbose) - self.europe = APCEurope(plc_pcu_record, verbose) +class APCControl121p1(APCControl): + def run(self, node_port, dryrun): + self.reboot_sequence = ["1", "2", "1", str(node_port), "1", "3"] + return super(APCControl121p1, self).run(node_port, dryrun) +class APCControl13p13(APCControl): def run(self, node_port, dryrun): - try_again = True - sleep_time = 1 + self.reboot_sequence = ["1", "3", str(node_port), "1", "3"] + return super(APCControl13p13, self).run(node_port, dryrun) - for pcu in [self.master, self.europe, self.folsom]: - if try_again: - try: - print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*" - try_again = False - print "sleeping 5" - time.sleep(sleep_time) - ret = pcu.reboot(node_port, dryrun) - except ExceptionSequence, err: - del pcu - sleep_time = 130 - try_again = True - - if try_again: - return "Unknown reboot sequence for APC PCU" - else: - return ret class IntelAMT(PCUControl): supported_ports = [16992] @@ -614,7 +635,7 @@ class IntelAMT(PCUControl): print cmd_str return cmd.system(cmd_str, self.TELNET_TIMEOUT) -class DRACRacAdm(PCUControl): +class DRAC(PCUControl): def run(self, node_port, dryrun): print "trying racadm_reboot..." @@ -622,7 +643,7 @@ class DRACRacAdm(PCUControl): return 0 -class DRAC(PCUControl): +class DRACDefault(PCUControl): supported_ports = [22,443,5869] def run(self, node_port, dryrun): self.open(self.host, self.username) @@ -645,6 +666,15 @@ class DRAC(PCUControl): class HPiLO(PCUControl): supported_ports = [22,443] def run(self, node_port, dryrun): + if self.type == Transport.SSH: + return self.run_ssh(node_port, dryrun) + elif self.type == Transport.HTTP or self.type == Transport.HTTPS: + return self.run_https(node_port, dryrun) + else: + raise ExceptionNoTransport("Unimplemented Transport for HPiLO %s" % self.type) + + def run_ssh(self, node_port, dryrun): + self.open(self.host, self.username) self.sendPassword(self.password) @@ -662,11 +692,8 @@ class HPiLO(PCUControl): self.close() return 0 - -class HPiLOHttps(PCUControl): - supported_ports = [22,443] - def run(self, node_port, dryrun): + def run_https(self, node_port, dryrun): locfg = command.CMD() @@ -694,7 +721,7 @@ class HPiLOHttps(PCUControl): return 0 -class BayTechAU(PCUControl): +class BayTechRPC3NC(PCUControl): def run(self, node_port, dryrun): self.open(self.host, self.username, None, "Enter user name:") self.sendPassword(self.password, "Enter Password:") @@ -712,7 +739,7 @@ class BayTechAU(PCUControl): self.close() return 0 -class BayTechGeorgeTown(PCUControl): +class BayTechRPC16(PCUControl): def run(self, node_port, dryrun): self.open(self.host, self.username, None, "Enter user name:") self.sendPassword(self.password, "Enter Password:") @@ -765,6 +792,7 @@ class BayTechCtrlCUnibe(PCUControl): if index == 0: print "Reboot %d" % node_port + time.sleep(5) s.send("Reboot %d\r\n" % node_port) time.sleep(5) @@ -916,7 +944,7 @@ class WTIIPS4(PCUControl): self.close() return 0 -class ePowerSwitchGood(PCUControl): +class ePowerSwitchNew(PCUControl): # NOTE: # The old code used Python's HTTPPasswordMgrWithDefaultRealm() # For some reason this both doesn't work and in some cases, actually @@ -1016,7 +1044,7 @@ class ePowerSwitchOld(PCUControl): self.close() return 0 -class ePowerSwitch(PCUControl): +class ePowerSwitchOld(PCUControl): supported_ports = [80] def run(self, node_port, dryrun): self.url = "http://%s:%d/" % (self.host,80) @@ -1062,6 +1090,9 @@ class ManualPCU(PCUControl): pass return 0 +class PM211MIP(ManualPCU): + supported_ports = [80,443] + ### rebooting european BlackBox PSE boxes # Thierry Parmentelat - May 11 2005 # tested on 4-ports models known as PSE505-FR @@ -1270,18 +1301,15 @@ def pcu_name(pcu): else: return None -#import database -from monitor import database -fb = None def get_pcu_values(pcu_id): - global fb - if fb == None: - # this shouldn't be loaded each time... - fb = database.dbLoad("findbadpcus") - + print "pcuid: %s" % pcu_id try: - values = fb['nodes']["id_%s" % pcu_id]['values'] + pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=pcu_id).first() + if pcurec: + values = pcurec.to_dict() + else: + values = None except: values = None @@ -1289,26 +1317,49 @@ def get_pcu_values(pcu_id): def reboot(nodename): return reboot_policy(nodename, True, False) + +def reboot_str(nodename): + global verbose + continue_probe = True + dryrun=False + + pcu = plc.getpcu(nodename) + if not pcu: + logger.debug("no pcu for %s" % nodename) + print "no pcu for %s" % nodename + return False # "%s has no pcu" % nodename + + values = get_pcu_values(pcu['pcu_id']) + if values == None: + logger.debug("No values for pcu probe %s" % nodename) + print "No values for pcu probe %s" % nodename + return False #"no info for pcu_id %s" % pcu['pcu_id'] + + # Try the PCU first + logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model'])) + + ret = reboot_test_new(nodename, values, verbose, dryrun) + return ret def reboot_policy(nodename, continue_probe, dryrun): global verbose pcu = plc.getpcu(nodename) if not pcu: - logger.debug("no pcu for %s" % hostname) - print "no pcu for %s" % hostname + logger.debug("no pcu for %s" % nodename) + print "no pcu for %s" % nodename return False # "%s has no pcu" % nodename values = get_pcu_values(pcu['pcu_id']) if values == None: - logger.debug("No values for pcu probe %s" % hostname) - print "No values for pcu probe %s" % hostname + logger.debug("No values for pcu probe %s" % nodename) + print "No values for pcu probe %s" % nodename return False #"no info for pcu_id %s" % pcu['pcu_id'] # Try the PCU first logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model'])) - ret = reboot_test(nodename, values, continue_probe, verbose, dryrun) + ret = reboot_test_new(nodename, values, verbose, dryrun) if ret != 0: print ret @@ -1323,27 +1374,51 @@ class Unknown(PCUControl): def model_to_object(modelname): if "AMT" in modelname: return IntelAMT - elif "DS4-RPC" in modelname: + elif "BayTech" in modelname: return BayTech - elif "ilo2" in modelname: + elif "HPiLO" in modelname: return HPiLO - elif "IP-41x" in modelname: + elif "IPAL" in modelname: return IPAL - elif "AP79xx" in modelname or "Masterswitch" in modelname: - return APCMaster + elif "APC" in modelname: + return APCControl elif "DRAC" in modelname: return DRAC elif "WTI" in modelname: return WTIIPS4 elif "ePowerSwitch" in modelname: - return ePowerSwitch - elif "ipmi" in modelname: + return ePowerSwitchNew + elif "IPMI" in modelname: return IPMI - elif "bbsemaverick" in modelname: + elif "BlackBoxPSMaverick" in modelname: return BlackBoxPSMaverick + elif "PM211MIP" in modelname: + return PM211MIP + elif "ManualPCU" in modelname: + return ManualPCU else: + print "UNKNOWN model %s"%modelname return Unknown +def reboot_test_new(nodename, values, verbose, dryrun): + rb_ret = "" + if 'plc_pcu_stats' in values: + values.update(values['plc_pcu_stats']) + + try: + modelname = values['model'] + if modelname: + object = eval('%s(values, verbose, ["22", "23", "80", "443", "9100", "16992", "5869"])' % modelname) + rb_ret = object.reboot(values[nodename], dryrun) + else: + rb_ret = "Not_Run" + # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults + except ExceptionPort, err: + rb_ret = str(err) + + return rb_ret + + def reboot_test(nodename, values, continue_probe, verbose, dryrun): rb_ret = "" if 'plc_pcu_stats' in values: @@ -1362,23 +1437,23 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): # TODO: make a more robust version of APC if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]: - apc = APCEurope(values, verbose, ['22', '23']) + apc = APCControl12p3(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) elif values['pcu_id'] in [1110,86]: - apc = APCBrazil(values, verbose, ['22', '23']) + apc = APCControl1p4(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) elif values['pcu_id'] in [1221,1225,1220,1192]: - apc = APCBerlin(values, verbose, ['22', '23']) + apc = APCControl121p3(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) elif values['pcu_id'] in [1173,1240,47,1363,1405,1401,1372,1371]: - apc = APCFolsom(values, verbose, ['22', '23']) + apc = APCControl121p1(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) else: - apc = APCMaster(values, verbose, ['22', '23']) + apc = APCControl13p13(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) # BayTech DS4-RPC @@ -1389,7 +1464,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): rb_ret = baytech.reboot(values[nodename], dryrun) elif values['pcu_id'] in [93]: - baytech = BayTechAU(values, verbose, ['22', '23']) + baytech = BayTechRPC3NC(values, verbose, ['22', '23']) rb_ret = baytech.reboot(values[nodename], dryrun) elif values['pcu_id'] in [1057]: @@ -1401,10 +1476,10 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): # This pcu sometimes doesn't present the 'Username' prompt, # unless you immediately try again... try: - baytech = BayTechGeorgeTown(values, verbose, ['22', '23']) + baytech = BayTechRPC16(values, verbose, ['22', '23']) rb_ret = baytech.reboot(values[nodename], dryrun) except: - baytech = BayTechGeorgeTown(values, verbose, ['22', '23']) + baytech = BayTechRPC16(values, verbose, ['22', '23']) rb_ret = baytech.reboot(values[nodename], dryrun) else: baytech = BayTech(values, verbose, ['22', '23']) @@ -1424,13 +1499,13 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): # DRAC ssh elif continue_probe and values['model'].find("DRAC") >= 0: - # TODO: I don't think DRACRacAdm will throw an exception for the + # TODO: I don't think DRAC will throw an exception for the # default method to catch... try: - drac = DRACRacAdm(values, verbose, ['443', '5869']) + drac = DRAC(values, verbose, ['443', '5869']) rb_ret = drac.reboot(0, dryrun) except: - drac = DRAC(values, verbose, ['22']) + drac = DRACDefault(values, verbose, ['22']) rb_ret = drac.reboot(0, dryrun) elif continue_probe and values['model'].find("WTI IPS-4") >= 0: @@ -1455,13 +1530,13 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): elif continue_probe and values['model'].find("ePowerSwitch") >=0: # TODO: allow a different port than http 80. if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]: - eps = ePowerSwitchGood(values, verbose, ['80']) + eps = ePowerSwitchNew(values, verbose, ['80']) elif values['pcu_id'] in [1003]: # OLD EPOWER print "OLD EPOWER" - eps = ePowerSwitch(values, verbose, ['80']) + eps = ePowerSwitchOld(values, verbose, ['80']) else: - eps = ePowerSwitchGood(values, verbose, ['80']) + eps = ePowerSwitchNew(values, verbose, ['80']) rb_ret = eps.reboot(values[nodename], dryrun) elif continue_probe and values['pcu_id'] in [1122]: @@ -1472,7 +1547,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): rb_ret = "Unsupported_PCU" elif continue_probe == False: - if 'portstatus' in values: + if 'port_status' in values: rb_ret = "NetDown" else: rb_ret = "Not_Run" @@ -1519,6 +1594,5 @@ def main(): print err if __name__ == '__main__': - import plc logger = logging.getLogger("monitor") main() diff --git a/setup.py b/setup.py index 407f5bf..19532fa 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup packages=['monitor', 'monitor.database', 'monitor.database.zabbixapi', - 'monitor.database.info', 'monitor.pcu', 'monitor.sources', + 'monitor.database.info', 'monitor.sources', 'monitor.util', 'monitor.wrapper' ] print packages diff --git a/web/MonitorWeb/monitorweb/controllers.py b/web/MonitorWeb/monitorweb/controllers.py index 2f7b3a6..e5d0da2 100644 --- a/web/MonitorWeb/monitorweb/controllers.py +++ b/web/MonitorWeb/monitorweb/controllers.py @@ -1,5 +1,7 @@ import turbogears as tg -from turbogears import controllers, expose, flash +from turbogears import controllers, expose, flash, exception_handler +from cherrypy import request, response +import cherrypy # from monitorweb import model # import logging # log = logging.getLogger("monitorweb.controllers") @@ -14,6 +16,10 @@ from monitor.wrapper.plccache import plcdb_id2lb as site_id2lb from monitor.wrapper.plccache import plcdb_hn2lb as site_hn2lb from monitor.wrapper.plccache import plcdb_lb2hn as site_lb2hn +from monitorweb.templates.links import * + +import findbad + def format_ports(pcu): retval = [] if pcu.port_status and len(pcu.port_status.keys()) > 0 : @@ -132,17 +138,83 @@ class Root(controllers.RootController): query.append(node) return dict(now=time.ctime(), query=query, fc=filtercount) + + def nodeaction_handler(self, tg_exceptions=None): + """Handle any kind of error.""" + refurl = request.headers.get("Referer",link("pcu")) + print refurl + # TODO: do this more intelligently... + if len(urllib.splitquery(refurl)) > 1: + pcuid = urllib.splitvalue(urllib.splitquery(refurl)[1])[1] + else: + pcuid=None + + cherry_trail = cherrypy._cputil.get_object_trail() + for i in cherry_trail: + print "trail: ", i + print pcuid + return self.pcuview(pcuid, **dict(exceptions=tg_exceptions)) + #return dict(pcuquery=[], nodequery=[], exceptions=tg_exceptions) + + def nodeaction(self, **data): + for item in data.keys(): + print "%s %s" % ( item, data[item] ) + + if 'hostname' in data: + hostname = data['hostname'] + else: + flash("No hostname given in submitted data") + return + + if 'submit' in data: + action = data['submit'] + else: + flash("No submit action given in submitted data") + return + + if action == "Reboot": + print "REBOOT: %s" % hostname + ret = reboot.reboot_str(str(hostname)) + print ret + if ret: raise RuntimeError("Error using PCU: " + ret) + + elif action == "ExternalProbe": + raise RuntimeError("THIS IS A PROBLEM") + + elif action == "DeepProbe": + findbad.probe(str(hostname)) + else: + # unknown action + flash("Unknown action given") + return + + # TODO: add form validation @expose(template="monitorweb.templates.pcuview") - def pcuview(self, pcuid=None): + @exception_handler(nodeaction_handler,"isinstance(tg_exceptions,RuntimeError)") + def pcuview(self, pcuid=None, **data): pcuquery=[] + nodequery=[] + if 'submit' in data.keys(): + self.nodeaction(**data) + if 'exceptions' in data: + exceptions = data['exceptions'] + else: + exceptions = None + if pcuid: for pcu in FindbadPCURecord.get_latest_by(plc_pcuid=pcuid): # NOTE: count filter prep_pcu_for_display(pcu) pcuquery += [pcu] - - return dict(pcuquery=pcuquery) + for nodename in pcu.plc_pcu_stats['nodenames']: + print "query for %s" % nodename + node = FindbadNodeRecord.get_latest_by(hostname=nodename).first() + print "%s" % node + if node: + prep_node_for_display(node) + nodequery += [node] + return dict(pcuquery=pcuquery, nodequery=nodequery, exceptions=exceptions) @expose(template="monitorweb.templates.pculist") def pcu(self, filter='all'): diff --git a/web/MonitorWeb/monitorweb/static/css/style.css b/web/MonitorWeb/monitorweb/static/css/style.css index 64370c6..7bb4078 100644 --- a/web/MonitorWeb/monitorweb/static/css/style.css +++ b/web/MonitorWeb/monitorweb/static/css/style.css @@ -26,15 +26,56 @@ tr.even td {background-color:#fff;} font-size: 180%; } + pre.results { + margin: 0 auto 0 auto; +} + a.link, a, a.active { color: #369; + display: inline; + page-break-after: avoid; + page-break-before: avoid; } + /*@media screen {*/ a.ext-link .icon { background: URL(../images/extlink.gif) left center no-repeat; padding-left: 16px; } + +a.info{ + position:relative; /*this is the key*/ + z-index:24; + color:#000; + } + +a.info:hover{z-index:25;} + +a.info span{display: none} + +a.info:hover span{ /*the span will display just on :hover state*/ + display:block; + position:absolute; + top:2em; left:2em; width:15em; + border:1px solid #AAA; + color:#DDD; + background-color:black; + text-align: center} + +div#links a:hover span {display: block; + /*position: absolute; top: 200px; left: 0; width: 125px;*/ + /*position: relative; top: 0px; left: 40; width: 30em;*/ + float: left; width: 30em; + padding: 5px; margin: 5px; z-index: 100; + color: #AAA; background: black; + font: 10px Verdana, sans-serif; text-align: center;} + + +div.oneline { clear : both; } +a.left { float: left; } +a.right { float: right; } + /** html a.ext-link .icon { display: inline-block; }*/ /*}*/ @@ -148,11 +189,15 @@ h2 { .code { font-family: monospace; + width: 100%; + background: #eee; } span.code { - font-weight: bold; - background: #eee; + font-size: 120%; + /*font-weight: bold;*/ + margin: 20 20 20 20; + padding: 20 20 20 20; } #status_block { @@ -172,6 +217,13 @@ span.code { background: #eef URL('../images/info.png') left center no-repeat; border: 1px solid #cce; } +.error { + margin: 0.5em auto 0.5em auto; + padding: 15px 10px 15px 55px; + width: 450px; + background: #e30 URL('../images/widgets.png') left center no-repeat; + border: 1px solid #c00; +} .fielderror { color: red; diff --git a/web/MonitorWeb/monitorweb/templates/links.py b/web/MonitorWeb/monitorweb/templates/links.py index 0c9203d..6b47bb1 100644 --- a/web/MonitorWeb/monitorweb/templates/links.py +++ b/web/MonitorWeb/monitorweb/templates/links.py @@ -1,4 +1,5 @@ from monitor import config +import turbogears as tg import urllib def plc_node_uri(hostname): @@ -37,8 +38,8 @@ def query_to_path(**kwargs): tgpath += '?' + query_string return tgpath -def link(base, ext=True, **kwargs): - if ext: +def link(base, **kwargs): + if config.embedded: str = "?query=" + base + query_to_path(**kwargs) else: str = tg.url(base, **kwargs) diff --git a/web/MonitorWeb/monitorweb/templates/nodelist.kid b/web/MonitorWeb/monitorweb/templates/nodelist.kid index cb62ec1..669f02f 100644 --- a/web/MonitorWeb/monitorweb/templates/nodelist.kid +++ b/web/MonitorWeb/monitorweb/templates/nodelist.kid @@ -3,6 +3,7 @@ layout_params['page_title'] = "Monitor Node List" from monitor.util import diff_time from time import mktime +from links import * ?> - Production(${fc['BOOT']}) - Debug(${fc['DEBUG']}) - Down(${fc['DOWN']}) - Never Booted(${fc['neverboot']}) - Pending Reply(${fc['pending']}) - All + Production(${fc['BOOT']}) + Debug(${fc['DEBUG']}) + Down(${fc['DOWN']}) + Never Booted(${fc['neverboot']}) + Pending Reply(${fc['pending']}) + All @@ -40,8 +41,8 @@ from time import mktime - ${node.loginbase} - + ${node.loginbase} + diff --git a/web/MonitorWeb/monitorweb/templates/nodeview.kid b/web/MonitorWeb/monitorweb/templates/nodeview.kid index f1acbae..354761c 100644 --- a/web/MonitorWeb/monitorweb/templates/nodeview.kid +++ b/web/MonitorWeb/monitorweb/templates/nodeview.kid @@ -26,16 +26,16 @@ from links import * - + ${node.loginbase} - + ${node.hostname} - ${node.pcu_short_status} + ${node.pcu_short_status} ${node.pcu_short_status} diff --git a/web/MonitorWeb/monitorweb/templates/pculist.kid b/web/MonitorWeb/monitorweb/templates/pculist.kid index 510218e..99ad41a 100644 --- a/web/MonitorWeb/monitorweb/templates/pculist.kid +++ b/web/MonitorWeb/monitorweb/templates/pculist.kid @@ -13,11 +13,11 @@ from links import * - - - - - + + + + + @@ -31,8 +31,8 @@ from links import * - - + + @@ -40,16 +40,30 @@ from links import * - - + + - + diff --git a/web/MonitorWeb/monitorweb/templates/pcuview.kid b/web/MonitorWeb/monitorweb/templates/pcuview.kid index 013714e..4eed424 100644 --- a/web/MonitorWeb/monitorweb/templates/pcuview.kid +++ b/web/MonitorWeb/monitorweb/templates/pcuview.kid @@ -3,6 +3,7 @@ layout_params['page_title'] = "Monitor PCU View" from monitor.util import diff_time from monitor import config +from time import mktime from pcucontrol.reboot import pcu_name, model_to_object from links import * ?> @@ -27,34 +28,115 @@ from links import * - + - - - + + - - - + + +
Ok(${fc['ok']})Misconfigured(${fc['Not_Run']})Offline(${fc['NetDown']})Runtime Error(${fc['pending']})AllOk(${fc['ok']})Misconfigured(${fc['Not_Run']})Offline(${fc['NetDown']})Runtime Error(${fc['pending']})All
PCU Name Missing Fields DNS StatusPort StatusTest ResultsPort StatusTest Results Model Nodes
${node.loginbase} - ${pcu_name(node.plc_pcu_stats)}${node.loginbase} + + 80 + +
- ${node.loginbase} + + ${pcu.loginbase} - - ${pcu_name(node.plc_pcu_stats)} + + ${pcu_name(pcu.plc_pcu_stats)} - 80
+

Convenience Calls

+ +
+ + + ssh -o PasswordAuthentication=yes -o PubkeyAuthentication=no + ${pcu.plc_pcu_stats['username']}@${pcu_name(pcu.plc_pcu_stats)} + + + telnet ${pcu_name(pcu.plc_pcu_stats)} + + + http://${pcu_name(pcu.plc_pcu_stats)} + + + https://${pcu_name(pcu.plc_pcu_stats)} +
+ /usr/share/monitor/racadm.py -r ${pcu.plc_pcu_stats['ip']} + -u ${pcu.plc_pcu_stats['username']} -p '${pcu.plc_pcu_stats['password']}' +
+ /usr/share/monitor/pcucontrol/models/hpilo/locfg.pl + -f /usr/share/monitor/pcucontrol/models/hpilo/iloxml/Reset_Server.xml + -s ${pcu_name(pcu.plc_pcu_stats)} + -u ${pcu.plc_pcu_stats['username']} + -p '${pcu.plc_pcu_stats['password']} ' | grep MESSAGE" +
+ + /usr/share/monitor/pcucontrol/models/intelamt/remoteControl -A + -verbose 'http://${pcu_name(pcu.plc_pcu_stats)}:16992/RemoteControlService' + -user admin -pass '${pcu.plc_pcu_stats['password']}' + +
+

Controls

- ... node list ... -
- form for reboot - + + + + + + + + + + + + + + + + + + + + + + + +
Hostnamelast_contactLast_checkedExternal ProbeInternal ProbeReboot
+ + ${node.hostname} + + +
+ + +
+
+ +
+ + +
+
+ +
+ + +
+
+
+ ${exceptions} +
+

Legend

diff --git a/web/MonitorWeb/monitorweb/templates/sitelist.kid b/web/MonitorWeb/monitorweb/templates/sitelist.kid index 299daa8..50b296e 100644 --- a/web/MonitorWeb/monitorweb/templates/sitelist.kid +++ b/web/MonitorWeb/monitorweb/templates/sitelist.kid @@ -1,6 +1,7 @@ - - - - - + + + + + @@ -33,7 +34,7 @@ layout_params['page_title'] = "Monitor Site List" - + diff --git a/web/MonitorWeb/monitorweb/templates/sitemenu.kid b/web/MonitorWeb/monitorweb/templates/sitemenu.kid index 73af304..4383b84 100644 --- a/web/MonitorWeb/monitorweb/templates/sitemenu.kid +++ b/web/MonitorWeb/monitorweb/templates/sitemenu.kid @@ -6,6 +6,10 @@ + + + + @@ -16,10 +20,11 @@
Compliant(${fc['good']})Down(${fc['down']})New Sites(${fc['new']})Disabled(${fc['pending']})All(${fc['all']})Compliant(${fc['good']})Down(${fc['down']})New Sites(${fc['new']})Disabled(${fc['pending']})All(${fc['all']})
${site.loginbase}${site.loginbase} ${site.slices_used}/${site.slices_total} ${site.nodes_up} / ${site.nodes_total}
- - - - + + + + + diff --git a/web/MonitorWeb/monitorweb/templates/siteview.kid b/web/MonitorWeb/monitorweb/templates/siteview.kid index 7b56393..039a2b7 100644 --- a/web/MonitorWeb/monitorweb/templates/siteview.kid +++ b/web/MonitorWeb/monitorweb/templates/siteview.kid @@ -22,7 +22,7 @@ from links import * - @@ -48,10 +48,11 @@ from links import * - + + ${node.pcu_short_status} diff --git a/zabbix/ext_boot_state b/zabbix/ext_boot_state index d72e124..657f208 100755 --- a/zabbix/ext_boot_state +++ b/zabbix/ext_boot_state @@ -35,8 +35,6 @@ def get_state_from_port(host): 'debug' : 3, 'boot' : 4, } - #print "--%s--" % line - #print "--%s--" % type(line) if line.strip() == "" or line.strip() == "Timeout": return mapping["down"] (p22, p80, p806) = line.split() @@ -51,4 +49,5 @@ def get_state_from_port(host): else: return mapping["unknown"] -print get_state_from_port(sys.argv[1]) +if __name__ == "__main__": + print get_state_from_port(sys.argv[1]) -- 2.43.0
SitesPCUsNodesActionsSitesPCUsNodesActions
+ ${site.loginbase}
your.host.org + your.host.org - ${node.pcu_short_status} ${node.pcu_short_status}