From 32e64e33bc81735e22024c5a44510848bb3c88df Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Tue, 28 Jul 2009 22:22:07 +0000 Subject: [PATCH] added firewall checks and notices added extra views for a simple and detailed view for sites added firewall_notice to emailTxt added extra fields for firewall, external_dns_status, as well as several other fields reserved for future use, such as traceroute, uptime and rpms. updated policy to act on firewalled nodes add dependency on 'nc' netcat for a more reliable nmap, port probing utility. reorganized scanapi.py, hopefully can generalize this further in the future. --- Monitor.spec | 1 + monitor/common.py | 9 +- monitor/database/info/findbad.py | 5 + monitor/database/info/history.py | 1 + monitor/scanapi.py | 389 +++++++++++------- monitor/wrapper/emailTxt.py | 23 ++ nodebad.py | 2 + policy.py | 17 +- upgrade/monitor-server-3.0-19.sql | 3 + web/MonitorWeb/monitorweb/controllers.py | 77 ++++ .../monitorweb/templates/detailview.kid | 293 +++++++++++++ .../monitorweb/templates/pcuview.kid | 7 +- .../monitorweb/templates/simpleview.kid | 182 ++++++++ .../monitorweb/templates/sitemenu.kid | 4 +- 14 files changed, 844 insertions(+), 169 deletions(-) create mode 100644 web/MonitorWeb/monitorweb/templates/detailview.kid create mode 100644 web/MonitorWeb/monitorweb/templates/simpleview.kid diff --git a/Monitor.spec b/Monitor.spec index 85ba2eb..230e6de 100644 --- a/Monitor.spec +++ b/Monitor.spec @@ -62,6 +62,7 @@ Requires: perl-libwww-perl Requires: perl-IO-Socket-SSL Requires: MySQL-python Requires: nmap +Requires: nc Requires: rt3 Requires: plewww-plekit diff --git a/monitor/common.py b/monitor/common.py index da174d8..6fca571 100644 --- a/monitor/common.py +++ b/monitor/common.py @@ -215,14 +215,19 @@ def get_nodeset(config): return l_nodes -def email_exception(content=None): +def email_exception(content=None, title=None): import config from monitor.model import Message import traceback msg=traceback.format_exc() if content: msg = content + "\n" + msg - m=Message("exception running monitor", msg, False) + + full_title = "exception running monitor" + if title: + full_title = "exception running monitor %s" % title + + m=Message(full_title, msg, False) m.send([config.cc_email]) return diff --git a/monitor/database/info/findbad.py b/monitor/database/info/findbad.py index dbe0eca..0cfb965 100644 --- a/monitor/database/info/findbad.py +++ b/monitor/database/info/findbad.py @@ -40,6 +40,9 @@ class FindbadNodeRecord(Entity): nm_status = Field(String,default=None) fs_status = Field(String,default=None) dns_status = Field(String,default=None) + external_dns_status = Field(Boolean,default=True) + uptime = Field(String,default=None) + rpms = Field(String,default=None) princeton_comon_dir = Field(Boolean,default=False) princeton_comon_running = Field(Boolean,default=False) princeton_comon_procs = Field(Int,default=None) @@ -50,9 +53,11 @@ class FindbadNodeRecord(Entity): plc_pcuid = Field(Int,default=None) comon_stats = Field(PickleType,default=None) port_status = Field(PickleType,default=None) + firewall = Field(Boolean,default=False) ssh_portused = Field(Int,default=22) ssh_status = Field(Boolean,default=False) ssh_error = Field(String,default=None) # set if ssh_access == False + traceroute = Field(String,default=None) ping_status = Field(Boolean,default=False) # INFERRED diff --git a/monitor/database/info/history.py b/monitor/database/info/history.py index ec6f5ac..7190248 100644 --- a/monitor/database/info/history.py +++ b/monitor/database/info/history.py @@ -16,6 +16,7 @@ class HistoryNodeRecord(Entity): last_changed = Field(DateTime,default=datetime.now) status = Field(String,default="unknown") haspcu = Field(Boolean,default=False) + firewall = Field(Boolean,default=False) plc_nodeid = Field(Int,default=1) acts_as_versioned(ignore=['last_changed', 'last_checked']) diff --git a/monitor/scanapi.py b/monitor/scanapi.py index af7fcd4..22e3e74 100644 --- a/monitor/scanapi.py +++ b/monitor/scanapi.py @@ -167,138 +167,107 @@ class ScanNodeInternal(ScanInterface): syncclass = None primarykey = 'hostname' + def collectPorts(self, nodename, port_list=[22,80,806]): + values = {} + for port in port_list: + ret = os.system("nc -w 5 -z %s %s > /dev/null" % (nodename, port) ) + if ret == 0: + values[str(port)] = "open" + else: + values[str(port)] = "closed" + return {'port_status' : values } + def collectNMAP(self, nodename, cohash): #### RUN NMAP ############################### + # NOTE: run the same command three times and take the best of three + # runs. NMAP can drop packets, and especially so when it runs many + # commands at once. values = {} nmap = command.CMD() print "nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename - (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename) + (oval1,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename) + (oval2,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename) + (oval3,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename) # NOTE: an empty / error value for oval, will still work. - (values['port_status'], continue_probe) = nmap_port_status(oval) + values['port_status'] = {} + (o1,continue_probe) = nmap_port_status(oval1) + (o2,continue_probe) = nmap_port_status(oval2) + (o3,continue_probe) = nmap_port_status(oval3) + for p in ['22', '80', '806']: + l = [ o1[p], o2[p], o3[p] ] + if len(filter(lambda x: x == 'open', l)) > 1: + values['port_status'][p] = 'open' + else: + values['port_status'][p] = o1[p] - values['date_checked'] = datetime.now() - + print values['port_status'] return (nodename, values) - def collectInternal(self, nodename, cohash): - ### RUN PING ###################### + def collectPING(self, nodename, cohash): + values = {} ping = command.CMD() (oval,errval) = ping.run_noexcept("ping -c 1 -q %s | grep rtt" % nodename) - try: - values = {} + values = {} + if oval == "": + # An error occurred + values['ping_status'] = False + else: + values['ping_status'] = True - if oval == "": - # An error occurred - values['ping_status'] = False - else: - values['ping_status'] = True + return values - try: - for port in [22, 806]: - ssh = command.SSH('root', nodename, port) - - (oval, errval) = ssh.run_noexcept2(""" <<\EOF - echo "{" - echo ' "kernel_version":"'`uname -a`'",' - echo ' "bmlog":"'`ls /tmp/bm.log`'",' - echo ' "bootcd_version":"'`cat /mnt/cdrom/bootme/ID`'",' - echo ' "nm_status":"'`ps ax | grep nm.py | grep -v grep`'",' - echo ' "dns_status":"'`host boot.planet-lab.org 2>&1`'",' - echo ' "princeton_comon_dir":"'`ls -d /vservers/princeton_comon`'",' - echo ' "uptime":"'`uptime`'",' - - ID=`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'` - echo ' "princeton_comon_running":"'`ls -d /proc/virtual/$ID`'",' - echo ' "princeton_comon_procs":"'`vps ax | grep $ID | grep -v grep | wc -l`'",' - echo ' "fs_status":"'`grep proc /proc/mounts | grep ro, ; if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 20 touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then timeout.pl 20 touch /vservers/monitor.log 2>&1 ; fi ; fi`'",' - echo ' "rpm_version":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 30 rpm -q NodeManager ; fi`'",' - echo ' "rpm_versions":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 45 rpm -q -a ; fi`'",' - echo "}" -EOF """) - - values['ssh_error'] = errval - if len(oval) > 0: - #print "OVAL: %s" % oval - values.update(eval(oval)) - values['ssh_portused'] = port - break - else: - values.update({'kernel_version': "", 'bmlog' : "", 'bootcd_version' : '', - 'nm_status' : '', - 'fs_status' : '', - 'uptime' : '', - 'dns_status' : '', - 'rpm_version' : '', - 'rpm_versions' : '', - 'princeton_comon_dir' : "", - 'princeton_comon_running' : "", - 'princeton_comon_procs' : "", 'ssh_portused' : None}) - except: - print traceback.print_exc() - sys.exit(1) + def collectTRACEROUTE(self, nodename, cohash): + values = {} + trace = command.CMD() + (oval,errval) = trace.run_noexcept("traceroute %s" % nodename) - values['fs_status'] = "" - print "ALLVERSIONS: %s %s" % (nodename, values['rpm_versions']) + values['traceroute'] = oval - print "RPMVERSION: %s %s" % (nodename, values['rpm_version']) - print "UPTIME: %s %s" % (nodename, values['uptime']) - ### RUN SSH ###################### - b_getbootcd_id = True - - oval = values['kernel_version'] - if "2.6.17" in oval or "2.6.2" in oval: - values['ssh_status'] = True - values['observed_category'] = 'PROD' - if "bm.log" in values['bmlog']: - values['observed_status'] = 'DEBUG' - else: - values['observed_status'] = 'BOOT' - elif "2.6.12" in oval or "2.6.10" in oval: - values['ssh_status'] = True - values['observed_category'] = 'OLDPROD' - if "bm.log" in values['bmlog']: - values['observed_status'] = 'DEBUG' - else: - values['observed_status'] = 'BOOT' - - # NOTE: on 2.6.8 kernels, with 4.2 bootstrapfs, the chroot - # command fails. I have no idea why. - elif "2.4" in oval or "2.6.8" in oval: - b_getbootcd_id = False - values['ssh_status'] = True - values['observed_category'] = 'OLDBOOTCD' - values['observed_status'] = 'DEBUG' - elif oval != "": - values['ssh_status'] = True - values['observed_category'] = 'UNKNOWN' - if "bm.log" in values['bmlog']: - values['observed_status'] = 'DEBUG' - else: - values['observed_status'] = 'BOOT' - else: - # An error occurred. - b_getbootcd_id = False - values['ssh_status'] = False - values['observed_category'] = 'ERROR' - values['observed_status'] = 'DOWN' - val = errval.strip() - values['ssh_error'] = val - values['kernel_version'] = "" - - if b_getbootcd_id: - # try to get BootCD for all nodes that are not 2.4 nor inaccessible - oval = values['bootcd_version'] - if "BootCD" in oval: - values['bootcd_version'] = oval - if "v2" in oval and \ - ( nodename is not "planetlab1.cs.unc.edu" and \ - nodename is not "planetlab2.cs.unc.edu" ): - values['observed_category'] = 'OLDBOOTCD' + return values + + def collectSSH(self, nodename, cohash): + values = {} + try: + for port in [22, 806]: + ssh = command.SSH('root', nodename, port) + + (oval, errval) = ssh.run_noexcept2(""" <<\EOF + echo "{" + echo ' "kernel_version":"'`uname -a`'",' + echo ' "bmlog":"'`ls /tmp/bm.log`'",' + echo ' "bootcd_version":"'`cat /mnt/cdrom/bootme/ID`'",' + echo ' "nm_status":"'`ps ax | grep nm.py | grep -v grep`'",' + echo ' "dns_status":"'`host boot.planet-lab.org 2>&1`'",' + echo ' "princeton_comon_dir":"'`ls -d /vservers/princeton_comon`'",' + echo ' "uptime":"'`cat /proc/uptime`'",' + + ID=`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'` + echo ' "princeton_comon_running":"'`ls -d /proc/virtual/$ID`'",' + echo ' "princeton_comon_procs":"'`vps ax | grep $ID | grep -v grep | wc -l`'",' + echo ' "fs_status":"'`grep proc /proc/mounts | grep ro, ; if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 20 touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then timeout.pl 20 touch /vservers/monitor.log 2>&1 ; fi ; fi`'",' + echo ' "rpm_version":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 30 rpm -q NodeManager ; fi`'",' + echo ' "rpm_versions":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 45 rpm -q -a ; fi`'",' + echo "}" +EOF """) + + values['ssh_error'] = errval + if len(oval) > 0: + #print "OVAL: %s" % oval + values.update(eval(oval)) + values['ssh_portused'] = port + break else: - values['bootcd_version'] = "" - else: - values['bootcd_version'] = "" + values.update({'kernel_version': "", 'bmlog' : "", 'bootcd_version' : '', + 'nm_status' : '', + 'fs_status' : '', + 'uptime' : '', + 'dns_status' : '', + 'rpm_version' : '', + 'rpm_versions' : '', + 'princeton_comon_dir' : "", + 'princeton_comon_running' : "", + 'princeton_comon_procs' : "", 'ssh_portused' : None}) oval = values['nm_status'] if "nm.py" in oval: @@ -329,8 +298,144 @@ EOF """) values['princeton_comon_procs'] = int(oval) else: values['princeton_comon_procs'] = None + except: + print traceback.print_exc() + sys.exit(1) + + return values + + def collectPLC(self, nodename, cohash): + values = {} + ### GET PLC NODE ###################### + d_node = plccache.GetNodeByName(nodename) + values['plc_node_stats'] = d_node + + ### GET PLC PCU ###################### + site_id = -1 + d_pcu = None + if d_node and len(d_node['pcu_ids']) > 0: + d_pcu = d_node['pcu_ids'][0] + + site_id = d_node['site_id'] + + values['plc_pcuid'] = d_pcu + + ### GET PLC SITE ###################### + print "SITEID: %s" % site_id + d_site = plccache.GetSitesById([ site_id ])[0] + values['loginbase'] = d_site['login_base'] + values['plc_site_stats'] = d_site + + return values + + def evaluate(self, nodename, values): + # TODO: this section can probably be reduced to a policy statement + # using patterns and values collected so far. + # NOTE: A node is "DOWN" if + # * cannot ssh into it. + # * all ports are not open for a 'BOOT' node + # * dns for hostname does not exist. + b_getbootcd_id = True + + oval = values['kernel_version'] + values['ssh_status'] = True + if "2.6.17" in oval or "2.6.2" in oval: + values['observed_category'] = 'PROD' + if "bm.log" in values['bmlog']: + values['observed_status'] = 'DEBUG' + else: + values['observed_status'] = 'BOOT' + elif "2.6.12" in oval or "2.6.10" in oval: + values['observed_category'] = 'OLDPROD' + if "bm.log" in values['bmlog']: + values['observed_status'] = 'DEBUG' + else: + values['observed_status'] = 'BOOT' + + # NOTE: on 2.6.8 kernels, with 4.2 bootstrapfs, the chroot + # command fails. I have no idea why. + elif "2.4" in oval or "2.6.8" in oval: + b_getbootcd_id = False + values['observed_category'] = 'OLDBOOTCD' + values['observed_status'] = 'DEBUG' + elif oval != "": + values['observed_category'] = 'UNKNOWN' + if "bm.log" in values['bmlog']: + values['observed_status'] = 'DEBUG' + else: + values['observed_status'] = 'BOOT' + else: + # An error occurred. + b_getbootcd_id = False + values['ssh_status'] = False + values['observed_category'] = 'ERROR' + values['observed_status'] = 'DOWN' + values['kernel_version'] = "" + + values['firewall'] = False + + # NOTE: A node is down if some of the public ports are not open + if values['observed_status'] == "BOOT": + # verify that all ports are open. Else, report node as down. + if not ( values['port_status']['22'] == "open" and \ + values['port_status']['80'] == "open" and \ + values['port_status']['806'] == "open") : + #email_exception(nodename, "%s FILTERED HOST" % nodename) + values['observed_status'] = 'DOWN' + values['firewall'] = True + + #if not values['external_dns_status']: + # email_exception("%s DNS down" % nodename) + + if b_getbootcd_id: + # try to get BootCD for all nodes that are not 2.4 nor inaccessible + oval = values['bootcd_version'] + if "BootCD" in oval: + values['bootcd_version'] = oval + if "v2" in oval and \ + ( nodename is not "planetlab1.cs.unc.edu" and \ + nodename is not "planetlab2.cs.unc.edu" ): + values['observed_category'] = 'OLDBOOTCD' + else: + values['bootcd_version'] = "" + else: + values['bootcd_version'] = "" + + return values + + def collectDNS(self, nodename, cohash): + values = {} + try: + ipaddr = socket.gethostbyname(nodename) + # TODO: check that IP returned matches IP in plc db. + values['external_dns_status'] = True + except Exception, err: + values['external_dns_status'] = False + + return values + + def collectInternal(self, nodename, cohash): + try: + values = {} + + v = self.collectPING(nodename, cohash) + values.update(v) + + v = self.collectPorts(nodename) + values.update(v) + + v = self.collectSSH(nodename, cohash) + values.update(v) + + v = self.collectDNS(nodename, cohash) + values.update(v) + + v = self.collectTRACEROUTE(nodename, cohash) + values.update(v) + + v = self.collectPLC(nodename, cohash) + values.update(v) - if nodename in cohash: values['comon_stats'] = cohash[nodename] else: @@ -341,51 +446,23 @@ EOF """) 'cpuspeed' : "null", 'disksize' : 'null', 'memsize' : 'null'} - # include output value - ### GET PLC NODE ###################### - d_node = plccache.GetNodeByName(nodename) - values['plc_node_stats'] = d_node - - ##### NMAP ################### - (n, v) = self.collectNMAP(nodename, None) - values.update(v) - - ### GET PLC PCU ###################### - site_id = -1 - d_pcu = None - if d_node: - pcu = d_node['pcu_ids'] - if len(pcu) > 0: - d_pcu = pcu[0] - - site_id = d_node['site_id'] - values['plc_pcuid'] = d_pcu - - ### GET PLC SITE ###################### - plc_lock.acquire() - d_site = None - values['loginbase'] = "" - try: - d_site = plccache.GetSitesById([ site_id ])[0] - #d_site = plc.getSites({'site_id': site_id}, - # ['max_slices', 'slice_ids', 'node_ids', 'login_base'])[0] - values['loginbase'] = d_site['login_base'] - except: - traceback.print_exc() - plc_lock.release() + values['rpms'] = values['rpm_versions'] + print "ALLVERSIONS: %s %s" % (nodename, values['rpm_versions']) + print "RPMVERSION: %s %s" % (nodename, values['rpm_version']) + print "UPTIME: %s %s" % (nodename, values['uptime']) - values['plc_site_stats'] = d_site + values = self.evaluate(nodename, values) values['date_checked'] = datetime.now() + except: print traceback.print_exc() return (nodename, values) + def internalprobe(hostname): - #fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", - # if_new_set={'round' : 1}) - scannode = ScanNodeInternal() # fbsync.round) + scannode = ScanNodeInternal() try: (nodename, values) = scannode.collectInternal(hostname, {}) scannode.record(None, (nodename, values)) @@ -396,12 +473,10 @@ def internalprobe(hostname): return False def externalprobe(hostname): - #fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", - # if_new_set={'round' : 1}) - scannode = ScanNodeInternal() # fbsync.round) + scannode = ScanNodeInternal() try: - (nodename, values) = scannode.collectNMAP(hostname, {}) - scannode.record(None, (nodename, values)) + values = self.collectPorts(hostname) + scannode.record(None, (hostname, values)) session.flush() return True except: diff --git a/monitor/wrapper/emailTxt.py b/monitor/wrapper/emailTxt.py index bb03942..3afbe7b 100644 --- a/monitor/wrapper/emailTxt.py +++ b/monitor/wrapper/emailTxt.py @@ -118,6 +118,29 @@ This notice is simply to let you know that: appears stuck in a debug mode. To try to correct this, we're trying to rerun BootManager.py. If any action is needed from you, you will recieve additional notices. Thank you! + """) + firewall_notice=("""Host %(hostname)s blocked by a firewall""", + """ +This notice is simply to let you know that: + %(hostname)s + +has some ports that appear to be blocked, making the node unusable. While +some ports are open, to be a fully functional node, all ports need to be +accessible at all times. Please see the following for the list of +requirements for hosting a node: + + http://www.planet-lab.org/hosting + +The node will be considered 'DOWN' until the ports are unblocked. + +Please investigate, and let us know if there's anything we can do to help get +it back on-line. You can see more information about the current status of +this host here: + + http://%(monitor_hostname)s/monitor/pcuview?loginbase=%(loginbase)s + +Thank you very much for your help, + -- %(plc_name)s (%(support_email)s) """) down_notice=("""Host %(hostname)s is down""", """ diff --git a/nodebad.py b/nodebad.py index bd3ed0d..9ba6a32 100755 --- a/nodebad.py +++ b/nodebad.py @@ -54,6 +54,8 @@ def check_node_state(rec, node): else: node.haspcu = False + node.firewall = rec.firewall + # NOTE: 'DOWN' and 'DEBUG' are temporary states, so only need # 'translations' into the node.status state diff --git a/policy.py b/policy.py index 84bdb44..fe54863 100755 --- a/policy.py +++ b/policy.py @@ -119,12 +119,17 @@ def main(hostnames, sitenames): # sitehist.sendMessage('retry_bootman', hostname=host) if nodehist.status == 'down' and \ - changed_greaterthan(nodehist.last_changed, 2) and \ - not found_within(recent_actions, 'down_notice', 3.5): - # send down node notice - - sitehist.sendMessage('down_notice', hostname=host) - print "send message for host %s down" % host + changed_greaterthan(nodehist.last_changed, 2): + if not nodehist.firewall and not found_within(recent_actions, 'down_notice', 3.5): + # send down node notice + sitehist.sendMessage('down_notice', hostname=host) + print "send message for host %s down" % host + + if nodehist.firewall and not found_within(recent_actions, 'firewall_notice', 3.5): + # send down node notice + email_exception(host, "firewall_notice") + sitehist.sendMessage('firewall_notice', hostname=host) + print "send message for host %s down" % host node_count = node_count + 1 print "time: ", time.strftime('%Y-%m-%d %H:%M:%S') diff --git a/upgrade/monitor-server-3.0-19.sql b/upgrade/monitor-server-3.0-19.sql index afcebf8..77e304a 100644 --- a/upgrade/monitor-server-3.0-19.sql +++ b/upgrade/monitor-server-3.0-19.sql @@ -16,3 +16,6 @@ ALTER TABLE findbadnoderecord_history ADD COLUMN uptime varchar DEFAULT NULL; ALTER TABLE findbadnoderecord ADD COLUMN traceroute varchar DEFAULT NULL; ALTER TABLE findbadnoderecord_history ADD COLUMN traceroute varchar DEFAULT NULL; +ALTER TABLE historynoderecord ADD COLUMN firewall boolean DEFAULT false; +ALTER TABLE historynoderecord_history ADD COLUMN firewall boolean DEFAULT false; + diff --git a/web/MonitorWeb/monitorweb/controllers.py b/web/MonitorWeb/monitorweb/controllers.py index 0c80fee..1ce95cc 100644 --- a/web/MonitorWeb/monitorweb/controllers.py +++ b/web/MonitorWeb/monitorweb/controllers.py @@ -87,6 +87,8 @@ def prep_pcu_for_display(pcu): except: agg.loginbase = "unknown" + agg.pcuhist = HistoryPCURecord.query.get(pcu.plc_pcuid) + agg.ports = format_ports(pcu.port_status, pcu.plc_pcu_stats['model']) agg.status = format_pcu_shortstatus(pcu) @@ -351,6 +353,81 @@ class Root(controllers.RootController, MonitorXmlrpcServer): raise RuntimeError("Unknown action given") return + @expose(template="monitorweb.templates.simpleview") + def simpleview(self, **data): + return self.pre_view(**data) + + @expose(template="monitorweb.templates.detailview") + def detailview(self, **data): + return self.pre_view(**data) + + def pre_view(self, **data): + session.flush(); session.clear() + + loginbase=None + hostname=None + pcuid=None + since=20 + + exceptions = None + sitequery=[] + nodequery=[] + pcuquery=[] + actions=[] + + for key in data: + print key, data[key] + + if 'query' in data: + obj = data['query'] + if len(obj.split(".")) > 1: hostname = obj + else: loginbase=obj + + if 'loginbase' in data: + loginbase = data['loginbase'] + + if 'hostname' in data: + hostname = data['hostname'] + + if 'pcuid' in data: + try: pcuid = int(data['pcuid']) + except: pcuid = None + + if 'since' in data: + try: since = int(since) + except: since = 20 + + if pcuid: + print "pcuid: %s" % pcuid + pcu = FindbadPCURecord.get_latest_by(plc_pcuid=pcuid) + loginbase = PlcSite.query.get(pcu.plc_pcu_stats['site_id']).plc_site_stats['login_base'] + + if hostname: + node = FindbadNodeRecord.get_latest_by(hostname=hostname) + loginbase = PlcSite.query.get(node.plc_node_stats['site_id']).plc_site_stats['login_base'] + + if loginbase: + actions = ActionRecord.query.filter_by(loginbase=loginbase + ).filter(ActionRecord.date_created >= datetime.now() - timedelta(since) + ).order_by(ActionRecord.date_created.desc()) + actions = [ a for a in actions ] + sitequery = [HistorySiteRecord.by_loginbase(loginbase)] + # NOTE: because a single pcu may be assigned to multiple hosts, + # track unique pcus by their plc_pcuid, then turn dict into list + pcus = {} + for node in FindbadNodeRecord.query.filter_by(loginbase=loginbase): + # NOTE: reformat some fields. + agg = prep_node_for_display(node) + nodequery += [agg] + if agg.pcu: + pcus[agg.pcu.pcu.plc_pcuid] = agg.pcu + + for pcuid_key in pcus: + pcuquery += [pcus[pcuid_key]] + + return dict(sitequery=sitequery, pcuquery=pcuquery, nodequery=nodequery, actions=actions, since=since, exceptions=exceptions) + + # TODO: add form validation @expose(template="monitorweb.templates.pcuview") @exception_handler(nodeaction_handler,"isinstance(tg_exceptions,RuntimeError)") diff --git a/web/MonitorWeb/monitorweb/templates/detailview.kid b/web/MonitorWeb/monitorweb/templates/detailview.kid new file mode 100644 index 0000000..5881d98 --- /dev/null +++ b/web/MonitorWeb/monitorweb/templates/detailview.kid @@ -0,0 +1,293 @@ + + + + +
+

Site Status

+ + + + + + + + + + + + + + + + + + + + + + + +
HistoryStatus SinceSite NameEnabledPenaltySlices/MaxNodes/Total
history + ${site.loginbase} + ${site.penalty_level}${site.slices_used}/${site.slices_total}${site.nodes_up} / ${site.nodes_total}
+

PCU Status

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
HistoryStatus SincePCU NameMissing FieldsDNS StatusPort StatusTest ResultsModelNodes
history + + ${pcu_name(agg.pcu.plc_pcu_stats)} + + + 80 +
+
+ There are no PCUs associated with this host. +
+ +

Nodes

+

+ There are no registered nodes for this site. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
History (scan)Status SinceHostnameDNSSSHlast_contact (cached)Last CheckedPort StatusFirewall
status + (scan) + + ${agg.node.hostname} + + 80 + + + + + + + + + +
+
+ ${exceptions} +
+
+ +

Actions Over the Last ${since} Days

+

+ There are no recent actions taken for this site. +

+ + + + + + + + + + + + + + + + + + + + + + + + +
DateAction taken onAction TypeMessage IDErrors
+ + ${act.hostname} + + + ${act.loginbase} + + ${act.message_id} + latest bm log + +
+ + + + +
+ + diff --git a/web/MonitorWeb/monitorweb/templates/pcuview.kid b/web/MonitorWeb/monitorweb/templates/pcuview.kid index 9390bb4..bd6fa58 100644 --- a/web/MonitorWeb/monitorweb/templates/pcuview.kid +++ b/web/MonitorWeb/monitorweb/templates/pcuview.kid @@ -76,7 +76,7 @@ from links import *
- There no PCUs associated with this host. + There are no PCUs associated with this host.
Legend: @@ -145,10 +145,11 @@ from links import * History (scan) Hostname + DNS last_contact (cached) last_checked Port Status - + Filter @@ -162,12 +163,14 @@ from links import * ${agg.node.hostname} + 80 + + + + ${act.message_id} + + latest bm log + + +

+				
+			
+		
+
+	
+	
+
+  
+ + diff --git a/web/MonitorWeb/monitorweb/templates/sitemenu.kid b/web/MonitorWeb/monitorweb/templates/sitemenu.kid index 0d0f5c6..d62be8d 100644 --- a/web/MonitorWeb/monitorweb/templates/sitemenu.kid +++ b/web/MonitorWeb/monitorweb/templates/sitemenu.kid @@ -25,7 +25,7 @@ - +
@@ -40,7 +40,7 @@
- +
-- 2.43.0