bug fix in summary template
authorStephen Soltesz <soltesz@cs.princeton.edu>
Mon, 29 Jun 2009 00:10:21 +0000 (00:10 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Mon, 29 Jun 2009 00:10:21 +0000 (00:10 +0000)
new access method for fbnode
removed fs_status check b/c it caused false negatives for down nodes that
weren't due to the check hanging.
added a new reboot check.

monitor/bootman.py
monitor/database/info/findbad.py
monitor/scanapi.py
policy.py
web/MonitorWeb/monitorweb/templates/summary.kid

index 8dda341..fdfadb2 100755 (executable)
@@ -488,6 +488,7 @@ class DebugInterface:
                for n in ["bminit-cfg-exception-nocfg-update-bootupdatefail-nonode-debug-done",
                                  "bminit-cfg-exception-update-bootupdatefail-nonode-debug-done",
                                  "bminit-cfg-exception-update-bootupdatefail-nonode-debug-validate-exception-done",
+                                 "bminit-cfg-exception-nocfg-update-bootupdatefail-nonode-debug-validate-exception-done",
                                  "bminit-cfg-auth-bootcheckfail-nonode-exception-update-bootupdatefail-nonode-debug-done",
                                ]:
                        sequences.update({n : "update_node_config_email"})
index 5e38aca..dbe0eca 100644 (file)
@@ -20,6 +20,10 @@ class FindbadNodeRecord(Entity):
        def get_latest_by(cls, **kwargs):
                return cls.query.filter_by(**kwargs).first()
 
+       @classmethod
+       def get_latest_by(cls, **kwargs):
+               return cls.query.filter_by(**kwargs).first()
+
        @classmethod
        def get_latest_n_by(cls, n=3, **kwargs):
                return cls.query.filter_by(**kwargs)
index f5c4f5f..5928c5a 100644 (file)
@@ -197,6 +197,7 @@ class ScanNodeInternal(ScanInterface):
                        try:
                                for port in [22, 806]: 
                                        ssh = command.SSH('root', nodename, port)
+                                       #echo '  "fs_status":"'`touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then touch /vservers/monitor.log 2>&1 ; fi ; grep proc /proc/mounts | grep ro,`'",'
 
                                        (oval, errval) = ssh.run_noexcept2(""" <<\EOF
                                                echo "{"
@@ -204,7 +205,6 @@ class ScanNodeInternal(ScanInterface):
                                                echo '  "bmlog":"'`ls /tmp/bm.log`'",'
                                                echo '  "bootcd_version":"'`cat /mnt/cdrom/bootme/ID`'",'
                                                echo '  "nm_status":"'`ps ax | grep nm.py | grep -v grep`'",'
-                                               echo '  "fs_status":"'`touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then touch /vservers/monitor.log 2>&1 ; fi ; grep proc /proc/mounts | grep ro,`'",'
                                                echo '  "dns_status":"'`host boot.planet-lab.org 2>&1`'",'
                                                echo '  "princeton_comon_dir":"'`ls -d /vservers/princeton_comon`'",'
 
@@ -236,6 +236,7 @@ EOF                         """)
                                print traceback.print_exc()
                                sys.exit(1)
 
+                       values['fs_status'] = ""
                        print "ALLVERSIONS: %s %s" % (nodename, values['rpm_versions'])
 
                        print "RPMVERSION: %s %s" % (nodename, values['rpm_version'])
index aafa7d5..f9605ae 100755 (executable)
--- a/policy.py
+++ b/policy.py
@@ -134,9 +134,11 @@ def main(hostnames, sitenames):
        for i,site in enumerate(sitenames):
                sitehist = SiteInterface.get_or_make(loginbase=site)
                siteblack = BlacklistRecord.get_by(loginbase=site)
+               skip_due_to_blacklist=False
 
                if siteblack and not siteblack.expired():
                        print "skipping %s due to blacklist.  will expire %s" % (site, siteblack.willExpire() )
+                       skip_due_to_blacklist=True
                        continue
 
                # TODO: make query only return records within a certin time range,
index e3f7d5f..1aadd7a 100644 (file)
@@ -28,7 +28,7 @@ from links import *
                                <span py:for="key in setorder + [s for s in set(sumdata[primarykey].keys())-set(setorder)]">
                                        <td bgcolor="lightgrey" valign="top" align="center">
                                                <a target="_blank" href="${link(plc_myops_uri() + '/monitor/node2', filter=key)}" py:if="primarykey == 'nodes'" py:content="sumdata[primarykey][key]"></a>
-                                               <div py:if="primarykey != 'nodes'" py:content="sumdata[primarykey][key]"></div>
+                                               <div py:if="primarykey != 'nodes' and key in sumdata[primarykey]" py:content="sumdata[primarykey][key]"></div>
                                                </td>
                                </span>
                        </tr>