From 035a846d8617889c01cae12bc6d64eb7c48b64bd Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Mon, 29 Jun 2009 00:10:21 +0000 Subject: [PATCH] bug fix in summary template new access method for fbnode removed fs_status check b/c it caused false negatives for down nodes that weren't due to the check hanging. added a new reboot check. --- monitor/bootman.py | 1 + monitor/database/info/findbad.py | 4 ++++ monitor/scanapi.py | 3 ++- policy.py | 2 ++ web/MonitorWeb/monitorweb/templates/summary.kid | 2 +- 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/monitor/bootman.py b/monitor/bootman.py index 8dda341..fdfadb2 100755 --- a/monitor/bootman.py +++ b/monitor/bootman.py @@ -488,6 +488,7 @@ class DebugInterface: for n in ["bminit-cfg-exception-nocfg-update-bootupdatefail-nonode-debug-done", "bminit-cfg-exception-update-bootupdatefail-nonode-debug-done", "bminit-cfg-exception-update-bootupdatefail-nonode-debug-validate-exception-done", + "bminit-cfg-exception-nocfg-update-bootupdatefail-nonode-debug-validate-exception-done", "bminit-cfg-auth-bootcheckfail-nonode-exception-update-bootupdatefail-nonode-debug-done", ]: sequences.update({n : "update_node_config_email"}) diff --git a/monitor/database/info/findbad.py b/monitor/database/info/findbad.py index 5e38aca..dbe0eca 100644 --- a/monitor/database/info/findbad.py +++ b/monitor/database/info/findbad.py @@ -20,6 +20,10 @@ class FindbadNodeRecord(Entity): def get_latest_by(cls, **kwargs): return cls.query.filter_by(**kwargs).first() + @classmethod + def get_latest_by(cls, **kwargs): + return cls.query.filter_by(**kwargs).first() + @classmethod def get_latest_n_by(cls, n=3, **kwargs): return cls.query.filter_by(**kwargs) diff --git a/monitor/scanapi.py b/monitor/scanapi.py index f5c4f5f..5928c5a 100644 --- a/monitor/scanapi.py +++ b/monitor/scanapi.py @@ -197,6 +197,7 @@ class ScanNodeInternal(ScanInterface): try: for port in [22, 806]: ssh = command.SSH('root', nodename, port) + #echo ' "fs_status":"'`touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then touch /vservers/monitor.log 2>&1 ; fi ; grep proc /proc/mounts | grep ro,`'",' (oval, errval) = ssh.run_noexcept2(""" <<\EOF echo "{" @@ -204,7 +205,6 @@ class ScanNodeInternal(ScanInterface): echo ' "bmlog":"'`ls /tmp/bm.log`'",' echo ' "bootcd_version":"'`cat /mnt/cdrom/bootme/ID`'",' echo ' "nm_status":"'`ps ax | grep nm.py | grep -v grep`'",' - echo ' "fs_status":"'`touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then touch /vservers/monitor.log 2>&1 ; fi ; grep proc /proc/mounts | grep ro,`'",' echo ' "dns_status":"'`host boot.planet-lab.org 2>&1`'",' echo ' "princeton_comon_dir":"'`ls -d /vservers/princeton_comon`'",' @@ -236,6 +236,7 @@ EOF """) print traceback.print_exc() sys.exit(1) + values['fs_status'] = "" print "ALLVERSIONS: %s %s" % (nodename, values['rpm_versions']) print "RPMVERSION: %s %s" % (nodename, values['rpm_version']) diff --git a/policy.py b/policy.py index aafa7d5..f9605ae 100755 --- a/policy.py +++ b/policy.py @@ -134,9 +134,11 @@ def main(hostnames, sitenames): for i,site in enumerate(sitenames): sitehist = SiteInterface.get_or_make(loginbase=site) siteblack = BlacklistRecord.get_by(loginbase=site) + skip_due_to_blacklist=False if siteblack and not siteblack.expired(): print "skipping %s due to blacklist. will expire %s" % (site, siteblack.willExpire() ) + skip_due_to_blacklist=True continue # TODO: make query only return records within a certin time range, diff --git a/web/MonitorWeb/monitorweb/templates/summary.kid b/web/MonitorWeb/monitorweb/templates/summary.kid index e3f7d5f..1aadd7a 100644 --- a/web/MonitorWeb/monitorweb/templates/summary.kid +++ b/web/MonitorWeb/monitorweb/templates/summary.kid @@ -28,7 +28,7 @@ from links import * -
+
-- 2.43.0