From cc412ced3e9ab20ca10c0fdce46bf4878f336977 Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Fri, 25 Jun 2010 21:17:43 +0000 Subject: [PATCH] add real checks for RebootNodeWithPCU. Report errors returned by API add notes_url to pcu service --- nagios/plc_hosts_to_nagios.py | 1 + nagios/plugins/checkpcu.py | 98 ++++++++++++++++++----------------- 2 files changed, 52 insertions(+), 47 deletions(-) diff --git a/nagios/plc_hosts_to_nagios.py b/nagios/plc_hosts_to_nagios.py index f45fa76..3b36ecd 100755 --- a/nagios/plc_hosts_to_nagios.py +++ b/nagios/plc_hosts_to_nagios.py @@ -299,6 +299,7 @@ for site in l_sites: s3 = Service(use="planetlab-service", host_name=hn_list, service_description="cPCU", + notes_url="http://www.planet-lab.org/db/sites/index.php?id=%s" % site['site_id'], display_name="cPCU", servicegroups="NET,PCU", notifications_enabled="1", diff --git a/nagios/plugins/checkpcu.py b/nagios/plugins/checkpcu.py index a7156a0..d276ab4 100755 --- a/nagios/plugins/checkpcu.py +++ b/nagios/plugins/checkpcu.py @@ -7,55 +7,59 @@ import os from monitor.wrapper import plc def argv_to_dict(argv): - """ - NOTE: very bare-bones, no error checking, will fail easily. - """ - d = {} - prev=None - for a in argv: - if "--" == a[0:2]: - prev = a[2:] - elif "-" == a[0:1]: - prev = a[1:] - else: - d[prev] = a - return d + """ + NOTE: very bare-bones, no error checking, will fail easily. + """ + d = {} + prev=None + for a in argv: + if "--" == a[0:2]: + prev = a[2:] + elif "-" == a[0:1]: + prev = a[1:] + else: + d[prev] = a + return d def main(): - d = argv_to_dict(sys.argv[1:]) - - api = plc.api - if 'hostname' in d or 'H' in d: - try: - hostname = d['host'] - except: - hostname = d['H'] - else: - print "UNKNOWN: argument error" - sys.exit(3) - - try: - n = api.GetNodes(hostname)[0] - except: - print "UNKNOWN: API failure" - sys.exit(3) - - t1 = 0 - t2 = time.time() - - if False: - print "FAKE-OK: PCU test successful" - sys.exit(0) - elif False: - print "FAKE-WARNING: PCU configuration incomplete" - sys.exit(1) - else: - print "FAKE-CRITICAL: PCU test failed" - sys.exit(2) + d = argv_to_dict(sys.argv[1:]) + + api = plc.api + if 'hostname' in d or 'H' in d: + try: + hostname = d['host'] + except: + hostname = d['H'] + else: + print "UNKNOWN: Argument error" + sys.exit(3) + + try: + n = api.RebootNodeWithPCU(hostname, True) + except Exception, e: + if "No PCUs associated with Node" in str(e): + print "CRITICAL: Failure: %s" % str(e) + sys.exit(2) + else: + print "UNKNOWN: Failure: %s" % str(e) + sys.exit(3) + + t1 = 0 + t2 = time.time() + + if n == 0: + print "OK: PCU test successful" + sys.exit(0) + elif n != 0: + print "WARNING: PCU configuration incomplete: %s" % n + sys.exit(1) + else: + print "FAKE-CRITICAL: PCU test failed" + sys.exit(2) if __name__ == '__main__': - f = open("/tmp/checkpcu", 'a') - f.write("checkpcu %s %s\n" % (time.time(), " ".join(sys.argv[1:]))) - f.close() - main() + f = open("/tmp/checkpcu", 'a') + f.write("checkpcu %s %s\n" % (time.time(), " ".join(sys.argv[1:]))) + f.close() + main() -- 2.43.0