From: Stephen Soltesz <soltesz@cs.princeton.edu>
Date: Fri, 25 Jun 2010 21:17:43 +0000 (+0000)
Subject: add real checks for RebootNodeWithPCU.  Report errors returned by API
X-Git-Tag: monitor-3.1-1~19
X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=cc412ced3e9ab20ca10c0fdce46bf4878f336977;p=monitor.git

add real checks for RebootNodeWithPCU.  Report errors returned by API
add notes_url to pcu service
---

diff --git a/nagios/plc_hosts_to_nagios.py b/nagios/plc_hosts_to_nagios.py
index f45fa76..3b36ecd 100755
--- a/nagios/plc_hosts_to_nagios.py
+++ b/nagios/plc_hosts_to_nagios.py
@@ -299,6 +299,7 @@ for site in l_sites:
 		s3 = Service(use="planetlab-service",
 					host_name=hn_list,
 					service_description="cPCU",
+					notes_url="http://www.planet-lab.org/db/sites/index.php?id=%s" % site['site_id'],
 					display_name="cPCU",
 					servicegroups="NET,PCU",
 					notifications_enabled="1",
diff --git a/nagios/plugins/checkpcu.py b/nagios/plugins/checkpcu.py
index a7156a0..d276ab4 100755
--- a/nagios/plugins/checkpcu.py
+++ b/nagios/plugins/checkpcu.py
@@ -7,55 +7,59 @@ import os
 from monitor.wrapper import plc
 
 def argv_to_dict(argv):
-	"""
-		NOTE: very bare-bones, no error checking, will fail easily.
-	"""
-	d = {}
-	prev=None
-	for a in argv:
-		if "--" == a[0:2]:
-			prev = a[2:]
-		elif "-" == a[0:1]:
-			prev = a[1:]
-		else:
-			d[prev] = a
-	return d
+    """
+        NOTE: very bare-bones, no error checking, will fail easily.
+    """
+    d = {}
+    prev=None
+    for a in argv:
+        if "--" == a[0:2]:
+            prev = a[2:]
+        elif "-" == a[0:1]:
+            prev = a[1:]
+        else:
+            d[prev] = a
+    return d
 
 def main():
-	d = argv_to_dict(sys.argv[1:])
-
-	api = plc.api
-	if 'hostname' in d or 'H' in d:
-		try:
-			hostname = d['host']
-		except:
-			hostname = d['H']
-	else:
-		print "UNKNOWN: argument error"
-		sys.exit(3)
-
-	try:
-		n = api.GetNodes(hostname)[0]
-	except:
-		print "UNKNOWN: API failure"
-		sys.exit(3)
-
-	t1 = 0
-	t2 = time.time()
-
-	if False:
-		print "FAKE-OK: PCU test successful"
-		sys.exit(0)
-	elif False:
-		print "FAKE-WARNING: PCU configuration incomplete"
-		sys.exit(1)
-	else:
-		print "FAKE-CRITICAL: PCU test failed"
-		sys.exit(2)
+    d = argv_to_dict(sys.argv[1:])
+
+    api = plc.api
+    if 'hostname' in d or 'H' in d:
+        try:
+            hostname = d['host']
+        except:
+            hostname = d['H']
+    else:
+        print "UNKNOWN: Argument error"
+        sys.exit(3)
+
+    try:
+        n = api.RebootNodeWithPCU(hostname, True)
+    except Exception, e:
+        if "No PCUs associated with Node" in str(e):
+            print "CRITICAL: Failure: %s" % str(e)
+            sys.exit(2)
+        else:
+            print "UNKNOWN: Failure: %s" % str(e)
+            sys.exit(3)
+
+    t1 = 0
+    t2 = time.time()
+
+    if n == 0:
+        print "OK: PCU test successful"
+        sys.exit(0)
+    elif n != 0:
+        print "WARNING: PCU configuration incomplete: %s" % n
+        sys.exit(1)
+    else:
+        print "FAKE-CRITICAL: PCU test failed"
+        sys.exit(2)
 
 
 if __name__ == '__main__':
-	f = open("/tmp/checkpcu", 'a')
-	f.write("checkpcu %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
-	f.close()
-	main()
+    f = open("/tmp/checkpcu", 'a')
+    f.write("checkpcu %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
+    f.close()
+    main()