add real checks for RebootNodeWithPCU. Report errors returned by API
authorStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 25 Jun 2010 21:17:43 +0000 (21:17 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 25 Jun 2010 21:17:43 +0000 (21:17 +0000)
add notes_url to pcu service

nagios/plc_hosts_to_nagios.py
nagios/plugins/checkpcu.py

index f45fa76..3b36ecd 100755 (executable)
@@ -299,6 +299,7 @@ for site in l_sites:
                s3 = Service(use="planetlab-service",
                                        host_name=hn_list,
                                        service_description="cPCU",
+                                       notes_url="http://www.planet-lab.org/db/sites/index.php?id=%s" % site['site_id'],
                                        display_name="cPCU",
                                        servicegroups="NET,PCU",
                                        notifications_enabled="1",
index a7156a0..d276ab4 100755 (executable)
@@ -7,55 +7,59 @@ import os
 from monitor.wrapper import plc
 
 def argv_to_dict(argv):
-       """
-               NOTE: very bare-bones, no error checking, will fail easily.
-       """
-       d = {}
-       prev=None
-       for a in argv:
-               if "--" == a[0:2]:
-                       prev = a[2:]
-               elif "-" == a[0:1]:
-                       prev = a[1:]
-               else:
-                       d[prev] = a
-       return d
+    """
+        NOTE: very bare-bones, no error checking, will fail easily.
+    """
+    d = {}
+    prev=None
+    for a in argv:
+        if "--" == a[0:2]:
+            prev = a[2:]
+        elif "-" == a[0:1]:
+            prev = a[1:]
+        else:
+            d[prev] = a
+    return d
 
 def main():
-       d = argv_to_dict(sys.argv[1:])
-
-       api = plc.api
-       if 'hostname' in d or 'H' in d:
-               try:
-                       hostname = d['host']
-               except:
-                       hostname = d['H']
-       else:
-               print "UNKNOWN: argument error"
-               sys.exit(3)
-
-       try:
-               n = api.GetNodes(hostname)[0]
-       except:
-               print "UNKNOWN: API failure"
-               sys.exit(3)
-
-       t1 = 0
-       t2 = time.time()
-
-       if False:
-               print "FAKE-OK: PCU test successful"
-               sys.exit(0)
-       elif False:
-               print "FAKE-WARNING: PCU configuration incomplete"
-               sys.exit(1)
-       else:
-               print "FAKE-CRITICAL: PCU test failed"
-               sys.exit(2)
+    d = argv_to_dict(sys.argv[1:])
+
+    api = plc.api
+    if 'hostname' in d or 'H' in d:
+        try:
+            hostname = d['host']
+        except:
+            hostname = d['H']
+    else:
+        print "UNKNOWN: Argument error"
+        sys.exit(3)
+
+    try:
+        n = api.RebootNodeWithPCU(hostname, True)
+    except Exception, e:
+        if "No PCUs associated with Node" in str(e):
+            print "CRITICAL: Failure: %s" % str(e)
+            sys.exit(2)
+        else:
+            print "UNKNOWN: Failure: %s" % str(e)
+            sys.exit(3)
+
+    t1 = 0
+    t2 = time.time()
+
+    if n == 0:
+        print "OK: PCU test successful"
+        sys.exit(0)
+    elif n != 0:
+        print "WARNING: PCU configuration incomplete: %s" % n
+        sys.exit(1)
+    else:
+        print "FAKE-CRITICAL: PCU test failed"
+        sys.exit(2)
 
 
 if __name__ == '__main__':
-       f = open("/tmp/checkpcu", 'a')
-       f.write("checkpcu %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
-       f.close()
-       main()
+    f = open("/tmp/checkpcu", 'a')
+    f.write("checkpcu %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
+    f.close()
+    main()