add external commands as stubs for the nagios plugins
authorStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 18 Jun 2010 21:19:44 +0000 (21:19 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 18 Jun 2010 21:19:44 +0000 (21:19 +0000)
commands/checkmode.py [new file with mode: 0755]
commands/checkpcu.py [new file with mode: 0755]
commands/escalation.py [new file with mode: 0755]
commands/mail.py [new file with mode: 0755]
commands/reboot.py
commands/repair.py [new file with mode: 0755]

diff --git a/commands/checkmode.py b/commands/checkmode.py
new file mode 100755 (executable)
index 0000000..2be4198
--- /dev/null
@@ -0,0 +1,66 @@
+#!/usr/bin/python
+
+import time
+import sys
+import os
+
+from monitor.wrapper import plc
+
+def argv_to_dict(argv):
+       """
+               NOTE: very bare-bones, no error checking, will fail easily.
+       """
+       d = {}
+       prev=None
+       for a in argv:
+               if "--" == a[0:2]:
+                       prev = a[2:]
+               elif "-" == a[0:1]:
+                       prev = a[1:]
+               else:
+                       d[prev] = a
+       return d
+
+def main():
+       d = argv_to_dict(sys.argv[1:])
+
+       api = plc.api
+       if 'hostname' in d or 'H' in d:
+               try:
+                       hostname = d['host']
+               except:
+                       hostname = d['H']
+       else:
+               print "UNKNOWN: argument error"
+               sys.exit(3)
+
+       try:
+               n = api.GetNodes(hostname)[0]
+       except:
+               print "UNKNOWN: API failure"
+               sys.exit(3)
+
+       if n['last_contact']:
+               t1 = n['last_contact']
+       else:
+               t1 = 0
+       t2 = time.time()
+       #print n['boot_state'], n['run_level'], t1, t2, t2-t1
+
+       if t2-t1 < 60*60*30:
+               if n['boot_state'] == n['run_level']:
+                       print "OK: bootstate matches runlevel and lastcontact is up to date"
+                       sys.exit(0)
+               else:
+                       print "WARNING: bootstate does not match runlevel"
+                       sys.exit(1)
+       else:
+               print "CRITICAL: node last_contact is stale, assumed offline"
+               sys.exit(2)
+
+
+if __name__ == '__main__':
+       f = open("/tmp/checkmode", 'a')
+       f.write("checkmode %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
+       f.close()
+       main()
diff --git a/commands/checkpcu.py b/commands/checkpcu.py
new file mode 100755 (executable)
index 0000000..4524cd0
--- /dev/null
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+
+import time
+import sys
+import os
+
+from monitor.wrapper import plc
+
+def argv_to_dict(argv):
+       """
+               NOTE: very bare-bones, no error checking, will fail easily.
+       """
+       d = {}
+       prev=None
+       for a in argv:
+               if "--" == a[0:2]:
+                       prev = a[2:]
+               elif "-" == a[0:1]:
+                       prev = a[1:]
+               else:
+                       d[prev] = a
+       return d
+
+def main():
+       d = argv_to_dict(sys.argv[1:])
+
+       api = plc.api
+       if 'hostname' in d or 'H' in d:
+               try:
+                       hostname = d['host']
+               except:
+                       hostname = d['H']
+       else:
+               print "UNKNOWN: argument error"
+               sys.exit(3)
+
+       try:
+               n = api.GetNodes(hostname)[0]
+       except:
+               print "UNKNOWN: API failure"
+               sys.exit(3)
+
+       t1 = 0
+       t2 = time.time()
+
+       if True:
+               print "FAKE-OK: PCU test successful"
+               sys.exit(0)
+       elif False:
+               print "FAKE-WARNING: PCU configuration incomplete"
+               sys.exit(1)
+       else:
+               print "FAKE-CRITICAL: PCU test failed"
+               sys.exit(2)
+
+
+if __name__ == '__main__':
+       f = open("/tmp/checkpcu", 'a')
+       f.write("checkpcu %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
+       f.close()
+       main()
diff --git a/commands/escalation.py b/commands/escalation.py
new file mode 100755 (executable)
index 0000000..c4979b6
--- /dev/null
@@ -0,0 +1,10 @@
+#!/usr/bin/python
+
+import time
+import sys
+
+
+if __name__ == '__main__':
+       f = open("/tmp/escalation", 'a')
+       f.write("escalation %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
+       f.close()
diff --git a/commands/mail.py b/commands/mail.py
new file mode 100755 (executable)
index 0000000..84d8217
--- /dev/null
@@ -0,0 +1,30 @@
+#!/usr/bin/python
+
+import time
+import sys
+import os
+
+
+def argv_to_dict(argv):
+       """
+               NOTE: very bare-bones, no error checking, will fail easily.
+       """
+       d = {}
+       prev=None
+       for a in argv:
+               if "--" in a:
+                       prev = a[2:]
+               else:
+                       d[prev] = a
+       return d
+
+if __name__ == '__main__':
+       f = open("/tmp/myopsmail", 'a')
+       f.write("mail %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
+       f.close()
+
+       d = argv_to_dict(sys.argv[1:])
+       command_line="""/usr/bin/printf "%%b" "***** MyOpsNagios %(hostnotificationnumber)s *****\\n\\nNotification Type: %(notificationtype)s\\nHost: %(hostname)s\\nState: %(hoststate)s\\nAddress: %(hostaddress)s\\nInfo: %(hostoutput)s\\n\\nDate/Time: %(longdatetime)s\\n" | /bin/mail -S replyto=monitor@planet-lab.org -s "** %(notificationtype)s Host Alert: %(hostname)s is %(hoststate)s **" %(contactemail)s""" % d
+       os.system(command_line)
+
+
index d5ce318..4963900 100755 (executable)
@@ -32,7 +32,7 @@ def main():
                print err
 
 if __name__ == '__main__':
-       main()
+       #main()
        f = open("/tmp/rebootlog", 'a')
        f.write("reboot %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
        f.close()
diff --git a/commands/repair.py b/commands/repair.py
new file mode 100755 (executable)
index 0000000..0706b02
--- /dev/null
@@ -0,0 +1,10 @@
+#!/usr/bin/python
+
+import time
+import sys
+import os
+
+if __name__ == '__main__':
+       f = open("/tmp/repair", 'a')
+       f.write("repair %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
+       f.close()