From: Stephen Soltesz Date: Tue, 29 Jun 2010 22:01:36 +0000 (+0000) Subject: add checkrt to indicate when a site has new or open tickets X-Git-Tag: monitor-3.1-1~16 X-Git-Url: http://git.onelab.eu/?p=monitor.git;a=commitdiff_plain;h=5f22fdfd8456bfcf0a050e1bb31b0bd0bf3c3d74 add checkrt to indicate when a site has new or open tickets add checkescalation to infer the penalty applied to a site based on the state of it's site and slices add extra RT configuration fields to auth.py --- diff --git a/nagios/auth.py b/nagios/auth.py index 101299b..4755a8b 100644 --- a/nagios/auth.py +++ b/nagios/auth.py @@ -4,4 +4,11 @@ #auth = {'Username' : '', 'AuthMethod' : 'password', 'AuthString' : ''} auth = {'AuthMethod' : "anonymous"} plc = "https://boot.planet-lab.org/PLCAPI/" +www = "https://www.planet-lab.org" + +RTSERVER="https://rt.planet-lab.org/" +RTUSER="monitor" +RTPASSWD="" +RTDEBUG="0" + diff --git a/nagios/plugins/checkescalation.py b/nagios/plugins/checkescalation.py new file mode 100755 index 0000000..1ab1156 --- /dev/null +++ b/nagios/plugins/checkescalation.py @@ -0,0 +1,59 @@ +#!/usr/bin/python + +import time +import sys +import plc + +def argv_to_dict(argv): + """ + NOTE: very bare-bones, no error checking, will fail easily. + """ + d = {} + prev=None + for a in argv: + if "--" == a[0:2]: + prev = a[2:] + elif "-" == a[0:1]: + prev = a[1:] + else: + d[prev] = a + return d + +def main(): + d = argv_to_dict(sys.argv[1:]) + + site = None + if 'site' in d: + site = d['site'].replace('site-cluster-for-','') + else: + print "No site specified" + sys.exit(1) + + # define escalation by observed properties about the site. + # i.e. EXEMPT, level 0 (site enabled and slices ok), level 1 (site disabled), level 2 (slices) + if plc.isSiteExempt(site): + tags = plc.api.GetSiteTags({'login_base' : site, 'tagname' : 'exempt_site_until'}) + print "Exempt: %s is exempt until %s" % (site, tags[0]['value']) + sys.exit(0) + + extra_str = "" + + # are slices disabled? + slices_enabled = plc.areSlicesEnabled(site) + if isinstance(slices_enabled, bool) and not slices_enabled: + print "Level >= 2: slices are disabled at %s" % (site) + sys.exit(0) + elif isinstance(slices_enabled, type(None)): + extra_str = "And, no slices." + + # Site is not exempt, so is it disabled? + if not plc.isSiteEnabled(site): + print "Level >= 1: site is disabled at %s. %s" % (site, extra_str) + sys.exit(0) + + print "Level 0: no policy applied to site %s" % (site) + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/nagios/plugins/checkrt.py b/nagios/plugins/checkrt.py new file mode 100755 index 0000000..befb1e3 --- /dev/null +++ b/nagios/plugins/checkrt.py @@ -0,0 +1,72 @@ +#!/usr/bin/python + +import os +import time +import sys +import auth + +def argv_to_dict(argv): + """ + NOTE: very bare-bones, no error checking, will fail easily. + """ + d = {} + prev=None + for a in argv: + if "--" == a[0:2]: + prev = a[2:] + elif "-" == a[0:1]: + prev = a[1:] + else: + d[prev] = a + return d + +def main(): + d = argv_to_dict(sys.argv[1:]) + + if 'pattern' in d or 'p' in d: + try: + pattern = d['pattern'] + except: + pattern = d['p'] + else: + print "UNKNOWN: Argument error" + sys.exit(3) + + + # TODO: check that RT is configured correctly + os.environ["RTSERVER"] = auth.RTSERVER + os.environ["RTUSER"] = auth.RTUSER + os.environ["RTPASSWD"] = auth.RTPASSWD + os.environ["RTDEBUG"] = auth.RTDEBUG + + # TODO: may need to add a timeout + # NOTE: RT3.8 + query = "Subject like '%%%s%%' and Queue='Monitor' and ( Status='new' or Status='open' )" % pattern + cmd = """rt ls -s -t ticket "%s" 2>&1 """ % query + cmd = cmd + """| grep -vi "no match" | wc -l """ + + out = os.popen(cmd, 'r') + open_tickets = out.read() + + try: + open_tickets_i = int(open_tickets) + except: + print "UNKNOWN: failed to convert %s to open ticket count" % open_tickets + sys.exit(3) + + if open_tickets_i == 0: + print "OK: no open tickets for site" + sys.exit(0) + elif open_tickets_i != 0: + print "WARNING: %s open tickets" % open_tickets_i + sys.exit(1) + else: + print "FAKE-CRITICAL: RT check failed" + sys.exit(2) + + +if __name__ == '__main__': + f = open("/tmp/checkpcu", 'a') + f.write("checkpcu %s %s\n" % (time.time(), " ".join(sys.argv[1:]))) + f.close() + main()