X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=nagios%2Fplugin%2Fcomon_query.py;fp=nagios%2Fplugin%2Fcomon_query.py;h=9aa2685d76c07d88fa1a02d3136b59fac857a54e;hb=7ac26d99242db506fda7e7e88ed31ab8f0944748;hp=0000000000000000000000000000000000000000;hpb=80f8e07e75431f360ffbd0f3daee305cd87363c6;p=infrastructure.git diff --git a/nagios/plugin/comon_query.py b/nagios/plugin/comon_query.py new file mode 100755 index 0000000..9aa2685 --- /dev/null +++ b/nagios/plugin/comon_query.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python + +# +# This module checks for a planetlab node by +# (*) connecting to the comon central query interface +# (*) retrieving the latest info for a given node +# + +import sys +import socket +import re +import string +import urllib2 + +import nagios + +# default server +SERVER='summer.cs.princeton.edu' + +NodeUnknownException="NodeUnknownByComon" + +#################### +def check (node,server=None): + + if (server == None): + server = SERVER + + try: + + # compute node IP number + node_address = compute_address (node) +# print 'int address',node_address + # build URL, connect and return attributes dict + node_dict = query_node (server,node_address) + for key in node_dict.keys(): + print key,node_dict[key] + # interpret + return interpret (node_dict) + + except NodeUnknownException: + return nagios.UNKNOWN + except Exception,e: + print "comon_query.check got exception",e + return nagios.UNKNOWN + +########## +re_dec="([0-9]{1,3})" +re_ipsep="\." +re_ip=(re_dec+re_ipsep)*3+re_dec +ma_ip = re.compile (re_ip) + +def compute_address (nodename): + ip=socket.gethostbyname(nodename) + ints=map(int,ma_ip.match(ip).groups()) + res=ints[0] + for i in range(1,4): + res = (res*256)+ints[i] + return res + +########## +URL_FORMAT="http://%s/status/tabulator.cgi" +ARGS_FORMAT="table=table_nodeviewshort&select='%s'" +FILTER_FORMAT="address==%d" +CSV_FORMAT="&format=formatcsv" + +def filter_address (address): + return FILTER_FORMAT%address +def filter_node (nodename): + return filter_address(compute_address(nodename)) + +def full_url (server, filter): + return (URL_FORMAT%server + + '?' + + ARGS_FORMAT%filter) + +def full_url_csv (server,filter): + return full_url(server,filter)+CSV_FORMAT + + +# mention field here means we'll parse it and keep it +# see store_dict below +FIELDS_FOCUS={ + 'resptime': 'float', + 'sshstatus':'int', + 'bootstate':'string', + } + +NOTHING_MATCHED='nothing matched select statement' +########## +def query_node (server,address): + filter=filter_address(address) + full_url=full_url_csv(server,filter) + req = urllib2.urlopen(full_url) + # let's parse this manually + headers=map(string.strip,req.readline().split(',')) + # handle the case where the node is unknown to comon + values=req.readline() + if values.find(NOTHING_MATCHED) != -1: + raise NodeUnknownException + + values=values.split(',') + + print 'h',headers + print 'v', values + + dict={} +# store_dict=lambda key,val: dict[key]=val +# -> SyntaxError: can't assign to lambda + def store_dict (key,val): + if FIELDS_FOCUS.has_key(key): + format = FIELDS_FOCUS[key] + if format == 'float': + dict[key]=float(val) + elif format == 'int': + dict[key]=int(val) + elif format == 'string': + dict[key]=string.strip(val) + map (store_dict, headers, values) + + return dict + +########## +# function for decomposing a number along units +# tuple [n] must be a multiple of tuple[n+1] +# e.g. +# 1223456, (10000, 500, 10) -> (122, 68, 5) +# coz 1223456 = 122*10000 + 68*500 + 5*10 + 6 +def split_number (n,tuple): + result=() + for i in range(0,len(tuple)): + base=tuple[i] + result+=(n/base,) + n=n%base + return result + +### +MINUTE=60 +HOUR=60*MINUTE +DAY=HOUR*24 +WEEK=DAY*7 +MONTH=DAY*30 + +# from a delay in seconds, returns a human-readable string +def seconds_printable (seconds): + month,week,day,hour,minute = split_number(seconds, + (MONTH,WEEK,DAY,HOUR,MINUTE)) + if month != 0: + return "%d month(s), %d weeks, %d day(s)"%(month,week,day) + elif week !=0: + return "%d weeks, %d day(s), %d hour(s)"%(week,day,hour) + elif day != 0: + return "%d day(s), %d hour(s)"%(day,hour) + elif hour != 0: + return "%d hour(s), %d minute(s)"%(hour,minute) + else: + return "%d minute(s)"%(minute) + +########## +def interpret (dict): + # check sshstatus is null + sshstatus=dict['sshstatus'] + if sshstatus != 0: + print 'No response to comon/ssh for %s'%seconds_printable(sshstatus) + if sshstatus >= 10*MINUTE: + return nagios.KO + else: + return nagios.WARNING + else: + # let's focus on resptime + resptime = dict['resptime'] + print "Response time as measured by comon = %.2f s"%resptime + if resptime >= 10.0: + return nagios.KO + elif resptime >= 5.0: + return nagios.WARNING + else: + return nagios.OK + +#################### quick test +def usage(): + print "Usage comon_query.py node" + sys.exit(1) + +if __name__=='__main__': + if len(sys.argv) != 2: + usage() + print 'comon_query.check would return %d'%check(sys.argv[1]) +# print 'get >%s<',seconds_printable(int(sys.argv[1]))