X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=nagios%2Fplugin%2Fcomon_sensor.py;fp=nagios%2Fplugin%2Fcomon_sensor.py;h=610f32ad2533dc895a70745bbe9105e3956e1219;hb=7ac26d99242db506fda7e7e88ed31ab8f0944748;hp=0000000000000000000000000000000000000000;hpb=80f8e07e75431f360ffbd0f3daee305cd87363c6;p=infrastructure.git diff --git a/nagios/plugin/comon_sensor.py b/nagios/plugin/comon_sensor.py new file mode 100755 index 0000000..610f32a --- /dev/null +++ b/nagios/plugin/comon_sensor.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +import re +import nagios +import socket +import signal +import string + +import sys + +# default port number +PORT=3121 +# default timeout +TIMEOUT=10 + +# exception raised when timeout occurs +TimeoutException = "Timeout" + +#################### comon-based acquisition +def check (hostname, timeout=None): + +# print "Entering comon_sensor::check",hostname + + if (timeout == None): + timeout=TIMEOUT + + try: + # connect to comon and read data + page = read_data (hostname,timeout) + # parse lines and store in dict + dict = parse_data (page) + # keep only relevant stuff and refine parsing + dict = filter (dict) + # make decision + return interpret (dict) + + except TimeoutException: + print "While connecting to comon sensor : timeout expired %d s"%timeout + ### XXX - in some cases this is a KO, but in general + # maybe comon does not run on these nodes + return nagios.UNKNOWN + +### implement timeout as an alarm signal +def alarm_handler(s,closure): + if s == signal.SIGALRM: + raise TimeoutException + else: + print "unexpected signal s in alarm_handler" + +### +# returns a list of lines +# dont use httplib nor urllib2 +# the server side replies its data even before you send a GET request +# with urllib2 you basically get a 'Connection reset by peer' error +def read_data (hostname,timeout,port=None): + + if (port ==None): + port=PORT + + signal.signal(signal.SIGALRM,alarm_handler) + signal.alarm(timeout) + s =socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((hostname,port)) + signal.alarm(0) + + lines=[] + line="" + while 1: + char=s.recv(1) + if not char: + break + elif char == '\n': + lines += [line] + line="" + else: + line+=char + return lines + +### pattern for interpreting sensor output +re_line="^([a-zA-Z0-9_]+): (.*)$" +ma_line=re.compile(re_line) + +def parse_data (page): + dict={} + for line in page: + matched=ma_line.match(line) + if matched: + key,val=matched.groups() + dict[key]=val + return dict + +FIELDS_FOCUS = { + 'Loads':'floats', + 'VMStat':'ints', + 'CPUUse':'ints', + 'MemInfo':'floats', + 'Date':'floats', + 'DfDot':'percent-floats', + 'LastSsh':'ints', + } + + +def filter (dict): + + filtered = {} + for key in dict.keys(): + if key in FIELDS_FOCUS.keys(): + format=FIELDS_FOCUS[key] + value=dict[key].rstrip() + if format == 'ints': + filtered[key]=map(int,value.split(' ')) + elif format == 'floats': + filtered[key]=map(float,value.split(' ')) + elif format == 'percent-floats': + fields=value.split() + pval=string.replace(fields[0],'%','') + filtered[key]=map(float,[pval]+fields[1:]) + # simpler access to single-fields + value = filtered[key] + if len(value)==1: + filtered[key]=value[0] + return filtered + +def interpret (dict): + + status = nagios.OK + + ### check ssh status + ssh_delay = dict['Date']- dict['LastSsh'] + print ssh_delay + + ### + return status + +### +def usage(): + print "Usage comon_sensor.py node timeout" + sys.exit(1) + +if __name__=='__main__': + if len(sys.argv) != 3: + usage() + check(sys.argv[1],int(sys.argv[2]))