#!/usr/bin/env python # # This module checks for a planetlab node by # (*) connecting to the comon central query interface # (*) retrieving the latest info for a given node # import sys import socket import re import string import urllib2 import nagios # default server SERVER='summer.cs.princeton.edu' NodeUnknownException="NodeUnknownByComon" #################### def check (node,server=None): if (server == None): server = SERVER try: # compute node IP number node_address = compute_address (node) # print 'int address',node_address # build URL, connect and return attributes dict node_dict = query_node (server,node_address) for key in node_dict.keys(): print key,node_dict[key] # interpret return interpret (node_dict) except NodeUnknownException: return nagios.UNKNOWN except Exception,e: print "comon_query.check got exception",e return nagios.UNKNOWN ########## re_dec="([0-9]{1,3})" re_ipsep="\." re_ip=(re_dec+re_ipsep)*3+re_dec ma_ip = re.compile (re_ip) def compute_address (nodename): ip=socket.gethostbyname(nodename) ints=map(int,ma_ip.match(ip).groups()) res=ints[0] for i in range(1,4): res = (res*256)+ints[i] return res ########## URL_FORMAT="http://%s/status/tabulator.cgi" ARGS_FORMAT="table=table_nodeviewshort&select='%s'" FILTER_FORMAT="address==%d" CSV_FORMAT="&format=formatcsv" def filter_address (address): return FILTER_FORMAT%address def filter_node (nodename): return filter_address(compute_address(nodename)) def full_url (server, filter): return (URL_FORMAT%server + '?' + ARGS_FORMAT%filter) def full_url_csv (server,filter): return full_url(server,filter)+CSV_FORMAT # mention field here means we'll parse it and keep it # see store_dict below FIELDS_FOCUS={ 'resptime': 'float', 'sshstatus':'int', 'bootstate':'string', } NOTHING_MATCHED='nothing matched select statement' ########## def query_node (server,address): filter=filter_address(address) full_url=full_url_csv(server,filter) req = urllib2.urlopen(full_url) # let's parse this manually headers=map(string.strip,req.readline().split(',')) # handle the case where the node is unknown to comon values=req.readline() if values.find(NOTHING_MATCHED) != -1: raise NodeUnknownException values=values.split(',') print 'h',headers print 'v', values dict={} # store_dict=lambda key,val: dict[key]=val # -> SyntaxError: can't assign to lambda def store_dict (key,val): if FIELDS_FOCUS.has_key(key): format = FIELDS_FOCUS[key] if format == 'float': dict[key]=float(val) elif format == 'int': dict[key]=int(val) elif format == 'string': dict[key]=string.strip(val) map (store_dict, headers, values) return dict ########## # function for decomposing a number along units # tuple [n] must be a multiple of tuple[n+1] # e.g. # 1223456, (10000, 500, 10) -> (122, 68, 5) # coz 1223456 = 122*10000 + 68*500 + 5*10 + 6 def split_number (n,tuple): result=() for i in range(0,len(tuple)): base=tuple[i] result+=(n/base,) n=n%base return result ### MINUTE=60 HOUR=60*MINUTE DAY=HOUR*24 WEEK=DAY*7 MONTH=DAY*30 # from a delay in seconds, returns a human-readable string def seconds_printable (seconds): month,week,day,hour,minute = split_number(seconds, (MONTH,WEEK,DAY,HOUR,MINUTE)) if month != 0: return "%d month(s), %d weeks, %d day(s)"%(month,week,day) elif week !=0: return "%d weeks, %d day(s), %d hour(s)"%(week,day,hour) elif day != 0: return "%d day(s), %d hour(s)"%(day,hour) elif hour != 0: return "%d hour(s), %d minute(s)"%(hour,minute) else: return "%d minute(s)"%(minute) ########## def interpret (dict): # check sshstatus is null sshstatus=dict['sshstatus'] if sshstatus != 0: print 'No response to comon/ssh for %s'%seconds_printable(sshstatus) if sshstatus >= 10*MINUTE: return nagios.KO else: return nagios.WARNING else: # let's focus on resptime resptime = dict['resptime'] print "Response time as measured by comon = %.2f s"%resptime if resptime >= 10.0: return nagios.KO elif resptime >= 5.0: return nagios.WARNING else: return nagios.OK #################### quick test def usage(): print "Usage comon_query.py node" sys.exit(1) if __name__=='__main__': if len(sys.argv) != 2: usage() print 'comon_query.check would return %d'%check(sys.argv[1]) # print 'get >%s<',seconds_printable(int(sys.argv[1]))