creation : creates nagios config from a set of sites
[infrastructure.git] / nagios / configurator / comon_query.py
diff --git a/nagios/configurator/comon_query.py b/nagios/configurator/comon_query.py
new file mode 100755 (executable)
index 0000000..9aa2685
--- /dev/null
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+
+#
+# This module checks for a planetlab node by
+# (*) connecting to the comon central query interface
+# (*) retrieving the latest info for a given node
+#
+
+import sys
+import socket
+import re
+import string
+import urllib2
+
+import nagios
+
+# default server
+SERVER='summer.cs.princeton.edu'
+
+NodeUnknownException="NodeUnknownByComon"
+
+####################
+def check (node,server=None):
+
+    if (server == None):
+        server = SERVER
+
+    try:
+    
+        # compute node IP number
+        node_address = compute_address (node)
+#        print 'int address',node_address
+        # build URL, connect and return attributes dict
+        node_dict = query_node (server,node_address)
+        for key in node_dict.keys():
+            print key,node_dict[key]
+        # interpret
+        return  interpret (node_dict)
+
+    except NodeUnknownException:
+        return nagios.UNKNOWN
+    except Exception,e:
+        print "comon_query.check got exception",e
+        return nagios.UNKNOWN
+
+##########
+re_dec="([0-9]{1,3})"
+re_ipsep="\."
+re_ip=(re_dec+re_ipsep)*3+re_dec
+ma_ip = re.compile (re_ip)
+
+def compute_address (nodename):
+    ip=socket.gethostbyname(nodename)
+    ints=map(int,ma_ip.match(ip).groups())
+    res=ints[0]
+    for i in range(1,4):
+        res = (res*256)+ints[i]
+    return res
+
+##########
+URL_FORMAT="http://%s/status/tabulator.cgi"
+ARGS_FORMAT="table=table_nodeviewshort&select='%s'"
+FILTER_FORMAT="address==%d"
+CSV_FORMAT="&format=formatcsv"
+
+def filter_address (address):
+    return FILTER_FORMAT%address
+def filter_node (nodename):
+    return filter_address(compute_address(nodename))
+
+def full_url (server, filter):
+    return (URL_FORMAT%server
+            + '?'
+            + ARGS_FORMAT%filter)
+
+def full_url_csv (server,filter):
+    return full_url(server,filter)+CSV_FORMAT
+
+
+# mention field here means we'll parse it and keep it
+# see store_dict below 
+FIELDS_FOCUS={
+    'resptime': 'float',
+    'sshstatus':'int',
+    'bootstate':'string',
+    }
+
+NOTHING_MATCHED='nothing matched select statement'
+##########
+def query_node (server,address):
+    filter=filter_address(address)
+    full_url=full_url_csv(server,filter)
+    req = urllib2.urlopen(full_url)
+    # let's parse this manually
+    headers=map(string.strip,req.readline().split(','))
+    # handle the case where the node is unknown to comon
+    values=req.readline()
+    if values.find(NOTHING_MATCHED) != -1:
+        raise NodeUnknownException
+
+    values=values.split(',')
+
+    print 'h',headers
+    print 'v', values
+    
+    dict={}
+#    store_dict=lambda key,val: dict[key]=val
+#    -> SyntaxError: can't assign to lambda
+    def store_dict (key,val):
+        if FIELDS_FOCUS.has_key(key):
+            format = FIELDS_FOCUS[key]
+            if format == 'float':
+                dict[key]=float(val)
+            elif format == 'int':
+                dict[key]=int(val)
+            elif format == 'string':
+                dict[key]=string.strip(val)
+    map (store_dict, headers, values)
+
+    return dict
+
+##########
+# function for decomposing a number along units
+# tuple [n] must be a multiple of tuple[n+1]
+# e.g.
+#   1223456, (10000, 500, 10)  -> (122, 68, 5)
+# coz 1223456 = 122*10000 + 68*500 + 5*10 + 6 
+def split_number (n,tuple):
+    result=()
+    for i in range(0,len(tuple)):
+        base=tuple[i]
+        result+=(n/base,)
+        n=n%base
+    return result
+
+###
+MINUTE=60
+HOUR=60*MINUTE
+DAY=HOUR*24
+WEEK=DAY*7
+MONTH=DAY*30
+
+# from a delay in seconds, returns a human-readable string
+def seconds_printable (seconds):
+    month,week,day,hour,minute = split_number(seconds,
+                                              (MONTH,WEEK,DAY,HOUR,MINUTE))
+    if month != 0:
+        return "%d month(s), %d weeks, %d day(s)"%(month,week,day)
+    elif week !=0:
+        return "%d weeks, %d day(s), %d hour(s)"%(week,day,hour)
+    elif day != 0:
+        return "%d day(s), %d hour(s)"%(day,hour)
+    elif hour != 0:
+        return "%d hour(s), %d minute(s)"%(hour,minute)
+    else:
+        return "%d minute(s)"%(minute)
+
+##########
+def interpret (dict):
+    # check sshstatus is null
+    sshstatus=dict['sshstatus']
+    if sshstatus != 0:
+        print 'No response to comon/ssh for %s'%seconds_printable(sshstatus)
+        if sshstatus >= 10*MINUTE:
+            return nagios.KO
+        else:
+            return nagios.WARNING
+    else:
+        # let's focus on resptime
+        resptime = dict['resptime']
+        print "Response time as measured by comon = %.2f s"%resptime
+        if resptime >= 10.0:
+            return nagios.KO
+        elif resptime >= 5.0:
+            return nagios.WARNING
+        else:
+            return nagios.OK
+
+#################### quick test
+def usage():
+    print "Usage comon_query.py node"
+    sys.exit(1)
+
+if __name__=='__main__':
+    if len(sys.argv) != 2:
+        usage()
+    print 'comon_query.check would return %d'%check(sys.argv[1])
+#     print 'get >%s<',seconds_printable(int(sys.argv[1]))