f37 -> f39
[infrastructure.git] / nagios / configurator / comon_query.py
1 #!/usr/bin/env python
2
3 #
4 # This module checks for a planetlab node by
5 # (*) connecting to the comon central query interface
6 # (*) retrieving the latest info for a given node
7 #
8
9 import sys
10 import socket
11 import re
12 import string
13 import urllib2
14
15 import nagios
16
17 # default server
18 SERVER='summer.cs.princeton.edu'
19
20 NodeUnknownException="NodeUnknownByComon"
21
22 ####################
23 def check (node,server=None):
24
25     if (server == None):
26         server = SERVER
27
28     try:
29     
30         # compute node IP number
31         node_address = compute_address (node)
32 #        print 'int address',node_address
33         # build URL, connect and return attributes dict
34         node_dict = query_node (server,node_address)
35         for key in node_dict.keys():
36             print key,node_dict[key]
37         # interpret
38         return  interpret (node_dict)
39
40     except NodeUnknownException:
41         return nagios.UNKNOWN
42     except Exception,e:
43         print "comon_query.check got exception",e
44         return nagios.UNKNOWN
45
46 ##########
47 re_dec="([0-9]{1,3})"
48 re_ipsep="\."
49 re_ip=(re_dec+re_ipsep)*3+re_dec
50 ma_ip = re.compile (re_ip)
51
52 def compute_address (nodename):
53     ip=socket.gethostbyname(nodename)
54     ints=map(int,ma_ip.match(ip).groups())
55     res=ints[0]
56     for i in range(1,4):
57         res = (res*256)+ints[i]
58     return res
59
60 ##########
61 URL_FORMAT="http://%s/status/tabulator.cgi"
62 ARGS_FORMAT="table=table_nodeviewshort&select='%s'"
63 FILTER_FORMAT="address==%d"
64 CSV_FORMAT="&format=formatcsv"
65
66 def filter_address (address):
67     return FILTER_FORMAT%address
68 def filter_node (nodename):
69     return filter_address(compute_address(nodename))
70
71 def full_url (server, filter):
72     return (URL_FORMAT%server
73             + '?'
74             + ARGS_FORMAT%filter)
75
76 def full_url_csv (server,filter):
77     return full_url(server,filter)+CSV_FORMAT
78
79
80 # mention field here means we'll parse it and keep it
81 # see store_dict below 
82 FIELDS_FOCUS={
83     'resptime': 'float',
84     'sshstatus':'int',
85     'bootstate':'string',
86     }
87
88 NOTHING_MATCHED='nothing matched select statement'
89 ##########
90 def query_node (server,address):
91     filter=filter_address(address)
92     full_url=full_url_csv(server,filter)
93     req = urllib2.urlopen(full_url)
94     # let's parse this manually
95     headers=map(string.strip,req.readline().split(','))
96     # handle the case where the node is unknown to comon
97     values=req.readline()
98     if values.find(NOTHING_MATCHED) != -1:
99         raise NodeUnknownException
100
101     values=values.split(',')
102
103     print 'h',headers
104     print 'v', values
105     
106     dict={}
107 #    store_dict=lambda key,val: dict[key]=val
108 #    -> SyntaxError: can't assign to lambda
109     def store_dict (key,val):
110         if FIELDS_FOCUS.has_key(key):
111             format = FIELDS_FOCUS[key]
112             if format == 'float':
113                 dict[key]=float(val)
114             elif format == 'int':
115                 dict[key]=int(val)
116             elif format == 'string':
117                 dict[key]=string.strip(val)
118     map (store_dict, headers, values)
119
120     return dict
121
122 ##########
123 # function for decomposing a number along units
124 # tuple [n] must be a multiple of tuple[n+1]
125 # e.g.
126 #   1223456, (10000, 500, 10)  -> (122, 68, 5)
127 # coz 1223456 = 122*10000 + 68*500 + 5*10 + 6 
128 def split_number (n,tuple):
129     result=()
130     for i in range(0,len(tuple)):
131         base=tuple[i]
132         result+=(n/base,)
133         n=n%base
134     return result
135
136 ###
137 MINUTE=60
138 HOUR=60*MINUTE
139 DAY=HOUR*24
140 WEEK=DAY*7
141 MONTH=DAY*30
142
143 # from a delay in seconds, returns a human-readable string
144 def seconds_printable (seconds):
145     month,week,day,hour,minute = split_number(seconds,
146                                               (MONTH,WEEK,DAY,HOUR,MINUTE))
147     if month != 0:
148         return "%d month(s), %d weeks, %d day(s)"%(month,week,day)
149     elif week !=0:
150         return "%d weeks, %d day(s), %d hour(s)"%(week,day,hour)
151     elif day != 0:
152         return "%d day(s), %d hour(s)"%(day,hour)
153     elif hour != 0:
154         return "%d hour(s), %d minute(s)"%(hour,minute)
155     else:
156         return "%d minute(s)"%(minute)
157
158 ##########
159 def interpret (dict):
160     # check sshstatus is null
161     sshstatus=dict['sshstatus']
162     if sshstatus != 0:
163         print 'No response to comon/ssh for %s'%seconds_printable(sshstatus)
164         if sshstatus >= 10*MINUTE:
165             return nagios.KO
166         else:
167             return nagios.WARNING
168     else:
169         # let's focus on resptime
170         resptime = dict['resptime']
171         print "Response time as measured by comon = %.2f s"%resptime
172         if resptime >= 10.0:
173             return nagios.KO
174         elif resptime >= 5.0:
175             return nagios.WARNING
176         else:
177             return nagios.OK
178
179 #################### quick test
180 def usage():
181     print "Usage comon_query.py node"
182     sys.exit(1)
183
184 if __name__=='__main__':
185     if len(sys.argv) != 2:
186         usage()
187     print 'comon_query.check would return %d'%check(sys.argv[1])
188 #     print 'get >%s<',seconds_printable(int(sys.argv[1]))