changes for 3.0
[monitor.git] / comon.py
1 #
2 # Copyright (c) 2004  The Trustees of Princeton University (Trustees).
3 #
4 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
5 #
6 # $Id: comon.py,v 1.7 2007/07/03 19:59:02 soltesz Exp $
7 #
8 # Get CoMon data, unsorted, in CSV, and create a huge hash.
9 #
10
11
12 import urllib2
13 import httplib
14 import time
15 import Queue 
16 import logging
17 import pickle
18 from threading import *
19 #httplib.HTTPConnection.debuglevel = 1  
20
21 logger = logging.getLogger("monitor")
22
23 # Time between comon refresh
24 COSLEEP=1200
25
26 # CoMon
27 COMONURL = "http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeview"
28
29 # node type:
30 # null == <not in DB?>
31 #        0 == 
32 #        1 == Prod
33 #        2 == alpha
34 #        3 == beta
35
36 # boot state:
37 #       0 == new
38 #       1 == boot
39 #       2 == dbg
40 #       3 == rins
41 #       4 == ins
42
43 def _tohash(rawdata):
44         # First line Comon returns is list of keys with respect to index
45         try:
46                 keys = rawdata.readline().rstrip().split(", ")
47                 l_host = []
48                 hash = {}
49                 i_ignored = 0
50                 for line in rawdata.readlines():
51                         l_host = line.rstrip().split(", ")              # split the line on ', '
52                         hostname = l_host[0]
53                         hash[hostname] = {}
54                         for i in range(1,len(keys)):
55                                 hash[hostname][keys[i]]=l_host[i]
56
57         except Exception, err:
58                 logger.debug("No hosts retrieved")      
59                 return {} 
60         return hash
61
62 def comonget(url):
63         rawdata = None
64         print "Getting: %s" % url
65         try:
66                 coserv = urllib2.Request(url)
67                 coserv.add_header('User-Agent', 'PL_Monitor +http://monitor.planet-lab.org/')
68                 opener = urllib2.build_opener()
69                 # Initial web get from summer.cs in CSV
70                 rawdata = opener.open(coserv)
71         except urllib2.URLError, (err):
72                 print "Attempting %s" %COMONURL
73                 print "URL error (%s)" % (err)
74                 rawdata = None
75         return _tohash(rawdata)
76
77
78 class Comon(Thread): 
79         """
80         cdb is the comon database (dictionary)
81         all buckets is a queue of all problem nodes. This gets sent to rt to find
82         tickets open for host. 
83         """
84         def __init__(self, cdb=None, d_allplc_nodes=None, q_allbuckets=None):
85
86                 self.accept_all_nodes = False
87
88                 if cdb == None:
89                         cdb = {}
90                 if d_allplc_nodes == None:
91                         self.accept_all_nodes = True # TODO :get from plc.
92
93                 self.codata = cdb 
94                 self.d_allplc_nodes = d_allplc_nodes
95                 self.updated = time.time()
96                 self.q_allbuckets = q_allbuckets
97                 #self.comon_buckets = {"down" : "resptime%20==%200%20&&%20keyok==null",
98                 #       "ssh": "sshstatus%20%3E%202h",
99                 #       "clock_drift": "drift%20%3E%201m",
100                 #       "dns": "dns1udp%20%3E%2080%20&&%20dns2udp%20%3E%2080",
101                 #       "filerw": "filerw%3E0",
102                 #       "dbg" : "keyok==0"}
103                 self.comon_buckets = {
104                         #"down" : "resptime==0&&keyok==null",
105                         #"ssh": "sshstatus > 2h",
106                         #"clock_drift": "drift > 1m",
107                         #"dns": "dns1udp>80 && dns2udp>80",
108                         #"filerw": "filerw > 0",
109                         #"all" : ""
110                         "dbg" : "keyok==0",
111                         }
112                 Thread.__init__(self)
113
114         def __tohash(self,rawdata):
115                 # First line Comon returns is list of keys with respect to index
116                 keys = rawdata.readline().rstrip().split(", ")
117                 l_host = []
118                 hash = {}
119                 try:
120                         i_ignored = 0
121                         for line in rawdata.readlines():
122                                 l_host = line.rstrip().split(", ")              # split the line on ', '
123                                 hostname = l_host[0]
124                                 add = False
125                                 if self.accept_all_nodes:
126                                         add=True
127                                 else:
128                                         if hostname in self.d_allplc_nodes:             # then we'll track it
129                                                 add = True
130
131                                 if add:
132                                         hash[hostname] = {}
133                                         for i in range(1,len(keys)):
134                                                 hash[hostname][keys[i]]=l_host[i]
135                                 else:
136                                         i_ignored += 1
137
138                         print "Retrieved %s hosts" % len(hash.keys())
139                         print "Ignoring %d hosts" % i_ignored
140
141                         logger.debug("Retrieved %s hosts" % len(hash.keys()))
142                         logger.debug("Ignoring %d hosts" % i_ignored)
143                 except Exception, err:
144                         logger.debug("No hosts retrieved")      
145                         return {} 
146                 return hash
147
148         # Update individual buckekts.  Hostnames only.
149         def updatebuckets(self):
150                 for (bucket,url) in self.comon_buckets.items():
151                         logger.debug("COMON:  Updating bucket %s" % bucket)
152                         tmp = self.coget(COMONURL + "&format=formatcsv&select='" + url + "'").keys()
153                         setattr(self, bucket, tmp)
154
155         # Update ALL node information
156         def updatedb(self):
157                 # Get time of update
158                 self.updated = time.time()
159                 # Make a Hash, put in self.
160                 self.codata.update(self.coget(COMONURL + "&format=formatcsv"))
161
162         def coget(self,url):
163                 rawdata = None
164                 print "Getting: %s" % url
165                 try:
166                         coserv = urllib2.Request(url)
167                         coserv.add_header('User-Agent',
168                                 'PL_Monitor +http://monitor.planet-lab.org/')
169                         opener = urllib2.build_opener()
170                         # Initial web get from summer.cs in CSV
171                         rawdata = opener.open(coserv)
172                 except urllib2.URLError, (err):
173                         print "Attempting %s" %COMONURL
174                         print "URL error (%s)" % (err)
175                         rawdata = None
176                 return self.__tohash(rawdata)
177
178         # Push nodes that are bad (in *a* bucket) into q(q_allbuckets)
179         def push(self):
180                 #buckets_per_node = []
181                 #for bucket in self.comon.comon_buckets.keys():
182                 #       if (hostname in getattr(self.comon, bucket)):
183                 #               buckets_per_node.append(bucket)
184
185                 #loginbase = self.plcdb_hn2lb[hostname] # plc.siteId(node)
186
187                 #if not loginbase in self.sickdb:
188                 #       self.sickdb[loginbase] = [{hostname: buckets_per_node}]
189                 #else:
190                 #       self.sickdb[loginbase].append({hostname: buckets_per_node})
191
192
193                 print "calling Comon.push()"
194                 for bucket in self.comon_buckets.keys():
195                         #print "bucket: %s" % bucket
196                         for host in getattr(self,bucket):
197                                 diag_node = {}
198                                 diag_node['nodename'] = host
199                                 diag_node['message'] = None
200                                 diag_node['bucket'] = [bucket]
201                                 diag_node['stage'] = ""
202                                 #diag_node['ticket_id'] = ""
203                                 diag_node['args'] = None
204                                 diag_node['info'] = None
205                                 diag_node['time'] = time.time()
206                                 #print "host: %s" % host
207                                 self.q_allbuckets.put(diag_node)
208
209         def run(self):
210                 self.updatedb()
211                 self.updatebuckets()
212                 self.push()
213                 # insert signal that this is the final host
214                 self.q_allbuckets.put("None")
215  
216         def __repr__(self):
217             return self
218
219 def main():
220         logger.setLevel(logging.DEBUG)
221         ch = logging.StreamHandler()
222         ch.setLevel(logging.DEBUG)
223         formatter = logging.Formatter('%(message)s')
224         ch.setFormatter(formatter)
225         logger.addHandler(ch)
226
227
228         t = Queue.Queue()
229         cdb = {}
230         a = Comon(cdb,t)
231         #for i in a.comon_buckets: print "%s : %s" % ( i, a.comon_buckets[i])
232         a.start()
233
234         time.sleep(5)
235         #for i in a.down: print i
236
237         time.sleep(5)
238         #print cdb
239         for host in cdb.keys():
240                 #if cdb[host]['keyok'] == "0":
241                 # null implies that it may not be in PL DB.
242                 if  cdb[host]['bootstate'] != "null" and \
243                         cdb[host]['bootstate'] == "2" and \
244                         cdb[host]['keyok'] == "0":      
245                         print("%-40s \t Bootstate %s nodetype %s kernver %s keyok %s" % ( 
246                                 host, cdb[host]['bootstate'], cdb[host]['nodetype'], 
247                                 cdb[host]['kernver'], cdb[host]['keyok']))
248         #       else:
249         #               print("key mismatch at: %s" % host)
250         #print a.codata['michelangelo.ani.univie.ac.at']
251         #time.sleep(3)
252         #a.push()
253         #print a.filerw
254         #print a.coget(COMONURL + "&format=formatcsv&select='" + a.comon_buckets['filerw'])
255
256         #os._exit(0)
257 if __name__ == '__main__':
258         import os
259         try:
260                 main()
261         except KeyboardInterrupt:
262                 print "Killed.  Exitting."
263                 logger.info('Monitor Killed')
264                 os._exit(0)