2 # Copyright (c) 2004 The Trustees of Princeton University (Trustees).
4 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
6 # $Id: comon.py,v 1.7 2007/07/03 19:59:02 soltesz Exp $
8 # Get CoMon data, unsorted, in CSV, and create a huge hash.
18 from threading import *
19 #httplib.HTTPConnection.debuglevel = 1
21 logger = logging.getLogger("monitor")
23 # Time between comon refresh
27 COMONURL = "http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeview"
30 # null == <not in DB?>
44 # First line Comon returns is list of keys with respect to index
46 keys = rawdata.readline().rstrip().split(", ")
50 for line in rawdata.readlines():
51 l_host = line.rstrip().split(", ") # split the line on ', '
54 for i in range(1,len(keys)):
55 hash[hostname][keys[i]]=l_host[i]
57 except Exception, err:
58 logger.debug("No hosts retrieved")
64 print "Getting: %s" % url
66 coserv = urllib2.Request(url)
67 coserv.add_header('User-Agent', 'PL_Monitor +http://monitor.planet-lab.org/')
68 opener = urllib2.build_opener()
69 # Initial web get from summer.cs in CSV
70 rawdata = opener.open(coserv)
71 except urllib2.URLError, (err):
72 print "Attempting %s" %COMONURL
73 print "URL error (%s)" % (err)
75 return _tohash(rawdata)
80 cdb is the comon database (dictionary)
81 all buckets is a queue of all problem nodes. This gets sent to rt to find
82 tickets open for host.
84 def __init__(self, cdb=None, d_allplc_nodes=None, q_allbuckets=None):
86 self.accept_all_nodes = False
90 if d_allplc_nodes == None:
91 self.accept_all_nodes = True # TODO :get from plc.
94 self.d_allplc_nodes = d_allplc_nodes
95 self.updated = time.time()
96 self.q_allbuckets = q_allbuckets
97 #self.comon_buckets = {"down" : "resptime%20==%200%20&&%20keyok==null",
98 # "ssh": "sshstatus%20%3E%202h",
99 # "clock_drift": "drift%20%3E%201m",
100 # "dns": "dns1udp%20%3E%2080%20&&%20dns2udp%20%3E%2080",
101 # "filerw": "filerw%3E0",
102 # "dbg" : "keyok==0"}
103 self.comon_buckets = {
104 #"down" : "resptime==0&&keyok==null",
105 #"ssh": "sshstatus > 2h",
106 #"clock_drift": "drift > 1m",
107 #"dns": "dns1udp>80 && dns2udp>80",
108 #"filerw": "filerw > 0",
112 Thread.__init__(self)
114 def __tohash(self,rawdata):
115 # First line Comon returns is list of keys with respect to index
116 keys = rawdata.readline().rstrip().split(", ")
121 for line in rawdata.readlines():
122 l_host = line.rstrip().split(", ") # split the line on ', '
125 if self.accept_all_nodes:
128 if hostname in self.d_allplc_nodes: # then we'll track it
133 for i in range(1,len(keys)):
134 hash[hostname][keys[i]]=l_host[i]
138 print "Retrieved %s hosts" % len(hash.keys())
139 print "Ignoring %d hosts" % i_ignored
141 logger.debug("Retrieved %s hosts" % len(hash.keys()))
142 logger.debug("Ignoring %d hosts" % i_ignored)
143 except Exception, err:
144 logger.debug("No hosts retrieved")
148 # Update individual buckekts. Hostnames only.
149 def updatebuckets(self):
150 for (bucket,url) in self.comon_buckets.items():
151 logger.debug("COMON: Updating bucket %s" % bucket)
152 tmp = self.coget(COMONURL + "&format=formatcsv&select='" + url + "'").keys()
153 setattr(self, bucket, tmp)
155 # Update ALL node information
158 self.updated = time.time()
159 # Make a Hash, put in self.
160 self.codata.update(self.coget(COMONURL + "&format=formatcsv"))
164 print "Getting: %s" % url
166 coserv = urllib2.Request(url)
167 coserv.add_header('User-Agent',
168 'PL_Monitor +http://monitor.planet-lab.org/')
169 opener = urllib2.build_opener()
170 # Initial web get from summer.cs in CSV
171 rawdata = opener.open(coserv)
172 except urllib2.URLError, (err):
173 print "Attempting %s" %COMONURL
174 print "URL error (%s)" % (err)
176 return self.__tohash(rawdata)
178 # Push nodes that are bad (in *a* bucket) into q(q_allbuckets)
180 #buckets_per_node = []
181 #for bucket in self.comon.comon_buckets.keys():
182 # if (hostname in getattr(self.comon, bucket)):
183 # buckets_per_node.append(bucket)
185 #loginbase = self.plcdb_hn2lb[hostname] # plc.siteId(node)
187 #if not loginbase in self.sickdb:
188 # self.sickdb[loginbase] = [{hostname: buckets_per_node}]
190 # self.sickdb[loginbase].append({hostname: buckets_per_node})
193 print "calling Comon.push()"
194 for bucket in self.comon_buckets.keys():
195 #print "bucket: %s" % bucket
196 for host in getattr(self,bucket):
198 diag_node['nodename'] = host
199 diag_node['message'] = None
200 diag_node['bucket'] = [bucket]
201 diag_node['stage'] = ""
202 #diag_node['ticket_id'] = ""
203 diag_node['args'] = None
204 diag_node['info'] = None
205 diag_node['time'] = time.time()
206 #print "host: %s" % host
207 self.q_allbuckets.put(diag_node)
213 # insert signal that this is the final host
214 self.q_allbuckets.put("None")
220 logger.setLevel(logging.DEBUG)
221 ch = logging.StreamHandler()
222 ch.setLevel(logging.DEBUG)
223 formatter = logging.Formatter('%(message)s')
224 ch.setFormatter(formatter)
225 logger.addHandler(ch)
231 #for i in a.comon_buckets: print "%s : %s" % ( i, a.comon_buckets[i])
235 #for i in a.down: print i
239 for host in cdb.keys():
240 #if cdb[host]['keyok'] == "0":
241 # null implies that it may not be in PL DB.
242 if cdb[host]['bootstate'] != "null" and \
243 cdb[host]['bootstate'] == "2" and \
244 cdb[host]['keyok'] == "0":
245 print("%-40s \t Bootstate %s nodetype %s kernver %s keyok %s" % (
246 host, cdb[host]['bootstate'], cdb[host]['nodetype'],
247 cdb[host]['kernver'], cdb[host]['keyok']))
249 # print("key mismatch at: %s" % host)
250 #print a.codata['michelangelo.ani.univie.ac.at']
254 #print a.coget(COMONURL + "&format=formatcsv&select='" + a.comon_buckets['filerw'])
257 if __name__ == '__main__':
261 except KeyboardInterrupt:
262 print "Killed. Exitting."
263 logger.info('Monitor Killed')