Uses CoMon's ability to find 'upness' to email. Changed queueing between threads...
[monitor.git] / comon.py
1 #
2 # Copyright (c) 2004  The Trustees of Princeton University (Trustees).
3 #
4 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
5 #
6 # $Id: $
7 #
8 # Get CoMon data, unsorted, in CSV, and create a huge hash.
9 #
10
11
12 import urllib2
13 import httplib
14 import time
15 import Queue 
16 import logging
17 from threading import *
18 #httplib.HTTPConnection.debuglevel = 1  
19
20 logger = logging.getLogger("monitor")
21
22 # Time between comon refresh
23 COSLEEP=1200
24
25 # CoMon
26 COMONURL = "http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeview"
27
28
29 class Comon(Thread): 
30         """
31         cdb is the comon database (dictionary)
32         all buckets is a queue of all problem nodes. This gets sent to rt to find
33         tickets open for host. 
34         """
35         def __init__(self, cdb, allbuckets):
36                 self.codata = cdb 
37                 self.updated = time.time()
38                 self.allbuckets = allbuckets
39                 self.comonbkts = {"down" : "resptime%20==%200%20&&%20keyok==null",
40                         "ssh": "sshstatus%20%3E%202h",
41                         "clock_drift": "drift%20%3E%201m",
42                         "dns": "dns1udp%20%3E%2080%20&&%20dns2udp%20%3E%2080",
43                         "disk": "resptime%20%3E%200%20&&%20gbfree%20%3C%205",
44                         "filerw": "filerw%3E0",
45                         "dbg" : "keyok==0"}
46                 Thread.__init__(self)
47
48         def __tohash(self,rawdata):
49                 # First line Comon returns is list of keys with respect to index
50                 keys = rawdata.readline().rstrip().split(", ")
51                 host = []
52                 hash = {}
53                 try:
54                         for line in rawdata.readlines():
55                                 host = line.rstrip().split(", ")
56                                 tmp = {}
57                                 for i in range(1,len(keys)):
58                                         tmp[keys[i]]=host[i]
59                                 hash[host[0]]=tmp
60                         logger.debug("Retrieved %s hosts" % len(hash.keys()))
61                 except Exception, err:
62                         logger.debug("No hosts retrieved")      
63                         return {} 
64                 return hash
65
66         # Update individual buckekts.  Hostnames only.
67         def updatebkts(self):
68                 for (bkt,url) in self.comonbkts.items():
69                         tmp = self.coget(COMONURL + "&format=formatcsv&select='" + url + "'").keys()
70                         setattr(self, bkt, tmp)
71
72         # Update ALL node information
73         def updatedb(self):
74                 # Get time of update
75                 self.updated = time.time()
76                 # Make a Hash, put in self.
77                 self.codata.update(self.coget(COMONURL + "&format=formatcsv"))
78
79         def coget(self,url):
80                 rawdata = None
81                 try:
82                         logger.debug("Trying - " + url)
83                         coserv = urllib2.Request(url)
84                         coserv.add_header('User-Agent',
85                                 'PL_Monitor +http://monitor.planet-lab.org/')
86                         opener = urllib2.build_opener()
87                         # Initial web get from summer.cs in CSV
88                         rawdata = opener.open(coserv)
89                 except urllib2.URLError, (err):
90                         print "Attempting %s" %COMONURL
91                         print "URL error (%s)" % (err)
92                         rawdata = None
93                 return self.__tohash(rawdata)
94
95         # Push nodes that are bad (in *a* bucket) into q(allbuckets)
96         def push(self):
97                 for bucket in self.comonbkts.keys():
98                         for host in getattr(self,bucket):
99                                 self.allbuckets.put(host)
100
101         def run(self):
102                 while 1:
103                         self.updatedb()
104                         self.updatebkts()
105                         self.push()
106                         time.sleep(COSLEEP)
107  
108         def __repr__(self):
109             return self
110
111 def main():
112         logger.setLevel(logging.DEBUG)
113         ch = logging.StreamHandler()
114         ch.setLevel(logging.DEBUG)
115         formatter = logging.Formatter('%(message)s')
116         ch.setFormatter(formatter)
117         logger.addHandler(ch)
118
119
120         t = Queue.Queue()
121         cdb = {}
122         a = Comon(cdb,t)
123         print a.comonbkts
124         a.start()
125
126         time.sleep(5)
127         print a.down
128
129         time.sleep(5)
130         #print cdb
131         for host in cdb.keys():
132                 if cdb[host]['keyok'] == "0":
133                         print("%s \t Bootstate %s nodetype %s kernver %s keyok %s" %(host, cdb[host]['bootstate'], cdb[host]['nodetype'], cdb[host]['kernver'], cdb[host]['keyok']))
134         #time.sleep(3)
135         #a.push()
136         #print a.filerw
137         #print a.coget(COMONURL + "&format=formatcsv&select='" + a.comonbkts['filerw'])
138
139         os._exit(0)
140 if __name__ == '__main__':
141         import os
142         try:
143                 main()
144         except KeyboardInterrupt:
145                 print "Killed.  Exitting."
146                 logger.info('Monitor Killed')
147                 os._exit(0)