* Sets nodes to reboot, uses PCU if available. Defaults to POD/email (with site...
[monitor.git] / comon.py
1 #
2 # Copyright (c) 2004  The Trustees of Princeton University (Trustees).
3 #
4 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
5 #
6 # $Id: $
7 #
8 # Get CoMon data, unsorted, in CSV, and create a huge hash.
9 #
10
11
12 import urllib2
13 import httplib
14 import time
15 import Queue 
16 import logging
17 from threading import *
18 #httplib.HTTPConnection.debuglevel = 1  
19
20 logger = logging.getLogger("monitor")
21
22 # Time between comon refresh
23 COSLEEP=1200
24
25 # CoMon
26 COMONURL = "http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeview"
27
28
29 class Comon(Thread): 
30         """
31         cdb is the comon database (dictionary)
32         all buckets is a queue of all problem nodes. This gets sent to rt to find
33         tickets open for host. 
34         """
35         def __init__(self, cdb, allbuckets):
36                 self.codata = cdb 
37                 self.updated = time.time()
38                 self.allbuckets = allbuckets
39                 self.comonbkts = {"down" : "resptime%20==%200%20&&%20keyok==null",
40                         "ssh": "sshstatus%20%3E%202h",
41                         "clock_drift": "drift%20%3E%201m",
42                         "dns": "dns1udp%20%3E%2080%20&&%20dns2udp%20%3E%2080",
43                         "filerw": "filerw%3E0",
44                         "dbg" : "keyok==0"}
45                 Thread.__init__(self)
46
47         def __tohash(self,rawdata):
48                 # First line Comon returns is list of keys with respect to index
49                 keys = rawdata.readline().rstrip().split(", ")
50                 host = []
51                 hash = {}
52                 try:
53                         for line in rawdata.readlines():
54                                 host = line.rstrip().split(", ")
55                                 tmp = {}
56                                 for i in range(1,len(keys)):
57                                         tmp[keys[i]]=host[i]
58                                 hash[host[0]]=tmp
59                         logger.debug("Retrieved %s hosts" % len(hash.keys()))
60                 except Exception, err:
61                         logger.debug("No hosts retrieved")      
62                         return {} 
63                 return hash
64
65         # Update individual buckekts.  Hostnames only.
66         def updatebkts(self):
67                 for (bkt,url) in self.comonbkts.items():
68                         logger.debug("COMON:  Updating bucket %s" % bkt)
69                         tmp = self.coget(COMONURL + "&format=formatcsv&select='" + url + "'").keys()
70                         setattr(self, bkt, tmp)
71
72         # Update ALL node information
73         def updatedb(self):
74                 # Get time of update
75                 self.updated = time.time()
76                 # Make a Hash, put in self.
77                 self.codata.update(self.coget(COMONURL + "&format=formatcsv"))
78
79         def coget(self,url):
80                 rawdata = None
81                 try:
82                         coserv = urllib2.Request(url)
83                         coserv.add_header('User-Agent',
84                                 'PL_Monitor +http://monitor.planet-lab.org/')
85                         opener = urllib2.build_opener()
86                         # Initial web get from summer.cs in CSV
87                         rawdata = opener.open(coserv)
88                 except urllib2.URLError, (err):
89                         print "Attempting %s" %COMONURL
90                         print "URL error (%s)" % (err)
91                         rawdata = None
92                 return self.__tohash(rawdata)
93
94         # Push nodes that are bad (in *a* bucket) into q(allbuckets)
95         def push(self):
96                 for bucket in self.comonbkts.keys():
97                         for host in getattr(self,bucket):
98                                 self.allbuckets.put(host)
99
100         def run(self):
101                 while 1:
102                         self.updatedb()
103                         self.updatebkts()
104                         self.push()
105                         time.sleep(COSLEEP)
106  
107         def __repr__(self):
108             return self
109
110 def main():
111         logger.setLevel(logging.DEBUG)
112         ch = logging.StreamHandler()
113         ch.setLevel(logging.DEBUG)
114         formatter = logging.Formatter('%(message)s')
115         ch.setFormatter(formatter)
116         logger.addHandler(ch)
117
118
119         t = Queue.Queue()
120         cdb = {}
121         a = Comon(cdb,t)
122         print a.comonbkts
123         a.start()
124
125         time.sleep(5)
126         print a.down
127
128         time.sleep(5)
129         #print cdb
130         for host in cdb.keys():
131                 if cdb[host]['keyok'] == "0":
132                         print("%s \t Bootstate %s nodetype %s kernver %s keyok %s" %(host, cdb[host]['bootstate'], cdb[host]['nodetype'], cdb[host]['kernver'], cdb[host]['keyok']))
133         #time.sleep(3)
134         #a.push()
135         #print a.filerw
136         #print a.coget(COMONURL + "&format=formatcsv&select='" + a.comonbkts['filerw'])
137
138         os._exit(0)
139 if __name__ == '__main__':
140         import os
141         try:
142                 main()
143         except KeyboardInterrupt:
144                 print "Killed.  Exitting."
145                 logger.info('Monitor Killed')
146                 os._exit(0)