add email_exception() to all except: statements.
[monitor.git] / nodecommon.py
1
2 import struct
3 import reboot
4 import time
5 import util.file
6 import plc
7 from datetime import datetime 
8 from monitor import database
9 from unified_model import PersistFlags
10 esc = struct.pack('i', 27)
11 RED     = esc + "[1;31m"
12 GREEN   = esc + "[1;32m"
13 YELLOW  = esc + "[1;33m"
14 BLUE    = esc + "[1;34m"
15 LIGHTBLUE       = esc + "[1;36m"
16 NORMAL  = esc + "[0;39m"
17
18 def red(str):
19         return RED + str + NORMAL
20
21 def yellow(str):
22         return YELLOW + str + NORMAL
23
24 def green(str):
25         return GREEN + str + NORMAL
26
27 def lightblue(str):
28         return LIGHTBLUE + str + NORMAL
29
30 def blue(str):
31         return BLUE + str + NORMAL
32
33 def get_current_state(fbnode):
34         if 'state' in fbnode:
35                 state = fbnode['state']
36         else:
37                 state = "none"
38         l = state.lower()
39         if l == "debug": l = 'dbg '
40         return l
41
42 def color_pcu_state(fbnode):
43
44         if 'plcnode' in fbnode and 'pcu_ids' in fbnode['plcnode'] and len(fbnode['plcnode']['pcu_ids']) > 0 :
45                 values = reboot.get_pcu_values(fbnode['plcnode']['pcu_ids'][0])
46                 if values == None:
47                         return fbnode['pcu']
48         else:
49                 if 'pcu' not in fbnode:
50                         return 'NOPCU'
51                 else:
52                         return fbnode['pcu']
53
54         if 'reboot' in values:
55                 rb = values['reboot']
56                 if rb == 0 or rb == "0":
57                         return fbnode['pcu'] + "OK  "
58                         #return fbnode['pcu'] + "OK  "
59                         #return green(fbnode['pcu'])
60                 elif "NetDown" == rb  or "Not_Run" == rb:
61                         return fbnode['pcu'] + "DOWN"
62                         #return yellow(fbnode['pcu'])
63                 else:
64                         return fbnode['pcu'] + "BAD "
65                         #return red(fbnode['pcu'])
66         else:
67                 #return red(fbnode['pcu'])
68                 return fbnode['pcu'] + "BAD "
69
70 def color_boot_state(l):
71         if    l == "dbg": return yellow("debg")
72         elif  l == "dbg ": return yellow("debg")
73         elif  l == "diag": return lightblue(l)
74         elif  l == "disable": return red("dsbl")
75         elif  l == "down": return red(l)
76         elif  l == "boot": return green(l)
77         elif  l == "rins": return blue(l)
78         else:
79                 return l
80
81 def diff_time(timestamp, abstime=True):
82         import math
83         now = time.time()
84         if timestamp == None:
85                 return "unknown"
86         if abstime:
87                 diff = now - timestamp
88         else:
89                 diff = timestamp
90         # return the number of seconds as a difference from current time.
91         t_str = ""
92         if diff < 60: # sec in min.
93                 t = diff / 1
94                 t_str = "%s sec ago" % int(math.ceil(t))
95         elif diff < 60*60: # sec in hour
96                 t = diff / (60)
97                 t_str = "%s min ago" % int(math.ceil(t))
98         elif diff < 60*60*24: # sec in day
99                 t = diff / (60*60)
100                 t_str = "%s hrs ago" % int(math.ceil(t))
101         elif diff < 60*60*24*14: # sec in week
102                 t = diff / (60*60*24)
103                 t_str = "%s days ago" % int(math.ceil(t))
104         elif diff <= 60*60*24*30: # approx sec in month
105                 t = diff / (60*60*24*7)
106                 t_str = "%s wks ago" % int(math.ceil(t))
107         elif diff > 60*60*24*30: # approx sec in month
108                 t = diff / (60*60*24*30)
109                 t_str = "%s mnths ago" % int(t)
110         return t_str
111
112 def getvalue(fb, path):
113     indexes = path.split("/")
114     values = fb
115     for index in indexes:
116         if index in values:
117             values = values[index]
118         else:
119             return None
120     return values
121
122 def nodegroup_display(node, fb, conf=None):
123         if node['hostname'] in fb['nodes']:
124                 node['current'] = get_current_state(fb['nodes'][node['hostname']]['values'])
125         else:
126                 node['current'] = 'none'
127
128         if fb['nodes'][node['hostname']]['values'] == []:
129                 return ""
130
131         s = fb['nodes'][node['hostname']]['values']['kernel'].split()
132         if len(s) >=3:
133                 node['kernel'] = s[2]
134         else:
135                 node['kernel'] = fb['nodes'][node['hostname']]['values']['kernel']
136                 
137         if '2.6' not in node['kernel']: node['kernel'] = ""
138         if conf and not conf.nocolor:
139             node['boot_state']  = color_boot_state(node['boot_state'])
140             node['current']     = color_boot_state(node['current'])
141         #node['boot_state']     = node['boot_state']
142         #node['current']        = node['current']
143         node['pcu'] = fb['nodes'][node['hostname']]['values']['pcu']
144         node['lastupdate'] = diff_time(node['last_contact'])
145         pf = PersistFlags(node['hostname'], 1, db='node_persistflags')
146         try:
147                 node['lc'] = diff_time(pf.last_changed)
148         except:
149                 node['lc'] = "err"
150         ut = fb['nodes'][node['hostname']]['values']['comonstats']['uptime']
151         if ut != "null":
152                 ut = diff_time(float(fb['nodes'][node['hostname']]['values']['comonstats']['uptime']), False)
153         node['uptime'] = ut
154
155         return "%(hostname)-42s %(boot_state)8s %(current)5s %(pcu)6s %(key)10.10s... %(kernel)35.35s %(lastupdate)12s, %(lc)s, %(uptime)s" % node
156
157 def datetime_fromstr(str):
158         if '-' in str:
159                 try:
160                         tup = time.strptime(str, "%Y-%m-%d")
161                 except:
162                         tup = time.strptime(str, "%Y-%m-%d-%H:%M")
163         elif '/' in str:
164                 tup = time.strptime(str, "%m/%d/%Y")
165         else:
166                 tup = time.strptime(str, "%m/%d/%Y")
167         ret = datetime.fromtimestamp(time.mktime(tup))
168         return ret
169
170 def get_nodeset(config):
171         """
172                 Given the config values passed in, return the set of hostnames that it
173                 evaluates to.
174         """
175         api = plc.getAuthAPI()
176         l_nodes = database.dbLoad("l_plcnodes")
177
178         if config.nodelist:
179                 f_nodes = util.file.getListFromFile(config.nodelist)
180                 l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
181         elif config.node:
182                 f_nodes = [config.node]
183                 l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
184         elif config.nodegroup:
185                 ng = api.GetNodeGroups({'name' : config.nodegroup})
186                 l_nodes = api.GetNodes(ng[0]['node_ids'], ['hostname'])
187         elif config.site:
188                 site = api.GetSites(config.site)
189                 l_nodes = api.GetNodes(site[0]['node_ids'], ['hostname'])
190                 
191         l_nodes = [node['hostname'] for node in l_nodes]
192
193         # perform this query after the above options, so that the filter above
194         # does not break.
195         if config.nodeselect:
196                 fb = database.dbLoad("findbad")
197                 l_nodes = node_select(config.nodeselect, fb['nodes'].keys(), fb)
198
199         return l_nodes
200         
201 def email_exception(content=None):
202         import config
203         from unified_model import Message
204         import traceback
205         msg=traceback.format_exc() 
206         if content:
207                 msg = content + "\n" + msg
208         m=Message("exception running monitor", msg, False)
209         m.send([config.cc_email])
210         return