X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=nodecommon.py;h=2250492ad1e4eb9dcd1acffc50fa090679514457;hb=refs%2Fheads%2F1.0;hp=0f3d0fb3a5889023ea00858c1b9d61ca1387111f;hpb=f38420ff4bc9fb114db5f62b01cc8990dc0a0af2;p=monitor.git diff --git a/nodecommon.py b/nodecommon.py index 0f3d0fb..2250492 100644 --- a/nodecommon.py +++ b/nodecommon.py @@ -1,11 +1,18 @@ import struct import reboot +import time +import util.file +import plc +from datetime import datetime +from monitor import database +from unified_model import PersistFlags esc = struct.pack('i', 27) RED = esc + "[1;31m" GREEN = esc + "[1;32m" YELLOW = esc + "[1;33m" BLUE = esc + "[1;34m" +LIGHTBLUE = esc + "[1;36m" NORMAL = esc + "[0;39m" def red(str): @@ -17,6 +24,9 @@ def yellow(str): def green(str): return GREEN + str + NORMAL +def lightblue(str): + return LIGHTBLUE + str + NORMAL + def blue(str): return BLUE + str + NORMAL @@ -30,19 +40,22 @@ def get_current_state(fbnode): return l def color_pcu_state(fbnode): - import plc if 'plcnode' in fbnode and 'pcu_ids' in fbnode['plcnode'] and len(fbnode['plcnode']['pcu_ids']) > 0 : values = reboot.get_pcu_values(fbnode['plcnode']['pcu_ids'][0]) if values == None: return fbnode['pcu'] else: - return fbnode['pcu'] + if 'pcu' not in fbnode: + return 'NOPCU' + else: + return fbnode['pcu'] if 'reboot' in values: rb = values['reboot'] if rb == 0 or rb == "0": return fbnode['pcu'] + "OK " + #return fbnode['pcu'] + "OK " #return green(fbnode['pcu']) elif "NetDown" == rb or "Not_Run" == rb: return fbnode['pcu'] + "DOWN" @@ -55,42 +68,59 @@ def color_pcu_state(fbnode): return fbnode['pcu'] + "BAD " def color_boot_state(l): - if l == "dbg": return yellow("dbg ") - elif l == "dbg ": return yellow(l) + if l == "dbg": return yellow("debg") + elif l == "dbg ": return yellow("debg") + elif l == "diag": return lightblue(l) + elif l == "disable": return red("dsbl") elif l == "down": return red(l) elif l == "boot": return green(l) elif l == "rins": return blue(l) + elif l == "reinstall": return blue(l) else: return l -def diff_time(timestamp): +def diff_time(timestamp, abstime=True): + import math now = time.time() if timestamp == None: return "unknown" - diff = now - timestamp + if abstime: + diff = now - timestamp + else: + diff = timestamp # return the number of seconds as a difference from current time. t_str = "" if diff < 60: # sec in min. - t = diff // 1 - t_str = "%s sec ago" % t + t = diff / 1 + t_str = "%s sec ago" % int(math.ceil(t)) elif diff < 60*60: # sec in hour - t = diff // (60) - t_str = "%s min ago" % int(t) + t = diff / (60) + t_str = "%s min ago" % int(math.ceil(t)) elif diff < 60*60*24: # sec in day - t = diff // (60*60) - t_str = "%s hrs ago" % int(t) - elif diff < 60*60*24*7: # sec in week - t = diff // (60*60*24) - t_str = "%s days ago" % int(t) - elif diff < 60*60*24*30: # approx sec in month - t = diff // (60*60*24*7) - t_str = "%s wks ago" % int(t) + t = diff / (60*60) + t_str = "%s hrs ago" % int(math.ceil(t)) + elif diff < 60*60*24*14: # sec in week + t = diff / (60*60*24) + t_str = "%s days ago" % int(math.ceil(t)) + elif diff <= 60*60*24*30: # approx sec in month + t = diff / (60*60*24*7) + t_str = "%s wks ago" % int(math.ceil(t)) elif diff > 60*60*24*30: # approx sec in month - t = diff // (60*60*24*7*30) + t = diff / (60*60*24*30) t_str = "%s mnths ago" % int(t) return t_str -def nodegroup_display(node, fb): +def getvalue(fb, path): + indexes = path.split("/") + values = fb + for index in indexes: + if index in values: + values = values[index] + else: + return None + return values + +def nodegroup_display(node, fb, conf=None): if node['hostname'] in fb['nodes']: node['current'] = get_current_state(fb['nodes'][node['hostname']]['values']) else: @@ -106,40 +136,76 @@ def nodegroup_display(node, fb): node['kernel'] = fb['nodes'][node['hostname']]['values']['kernel'] if '2.6' not in node['kernel']: node['kernel'] = "" - node['boot_state'] = color_boot_state(node['boot_state']) - node['current'] = color_boot_state(node['current']) + if conf and not conf.nocolor: + node['boot_state'] = color_boot_state(node['boot_state']) + node['current'] = color_boot_state(node['current']) #node['boot_state'] = node['boot_state'] #node['current'] = node['current'] node['pcu'] = fb['nodes'][node['hostname']]['values']['pcu'] node['lastupdate'] = diff_time(node['last_contact']) - - return "%(hostname)-38s %(boot_state)5s %(current)5s %(pcu)6s %(key)45s %(kernel)32s %(lastupdate)12s " % node - -from model import * -import soltesz - -def node_end_record(node): - act_all = soltesz.dbLoad("act_all") - if node not in act_all: - del act_all - return False - - if len(act_all[node]) == 0: - del act_all - return False - - a = Action(node, act_all[node][0]) - a.delField('rt') - a.delField('found_rt_ticket') - a.delField('second-mail-at-oneweek') - a.delField('second-mail-at-twoweeks') - a.delField('first-found') - rec = a.get() - rec['action'] = ["close_rt"] - rec['category'] = "UNKNOWN" - rec['stage'] = "monitor-end-record" - rec['time'] = time.time() - 7*60*60*24 - act_all[node].insert(0,rec) - soltesz.dbDump("act_all", act_all) - del act_all - return True + pf = PersistFlags(node['hostname'], 1, db='node_persistflags') + try: + node['lc'] = diff_time(pf.last_changed) + except: + node['lc'] = "err" + ut = fb['nodes'][node['hostname']]['values']['comonstats']['uptime'] + if ut != "null": + ut = diff_time(float(fb['nodes'][node['hostname']]['values']['comonstats']['uptime']), False) + node['uptime'] = ut + + return "%(hostname)-42s %(boot_state)8s %(current)5s %(pcu)6s %(key)10.10s... %(kernel)35.35s %(lastupdate)12s, %(lc)s, %(uptime)s" % node + +def datetime_fromstr(str): + if '-' in str: + try: + tup = time.strptime(str, "%Y-%m-%d") + except: + tup = time.strptime(str, "%Y-%m-%d-%H:%M") + elif '/' in str: + tup = time.strptime(str, "%m/%d/%Y") + else: + tup = time.strptime(str, "%m/%d/%Y") + ret = datetime.fromtimestamp(time.mktime(tup)) + return ret + +def get_nodeset(config): + """ + Given the config values passed in, return the set of hostnames that it + evaluates to. + """ + api = plc.getAuthAPI() + l_nodes = database.dbLoad("l_plcnodes") + + if config.nodelist: + f_nodes = util.file.getListFromFile(config.nodelist) + l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes) + elif config.node: + f_nodes = [config.node] + l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes) + elif config.nodegroup: + ng = api.GetNodeGroups({'groupname' : config.nodegroup}) + l_nodes = api.GetNodes(ng[0]['node_ids'], ['hostname']) + elif config.site: + site = api.GetSites(config.site) + l_nodes = api.GetNodes(site[0]['node_ids'], ['hostname']) + + l_nodes = [node['hostname'] for node in l_nodes] + + # perform this query after the above options, so that the filter above + # does not break. + if config.nodeselect: + fb = database.dbLoad("findbad") + l_nodes = node_select(config.nodeselect, fb['nodes'].keys(), fb) + + return l_nodes + +def email_exception(content=None): + import config + from unified_model import Message + import traceback + msg=traceback.format_exc() + if content: + msg = content + "\n" + msg + m=Message("exception running monitor", msg, False) + m.send([config.cc_email]) + return