7 from datetime import datetime,timedelta
9 from nodequery import verify,query_to_dict,node_select
11 from monitor.common import *
13 from monitor import config
14 from monitor.wrapper import plc,plccache
15 from monitor.const import MINUP
16 from monitor.database.info.model import FindbadNodeRecord, HistoryNodeRecord
17 from monitor.database.dborm import mon_session as session
19 from monitor.model import *
21 api = plc.getAuthAPI()
30 l_plcnodes = plccache.l_nodes
31 l_nodes = get_nodeset(config)
33 checkAndRecordState(l_nodes, l_plcnodes)
37 def check_node_state(rec, node):
39 node_state = rec.observed_status
40 if rec.plc_node_stats:
41 boot_state = rec.plc_node_stats['boot_state']
42 last_contact = rec.plc_node_stats['last_contact']
44 boot_state = "unknown"
47 # NOTE: 'DOWN' and 'DEBUG' are temporary states, so only need
48 # 'translations' into the node.status state
49 # 'BOOT' is a permanent state, but we want it to have a bit of
50 # hysteresis (less than 0.5 days)
52 #################################################################3
53 # "Translate" the findbad states into nodebad status.
55 if node_state == 'DOWN' and ( node.status != 'offline' and node.status != 'down' ) and boot_state != 'disable' :
56 print "changed status from %s to offline" % node.status
57 node.status = 'offline'
58 node.last_changed = datetime.now()
60 if node_state == 'DEBUG' and node.status != 'monitordebug':
61 print "changed status from %s to monitordebug" % (node.status)
62 node.status = "monitordebug"
63 node.last_changed = datetime.now()
65 if node_state == 'BOOT' and node.status != 'online' and node.status != 'good':
66 print "changed status from %s to online" % node.status
67 node.status = 'online'
68 node.last_changed = datetime.now()
70 #################################################################3
71 # Switch temporary hystersis states into their 'firm' states.
73 if node.status == 'online' and changed_greaterthan(node.last_changed, 0.5):
74 print "changed status from %s to good" % node.status
76 # NOTE: do not reset last_changed, or you lose how long it's been up.
78 if node.status == 'offline' and changed_greaterthan(node.last_changed, 2):
79 print "changed status from %s to down" % node.status
81 # NOTE: do not reset last_changed, or you lose how long it's been down.
83 if node.status == 'monitordebug' and changed_greaterthan(node.last_changed, 14):
84 print "changed status from %s to down" % node.status
86 # NOTE: do not reset last_changed, or you lose how long it's been down.
87 #node.last_changed = datetime.now()
89 # extreme cases of offline nodes
90 if ( boot_state == 'disabled' or last_contact == None ) and \
91 changed_greaterthan(node.last_changed, 2*30) and \
92 node.status != 'down':
93 print "changed status from %s to down" % node.status
95 node.last_changed = datetime.now()
97 def checkAndRecordState(l_nodes, l_plcnodes):
100 for nodename in l_nodes:
102 nodehist = HistoryNodeRecord.findby_or_create(hostname=nodename,
103 if_new_set={'status' : 'offline',
104 'last_changed' : datetime.now()})
105 nodehist.last_checked = datetime.now()
108 # Find the most recent record
109 noderec = FindbadNodeRecord.get_latest_by(hostname=nodename)
111 print "COULD NOT FIND %s" % nodename
113 print traceback.print_exc()
117 print "none object for %s"% nodename
120 check_node_state(noderec, nodehist)
123 print "%d %35s %s since(%s)" % (count, nodename, nodehist.status, diff_time(time.mktime(nodehist.last_changed.timetuple())))
125 # NOTE: this commits all pending operations to the DB. Do not remove, or
126 # replace with another operations that also commits all pending ops, such
127 # as session.commit() or flush() or something
129 print HistoryNodeRecord.query.count()
133 if __name__ == '__main__':
134 from monitor import parser as parsermodule
135 parser = parsermodule.getParser(['nodesets'])
136 parser.set_defaults(filename=None, node=None, nodeselect=False, nodegroup=None, cachenodes=False)
137 parser = parsermodule.getParser(['defaults'], parser)
138 config = parsermodule.parse_args(parser)
142 except Exception, err:
144 print traceback.print_exc()
145 print "Exception: %s" % err