round = 1
count = 0
+def main():
+ main2(config)
-def main(config):
+def main2(config):
l_plcnodes = plccache.l_nodes
l_nodes = get_nodeset(config)
checkAndRecordState(l_nodes, l_plcnodes)
+# Node states:
+
+def check_node_state(rec, node):
+
+ node_state = rec.observed_status
+ if rec.plc_node_stats:
+ boot_state = rec.plc_node_stats['boot_state']
+ last_contact = rec.plc_node_stats['last_contact']
+ else:
+ boot_state = "unknown"
+ last_contact = None
+
+ if node_state == 'DOWN' and ( node.status == 'online' or node.status == 'good' ):
+ print "changed status from %s to offline" % node.status
+ node.status = 'offline'
+ node.last_changed = datetime.now()
+
+ if node_state == 'BOOT' and changed_lessthan(node.last_changed, 0.5) and node.status != 'online':
+ print "changed status from %s to online" % node.status
+ node.status = 'online'
+ node.last_changed = datetime.now()
+
+ if node.status == 'online' and changed_greaterthan(node.last_changed, 0.5):
+ #send thank you notice, or on-line notice.
+ print "changed status from %s to good" % node.status
+ node.status = 'good'
+ # NOTE: do not reset last_changed, or you lose how long it's been up.
+
+ #if node.status == 'offline' and changed_greaterthan(node.last_changed, 1): # and pcu.status == 'good'
+ # # attempt reboots
+ # pass
+ #if node.status == 'offline' and changed_greaterthan(node.last_changed, 1.5): # and node.has_pcu
+ # # send PCU failure message
+ # pass
+
+ if node.status == 'offline' and changed_greaterthan(node.last_changed, 2):
+ print "changed status from %s to down" % node.status
+ # send down node notice
+ node.status = 'down'
+ node.last_changed = datetime.now()
+
+ if ( boot_state == 'disabled' or last_contact == None ) and \
+ changed_greaterthan(node.last_changed, 2*30) and \
+ node.status != 'down':
+ print "changed status from %s to down" % node.status
+ node.status = 'down'
+ node.last_changed = datetime.now()
+
def checkAndRecordState(l_nodes, l_plcnodes):
global count
for nodename in l_nodes:
- d_node = None
- for node in l_plcnodes:
- if node['hostname'] == nodename:
- d_node = node
- break
- if not d_node:
- continue
- pf = HistoryNodeRecord.findby_or_create(hostname=nodename)
- pf.last_checked = datetime.now()
+ nodehist = HistoryNodeRecord.findby_or_create(hostname=nodename,
+ if_new_set={'status' : 'offline',
+ 'last_changed' : datetime.now()})
+ nodehist.last_checked = datetime.now()
try:
# Find the most recent record
noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==nodename).order_by(FindbadNodeRecord.date_checked.desc()).first()
- #print "NODEREC: ", noderec.date_checked
except:
print "COULD NOT FIND %s" % nodename
import traceback
print "none object for %s"% nodename
continue
- node_state = noderec.observed_status
- if noderec.plc_node_stats:
- boot_state = noderec.plc_node_stats['boot_state']
- else:
- boot_state = "unknown"
-
- if node_state == "BOOT":
- if pf.status != "good":
- pf.last_changed = datetime.now()
- pf.status = "good"
- elif node_state == "DEBUG":
- if pf.status != boot_state:
- pf.last_changed = datetime.now()
- pf.status = boot_state
- else:
- if pf.status != "down":
- pf.last_changed = datetime.now()
- pf.status = "down"
+ check_node_state(noderec, nodehist)
count += 1
- print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(time.mktime(pf.last_changed.timetuple())))
+ print "%d %35s %s since(%s)" % (count, nodename, nodehist.status, diff_time(time.mktime(nodehist.last_changed.timetuple())))
# NOTE: this commits all pending operations to the DB. Do not remove, or
# replace with another operations that also commits all pending ops, such
# as session.commit() or flush() or something
- print HistoryNodeRecord.query.count()
session.flush()
+ print HistoryNodeRecord.query.count()
return True
config = parsermodule.parse_args(parser)
try:
- main(config)
+ main2(config)
except Exception, err:
import traceback
print traceback.print_exc()