X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=nodebad.py;h=9ba6a32b2e49c49e8cae2ebe005d89817d16b807;hb=32e64e33bc81735e22024c5a44510848bb3c88df;hp=c3aae39619d18335e985f1d62f2da2584d053ce5;hpb=6a452e8ece2ca8a47105c128eaebc38507bc76c5;p=monitor.git diff --git a/nodebad.py b/nodebad.py index c3aae39..9ba6a32 100755 --- a/nodebad.py +++ b/nodebad.py @@ -41,18 +41,22 @@ def check_node_state(rec, node): print rec.plc_node_stats boot_state = rec.plc_node_stats['boot_state'] last_contact = rec.plc_node_stats['last_contact'] + node.plc_nodeid = rec.plc_node_stats['node_id'] else: boot_state = "unknown" last_contact = None if boot_state == 'disable': boot_state = 'disabled' - if boot_state == 'diag': boot_state = 'diagnose' + if boot_state == 'diag' or boot_state == 'diagnose': boot_state = 'safeboot' if len(rec.plc_node_stats['pcu_ids']) > 0: node.haspcu = True else: node.haspcu = False + node.firewall = rec.firewall + + # NOTE: 'DOWN' and 'DEBUG' are temporary states, so only need # 'translations' into the node.status state # 'BOOT' is a permanent state, but we want it to have a bit of @@ -61,15 +65,36 @@ def check_node_state(rec, node): ################################################################# # "Initialize" the findbad states into nodebad status if they are not already set - if node_state == 'DOWN' and ( node.status != 'offline' and node.status != 'down' ) and boot_state != 'disabled' : - print "changed status from %s to offline" % node.status - node.status = 'offline' - node.last_changed = datetime.now() + if node_state == 'DOWN': + if boot_state == 'disabled' and changed_lessthan(node.last_changed, 60) and \ + node.status != 'disabled': + # NOTE: if changed less than 2 months, then we can allow this. + # otherwise, apply 'down' status after greater than 2 months (below). + + print "changed status from %s to %s" % (node.status, boot_state) + node.status = boot_state + node.last_changed = datetime.now() + + if node.status not in ['offline', 'down', 'disabled']: + print "changed status from %s to offline" % node.status + node.status = 'offline' + node.last_changed = datetime.now() + + + #if node_state == 'DOWN' and node.status not in ['offline', 'down', 'disabled']: + # if boot_state != 'disabled': + # print "changed status from %s to offline" % node.status + # node.status = 'offline' + # node.last_changed = datetime.now() + # else: + # print "changed status from %s to %s" % (node.status, boot_state) + # node.status = boot_state + # node.last_changed = datetime.now() if node_state == 'DEBUG' and node.status != 'monitordebug' and \ node.status != 'disabled' and \ - node.status != 'diagnose': - if boot_state != 'disabled' and boot_state != 'diagnose': + node.status != 'safeboot': + if boot_state != 'disabled' and boot_state != 'safeboot': print "changed status from %s to monitordebug" % (node.status) node.status = "monitordebug" @@ -89,7 +114,7 @@ def check_node_state(rec, node): # online -> good after half a day # offline -> down after two days # monitordebug -> down after 30 days - # diagnose -> monitordebug after 60 days + # safeboot -> monitordebug after 60 days # disabled -> down after 60 days if node.status == 'online' and changed_greaterthan(node.last_changed, 0.5): @@ -107,7 +132,7 @@ def check_node_state(rec, node): node.status = 'down' # NOTE: do not reset last_changed, or you lose how long it's been down. - if node.status == 'diagnose' and changed_greaterthan(node.last_changed, 60): + if node.status == 'safeboot' and changed_greaterthan(node.last_changed, 60): print "changed status from %s to down" % node.status # NOTE: change an admin mode back into monitordebug after two months. node.status = 'monitordebug'