# if it is offline and HAS a PCU, then try to use it.
if nodehist.haspcu and nodehist.status in ['offline', 'down'] and \
changed_greaterthan(nodehist.last_changed,1.0) and \
+ not nodehist.firewall and \
not found_between(recent_actions, 'try_reboot', 3.5, 1):
sitehist.attemptReboot(host)
# will be false for a day after the above condition is satisfied
if nodehist.haspcu and nodehist.status in ['offline', 'down'] and \
changed_greaterthan(nodehist.last_changed,1.5) and \
+ not nodehist.firewall and \
found_between(recent_actions, 'try_reboot', 3.5, 1) and \
not found_within(recent_actions, 'pcufailed_notice', 3.5):
sitehist.sendMessage('pcufailed_notice', hostname=host)
print "send message for host %s PCU Failure" % host
- if nodehist.status == 'monitordebug' and \
+ if nodehist.status == 'failboot' and \
changed_greaterthan(nodehist.last_changed, 1) and \
not found_between(recent_actions, 'bootmanager_restore', 0.5, 0):
# send down node notice
# sitehist.sendMessage('retry_bootman', hostname=host)
if nodehist.status == 'down' and \
- changed_greaterthan(nodehist.last_changed, 2) and \
- not found_within(recent_actions, 'down_notice', 3.5):
- # send down node notice
-
- sitehist.sendMessage('down_notice', hostname=host)
- print "send message for host %s down" % host
+ changed_greaterthan(nodehist.last_changed, 2):
+ if not nodehist.firewall and not found_within(recent_actions, 'down_notice', 3.5):
+ # send down node notice
+ sitehist.sendMessage('down_notice', hostname=host)
+ print "send message for host %s down" % host
+
+ if nodehist.firewall and not found_within(recent_actions, 'firewall_notice', 3.5):
+ # send down node notice
+ #email_exception(host, "firewall_notice")
+ sitehist.sendMessage('firewall_notice', hostname=host)
+ print "send message for host %s down" % host
node_count = node_count + 1
print "time: ", time.strftime('%Y-%m-%d %H:%M:%S')
if siteblack and not siteblack.expired():
print "skipping %s due to blacklist. will expire %s" % (site, siteblack.willExpire() )
skip_due_to_blacklist=True
+ sitehist.clearPenalty()
+ sitehist.applyPenalty()
continue
# TODO: make query only return records within a certin time range,