X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=policy.py;h=84bdb445a17e40086f1f07ff4f4eca6ea4f149ce;hb=cb01884d466b6250c1f7351e3b68c7c8a3699c62;hp=11ec4a7e5b74e57ac85c7abbef68ea1ecdcdc4d1;hpb=5772ce036b96297a23f834ea34ce4466ef4d522c;p=monitor.git diff --git a/policy.py b/policy.py index 11ec4a7..84bdb44 100755 --- a/policy.py +++ b/policy.py @@ -76,17 +76,17 @@ def main(hostnames, sitenames): # NOTE: there is a narrow window in which this command must be # evaluated, otherwise the notice will not go out. # this is not ideal. - sitehist.sendMessage('online_notice', hostname=host, viart=False) + sitehist.sendMessage('online_notice', hostname=host, viart=False, saveact=True) print "send message for host %s online" % host # if a node is offline and doesn't have a PCU, remind the user that they should have one. - if not nodehist.haspcu and nodehist.status in ['offline', 'down'] and \ - changed_greaterthan(nodehist.last_changed,1.0) and \ - not found_within(recent_actions, 'pcumissing_notice', 7.0): - - sitehist.sendMessage('pcumissing_notice', hostname=host) - print "send message for host %s pcumissing_notice" % host + #if not nodehist.haspcu and nodehist.status in ['offline', 'down'] and \ + # changed_greaterthan(nodehist.last_changed,1.0) and \ + # not found_within(recent_actions, 'pcumissing_notice', 7.0): + # + # sitehist.sendMessage('pcumissing_notice', hostname=host) + # print "send message for host %s pcumissing_notice" % host # if it is offline and HAS a PCU, then try to use it. if nodehist.haspcu and nodehist.status in ['offline', 'down'] and \ @@ -134,9 +134,13 @@ def main(hostnames, sitenames): for i,site in enumerate(sitenames): sitehist = SiteInterface.get_or_make(loginbase=site) siteblack = BlacklistRecord.get_by(loginbase=site) + skip_due_to_blacklist=False if siteblack and not siteblack.expired(): print "skipping %s due to blacklist. will expire %s" % (site, siteblack.willExpire() ) + skip_due_to_blacklist=True + sitehist.clearPenalty() + sitehist.applyPenalty() continue # TODO: make query only return records within a certin time range, @@ -144,33 +148,7 @@ def main(hostnames, sitenames): recent_actions = sitehist.getRecentActions(loginbase=site) print "%s %s %s" % (i, sitehist.db.loginbase, sitehist.db.status) - if sitehist.db.status == 'down': - if not found_within(recent_actions, 'pause_penalty', 30) and \ - not found_within(recent_actions, 'increase_penalty', 7) and \ - changed_greaterthan(sitehist.db.last_changed, 7): - - # TODO: catch errors - sitehist.increasePenalty() - #sitehist.applyPenalty() - sitehist.sendMessage('increase_penalty') - - print "send message for site %s penalty increase" % site - - if sitehist.db.status == 'good': - # clear penalty - # NOTE: because 'all clear' should have an indefinite status, we - # have a boolean value rather than a 'recent action' - if sitehist.db.penalty_applied: - # send message that penalties are cleared. - - sitehist.clearPenalty() - #sitehist.applyPenalty() - sitehist.sendMessage('clear_penalty') - sitehist.closeTicket() - - print "send message for site %s penalty cleared" % site - # find all ticket ids for site ( could be on the site record? ) # determine if there are penalties within the last 30 days? # if so, add a 'pause_penalty' action. if sitehist.db.message_id != 0 and sitehist.db.message_status == 'open' and \ @@ -178,6 +156,34 @@ def main(hostnames, sitenames): # pause escalation print "Pausing penalties for %s" % site sitehist.pausePenalty() + else: + + if sitehist.db.status == 'down': + if not found_within(recent_actions, 'pause_penalty', 30) and \ + not found_within(recent_actions, 'increase_penalty', 7) and \ + changed_greaterthan(sitehist.db.last_changed, 7): + + # TODO: catch errors + sitehist.increasePenalty() + sitehist.applyPenalty() + sitehist.sendMessage('increase_penalty') + + print "send message for site %s penalty increase" % site + + if sitehist.db.status == 'good': + # clear penalty + # NOTE: because 'all clear' should have an indefinite status, we + # have a boolean value rather than a 'recent action' + if sitehist.db.penalty_applied: + # send message that penalties are cleared. + + sitehist.clearPenalty() + sitehist.applyPenalty() + sitehist.sendMessage('clear_penalty') + sitehist.closeTicket() + + print "send message for site %s penalty cleared" % site + site_count = site_count + 1 @@ -251,6 +257,6 @@ if __name__ == "__main__": session.flush() sys.exit(0) except: - #email_exception() + email_exception() print traceback.print_exc(); print "fail all..."