changes for 3.0
[monitor.git] / clean_policy.py
index a14016e..2dd737b 100644 (file)
@@ -2,7 +2,6 @@ import config
 import database 
 import time
 import mailer
-from unified_model import cmpCategoryVal
 import sys
 import emailTxt
 import string
@@ -84,6 +83,7 @@ class MonitorMergeDiagnoseSendEscellate:
                fbnode['log'] = None
                fbnode['time'] = time.time()
                fbnode['email'] = TECH
+               fbnode['action-level'] = 0
                fbnode['action'] = ['noop']
                fbnode['date_created'] = time.time()
 
@@ -92,8 +92,10 @@ class MonitorMergeDiagnoseSendEscellate:
                        actnode.update(fbnode)
                        actnode['ticket_id'] = ""
                        actnode['prev_category'] = "ERROR" 
+                       actnode['prev_state'] = "DOWN" 
                else:
                        actnode['prev_category']= actnode['category']
+                       actnode['prev_state']   = actnode['state']
                        actnode['comonstats']   = fbnode['comonstats']
                        actnode['category']             = fbnode['category']
                        actnode['state']                = fbnode['state']
@@ -115,6 +117,10 @@ class MonitorMergeDiagnoseSendEscellate:
                actnode= self.getActionRecord()
                actrec = self.mergeRecord(fbnode, actnode)
                record = Record(self.hostname, actrec)
+               #print record
+               #print actrec
+               #print record.data['time']
+               #print time.time() - record.data['time']
                diag   = self.diagnose(record)
                if self.act and diag is not None:
                        self.action(record,diag)
@@ -137,6 +143,9 @@ class MonitorMergeDiagnoseSendEscellate:
                        if 'resolved' in ticket['Status']:
                                diag.setFlag('RTEndRecord')
 
+               # NOTE: try to give a default value to catch the errors for
+               # planetlab1.ias.csusb.edu which seems to have an out-of-date node config
+               record.data['message_series'] = emailTxt.mailtxt.newdown
                # NOTE: take category, and prepare action
                category = record.getCategory()
                if category == "error":
@@ -171,7 +180,7 @@ class MonitorMergeDiagnoseSendEscellate:
                print "diagnose: checkStageAndTime Returned Valid Record"
                site = PersistFlags(self.loginbase, 1, db='site_persistflags')
 
-               if site.status != "good":
+               if "good" not in site.status: #  != "good":
                        print "diagnose: Setting site %s for 'squeeze'" % self.loginbase
                        diag.setFlag('Squeeze')
                else:
@@ -191,11 +200,12 @@ class MonitorMergeDiagnoseSendEscellate:
                #print record.data['stage']
                #print "improvement" in record.data['stage']
                #print self.getSendEmailFlag(record)
-               if self.getSendEmailFlag(record) or "monitor-end-record" in record.data['stage']: 
+               print "%s %s DAYS DOWN" % ( self.hostname, Record.getDaysDown(record.data) )
+               if ( self.getSendEmailFlag(record) and Record.getDaysDown(record.data) >= 2 ) or \
+                       "monitor-end-record" in record.data['stage']:
                        print "action: getting message"
                        message = record.getMessage(record.data['ticket_id'])
                        if message:
-                               #message.reset()
                                print "action: sending email"
                                message.send(record.getContacts())
                                #print "DEBUG NOT SENDING MESSAGE WHEN I SHOULD BE!!!!!"
@@ -206,10 +216,14 @@ class MonitorMergeDiagnoseSendEscellate:
                                        print "action: setting record ticket_id"
                                        record.data['ticket_id'] = message.rt.ticket_id
 
-                       if (record.data['takeaction'] and diag.getFlag('Squeeze') ) or diag.getFlag('BackOff'):
-                               print "action: taking action"
-                               record.takeAction()
+                       if ( record.data['takeaction'] and diag.getFlag('Squeeze') ): 
+                               print "action: taking squeeze action"
+                               record.takeAction(record.data['action-level'])
                                diag.resetFlag('Squeeze')
+                               diag.save()
+                       if diag.getFlag('BackOff'):
+                               print "action: taking backoff action"
+                               record.takeAction(0)
                                diag.resetFlag('BackOff')
                                diag.save()
 
@@ -235,7 +249,7 @@ class MonitorMergeDiagnoseSendEscellate:
                                        diag.save()
 
                else:
-                       print "NOT sending email : %s %s" % (config.mail, record.data['rt'])
+                       print "NOT sending email : %s" % config.mail
 
                return
 
@@ -306,6 +320,7 @@ class MonitorMergeDiagnoseSendEscellate:
                        record.data['message'] = record.data['message_series'][0]
                        record.data['stage'] = 'stage_actinoneweek'
                        record.data['save-act-all'] = True
+                       record.data['action-level'] = 0
 
                elif 'reboot_node' in record.data['stage']:
                        record.data['email'] = TECH
@@ -314,6 +329,7 @@ class MonitorMergeDiagnoseSendEscellate:
                        record.data['stage'] = 'stage_actinoneweek'
                        record.data['takeaction'] = False
                        record.data['save-act-all'] = False
+                       record.data['action-level'] = 0
                        
                elif 'improvement' in record.data['stage']:
                        print "checkStageAndTime: backing off of %s" % self.hostname
@@ -322,6 +338,7 @@ class MonitorMergeDiagnoseSendEscellate:
                        record.data['message'] = record.data['message_series'][0]
                        record.data['stage'] = 'monitor-end-record'
                        record.data['save-act-all'] = True
+                       record.data['action-level'] = 0
 
                elif 'actinoneweek' in record.data['stage']:
                        if delta >= 7 * SPERDAY: 
@@ -333,6 +350,7 @@ class MonitorMergeDiagnoseSendEscellate:
                                record.data['time'] = current_time              # reset clock for waitforever
                                record.data['takeaction'] = True
                                record.data['save-act-all'] = True
+                               record.data['action-level'] = 1
                        elif delta >= 3* SPERDAY and not 'second-mail-at-oneweek' in record.data:
                                print "checkStageAndTime: second message in one week"
                                record.data['email'] = TECH 
@@ -341,11 +359,13 @@ class MonitorMergeDiagnoseSendEscellate:
                                record.data['second-mail-at-oneweek'] = True
                                record.data['takeaction'] = False
                                record.data['save-act-all'] = True
+                               record.data['action-level'] = 0
                        else:
                                record.data['message'] = None
                                record.data['action'] = ['waitforoneweekaction' ]
                                record.data['takeaction'] = False
                                record.data['save-act-all'] = False
+                               record.data['action-level'] = 0
                                print "checkStageAndTime: ignoring this record for: %s" % self.hostname
                                #return None                    # don't send if there's no action
 
@@ -359,6 +379,7 @@ class MonitorMergeDiagnoseSendEscellate:
                                record.data['time'] = current_time              # reset clock for waitforever
                                record.data['takeaction'] = True
                                record.data['save-act-all'] = True
+                               record.data['action-level'] = 2
                        elif delta >= 3* SPERDAY and not 'second-mail-at-twoweeks' in record.data:
                                print "checkStageAndTime: second message in one week for stage two"
                                record.data['email'] = TECH | PI
@@ -367,12 +388,14 @@ class MonitorMergeDiagnoseSendEscellate:
                                record.data['second-mail-at-twoweeks'] = True
                                record.data['takeaction'] = False
                                record.data['save-act-all'] = True
+                               record.data['action-level'] = 1
                        else:
                                record.data['message'] = None
                                record.data['takeaction'] = False
                                record.data['action'] = ['waitfortwoweeksaction']
                                record.data['save-act-all'] = False
                                print "checkStageAndTime: second message in one week for stage two"
+                               record.data['action-level'] = 1
                                #return None                    # don't send if there's no action
 
                elif 'ticket_waitforever' in record.data['stage']:
@@ -385,18 +408,21 @@ class MonitorMergeDiagnoseSendEscellate:
                                record.data['message'] = None
                                record.data['time'] = current_time
                                record.data['save-act-all'] = True
+                               record.data['action-level'] = 2
                        else:
                                if delta >= 7*SPERDAY:
                                        record.data['action'] = ['ticket_waitforever']
                                        record.data['message'] = None
                                        record.data['time'] = current_time              # reset clock
                                        record.data['save-act-all'] = True
+                                       record.data['action-level'] = 2
                                else:
                                        record.data['action'] = ['ticket_waitforever']
                                        record.data['message'] = None
                                        record.data['takeaction'] = False
                                        record.data['save-act-all'] = False
-                                       return None
+                                       record.data['action-level'] = 2
+                                       #return None
 
                elif 'waitforever' in record.data['stage']:
                        # more than 3 days since last action
@@ -408,11 +434,13 @@ class MonitorMergeDiagnoseSendEscellate:
                                record.data['message'] = record.data['message_series'][2]
                                record.data['time'] = current_time              # reset clock
                                record.data['save-act-all'] = True
+                               record.data['action-level'] = 2
                        else:
                                record.data['action'] = ['waitforever']
                                record.data['message'] = None
                                record.data['takeaction'] = False
                                record.data['save-act-all'] = False
+                               record.data['action-level'] = 2
                                #return None                    # don't send if there's no action
 
                else: