Tagging module Monitor - Monitor-2.0-10
[monitor.git] / sitebad.py
index 5a2f3be..6c09c1c 100755 (executable)
@@ -9,7 +9,7 @@ from datetime import datetime,timedelta
 from monitor import database
 from monitor import parser as parsermodule
 from monitor import config
-from monitor.database.info.model import HistorySiteRecord, HistoryNodeRecord, session
+from monitor.database.info.model import HistorySiteRecord, HistoryNodeRecord, session, BlacklistRecord
 from monitor.wrapper import plc, plccache
 from monitor.const import MINUP
 
@@ -28,6 +28,8 @@ def main2(config):
 
        if config.site:
                l_sites = [config.site]
+       elif config.node:
+               l_sites = [plccache.plcdb_hn2lb[config.node]]
        elif config.sitelist:
                site_list = config.sitelist.split(',')
                l_sites = site_list
@@ -37,50 +39,56 @@ def main2(config):
        checkAndRecordState(l_sites, l_plcsites)
 
 def getnodesup(nodelist):
+       # NOTE : assume that a blacklisted node is fine, since we're told not to
+       #               ignore it, no policy actions should be taken for it.
        up = 0
        for node in nodelist:
                try:
+                       # NOTE: adding a condition for nodehist.haspcu would include pcus
+                       #               in the calculation
                        nodehist = HistoryNodeRecord.findby_or_create(hostname=node['hostname'])
-                       if nodehist is not None and nodehist.status == "good":
+                       nodebl   = BlacklistRecord.get_by(hostname=node['hostname'])
+                       if (nodehist is not None and nodehist.status != 'down') or \
+                               (nodebl is not None and not nodebl.expired()):
                                up = up + 1
                except:
                        import traceback
+                       email_exception(node['hostname'])
                        print traceback.print_exc()
        return up
 
 def check_site_state(rec, sitehist):
 
-       if sitehist.new and sitehist.status != 'new':
+       if sitehist.new and sitehist.status not in ['new', 'online', 'good']:
                sitehist.status = 'new'
+               sitehist.penalty_applied = True         # because new sites are disabled by default, i.e. have a penalty.
                sitehist.last_changed = datetime.now()
 
-       if not sitehist.new:
+       if sitehist.nodes_up >= MINUP:
 
-               if sitehist.nodes_up >= MINUP:
+               if sitehist.status != 'online' and sitehist.status != 'good':
+                       sitehist.last_changed = datetime.now()
 
-                       if sitehist.status != 'online' and sitehist.status != 'good':
-                               sitehist.last_changed = datetime.now()
+               if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'online':
+                       print "changed status from %s to online" % sitehist.status
+                       sitehist.status = 'online'
 
-                       if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'online':
-                               print "changed status from %s to online" % sitehist.status
-                               sitehist.status = 'online'
+               if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'good':
+                       print "changed status from %s to good" % sitehist.status
+                       sitehist.status = 'good'
 
-                       if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'good':
-                               print "changed status from %s to good" % sitehist.status
-                               sitehist.status = 'good'
+       elif not sitehist.new:
        
-               else: # sitehist.nodes_up < MINUP:
+               if sitehist.status != 'offline' and sitehist.status != 'down':
+                       sitehist.last_changed = datetime.now()
 
-                       if sitehist.status != 'offline' and sitehist.status != 'down':
-                               sitehist.last_changed = datetime.now()
+               if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'offline':
+                       print "changed status from %s to offline" % sitehist.status
+                       sitehist.status = 'offline'
 
-                       if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'offline':
-                               print "changed status from %s to offline" % sitehist.status
-                               sitehist.status = 'offline'
-
-                       if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'down':
-                               print "changed status from %s to down" % sitehist.status
-                               sitehist.status = 'down'
+               if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'down':
+                       print "changed status from %s to down" % sitehist.status
+                       sitehist.status = 'down'
 
 def checkAndRecordState(l_sites, l_plcsites):
        count = 0
@@ -118,7 +126,7 @@ def checkAndRecordState(l_sites, l_plcsites):
                        check_site_state(d_site, sitehist)
 
                        count += 1
-                       print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, sitehist.slices_used, 
+                       print "%d %15s slices(%2s) nodes(%2s) notdown(%2s) %s" % (count, sitename, sitehist.slices_used, 
                                                                                        sitehist.nodes_total, sitehist.nodes_up, sitehist.status)
                        sitehist.flush()