merge from 2.0 branch
[monitor.git] / sitebad.py
index eccaa28..6c09c1c 100755 (executable)
@@ -4,145 +4,155 @@ import os
 import sys
 import string
 import time
+from datetime import datetime,timedelta
 
+from monitor import database
+from monitor import parser as parsermodule
+from monitor import config
+from monitor.database.info.model import HistorySiteRecord, HistoryNodeRecord, session, BlacklistRecord
+from monitor.wrapper import plc, plccache
+from monitor.const import MINUP
 
-import soltesz
-import comon
-import threadpool
-import syncplcdb
+from monitor.common import *
 from nodequery import verify,query_to_dict,node_select
+from monitor.model import *
 
-import plc
-import auth
-api = plc.PLC(auth.auth, auth.plc)
-from unified_model import *
-from monitor_policy import MINUP
+api = plc.getAuthAPI()
+def main():
+       main2(config)
 
-round = 1
-externalState = {'round': round, 'sites': {}}
-count = 0
+def main2(config):
 
-def main(config):
-       global externalState
-       externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState) 
-       if config.increment:
-               # update global round number to force refreshes across all nodes
-               externalState['round'] += 1
-
-       l_nodes = syncplcdb.create_plcdb()
-       l_plcsites = soltesz.dbLoad("l_plcsites")
+       l_nodes = plccache.l_nodes
+       l_plcsites = plccache.l_sites
 
        if config.site:
                l_sites = [config.site]
+       elif config.node:
+               l_sites = [plccache.plcdb_hn2lb[config.node]]
+       elif config.sitelist:
+               site_list = config.sitelist.split(',')
+               l_sites = site_list
        else:
                l_sites = [site['login_base'] for site in l_plcsites]
        
        checkAndRecordState(l_sites, l_plcsites)
 
-def checkAndRecordState(l_sites, l_plcsites):
-       global externalState
-       global count
-       global_round = externalState['round']
+def getnodesup(nodelist):
+       # NOTE : assume that a blacklisted node is fine, since we're told not to
+       #               ignore it, no policy actions should be taken for it.
+       up = 0
+       for node in nodelist:
+               try:
+                       # NOTE: adding a condition for nodehist.haspcu would include pcus
+                       #               in the calculation
+                       nodehist = HistoryNodeRecord.findby_or_create(hostname=node['hostname'])
+                       nodebl   = BlacklistRecord.get_by(hostname=node['hostname'])
+                       if (nodehist is not None and nodehist.status != 'down') or \
+                               (nodebl is not None and not nodebl.expired()):
+                               up = up + 1
+               except:
+                       import traceback
+                       email_exception(node['hostname'])
+                       print traceback.print_exc()
+       return up
 
-       for sitename in l_sites:
-               if sitename not in externalState['sites']:
-                       externalState['sites'][sitename] = {'round': 0, 'values': []}
-
-               site_round   = externalState['sites'][sitename]['round']
-               if site_round < global_round:
-                       # do work
-                       values = collectStatusAndState(sitename, l_plcsites)
-                       global_round = externalState['round']
-                       externalState['sites'][sitename]['values'] = values
-                       externalState['sites'][sitename]['round'] = global_round
-               else:
-                       count += 1
+def check_site_state(rec, sitehist):
 
-               if count % 20 == 0:
-                       soltesz.dbDump(config.dbname, externalState)
+       if sitehist.new and sitehist.status not in ['new', 'online', 'good']:
+               sitehist.status = 'new'
+               sitehist.penalty_applied = True         # because new sites are disabled by default, i.e. have a penalty.
+               sitehist.last_changed = datetime.now()
 
-       soltesz.dbDump(config.dbname, externalState)
+       if sitehist.nodes_up >= MINUP:
 
-fb = soltesz.dbLoad('findbad')
-lb2hn = soltesz.dbLoad("plcdb_lb2hn")
+               if sitehist.status != 'online' and sitehist.status != 'good':
+                       sitehist.last_changed = datetime.now()
 
-def getnodesup(nodelist):
-       up = 0
-       for node in nodelist:
-               if node['hostname'] in fb['nodes'].keys():
-                       try:
-                               if fb['nodes'][node['hostname']]['values']['state'] == "BOOT":
-                                       up = up + 1
-                       except:
-                               pass
-       return up
+               if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'online':
+                       print "changed status from %s to online" % sitehist.status
+                       sitehist.status = 'online'
 
-def collectStatusAndState(sitename, l_plcsites):
-       global count
-
-       d_site = None
-       for site in l_plcsites:
-               if site['login_base'] == sitename:
-                       d_site = site
-                       break
-       if not d_site:
-               return None
-
-       if sitename in lb2hn:
-               pf = PersistFlags(sitename, 1, db='site_persistflags')
-
-               if not pf.checkattr('last_changed'):
-                       pf.last_changed = time.time()
-               
-               pf.last_checked = time.time()
-               pf.nodes_total = len(lb2hn[sitename])
-               pf.slices_used = len(d_site['slice_ids'])
-               pf.nodes_up = getnodesup(lb2hn[sitename])
-               if not pf.checkattr('status'):
-                       pf.status = "unknown"
-
-               if pf.nodes_up >= MINUP:
-                       if pf.status != "good": pf.last_changed = time.time()
-                       pf.status = "good"
-               else:
-                       if pf.status != "down": pf.last_changed = time.time()
-                       pf.status = "down"
-
-               count += 1
-               print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, 
-                                                                               pf.nodes_total, pf.nodes_up, pf.status)
-               # updated by other modules
-               #pf.enabled = 
-               #pf.suspended = 
-
-               pf.save()
+               if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'good':
+                       print "changed status from %s to good" % sitehist.status
+                       sitehist.status = 'good'
+
+       elif not sitehist.new:
+       
+               if sitehist.status != 'offline' and sitehist.status != 'down':
+                       sitehist.last_changed = datetime.now()
+
+               if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'offline':
+                       print "changed status from %s to offline" % sitehist.status
+                       sitehist.status = 'offline'
+
+               if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'down':
+                       print "changed status from %s to down" % sitehist.status
+                       sitehist.status = 'down'
+
+def checkAndRecordState(l_sites, l_plcsites):
+       count = 0
+       lb2hn = plccache.plcdb_lb2hn
+       for sitename in l_sites:
+               d_site = None
+               for site in l_plcsites:
+                       if site['login_base'] == sitename:
+                               d_site = site
+                               break
+               if not d_site:
+                       continue
+
+               if sitename in lb2hn:
+                       sitehist = HistorySiteRecord.findby_or_create(loginbase=sitename,
+                                                                                               if_new_set={'status' : 'unknown', 
+                                                                                                                       'last_changed' : datetime.now(),
+                                                                                                                       'message_id': 0,
+                                                                                                                       'penalty_level' : 0})
+                       sitehist.last_checked = datetime.now()
+
+                       sitehist.slices_total = d_site['max_slices']
+                       sitehist.slices_used = len(d_site['slice_ids'])
+                       sitehist.nodes_total = len(lb2hn[sitename])
+                       if sitehist.message_id != 0:
+                               rtstatus = mailer.getTicketStatus(sitehist.message_id)
+                               sitehist.message_status = rtstatus['Status']
+                               sitehist.message_queue = rtstatus['Queue']
+                               sitehist.message_created = datetime.fromtimestamp(rtstatus['Created'])
+
+                       sitehist.nodes_up = getnodesup(lb2hn[sitename])
+                       sitehist.new = changed_lessthan(datetime.fromtimestamp(d_site['date_created']), 30) # created < 30 days ago
+                       sitehist.enabled = d_site['enabled']
+
+                       check_site_state(d_site, sitehist)
+
+                       count += 1
+                       print "%d %15s slices(%2s) nodes(%2s) notdown(%2s) %s" % (count, sitename, sitehist.slices_used, 
+                                                                                       sitehist.nodes_total, sitehist.nodes_up, sitehist.status)
+                       sitehist.flush()
+
+       print HistorySiteRecord.query.count()
+       session.flush()
 
        return True
 
 if __name__ == '__main__':
-       from config import config
-       from optparse import OptionParser
-       parser = OptionParser()
-       parser.set_defaults(filename=None, node=None, site=None, nodeselect=False, nodegroup=None, 
-                                               increment=False, dbname="sitebad", cachenodes=False)
+       from monitor import parser as parsermodule
+
+       parser = parsermodule.getParser()
+       parser.set_defaults(filename=None, node=None, site=None, 
+                                               nodeselect=False, nodegroup=None, cachenodes=False)
+
        parser.add_option("", "--site", dest="site", metavar="login_base", 
                                                help="Provide a single site to operate on")
-       parser.add_option("", "--sitelist", dest="sitelist", metavar="file.list", 
-                                               help="Provide a list of files to operate on")
+       parser.add_option("", "--sitelist", dest="sitelist", 
+                                               help="Provide a list of sites separated by ','")
 
-       parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
-                                               help="Specify the name of the database to which the information is saved")
-       parser.add_option("-i", "--increment", action="store_true", dest="increment", 
-                                               help="Increment round number to force refresh or retry")
-       config = config(parser)
-       config.parse_args()
+       config = parsermodule.parse_args(parser)
 
        try:
-               main(config)
+               main2(config)
        except Exception, err:
                import traceback
                print traceback.print_exc()
                print "Exception: %s" % err
-               print "Saving data... exitting."
-               soltesz.dbDump(config.dbname, externalState)
                sys.exit(0)