changes for 3.0
[monitor.git] / sitebad.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7
8
9 import database
10 import comon
11 import threadpool
12 import syncplcdb
13 from nodequery import verify,query_to_dict,node_select
14
15 import plc
16 api = plc.getAuthAPI()
17 from unified_model import *
18 from const import MINUP
19
20 round = 1
21 externalState = {'round': round, 'sites': {}}
22 count = 0
23
24 def main(config):
25         global externalState
26         externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
27         if config.increment:
28                 # update global round number to force refreshes across all nodes
29                 externalState['round'] += 1
30
31         l_nodes = syncplcdb.create_plcdb()
32         l_plcsites = database.dbLoad("l_plcsites")
33
34         if config.site:
35                 l_sites = [config.site]
36         else:
37                 l_sites = [site['login_base'] for site in l_plcsites]
38         
39         checkAndRecordState(l_sites, l_plcsites)
40
41 def checkAndRecordState(l_sites, l_plcsites):
42         global externalState
43         global count
44         global_round = externalState['round']
45
46         for sitename in l_sites:
47                 if sitename not in externalState['sites']:
48                         externalState['sites'][sitename] = {'round': 0, 'values': []}
49
50                 site_round   = externalState['sites'][sitename]['round']
51                 if site_round < global_round:
52                         # do work
53                         values = collectStatusAndState(sitename, l_plcsites)
54                         global_round = externalState['round']
55                         externalState['sites'][sitename]['values'] = values
56                         externalState['sites'][sitename]['round'] = global_round
57                 else:
58                         pf = PersistFlags(sitename, 1, db=config.dbpfname )
59                         print "%d noinc %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, 
60                                                                                 pf.nodes_total, pf.nodes_up, pf.status)
61                         count += 1
62
63                 if count % 20 == 0:
64                         database.dbDump(config.dbname, externalState)
65
66         database.dbDump(config.dbname, externalState)
67
68 fb = database.dbLoad('findbad')
69 lb2hn = database.dbLoad("plcdb_lb2hn")
70
71 def getnodesup(nodelist):
72         up = 0
73         for node in nodelist:
74                 if node['hostname'] in fb['nodes'].keys():
75                         try:
76                                 if fb['nodes'][node['hostname']]['values']['state'] == "BOOT":
77                                         up = up + 1
78                         except:
79                                 pass
80         return up
81
82 def collectStatusAndState(sitename, l_plcsites):
83         global count
84
85         d_site = None
86         for site in l_plcsites:
87                 if site['login_base'] == sitename:
88                         d_site = site
89                         break
90         if not d_site:
91                 return None
92
93         if sitename in lb2hn:
94                 pf = PersistFlags(sitename, 1, db=config.dbpfname )
95
96                 if not pf.checkattr('last_changed'):
97                         pf.last_changed = time.time()
98                 
99                 pf.last_checked = time.time()
100                 pf.nodes_total = len(lb2hn[sitename])
101                 pf.slices_used = len(d_site['slice_ids'])
102                 pf.nodes_up = getnodesup(lb2hn[sitename])
103                 if not pf.checkattr('status'):
104                         pf.status = "unknown"
105
106                 if pf.nodes_up >= MINUP:
107                         if pf.status != "good": pf.last_changed = time.time()
108                         pf.status = "good"
109                 else:
110                         if pf.status != "down": pf.last_changed = time.time()
111                         pf.status = "down"
112
113                 count += 1
114                 print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, 
115                                                                                 pf.nodes_total, pf.nodes_up, pf.status)
116                 # updated by other modules
117                 #pf.enabled = 
118                 #pf.suspended = 
119
120                 pf.save()
121
122         return True
123
124 if __name__ == '__main__':
125         import parser as parsermodule
126
127         parser = parsermodule.getParser()
128         parser.set_defaults(filename=None, node=None, site=None, nodeselect=False, nodegroup=None, 
129                                                 increment=False, dbname="sitebad", dbpfname="site_persistflags", cachenodes=False)
130         parser.add_option("", "--site", dest="site", metavar="login_base", 
131                                                 help="Provide a single site to operate on")
132         parser.add_option("", "--sitelist", dest="sitelist", metavar="file.list", 
133                                                 help="Provide a list of files to operate on")
134
135         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
136                                                 help="Specify the name of the database to which the information is saved")
137         parser.add_option("", "--dbpfname", dest="dbpfname", metavar="FILE", 
138                                                 help="Specify the persistflags db name")
139         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
140                                                 help="Increment round number to force refresh or retry")
141         config = parsermodule.parse_args(parser)
142
143         try:
144                 main(config)
145         except Exception, err:
146                 import traceback
147                 print traceback.print_exc()
148                 from nodecommon import email_exception
149                 email_exception()
150                 print "Exception: %s" % err
151                 print "Saving data... exitting."
152                 database.dbDump(config.dbname, externalState)
153                 sys.exit(0)