www/printbadnodes.py
[monitor.git] / sitebad.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7
8
9 import database
10 import comon
11 import threadpool
12 import syncplcdb
13 from nodequery import verify,query_to_dict,node_select
14
15 import plc
16 api = plc.getAuthAPI()
17 from unified_model import *
18 from const import MINUP
19
20 round = 1
21 externalState = {'round': round, 'sites': {}}
22 count = 0
23
24 def main(config):
25         global externalState
26         externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
27         if config.increment:
28                 # update global round number to force refreshes across all nodes
29                 externalState['round'] += 1
30
31         l_nodes = syncplcdb.create_plcdb()
32         l_plcsites = database.dbLoad("l_plcsites")
33
34         if config.site:
35                 l_sites = [config.site]
36         else:
37                 l_sites = [site['login_base'] for site in l_plcsites]
38         
39         checkAndRecordState(l_sites, l_plcsites)
40
41 def checkAndRecordState(l_sites, l_plcsites):
42         global externalState
43         global count
44         global_round = externalState['round']
45
46         for sitename in l_sites:
47                 if sitename not in externalState['sites']:
48                         externalState['sites'][sitename] = {'round': 0, 'values': []}
49
50                 site_round   = externalState['sites'][sitename]['round']
51                 if site_round < global_round:
52                         # do work
53                         values = collectStatusAndState(sitename, l_plcsites)
54                         global_round = externalState['round']
55                         externalState['sites'][sitename]['values'] = values
56                         externalState['sites'][sitename]['round'] = global_round
57                 else:
58                         count += 1
59
60                 if count % 20 == 0:
61                         database.dbDump(config.dbname, externalState)
62
63         database.dbDump(config.dbname, externalState)
64
65 fb = database.dbLoad('findbad')
66 lb2hn = database.dbLoad("plcdb_lb2hn")
67
68 def getnodesup(nodelist):
69         up = 0
70         for node in nodelist:
71                 if node['hostname'] in fb['nodes'].keys():
72                         try:
73                                 if fb['nodes'][node['hostname']]['values']['state'] == "BOOT":
74                                         up = up + 1
75                         except:
76                                 pass
77         return up
78
79 def collectStatusAndState(sitename, l_plcsites):
80         global count
81
82         d_site = None
83         for site in l_plcsites:
84                 if site['login_base'] == sitename:
85                         d_site = site
86                         break
87         if not d_site:
88                 return None
89
90         if sitename in lb2hn:
91                 pf = PersistFlags(sitename, 1, db='site_persistflags')
92
93                 if not pf.checkattr('last_changed'):
94                         pf.last_changed = time.time()
95                 
96                 pf.last_checked = time.time()
97                 pf.nodes_total = len(lb2hn[sitename])
98                 pf.slices_used = len(d_site['slice_ids'])
99                 pf.nodes_up = getnodesup(lb2hn[sitename])
100                 if not pf.checkattr('status'):
101                         pf.status = "unknown"
102
103                 if pf.nodes_up >= MINUP:
104                         if pf.status != "good": pf.last_changed = time.time()
105                         pf.status = "good"
106                 else:
107                         if pf.status != "down": pf.last_changed = time.time()
108                         pf.status = "down"
109
110                 count += 1
111                 print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, 
112                                                                                 pf.nodes_total, pf.nodes_up, pf.status)
113                 # updated by other modules
114                 #pf.enabled = 
115                 #pf.suspended = 
116
117                 pf.save()
118
119         return True
120
121 if __name__ == '__main__':
122         import parser as parsermodule
123
124         parser = parsermodule.getParser()
125         parser.set_defaults(filename=None, node=None, site=None, nodeselect=False, nodegroup=None, 
126                                                 increment=False, dbname="sitebad", cachenodes=False)
127         parser.add_option("", "--site", dest="site", metavar="login_base", 
128                                                 help="Provide a single site to operate on")
129         parser.add_option("", "--sitelist", dest="sitelist", metavar="file.list", 
130                                                 help="Provide a list of files to operate on")
131
132         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
133                                                 help="Specify the name of the database to which the information is saved")
134         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
135                                                 help="Increment round number to force refresh or retry")
136         config = parsermodule.parse_args(parser)
137
138         try:
139                 main(config)
140         except Exception, err:
141                 import traceback
142                 print traceback.print_exc()
143                 print "Exception: %s" % err
144                 print "Saving data... exitting."
145                 database.dbDump(config.dbname, externalState)
146                 sys.exit(0)