move clean_policy.py into monitor package
[monitor.git] / nodebad.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 from datetime import datetime,timedelta
8
9 from nodequery import verify,query_to_dict,node_select
10
11 from nodecommon import *
12
13 from monitor import config
14 from monitor.wrapper import plc,plccache
15 from monitor.const import MINUP
16 from monitor.database import  FindbadNodeRecord, HistoryNodeRecord
17
18 from monitor.model import *
19
20 api = plc.getAuthAPI()
21
22 round = 1
23 count = 0
24
25 def main(config):
26
27         l_plcnodes = plccache.l_nodes
28         l_nodes = get_nodeset(config)
29         
30         checkAndRecordState(l_nodes, l_plcnodes)
31
32 def checkAndRecordState(l_nodes, l_plcnodes):
33         global count
34
35         for nodename in l_nodes:
36                 d_node = None
37                 for node in l_plcnodes:
38                         if node['hostname'] == nodename:
39                                 d_node = node
40                                 break
41                 if not d_node:
42                         continue
43
44                 pf = HistoryNodeRecord.findby_or_create(hostname=nodename)
45                 pf.last_checked = datetime.now()
46
47                 try:
48                         # Find the most recent record
49                         noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==nodename).order_by(FindbadNodeRecord.date_checked.desc()).first()
50                         #print "NODEREC: ", noderec.date_checked
51                 except:
52                         print "COULD NOT FIND %s" % nodename
53                         import traceback
54                         print traceback.print_exc()
55                         continue
56
57                 node_state = noderec.observed_status
58                 if noderec.plc_node_stats:
59                         boot_state = noderec.plc_node_stats['boot_state']
60                 else:
61                         boot_state = "unknown"
62
63                 if node_state == "BOOT":
64                         if pf.status != "good": 
65                                 pf.last_changed = datetime.now()
66                                 pf.status = "good"
67                 elif node_state == "DEBUG":
68                         if pf.status != boot_state: 
69                                 pf.last_changed = datetime.now()
70                                 pf.status = boot_state
71                 else:
72                         if pf.status != "down": 
73                                 pf.last_changed = datetime.now()
74                                 pf.status = "down"
75
76                 count += 1
77                 print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(time.mktime(pf.last_changed.timetuple())))
78
79         # NOTE: this commits all pending operations to the DB.  Do not remove, or
80         # replace with another operations that also commits all pending ops, such
81         # as session.commit() or flush() or something
82         print HistoryNodeRecord.query.count()
83
84         return True
85
86 if __name__ == '__main__':
87         from monitor import parser as parsermodule
88         parser = parsermodule.getParser(['nodesets'])
89         parser.set_defaults(filename=None, node=None, nodeselect=False, nodegroup=None, cachenodes=False)
90         parser = parsermodule.getParser(['defaults'], parser)
91         config = parsermodule.parse_args(parser)
92
93         try:
94                 main(config)
95         except Exception, err:
96                 import traceback
97                 print traceback.print_exc()
98                 print "Exception: %s" % err
99                 sys.exit(0)