Merge branch 'master' of git://git.planet-lab.org/monitor
[monitor.git] / commands / pcubad.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 import sets
8 from datetime import datetime,timedelta
9
10 from monitor import database
11 from monitor import reboot
12 from monitor import parser as parsermodule
13 from monitor import config
14 from monitor.database.info.model import HistoryPCURecord, FindbadPCURecord
15 from monitor.database.dborm import mon_session as session
16 from monitor.wrapper import plc,plccache
17 from monitor.const import MINUP
18
19 from monitor.common import *
20 from monitor.query import verify,query_to_dict,node_select
21 from monitor.model import *
22
23 api = plc.getAuthAPI()
24
25 def main():
26         main2(config)
27
28 def main2(config):
29
30         l_plcpcus = plccache.l_pcus 
31
32         l_pcus = None
33         if config.site is not None:
34                 site = plccache.GetSitesByName([config.site])
35                 l_nodes = plccache.GetNodesByIds(site[0]['node_ids'])
36                 pcus = []
37                 for node in l_nodes:
38                         pcus += node['pcu_ids']
39                 # clear out dups.
40                 l_pcus = [pcu for pcu in sets.Set(pcus)]
41
42         elif config.node:
43                 node = plccache.GetNodeByName(config.node)
44                 pcus = node['pcu_ids']
45                 # clear out dups.
46                 l_pcus = [pcu for pcu in sets.Set(pcus)]
47
48         elif config.pcu:
49                 for pcu in l_plcpcus:
50                         if ( pcu['hostname'] is not None and config.pcu in pcu['hostname'] ) or \
51                            ( pcu['ip'] is not None and config.pcu in pcu['ip'] ):
52                                 l_pcus = [pcu['pcu_id']]
53                 if not l_pcus:
54                         print "ERROR: could not find pcu %s" % config.pcu
55                         sys.exit(1)
56         else:
57                 l_pcus = [pcu['pcu_id'] for pcu in l_plcpcus]
58         
59         checkAndRecordState(l_pcus, l_plcpcus)
60
61 hn2lb = plccache.plcdb_hn2lb
62
63 def check_pcu_state(rec, pcu):
64
65         pcu_state = rec.reboot_trial_status
66
67         # DOWN
68         if pcu_state not in [0, "0"] and pcu.status not in ['offline', 'down']:
69                         print "changed status from %s to offline" % pcu.status
70                         pcu.status = 'offline'
71                         pcu.last_changed = datetime.now()
72
73         # ONLINE
74         if pcu_state in [0, "0"] and pcu.status not in [ 'online', 'good' ]:
75                 print "changed status from %s to online" % pcu.status
76                 pcu.status = 'online'
77                 pcu.last_changed = datetime.now()
78
79
80         # STATE TRANSITIONS
81         if pcu.status == 'online' and changed_greaterthan(pcu.last_changed, 0.5):
82                 #send thank you notice, or on-line notice.
83                 print "changed status from %s to good" % pcu.status
84                 pcu.status = 'good'
85                 # NOTE: do not reset last_changed, or you lose how long it's been up.
86
87         if pcu.status == 'offline' and changed_greaterthan(pcu.last_changed, 2):
88                 # send down pcu notice
89                 print "changed status from %s to down" % pcu.status
90                 pcu.status = 'down'
91
92 #       if pcu.status in [ 'offline', 'down' ] and changed_greaterthan(pcu.last_changed, 2*30):
93 #               print "changed status from %s to down" % pcu.status
94 #               pcu.status = 'down'
95 #               pcu.last_changed = datetime.now()
96
97 def checkAndRecordState(l_pcus, l_plcpcus):
98         count = 0
99         for pcuname in l_pcus:
100
101                 d_pcu = None
102                 for pcu in l_plcpcus:
103                         if pcu['pcu_id'] == pcuname:
104                                 d_pcu = pcu
105                                 break
106                 if not d_pcu:
107                         continue
108
109                 pcuhist = HistoryPCURecord.findby_or_create(plc_pcuid=d_pcu['pcu_id'], 
110                                                                         if_new_set={'status' : 'offline', 
111                                                                                                 'last_changed' : datetime.now()})
112                 pcuhist.last_checked = datetime.now()
113
114                 try:
115                         # Find the most recent record
116                         pcurec = FindbadPCURecord.query.filter(FindbadPCURecord.plc_pcuid==pcuname).first()
117                 except:
118                         print "COULD NOT FIND FB record for %s" % reboot.pcu_name(d_pcu)
119                         import traceback
120                         email_exception()
121                         print traceback.print_exc()
122                         # don't have the info to create a new entry right now, so continue.
123                         continue 
124
125                 if not pcurec:
126                         print "none object for pcu %s"% reboot.pcu_name(d_pcu)
127                         continue
128
129                 check_pcu_state(pcurec, pcuhist)
130
131                 count += 1
132                 print "%d %35s %s since(%s)" % (count, reboot.pcu_name(d_pcu), pcuhist.status, diff_time(time.mktime(pcuhist.last_changed.timetuple())))
133
134         # NOTE: this commits all pending operations to the DB.  Do not remove, or
135         # replace with another operations that also commits all pending ops, such
136         # as session.commit() or flush() or something
137         session.flush()
138         print HistoryPCURecord.query.count()
139
140         return True
141
142 if __name__ == '__main__':
143         parser = parsermodule.getParser()
144         parser.set_defaults(filename=None, pcu=None, node=None, site=None, pcuselect=False, pcugroup=None, cachepcus=False)
145         parser.add_option("", "--pcu", dest="pcu", metavar="hostname", 
146                                                 help="Provide a single pcu to operate on")
147         parser.add_option("", "--site", dest="site", metavar="sitename", 
148                                                 help="Provide a single sitename to operate on")
149         parser.add_option("", "--node", dest="node", metavar="nodename", 
150                                                 help="Provide a single node to operate on")
151         parser.add_option("", "--pculist", dest="pculist", metavar="file.list", 
152                                                 help="Provide a list of files to operate on")
153
154         config = parsermodule.parse_args(parser)
155
156         try:
157                 main2(config)
158         except Exception, err:
159                 import traceback
160                 traceback.print_exc()
161                 print "Exception: %s" % err
162                 sys.exit(0)