can more visually check tracker consistency
[infrastructure.git] / scripts / manage-infrastructure.py
1 #!/usr/bin/python
2
3 import os.path
4 import re
5 import subprocess
6 from optparse import OptionParser
7
8 class BuildBoxes:
9
10     # everything in the onelab.eu domain
11     domain = 'onelab.eu'
12     testmaster = 'testmaster'
13     build_boxes = [ "mirror", "liquid", "reed", "velvet", ]
14     plc_boxes = [ "testplc" ]
15     qemu_boxes = \
16         [ "testqemu%d"%i for i in range (1,4) ] + \
17         [ "testqemu32-%d"%i for i in range (1,6) ]
18     test_boxes = plc_boxes + qemu_boxes
19
20     def __init__ (self):
21         # dummy defaults
22         self.boxes = []
23         self.do_tracker_qemus = False
24         self.do_tracker_plcs = False
25
26     def fqdn (self, box):
27         return "%s.%s"%(box,self.domain)
28     @staticmethod
29     def root (box): return "root@%s"%box
30
31     def header (self,message):
32         print "===============",message
33
34     def run (self,argv,message, trash_err=False):
35         if self.options.dry_run:
36             print 'DRY_RUN:',
37             print " ".join(argv)
38         else:
39             if message: self.header(message)
40             if not trash_err:
41                 subprocess.call(argv)
42             else:
43                 subprocess.call(argv,stderr=file('/dev/null','w'))
44                 
45     def backquote (self, argv, trash_err=False):
46         if not trash_err:
47             return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
48         else:
49             null = open('/dev/null','w')
50             result = subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=null).communicate()[0]
51             null.close()
52             return result
53
54     def reboot (self,box):
55         command=['ssh',self.root(box),'shutdown','-r','now']
56         self.run (command,"Rebooting %s"%box)
57
58     def handle_tracker_plcs (self):
59         box = self.fqdn (self.testmaster)
60         filename="tracker-plcs"
61         if not self.options.probe:
62             command=['ssh',self.root(box),"rm","-rf",filename]
63             self.run(command,"Cleaning up %s on %s"%(filename,box))
64         else:
65             self.header ("++++++++++ Inspecting %s on %s"%(filename,box))
66             read_command = ['ssh',self.root(box),"cat",filename]
67             trackers=self.backquote(read_command)
68             for tracker in trackers.split('\n'):
69                 if not tracker: continue
70                 try:
71                     tracker=tracker.strip()
72                     [hostname,buildname]=tracker.split('@')
73                     [left,plcname]=buildname.rsplit('-',1)
74                     print self.margin_outline(plcname),tracker
75                 except:
76                     print self.margin(""),tracker
77
78     def handle_tracker_qemus (self):
79         box = self.fqdn (self.testmaster)
80         filename="tracker-qemus"
81         if not self.options.probe:
82             command=['ssh',self.root(box),"rm","-rf",filename]
83             self.run(command,"Cleaning up %s on %s"%(filename,box))
84         else:
85             self.header ("++++++++++ Inspecting %s on %s"%(filename,box))
86             read_command = ['ssh',self.root(box),"cat",filename]
87             trackers=self.backquote(read_command)
88             for tracker in trackers.split('\n'):
89                 if not tracker: continue
90                 try:
91                     tracker=tracker.strip()
92                     [hostname,buildname,nodename]=tracker.split('@')
93                     nodename=nodename.split('.')[0]
94                     print self.margin_outline(nodename),tracker
95                 except:
96                     print self.margin(""),tracker
97
98     def handle_build_box (self,box):
99         if not self.options.probe:
100             self.reboot(box)
101         else:
102             command=['ssh',self.root(box),'uptime']
103             uptime=self.backquote(command,True).strip()
104
105             command=['ssh',self.root(box),'pgrep','build']
106             if self.options.dry_run:
107                 self.run(command,None)
108             else:
109                 pids=self.backquote(command,True)
110                 if not pids:
111                     self.header ('No build process on %s (%s)'%(box,uptime))
112                 else:
113                     command=['ssh',self.root(box),'ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
114                     self.run(command,"Active build processes on %s (%s)"%(box,uptime),True)
115
116     vplc_matcher = re.compile(".*(vplc[0-9]+$)")
117     def vplcname (self, vservername):
118         match = self.vplc_matcher.match(vservername)
119         if match: return match.groups(0)
120         else: return ""
121
122     margin_format="%-14s"
123     def margin(self,string): return self.margin_format%string
124     def outline (self, string): return '== %s =='%string
125     def margin_outline (self, string): return self.margin(self.outline(string))
126
127     def handle_plc_box (self,box):
128         if not self.options.probe:
129             self.reboot(box)
130         else:
131             command=['ssh',self.root(box),'vserver-stat']
132             if self.options.dry_run:
133                 self.run(command,"Active vservers on %s"%box)
134             else:
135                 # try to find fullname (vserver_stat truncates to a ridiculously short name)
136                 try:
137                     self.header ("vserver map on %s"%box)
138                     # fetch the contexts for all vservers on that box
139                     map_command=['ssh',self.root(box),'grep','.','/etc/vservers/*/context','/dev/null',]
140                     context_map=self.backquote (map_command)
141                     # at this point we have a set of lines like
142                     # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
143                     ctx_dict={}
144                     for map_line in context_map.split("\n"):
145                         if not map_line: continue
146                         [path,xid] = map_line.split(':')
147                         ctx_dict[xid]=os.path.basename(os.path.dirname(path))
148                     # at this point ctx_id maps context id to vservername
149
150                     vserver_stat = self.backquote (command)
151                     for vserver_line in vserver_stat.split("\n"):
152                         if not vserver_line: continue
153                         context=vserver_line.split()[0]
154                         if context=="CTX": 
155                             print self.margin(""),vserver_line
156                             continue
157                         longname=ctx_dict[context]
158                         print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
159                 except:
160                     self.run(command,"Fine-grained method failed - fallback to plain vserver-stat")
161
162     vnode_matcher = re.compile(".*(vnode[0-9]+)")
163     def vnodename (self, ps_line):
164         match = self.vnode_matcher.match(ps_line)
165         if match: return match.groups(0)
166         else: return ""
167
168
169     def handle_qemu_box (self,box):
170         if not self.options.probe:
171             self.reboot(box)
172         else:
173             command=['ssh',self.root(box),'pgrep','qemu']
174             if self.options.dry_run:
175                 self.run(command,None)
176             else:
177                 pids=self.backquote(command)
178                 if not pids:
179                     self.header ('No qemu process on %s'%box)
180                 else:
181                     self.header ("Active qemu processes on %s"%box)
182                     command=['ssh',self.root(box),'ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
183                     ps_lines = self.backquote (command).split("\n")
184                     for ps_line in ps_lines:
185                         if not ps_line or ps_line.find('PID') >=0 : continue
186                         print self.margin_outline(self.vnodename(ps_line)), ps_line
187
188     def handle_box(self,box,type):
189         if box in self.qemu_boxes:
190             if type=="qemu": self.handle_qemu_box(self.fqdn(box))
191         elif box in self.plc_boxes:
192             if type=="plc":  self.handle_plc_box(self.fqdn(box))
193         elif type=="build":
194             self.handle_build_box(self.fqdn(box))
195
196     def main (self):
197         usage="""%prog [options] [hostname..(s)]
198 Default is to act on test boxes only (with trackers clean)"""
199         parser = OptionParser (usage=usage)
200         parser.add_option ("-n","--dry-run",action="store_true",dest="dry_run",default=False,
201                            help="Dry run")
202         parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True,
203                            help="Actually reset/reboot stuff instead of just probing it")
204         # no need for -p = probe, as this is the default
205         parser.add_option ("-p","--plc", action="store_true",dest="plc_only",default=False,
206                            help="Acts on the plc box only")
207
208         parser.add_option ("-a","--all",action="store_true",dest="all_boxes",default=False,
209                            help="Acts on build and test boxes")
210         parser.add_option ("-b","--build",action="store_true",dest="build_only",default=False,
211                            help="Acts on build boxes only")
212         parser.add_option ("-q","--qemu",action="store_true",dest="qemu_only",default=False,
213                            help="Only acts on the qemu boxes")
214         parser.add_option ("-t","--trackers",action="store_true",dest="trackers_only",default=False,
215                            help="Only wipes trackers")
216
217         (self.options,args) = parser.parse_args()
218
219         # use given hostnames if provided
220         if args:
221             self.boxes=args
222             # if hostnames are specified, let's stay on the safe side and don't reset trackers
223             self.do_tracker_plcs = False
224             self.do_tracker_qemus = False
225         elif self.options.all_boxes:
226             self.boxes=self.test_boxes + self.build_boxes
227             self.do_tracker_plcs = True
228             self.do_tracker_qemus = True
229         elif self.options.build_only:
230             self.boxes=self.build_boxes
231             self.do_tracker_plcs = False
232             self.do_tracker_qemus = False
233         elif self.options.qemu_only:
234             self.boxes=self.qemu_boxes
235             self.do_tracker_plcs = False
236             self.do_tracker_qemus = True
237         elif self.options.plc_only:
238             self.boxes=self.plc_boxes
239             self.do_tracker_plcs = True
240             self.do_tracker_qemus = False
241         elif self.options.trackers_only:
242             self.boxes = []
243             self.do_tracker_plcs = True
244             self.do_tracker_qemus = True
245         # default
246         else:
247             self.boxes = self.test_boxes
248             self.do_tracker_plcs = True
249             self.do_tracker_qemus = True
250
251         # ALL OTHERS
252         for box in self.boxes:  self.handle_box (box,"build")
253         # PLCS
254         if self.do_tracker_plcs:self.handle_tracker_plcs ()
255         for box in self.boxes:  self.handle_box (box,"plc")
256         # QEMU
257         if self.do_tracker_qemus:self.handle_tracker_qemus ()
258         for box in self.boxes:  self.handle_box (box,"qemu")
259
260 if __name__ == "__main__":
261     BuildBoxes().main()