f9983ece8f6c5f826bbe4a032458863a9ce726bd
[infrastructure.git] / scripts / manage-infrastructure.py
1 #!/usr/bin/python
2
3 import os.path
4 import re
5 import subprocess
6 from optparse import OptionParser
7
8 class BuildBoxes:
9
10     # everything in the onelab.eu domain
11     domain = 'onelab.eu'
12     testmaster = 'testmaster'
13     build_boxes = [ "mirror", "liquid", "reed", "velvet", ]
14     plc_boxes = [ "testplc" ]
15     qemu_boxes = \
16         [ "testqemu%d"%i for i in range (1,4) ] + \
17         [ "testqemu32-%d"%i for i in range (1,6) ]
18     test_boxes = plc_boxes + qemu_boxes
19
20     def __init__ (self):
21         # dummy defaults
22         self.boxes = []
23         self.do_tracker = False
24
25     def fqdn (self, box):
26         return "%s.%s"%(box,self.domain)
27     @staticmethod
28     def root (box): return "root@%s"%box
29
30     def header (self,message):
31         print "===============",message
32
33     def run (self,argv,message, trash_err=False):
34         if self.options.dry_run:
35             print 'DRY_RUN:',
36             print " ".join(argv)
37         else:
38             if message: self.header(message)
39             if not trash_err:
40                 subprocess.call(argv)
41             else:
42                 subprocess.call(argv,stderr=file('/dev/null','w'))
43                 
44     def backquote (self, argv, trash_err=False):
45         if not trash_err:
46             return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
47         else:
48             null = open('/dev/null','w')
49             result = subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=null).communicate()[0]
50             null.close()
51             return result
52
53     def reboot (self,box):
54         command=['ssh',self.root(box),'shutdown','-r','now']
55         self.run (command,"Rebooting %s"%box)
56
57     def handle_trackers (self):
58         box = self.fqdn (self.testmaster)
59         if self.options.probe:
60             command=['ssh',self.root(box),"head","-100","tracker*"]
61             self.run(command,"Inspecting trackers on %s"%box)
62         else:
63             command=['ssh',self.root(box),"rm","-rf","tracker*"]
64             self.run(command,"Cleaning up trackers on %s"%box)
65
66     def handle_build_box (self,box):
67         if not self.options.probe:
68             self.reboot(box)
69         else:
70             command=['ssh',self.root(box),'uptime']
71             uptime=self.backquote(command,True).strip()
72
73             command=['ssh',self.root(box),'pgrep','build']
74             if self.options.dry_run:
75                 self.run(command,None)
76             else:
77                 pids=self.backquote(command,True)
78                 if not pids:
79                     self.header ('No build process on %s (%s)'%(box,uptime))
80                 else:
81                     command=['ssh',self.root(box),'ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
82                     self.run(command,"Active build processes on %s (%s)"%(box,uptime),True)
83
84     vplc_matcher = re.compile(".*(vplc[0-9]+$)")
85     def vplcname (self, vservername):
86         match = self.vplc_matcher.match(vservername)
87         if match: return match.groups(0)
88         else: return ""
89
90     def handle_plc_box (self,box):
91         if not self.options.probe:
92             self.reboot(box)
93         else:
94             command=['ssh',self.root(box),'vserver-stat']
95             if self.options.dry_run:
96                 self.run(command,"Active vservers on %s"%box)
97             else:
98                 # try to find fullname (vserver_stat truncates to a ridiculously short name)
99                 try:
100                     self.header ("vserver map on %s"%box)
101                     # fetch the contexts for all vservers on that box
102                     map_command=['ssh',self.root(box),'grep','.','/etc/vservers/*/context','/dev/null',]
103                     context_map=self.backquote (map_command)
104                     # at this point we have a set of lines like
105                     # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
106                     ctx_dict={}
107                     for map_line in context_map.split("\n"):
108                         if not map_line: continue
109                         [path,xid] = map_line.split(':')
110                         ctx_dict[xid]=os.path.basename(os.path.dirname(path))
111                     # at this point ctx_id maps context id to vservername
112
113                     vserver_stat = self.backquote (command)
114                     for vserver_line in vserver_stat.split("\n"):
115                         if not vserver_line: continue
116                         context=vserver_line.split()[0]
117                         if context=="CTX": 
118                             print vserver_line
119                             continue
120                         longname=ctx_dict[context]
121                         plcname=self.vplcname(longname)
122                         if plcname: print "== %s =="%plcname
123                         print "%(vserver_line)s [=%(longname)s]"%locals()
124                 except:
125                     self.run(command,"Fine-grained method failed - fallback to plain vserver-stat")
126
127     vnode_matcher = re.compile(".*(vnode[0-9]+)")
128     def vnodename (self, ps_line):
129         match = self.vnode_matcher.match(ps_line)
130         if match: return match.groups(0)
131         else: return ""
132
133
134     def handle_qemu_box (self,box):
135         if not self.options.probe:
136             self.reboot(box)
137         else:
138             command=['ssh',self.root(box),'pgrep','qemu']
139             if self.options.dry_run:
140                 self.run(command,None)
141             else:
142                 pids=self.backquote(command)
143                 if not pids:
144                     self.header ('No qemu process on %s'%box)
145                 else:
146                     self.header ("Active qemu processes on %s"%box)
147                     command=['ssh',self.root(box),'ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
148                     ps_lines = self.backquote (command).split("\n")
149                     for ps_line in ps_lines:
150                         if not ps_line or ps_line.find('PID') >=0 : continue
151                         node=self.vnodename(ps_line)
152                         if node: print "== %s =="%node
153                         print ps_line
154
155     def handle_box(self,box):
156         if box in self.qemu_boxes:
157             self.handle_qemu_box(self.fqdn(box))
158         elif box in self.plc_boxes:
159             self.handle_plc_box(self.fqdn(box))
160         else:
161             self.handle_build_box(self.fqdn(box))
162
163     def main (self):
164         usage="""%prog [options] [hostname..(s)]
165 Default is to act on test boxes only (with trackers clean)"""
166         parser = OptionParser (usage=usage)
167         parser.add_option ("-n","--dry-run",action="store_true",dest="dry_run",default=False,
168                            help="Dry run")
169         parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True,
170                            help="Actually reset/reboot stuff instead of just probing it")
171         # no need for -p = probe, as this is the default
172         parser.add_option ("-p","--plc", action="store_true",dest="plc_only",default=False,
173                            help="Acts on the plc box only")
174
175         parser.add_option ("-a","--all",action="store_true",dest="all_boxes",default=False,
176                            help="Acts on build and test boxes")
177         parser.add_option ("-b","--build",action="store_true",dest="build_only",default=False,
178                            help="Acts on build boxes only")
179         parser.add_option ("-q","--qemu",action="store_true",dest="qemu_only",default=False,
180                            help="Only acts on the qemu boxes")
181         parser.add_option ("-t","--trackers",action="store_true",dest="trackers_only",default=False,
182                            help="Only wipes trackers")
183
184         (self.options,args) = parser.parse_args()
185
186         # use given hostnames if provided
187         if args:
188             self.boxes=args
189             # if hostnames are specified, let's stay on the safe side and don't reset trackers
190             self.do_tracker = False
191         elif self.options.all_boxes:
192             self.boxes=self.test_boxes + self.build_boxes
193             self.do_tracker = True
194         elif self.options.build_only:
195             self.boxes=self.build_boxes
196             self.do_tracker = False
197         elif self.options.qemu_only:
198             self.boxes=self.qemu_boxes
199             self.do_tracker = False
200         elif self.options.plc_only:
201             self.boxes=self.plc_boxes
202             self.do_tracker = False
203         elif self.options.trackers_only:
204             self.boxes = []
205             self.do_tracker = True
206         # default
207         else:
208             self.boxes = self.test_boxes
209             self.do_tracker = True
210
211         if self.do_tracker:
212             self.handle_trackers ()
213         for box in self.boxes:
214             self.handle_box (box)
215
216
217 if __name__ == "__main__":
218     BuildBoxes().main()