6 from optparse import OptionParser
10 # everything in the onelab.eu domain
11 domain = 'pl.sophia.inria.fr'
12 build_boxes = [ "devel", "liquid", "reed", "velvet", ]
13 plc_boxes = [ "testplc" ]
14 testmaster = 'testmaster'
15 testmaster_boxes = [ testmaster ]
16 # cache the list of qemu boxes in ~/.qemu-boxes
17 # this can be refreshed by running -c
20 def cache_file (self): return os.path.expanduser("~/.qemu-boxes")
22 def load_cache (self):
23 cache=self.cache_file()
24 if os.path.isfile(cache):
25 self.qemu_boxes=file(cache).read().split()
26 self.test_boxes = self.plc_boxes + self.qemu_boxes
28 # run LocalTestResources on testmaster
29 def refresh_cache (self):
31 self.backquote_ssh(self.fqdn(self.testmaster),['LocalTestResources.py'],trash_err=True)
32 remove="."+Infrastructure.domain
33 retrieved = [ x.replace(remove,"").strip() for x in retrieved.split()]
34 self.qemu_boxes = retrieved
35 cache=self.cache_file()
36 file(cache,'w').write(' '.join(self.qemu_boxes)+'\n')
37 print "New contents of %s:"%cache
38 print file(cache).read(),
43 self.do_tracker_qemus = False
44 self.do_tracker_plcs = False
48 return "%s.%s"%(box,self.domain)
50 ssh_command=['ssh','-o','ConnectTimeout=3']
52 def root (box): return "root@%s"%box
56 return Infrastructure.ssh_command + [ Infrastructure.root(box) ]
58 def header (self,message):
59 print "===============",message
62 def run (self,argv,message, trash_err=False):
63 if self.options.dry_run:
68 if message: self.header(message)
70 return subprocess.call(argv)
72 return subprocess.call(argv,stderr=file('/dev/null','w'))
74 def run_ssh (self, box, argv, message, trash_err=False):
75 result=self.run (self.ssh(box) + argv, message, trash_err)
77 print "WARNING: failed to run %s on %s"%(" ".join(argv),box)
80 def backquote (self, argv, trash_err=False):
82 return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
84 return subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0]
86 def backquote_ssh (self, box, argv, trash_err=False):
87 # first probe the ssh link
88 hostname=self.backquote ( self.ssh(box) + [ "hostname"], trash_err=True )
90 print "%s unreachable"%self.root(box)
93 return self.backquote( ['ssh',self.root(box)] + argv, trash_err)
95 def reboot (self,box):
96 command=['ssh',self.root(box),'shutdown','-r','now']
97 self.run (command,"Rebooting %s"%box)
99 def handle_starting (self):
100 box = self.fqdn (self.testmaster)
102 if not self.options.probe:
103 command=["rm","-rf",filename]
104 self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box))
106 read_command = ["cat",filename]
107 self.run_ssh(box,read_command,"++++++++++ Inspecting %s on %s"%(filename,box))
109 def handle_tracker_plcs (self):
110 box = self.fqdn (self.testmaster)
111 filename="tracker-plcs"
112 if not self.options.probe:
113 command=["rm","-rf",filename]
114 self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box))
116 self.header ("++++++++++ Inspecting %s on %s"%(filename,box))
117 read_command = ["cat",filename]
118 trackers=self.backquote_ssh(box,read_command)
119 for tracker in trackers.split('\n'):
120 if not tracker: continue
122 tracker=tracker.strip()
123 (hostname,buildname,plcname)=tracker.split('@')
124 print self.margin_outline(plcname),tracker
126 print self.margin(""),tracker
128 def handle_tracker_qemus (self):
129 box = self.fqdn (self.testmaster)
130 filename="tracker-qemus"
131 if not self.options.probe:
132 command=["rm","-rf",filename]
133 self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box))
135 self.header ("++++++++++ Inspecting %s on %s"%(filename,box))
136 read_command = ["cat",filename]
137 trackers=self.backquote_ssh(box,read_command)
138 for tracker in trackers.split('\n'):
139 if not tracker: continue
141 tracker=tracker.strip()
142 [hostname,buildname,nodename]=tracker.split('@')
143 nodename=nodename.split('.')[0]
144 print self.margin_outline(nodename),tracker
146 print self.margin(""),tracker
148 def handle_build_box (self,box):
149 if not self.options.probe:
153 uptime=self.backquote_ssh(box,command,True).strip()
155 command=['pgrep','build']
156 if self.options.dry_run:
157 self.run_ssh(box,command,None)
159 pids=self.backquote_ssh(box,command,True)
161 self.header ('No build process on %s (%s)'%(box,uptime))
163 command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
164 self.run_ssh(box,command,"Active build processes on %s (%s)"%(box,uptime),True)
166 # this one is more accurate as it locates processes in the vservers as well
167 # but it's so sloooowww
168 def handle_build_box_deep (self,box):
169 if not self.options.probe:
173 uptime=self.backquote_ssh(box,command,True).strip()
176 if self.options.dry_run:
177 self.run_ssh(box,command,None)
179 # simulate grep vbuild
180 vps_lines=[ line for line in self.backquote_ssh(box,command,True).split("\n")
181 if line.find('vbuild') >= 0]
182 pids=[ line.split()[0] for line in vps_lines ]
184 self.header ('No build process on %s (%s)'%(box,uptime))
186 command=['vps','-o','pid,command'] + pids
187 self.run_ssh(box,command,"Active build processes on %s (%s)"%(box,uptime),True)
190 vplc_matcher = re.compile(".*(vplc[0-9]+$)")
191 def vplcname (self, vservername):
192 match = self.vplc_matcher.match(vservername)
193 if match: return match.groups(0)
196 margin_format="%-14s"
197 def margin(self,string): return self.margin_format%string
198 def outline (self, string): return '== %s =='%string
199 def margin_outline (self, string): return self.margin(self.outline(string))
201 def handle_plc_box (self,box):
202 # initial approach was to first scan vserver-stat, but it's not needed
203 if not self.options.probe:
204 # # remove mark for all running servers to avoid resurrection
206 # bash="; ".join( [ "rm -f /etc/vservers/%s/apps/init/mark"%vs for vs in vserver_names ] )
207 # stop_command=['bash','-c',"'" + bash + "'"]
208 # self.run_ssh(box,stop_command,"Removing mark on running vservers on %s"%box)
209 # just trash all marks
210 stop_command=['rm','-rf','/etc/vservers/*/apps/init/mark']
211 self.run_ssh(box,stop_command,"Removing all vserver marks on %s"%box)
212 if not self.options.soft:
215 self.run_ssh(box,['service','util-vserver','stop'],"Stopping all running vservers")
217 # even for rebooting we need to scan vserver-stat to stop the vservers properly
219 command=['vserver-stat']
220 if self.options.dry_run:
221 self.run_ssh(box,command,"Active vservers on %s"%box)
222 # try to find fullname (vserver_stat truncates to a ridiculously short name)
223 self.header ("vserver map on %s"%box)
224 # fetch the contexts for all vservers on that box
225 map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
226 context_map=self.backquote_ssh (box,map_command)
227 # at this point we have a set of lines like
228 # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
230 for map_line in context_map.split("\n"):
231 if not map_line: continue
232 [path,xid] = map_line.split(':')
233 ctx_dict[xid]=os.path.basename(os.path.dirname(path))
234 # at this point ctx_id maps context id to vservername
236 vserver_stat = self.backquote_ssh (box,command)
237 for vserver_line in vserver_stat.split("\n"):
238 if not vserver_line: continue
239 context=vserver_line.split()[0]
241 print self.margin(""),vserver_line
243 longname=ctx_dict[context]
244 vserver_names.append(longname)
245 print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
247 vnode_matcher = re.compile(".*(vnode[0-9]+)")
248 def vnodename (self, ps_line):
249 match = self.vnode_matcher.match(ps_line)
250 if match: return match.groups(0)
253 def handle_qemu_box (self,box):
254 if not self.options.probe:
255 if not self.options.soft:
258 self.run_ssh(box,['pkill','qemu'],"Killing qemu instances")
261 modules=self.backquote_ssh(box,command).split('\n')
262 kqemu_msg='*NO kqemu/kmv_intel MODULE LOADED*'
263 for module in modules:
264 if module.find('kqemu')==0:
265 kqemu_msg='kqemu module loaded'
266 # kvm might be loaded without vkm_intel (we dont have AMD)
267 elif module.find('kvm_intel')==0:
268 kqemu_msg='kvm_intel module loaded'
270 command=['pgrep','qemu']
271 if self.options.dry_run:
272 self.run_ssh(box,command,None)
274 pids=self.backquote_ssh(box,command)
276 self.header ('No qemu process on %s (%s)'%(box,kqemu_msg))
278 self.header ("Active qemu processes on %s (%s)"%(box,kqemu_msg))
279 command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
280 ps_lines = self.backquote_ssh (box,command).split("\n")
281 for ps_line in ps_lines:
282 if not ps_line or ps_line.find('PID') >=0 : continue
283 print self.margin_outline(self.vnodename(ps_line)), ps_line
285 # the ouput of ps -o pid,command gives us <pid> bash <buildname>/run_log
286 def testmaster_buildname (self, ps_line):
287 chunks=ps_line.split()
289 [buildname,command]=path.split('/')
292 def handle_testmaster_box (self, box):
293 if not self.options.probe:
296 command=['pgrep','run_log']
297 if self.options.dry_run:
298 self.run_ssh(box,command,None)
300 pids=self.backquote_ssh(box,command)
302 self.header ('No run_log process on %s'%box)
304 self.header ("Active run_log processes on %s"%(box))
305 command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
306 ps_lines = self.backquote_ssh (box,command).split("\n")
307 for ps_line in ps_lines:
308 if not ps_line or ps_line.find('PID') >=0 : continue
309 print self.margin_outline(self.testmaster_buildname(ps_line)), ps_line
312 def handle_box(self,box,type):
313 if box in self.qemu_boxes:
314 if type=="qemu": self.handle_qemu_box(self.fqdn(box))
315 elif box in self.plc_boxes:
316 if type=="plc": self.handle_plc_box(self.fqdn(box))
317 elif box in self.testmaster_boxes:
318 if type=='testmaster': self.handle_testmaster_box(self.fqdn(box))
320 if self.options.deep:
321 self.handle_build_box_deep(self.fqdn(box))
323 self.handle_build_box(self.fqdn(box))
325 def handle_disk (self,box):
327 return self.run_ssh(box,["df","-h",],"Disk space on %s"%box)
330 usage="""%prog [options] [hostname..(s)]
331 Default is to act on test boxes only"""
332 parser = OptionParser (usage=usage)
333 parser.add_option ("-n","--dry-run",action="store_true",dest="dry_run",default=False,
335 parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True,
336 help="Actually reset/reboot stuff instead of just probing it")
337 parser.add_option ("-s","--soft",action="store_true",dest="soft",default=False,
338 help="Soft reset instead of hard reboot of the boxes")
339 # no need for -p = probe, as this is the default
340 parser.add_option ("-p","--plc", action="store_true",dest="plc_only",default=False,
341 help="Acts on the plc box only")
343 parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False,
344 help="on build boxes, shows vbuild processes in vservers as well; signif. slower")
346 parser.add_option ("-a","--all",action="store_true",dest="all_boxes",default=False,
347 help="Acts on build and test boxes")
348 parser.add_option ("-b","--build",action="store_true",dest="build_only",default=False,
349 help="Acts on build boxes only")
350 parser.add_option ("-q","--qemu",action="store_true",dest="qemu_only",default=False,
351 help="Only acts on the qemu boxes")
352 parser.add_option ("-t","--trackers",action="store_true",dest="trackers_only",default=False,
353 help="Only wipes trackers")
354 parser.add_option ("-m","--master",action="store_true",dest="testmaster_only",default=False,
355 help="Display the testmaster status")
356 parser.add_option ("-d","--disk",action="store_true",dest="show_disk",default=False,
357 help="Only inspects disk status")
358 parser.add_option ("-c","--refresh-cache",action="store_true",dest="refresh_cache", default=False,
359 help="Refresh cached list of qemu boxes at testmaster - implies -q")
361 (self.options,args) = parser.parse_args()
364 if self.options.refresh_cache:
365 self.options.qemu_only=True
368 # use given hostnames if provided
371 # if hostnames are specified, let's stay on the safe side and don't reset trackers
372 self.do_tracker_plcs = False
373 self.do_tracker_qemus = False
374 elif self.options.all_boxes:
375 self.boxes=self.test_boxes + self.build_boxes + self.testmaster_boxes
376 self.do_tracker_plcs = True
377 self.do_tracker_qemus = True
378 elif self.options.build_only:
379 self.boxes=self.build_boxes
380 self.do_tracker_plcs = False
381 self.do_tracker_qemus = False
382 elif self.options.qemu_only:
383 self.boxes=self.qemu_boxes
384 self.do_tracker_plcs = False
385 self.do_tracker_qemus = True
386 elif self.options.plc_only:
387 self.boxes=self.plc_boxes
388 self.do_tracker_plcs = True
389 self.do_tracker_qemus = False
390 elif self.options.testmaster_only:
391 self.boxes=self.testmaster_boxes
392 self.do_tracker_plcs = False
393 self.do_tracker_qemus = False
394 elif self.options.trackers_only:
396 self.do_tracker_plcs = True
397 self.do_tracker_qemus = True
400 self.boxes = self.test_boxes
401 self.do_tracker_plcs = True
402 self.do_tracker_qemus = True
404 if self.options.show_disk:
405 for box in self.boxes: self.handle_disk(box)
409 if self.do_tracker_plcs:
410 self.handle_tracker_plcs ()
411 self.handle_starting ()
412 for box in self.boxes: self.handle_box (box,"plc")
414 if self.do_tracker_qemus:self.handle_tracker_qemus ()
415 for box in self.boxes: self.handle_box (box,"qemu")
417 for box in self.boxes: self.handle_box (box,"build")
419 for box in self.boxes: self.handle_box (box,"testmaster")
421 if __name__ == "__main__":
422 Infrastructure().main()