X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=scripts%2Fmanage-infrastructure.py;h=c6251d686cdcfd64dfc431b4840c85568135035c;hb=8b801f9d78d78b7df690695d36c32be7d359498e;hp=3312d929c3260605b7c1344bdc53ff2c2c1fa5fb;hpb=e388c6a9e97ef537c76cc3a3d537358a1f949b92;p=infrastructure.git diff --git a/scripts/manage-infrastructure.py b/scripts/manage-infrastructure.py index 3312d92..c6251d6 100755 --- a/scripts/manage-infrastructure.py +++ b/scripts/manage-infrastructure.py @@ -9,21 +9,40 @@ class BuildBoxes: # everything in the onelab.eu domain domain = 'pl.sophia.inria.fr' - testmaster = 'testmaster' - build_boxes = [ "mirror", "liquid", "reed", "velvet", ] + build_boxes = [ "devel", "liquid", "reed", "velvet", ] plc_boxes = [ "testplc" ] - # qemu32-5 is officially dead - qemu_boxes = \ - [ "qemu64-%d"%i for i in range (1,4) ] + \ - [ "qemu32-%d"%i for i in range (1,5) ] - test_boxes = plc_boxes + qemu_boxes + testmaster = 'testmaster' testmaster_boxes = [ testmaster ] + # cache the list of qemu boxes in ~/.qemu-boxes + # this can be refreshed by running -c + qemu_boxes=[] + + def cache_file (self): return os.path.expanduser("~/.qemu-boxes") + + def load_cache (self): + cache=self.cache_file() + if os.path.isfile(cache): + self.qemu_boxes=file(cache).read().split() + self.test_boxes = self.plc_boxes + self.qemu_boxes + + # run LocalTestResources on testmaster + def refresh_cache (self): + retrieved= \ + self.backquote_ssh(self.fqdn(self.testmaster),['LocalTestResources.py'],trash_err=True) + remove="."+BuildBoxes.domain + retrieved = [ x.replace(remove,"").strip() for x in retrieved.split()] + self.qemu_boxes = retrieved + cache=self.cache_file() + file(cache,'w').write(' '.join(self.qemu_boxes)+'\n') + print "New contents of %s:"%cache + print file(cache).read(), def __init__ (self): # dummy defaults self.boxes = [] self.do_tracker_qemus = False self.do_tracker_plcs = False + self.load_cache() def fqdn (self, box): return "%s.%s"%(box,self.domain) @@ -171,39 +190,50 @@ class BuildBoxes: def margin_outline (self, string): return self.margin(self.outline(string)) def handle_plc_box (self,box): +# initial approach was to first scan vserver-stat, but it's not needed if not self.options.probe: - self.reboot(box) - else: - command=['vserver-stat'] - if self.options.dry_run: - self.run_ssh(box,command,"Active vservers on %s"%box) +# # remove mark for all running servers to avoid resurrection +# if vserver_names: +# bash="; ".join( [ "rm -f /etc/vservers/%s/apps/init/mark"%vs for vs in vserver_names ] ) +# stop_command=['bash','-c',"'" + bash + "'"] +# self.run_ssh(box,stop_command,"Removing mark on running vservers on %s"%box) + # just trash all marks + stop_command=['rm','-rf','/etc/vservers/*/apps/init/mark'] + self.run_ssh(box,stop_command,"Removing all vserver marks on %s"%box) + if not self.options.soft: + self.reboot(box) else: - # try to find fullname (vserver_stat truncates to a ridiculously short name) - try: - self.header ("vserver map on %s"%box) - # fetch the contexts for all vservers on that box - map_command=['grep','.','/etc/vservers/*/context','/dev/null',] - context_map=self.backquote_ssh (box,map_command) - # at this point we have a set of lines like - # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144 - ctx_dict={} - for map_line in context_map.split("\n"): - if not map_line: continue - [path,xid] = map_line.split(':') - ctx_dict[xid]=os.path.basename(os.path.dirname(path)) - # at this point ctx_id maps context id to vservername - - vserver_stat = self.backquote_ssh (box,command) - for vserver_line in vserver_stat.split("\n"): - if not vserver_line: continue - context=vserver_line.split()[0] - if context=="CTX": - print self.margin(""),vserver_line - continue - longname=ctx_dict[context] - print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals() - except: - self.run_ssh(box,command,"Fine-grained method failed - fallback to plain vserver-stat") + self.run_ssh(box,['service','util-vserver','stop'],"Stopping all running vservers") + return + # even for rebooting we need to scan vserver-stat to stop the vservers properly + vserver_names=[] + command=['vserver-stat'] + if self.options.dry_run: + self.run_ssh(box,command,"Active vservers on %s"%box) + # try to find fullname (vserver_stat truncates to a ridiculously short name) + self.header ("vserver map on %s"%box) + # fetch the contexts for all vservers on that box + map_command=['grep','.','/etc/vservers/*/context','/dev/null',] + context_map=self.backquote_ssh (box,map_command) + # at this point we have a set of lines like + # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144 + ctx_dict={} + for map_line in context_map.split("\n"): + if not map_line: continue + [path,xid] = map_line.split(':') + ctx_dict[xid]=os.path.basename(os.path.dirname(path)) + # at this point ctx_id maps context id to vservername + + vserver_stat = self.backquote_ssh (box,command) + for vserver_line in vserver_stat.split("\n"): + if not vserver_line: continue + context=vserver_line.split()[0] + if context=="CTX": + print self.margin(""),vserver_line + continue + longname=ctx_dict[context] + vserver_names.append(longname) + print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals() vnode_matcher = re.compile(".*(vnode[0-9]+)") def vnodename (self, ps_line): @@ -213,14 +243,20 @@ class BuildBoxes: def handle_qemu_box (self,box): if not self.options.probe: - self.reboot(box) + if not self.options.soft: + self.reboot(box) + else: + self.run_ssh(box,['pkill','qemu'],"Killing qemu instances") else: command=['lsmod'] modules=self.backquote_ssh(box,command).split('\n') - kqemu_msg='*NO kqemu MODULE LOADED*' + kqemu_msg='*NO kqemu/kmv_intel MODULE LOADED*' for module in modules: if module.find('kqemu')==0: - kqemu_msg='kqemu OK' + kqemu_msg='kqemu module loaded' + # kvm might be loaded without vkm_intel (we dont have AMD) + elif module.find('kvm_intel')==0: + kqemu_msg='kvm_intel module loaded' command=['pgrep','qemu'] if self.options.dry_run: @@ -228,7 +264,7 @@ class BuildBoxes: else: pids=self.backquote_ssh(box,command) if not pids: - self.header ('No qemu process on %s'%box) + self.header ('No qemu process on %s (%s)'%(box,kqemu_msg)) else: self.header ("Active qemu processes on %s (%s)"%(box,kqemu_msg)) command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] @@ -289,16 +325,19 @@ Default is to act on test boxes only""" help="Dry run") parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True, help="Actually reset/reboot stuff instead of just probing it") + parser.add_option ("-s","--soft",action="store_true",dest="soft",default=False, + help="Soft reset instead of hard reboot of the boxes") # no need for -p = probe, as this is the default parser.add_option ("-p","--plc", action="store_true",dest="plc_only",default=False, help="Acts on the plc box only") + parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False, + help="on build boxes, shows vbuild processes in vservers as well; signif. slower") + parser.add_option ("-a","--all",action="store_true",dest="all_boxes",default=False, help="Acts on build and test boxes") parser.add_option ("-b","--build",action="store_true",dest="build_only",default=False, help="Acts on build boxes only") - parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False, - help="on build boxes, shows vbuild processes in vservers as well; signif. slower") parser.add_option ("-q","--qemu",action="store_true",dest="qemu_only",default=False, help="Only acts on the qemu boxes") parser.add_option ("-t","--trackers",action="store_true",dest="trackers_only",default=False, @@ -307,9 +346,16 @@ Default is to act on test boxes only""" help="Display the testmaster status") parser.add_option ("-d","--disk",action="store_true",dest="show_disk",default=False, help="Only inspects disk status") + parser.add_option ("-c","--refresh-cache",action="store_true",dest="refresh_cache", default=False, + help="Refresh cached list of qemu boxes at testmaster - implies -q") (self.options,args) = parser.parse_args() + # -c implies -q + if self.options.refresh_cache: + self.options.qemu_only=True + self.refresh_cache() + # use given hostnames if provided if args: self.boxes=args