X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=system%2FSubstrate.py;h=fc5125f59b7845fe2ab58680a69b0ac037c3c02b;hb=e85fe8ae48538f2cec761ef1a47f8a374dc1526c;hp=e660b4784acab55d86c224379d0ddc2dc9f14345;hpb=4752a573406d72f89495e6e6b6fadd0ac02e0aab;p=tests.git diff --git a/system/Substrate.py b/system/Substrate.py index e660b47..fc5125f 100644 --- a/system/Substrate.py +++ b/system/Substrate.py @@ -4,6 +4,8 @@ # # #################### history # +# see also Substrate.readme +# # This is a complete rewrite of TestResources/Tracker/Pool # we don't use trackers anymore and just probe/sense the running # boxes to figure out where we are @@ -31,6 +33,17 @@ # . and their admissible load (max # of myplcs) # . the pool of DNS-names and IP-addresses available for nodes # +# #################### implem. note +# +# this model relies on 'sensing' the substrate, +# i.e. probing all the boxes for their running instances of vservers and qemu +# this is how we get rid of tracker inconsistencies +# however there is a 'black hole' between the time where a given address is +# allocated and when it actually gets used/pingable +# this is why we still need a shared knowledge among running tests +# in a file named /root/starting +# this is connected to the Pool class +# # #################### import os.path, sys @@ -46,17 +59,50 @@ import utils from TestSsh import TestSsh from TestMapper import TestMapper +# too painful to propagate this cleanly +verbose=None + def header (message,banner=True): if not message: return if banner: print "===============", print message sys.stdout.flush() -def timestamp_sort(o1,o2): - if not o1.timestamp: return -1 - elif not o2.timestamp: return 1 - else: return o2.timestamp-o1.timestamp +def timestamp_sort(o1,o2): return o1.timestamp-o2.timestamp + +def short_hostname (hostname): + return hostname.split('.')[0] +#################### +# the place were other test instances tell about their not-yet-started +# instances, that go undetected through sensing +class Starting: + + location='/root/starting' + def __init__ (self): + self.tuples=[] + + def load (self): + try: self.tuples=[line.strip().split('@') + for line in file(Starting.location).readlines()] + except: self.tuples=[] + + def vnames (self) : + self.load() + return [ x for (x,_) in self.tuples ] + + def add (self, vname, bname): + if not vname in self.vnames(): + file(Starting.location,'a').write("%s@%s\n"%(vname,bname)) + + def delete_vname (self, vname): + self.load() + if vname in self.vnames(): + f=file(Starting.location,'w') + for (v,b) in self.tuples: + if v != vname: f.write("%s@%s\n"%(v,b)) + f.close() + #################### # pool class # allows to pick an available IP among a pool @@ -78,12 +124,21 @@ class PoolItem: def __init__ (self,hostname,userdata): self.hostname=hostname self.userdata=userdata - # slot holds 'busy' or 'free' or 'fake' or None + # slot holds 'busy' or 'free' or 'mine' or 'starting' or None + # 'mine' is for our own stuff, 'starting' from the concurrent tests self.status=None self.ip=None def line(self): return "Pooled %s (%s) -> %s"%(self.hostname,self.userdata, self.status) + + def char (self): + if self.status==None: return '?' + elif self.status=='busy': return '+' + elif self.status=='free': return '-' + elif self.status=='mine': return 'M' + elif self.status=='starting': return 'S' + def get_ip(self): if self.ip: return self.ip ip=socket.gethostbyname(self.hostname) @@ -92,45 +147,97 @@ class PoolItem: class Pool: - def __init__ (self, tuples,message): - self.pool= [ PoolItem (h,u) for (h,u) in tuples ] + def __init__ (self, tuples,message, substrate): + self.pool_items= [ PoolItem (hostname,userdata) for (hostname,userdata) in tuples ] self.message=message - self._sensed=False + # where to send notifications upon load_starting + self.substrate=substrate - def sense (self): - if self._sensed: return - print 'Checking IP pool',self.message, - for item in self.pool: - if self.check_ping (item.hostname): item.status='busy' - else: item.status='free' - self._sensed=True - print 'Done' + def list (self, verbose=False): + for i in self.pool_items: print i.line() + + def line (self): + line=self.message + for i in self.pool_items: line += ' ' + i.char() + return line - def list (self): - for i in self.pool: print i.line() + def _item (self, hostname): + for i in self.pool_items: + if i.hostname==hostname: return i + raise Exception ("Could not locate hostname %s in pool %s"%(hostname,self.message)) - def retrieve_userdata (self, hostname): - for i in self.pool: - if i.hostname==hostname: return i.userdata - return None + def retrieve_userdata (self, hostname): + return self._item(hostname).userdata def get_ip (self, hostname): - # use cached if in pool - for i in self.pool: - if i.hostname==hostname: return i.get_ip() - # otherwise just ask dns again - return socket.gethostbyname(hostname) + try: return self._item(hostname).get_ip() + except: return socket.gethostbyname(hostname) + + def set_mine (self, hostname): + try: + self._item(hostname).status='mine' + except: + print 'WARNING: host %s not found in IP pool %s'%(hostname,self.message) def next_free (self): - for i in self.pool: - if i.status in ['busy','fake']: continue - i.status='fake' - return (i.hostname,i.userdata) - raise Exception,"No IP address available in pool %s"%self.message + for i in self.pool_items: + if i.status == 'free': + i.status='mine' + return (i.hostname,i.userdata) + return None + + #################### + # we have a starting instance of our own + def add_starting (self, vname, bname): + Starting().add(vname,bname) + for i in self.pool_items: + if i.hostname==vname: i.status='mine' + + # load the starting instances from the common file + # remember that might be ours + # return the list of (vname,bname) that are not ours + def load_starting (self): + starting=Starting() + starting.load() + new_tuples=[] + for (v,b) in starting.tuples: + for i in self.pool_items: + if i.hostname==v and i.status=='free': + i.status='starting' + new_tuples.append( (v,b,) ) + return new_tuples + + def release_my_starting (self): + for i in self.pool_items: + if i.status=='mine': + Starting().delete_vname (i.hostname) + i.status=None -# OS-dependent ping option (support for macos, for convenience) + + ########## + def _sense (self): + for item in self.pool_items: + if item.status is not None: + print item.char(), + continue + if self.check_ping (item.hostname): + item.status='busy' + print '*', + else: + item.status='free' + print '.', + + def sense (self): + print 'Sensing IP pool',self.message, + self._sense() + print 'Done' + for (vname,bname) in self.load_starting(): + self.substrate.add_starting_dummy (bname, vname) + print 'After starting: IP pool' + print self.line() + # OS-dependent ping option (support for macos, for convenience) ping_timeout_option = None -# checks whether a given hostname/ip responds to ping + # returns True when a given hostname/ip responds to ping def check_ping (self,hostname): if not Pool.ping_timeout_option: (status,osname) = commands.getstatusoutput("uname -s") @@ -143,19 +250,87 @@ class Pool: command="ping -c 1 %s 1 %s"%(Pool.ping_timeout_option,hostname) (status,output) = commands.getstatusoutput(command) - if status==0: print '+', - else: print '-', return status == 0 #################### class Box: def __init__ (self,hostname): self.hostname=hostname - def simple_hostname (self): - return self.hostname.split('.')[0] + self._probed=None + def shortname (self): + return short_hostname(self.hostname) def test_ssh (self): return TestSsh(self.hostname,username='root',unknown_host=False) - def reboot (self): - self.test_ssh().run("shutdown -r now",message="Rebooting %s"%self.hostname) + def reboot (self, options): + self.test_ssh().run("shutdown -r now",message="Rebooting %s"%self.hostname, + dry_run=options.dry_run) + + def hostname_fedora (self,virt=None): + result = "%s {"%self.hostname + if virt: result += "%s-"%virt + result += "%s"%self.fedora() + # too painful to propagate this cleanly + global verbose + if verbose: + result += "-%s" % self.uname() + result += "}" + return result + + separator = "===composite===" + + # probe the ssh link + # take this chance to gather useful stuff + def probe (self): + # try it only once + if self._probed is not None: return self._probed + composite_command = [ ] + composite_command += [ "hostname" ] + composite_command += [ ";" , "echo", Box.separator , ";" ] + composite_command += [ "uptime" ] + composite_command += [ ";" , "echo", Box.separator , ";" ] + composite_command += [ "uname", "-r"] + composite_command += [ ";" , "echo", Box.separator , ";" ] + composite_command += [ "cat" , "/etc/fedora-release" ] + + # due to colons and all, this is going wrong on the local box (typically testmaster) + # I am reluctant to change TestSsh as it might break all over the place, so + if self.test_ssh().is_local(): + probe_argv = [ "bash", "-c", " ".join (composite_command) ] + else: + probe_argv=self.test_ssh().actual_argv(composite_command) + composite=self.backquote ( probe_argv, trash_err=True ) + self._hostname = self._uptime = self._uname = self._fedora = "** Unknown **" + if not composite: + print "root@%s unreachable"%self.hostname + self._probed='' + else: + try: + pieces = composite.split(Box.separator) + pieces = [ x.strip() for x in pieces ] + [self._hostname, self._uptime, self._uname, self._fedora] = pieces + # customize + self._uptime = ', '.join([ x.strip() for x in self._uptime.split(',')[2:]]) + self._fedora = self._fedora.replace("Fedora release ","f").split(" ")[0] + except: + import traceback + print 'BEG issue with pieces',pieces + traceback.print_exc() + print 'END issue with pieces',pieces + self._probed=self._hostname + return self._probed + + # use argv=['bash','-c',"the command line"] + def uptime(self): + self.probe() + if hasattr(self,'_uptime') and self._uptime: return self._uptime + return '*unprobed* uptime' + def uname(self): + self.probe() + if hasattr(self,'_uname') and self._uname: return self._uname + return '*unprobed* uname' + def fedora(self): + self.probe() + if hasattr(self,'_fedora') and self._fedora: return self._fedora + return '*unprobed* fedora' def run(self,argv,message=None,trash_err=False,dry_run=False): if dry_run: @@ -169,28 +344,26 @@ class Box: else: return subprocess.call(argv,stderr=file('/dev/null','w')) - def run_ssh (self, argv, message, trash_err=False): + def run_ssh (self, argv, message, trash_err=False, dry_run=False): ssh_argv = self.test_ssh().actual_argv(argv) - result=self.run (ssh_argv, message, trash_err) + result=self.run (ssh_argv, message, trash_err, dry_run=dry_run) if result!=0: print "WARNING: failed to run %s on %s"%(" ".join(argv),self.hostname) return result def backquote (self, argv, trash_err=False): + # print 'running backquote',argv if not trash_err: - return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0] + result= subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0] else: - return subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0] + result= subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0] + return result + # if you have any shell-expanded arguments like * + # and if there's any chance the command is adressed to the local host def backquote_ssh (self, argv, trash_err=False): - # first probe the ssh link - probe_argv=self.test_ssh().actual_argv(['hostname']) - hostname=self.backquote ( probe_argv, trash_err=True ) - if not hostname: - print "root@%s unreachable"%self.hostname - return '' - else: - return self.backquote( self.test_ssh().actual_argv(argv), trash_err) + if not self.probe(): return '' + return self.backquote( self.test_ssh().actual_argv(argv), trash_err) ############################################################ class BuildInstance: @@ -217,56 +390,108 @@ class BuildBox (Box): return self.build_instances.append(BuildInstance(buildname, pid, self)) - def list(self): + def list(self, verbose=False): if not self.build_instances: - header ('No build process on %s (%s)'%(self.hostname,self.uptime())) + header ('No build process on %s (%s)'%(self.hostname_fedora(),self.uptime())) else: - header ("Builds on %s (%s)"%(self.hostname,self.uptime())) + header ("Builds on %s (%s)"%(self.hostname_fedora(),self.uptime())) for b in self.build_instances: header (b.line(),banner=False) - def uptime(self): - if hasattr(self,'_uptime') and self._uptime: return self._uptime - return '*undef* uptime' + def reboot (self, options): + if not options.soft: + Box.reboot(self,options) + else: + self.soft_reboot (options) + +build_matcher=re.compile("\s*(?P[0-9]+).*-[bo]\s+(?P[^\s]+)(\s|\Z)") +build_matcher_initvm=re.compile("\s*(?P[0-9]+).*initvm.*\s+(?P[^\s]+)\s*\Z") + +class BuildVsBox (BuildBox): + def soft_reboot (self, options): + command=['pkill','vbuild'] + self.run_ssh(command,"Terminating vbuild processes",dry_run=options.dry_run) # inspect box and find currently running builds - matcher=re.compile("\s*(?P[0-9]+).*-[bo]\s+(?P[^\s]+)(\s|\Z)") - def sense(self,reboot=False,verbose=True): - if reboot: - self.reboot(box) - return - print 'b', - command=['uptime'] - self._uptime=self.backquote_ssh(command,trash_err=True).strip() - if not self._uptime: self._uptime='unreachable' - pids=self.backquote_ssh(['pgrep','build'],trash_err=True) + def sense(self, options): + print 'vb', + pids=self.backquote_ssh(['pgrep','vbuild'],trash_err=True) if not pids: return command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] ps_lines=self.backquote_ssh (command).split('\n') for line in ps_lines: if not line.strip() or line.find('PID')>=0: continue - m=BuildBox.matcher.match(line) - if m: self.add_build (m.group('buildname'),m.group('pid')) - else: header('command %r returned line that failed to match'%command) + m=build_matcher.match(line) + if m: + date=time.strftime('%Y-%m-%d',time.localtime(time.time())) + buildname=m.group('buildname').replace('@DATE@',date) + self.add_build (buildname,m.group('pid')) + continue + m=build_matcher_initvm.match(line) + if m: + # buildname is expansed here + self.add_build (buildname,m.group('pid')) + continue + header('BuildVsBox.sense: command %r returned line that failed to match'%command) + header(">>%s<<"%line) + +class BuildLxcBox (BuildBox): + def soft_reboot (self, options): + command=['pkill','lbuild'] + self.run_ssh(command,"Terminating vbuild processes",dry_run=options.dry_run) + # inspect box and find currently running builds + def sense(self, options): + print 'xb' + pids=self.backquote_ssh(['pgrep','lbuild'],trash_err=True) + if not pids: return + command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] + ps_lines=self.backquote_ssh (command).split('\n') + for line in ps_lines: + if not line.strip() or line.find('PID')>=0: continue + m=build_matcher.match(line) + if m: + date=time.strftime('%Y-%m-%d',time.localtime(time.time())) + buildname=m.group('buildname').replace('@DATE@',date) + self.add_build (buildname,m.group('pid')) + continue + m=build_matcher_initvm.match(line) + if m: + # buildname is expansed here + self.add_build (buildname,m.group('pid')) + continue + header('BuildLxcBox.sense: command %r returned line that failed to match'%command) + header(">>%s<<"%line) + ############################################################ class PlcInstance: - def __init__ (self, vservername, ctxid, plcbox): - self.vservername=vservername - self.ctxid=ctxid + def __init__ (self, plcbox): self.plc_box=plcbox # unknown yet - self.timestamp=None - + self.timestamp=0 + def set_timestamp (self,timestamp): self.timestamp=timestamp def set_now (self): self.timestamp=int(time.time()) def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp)) +class PlcVsInstance (PlcInstance): + def __init__ (self, plcbox, vservername, ctxid): + PlcInstance.__init__(self,plcbox) + self.vservername=vservername + self.ctxid=ctxid + + def vplcname (self): + return self.vservername.split('-')[-1] + def buildname (self): + return self.vservername.rsplit('-',2)[0] + def line (self): - msg="== %s == (ctx=%s)"%(self.vservername,self.ctxid) + msg="== %s =="%(self.vplcname()) + msg += " [=%s]"%self.vservername + if self.ctxid==0: msg+=" not (yet?) running" + else: msg+=" (ctx=%s)"%self.ctxid if self.timestamp: msg += " @ %s"%self.pretty_timestamp() else: msg += " *unknown timestamp*" - if self.ctxid==0: msg+=" not (yet?) running" return msg def kill (self): @@ -274,66 +499,94 @@ class PlcInstance: self.plc_box.run_ssh(['vserver',self.vservername,'stop'],msg) self.plc_box.forget(self) +class PlcLxcInstance (PlcInstance): + # does lxc have a context id of any kind ? + def __init__ (self, plcbox, lxcname, pid): + PlcInstance.__init__(self, plcbox) + self.lxcname = lxcname + self.pid = pid + + def vplcname (self): + return self.lxcname.split('-')[-1] + def buildname (self): + return self.lxcname.rsplit('-',2)[0] + + def line (self): + msg="== %s =="%(self.vplcname()) + msg += " [=%s]"%self.lxcname + if self.pid==-1: msg+=" not (yet?) running" + else: msg+=" (pid=%s)"%self.pid + if self.timestamp: msg += " @ %s"%self.pretty_timestamp() + else: msg += " *unknown timestamp*" + return msg + + def kill (self): + command="rsync lxc-driver.sh %s:/root"%self.plc_box.hostname + commands.getstatusoutput(command) + msg="lxc container stopping %s on %s"%(self.lxcname,self.plc_box.hostname) + self.plc_box.run_ssh(['/root/lxc-driver.sh','-c','stop_lxc','-n',self.lxcname],msg) + self.plc_box.forget(self) + +########## class PlcBox (Box): def __init__ (self, hostname, max_plcs): Box.__init__(self,hostname) self.plc_instances=[] self.max_plcs=max_plcs - def add_vserver (self,vservername,ctxid): - for plc in self.plc_instances: - if plc.vservername==vservername: - header("WARNING, duplicate myplc %s running on %s"%\ - (vservername,self.hostname),banner=False) - return - self.plc_instances.append(PlcInstance(vservername,ctxid,self)) - + def free_slots (self): + return self.max_plcs - len(self.plc_instances) + + # fill one slot even though this one is not started yet + def add_dummy (self, plcname): + dummy=PlcVsInstance(self,'dummy_'+plcname,0) + dummy.set_now() + self.plc_instances.append(dummy) + def forget (self, plc_instance): self.plc_instances.remove(plc_instance) - # fill one slot even though this one is not started yet - def add_fake (self, plcname): - fake=PlcInstance('fake_'+plcname,0,self) - fake.set_now() - self.plc_instances.append(fake) + def reboot (self, options): + if not options.soft: + Box.reboot(self,options) + else: + self.soft_reboot (options) - def line(self): - msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_plcs,self.free_spots(),self.uname()) - return msg - - def list(self): + def list(self, verbose=False): if not self.plc_instances: - header ('No vserver running on %s'%(self.line())) + header ('No plc running on %s'%(self.line())) else: header ("Active plc VMs on %s"%self.line()) + self.plc_instances.sort(timestamp_sort) for p in self.plc_instances: header (p.line(),banner=False) - def free_spots (self): - return self.max_plcs - len(self.plc_instances) - - def uname(self): - if hasattr(self,'_uname') and self._uname: return self._uname - return '*undef* uname' +# we do not this at INRIA any more +class PlcVsBox (PlcBox): + def add_vserver (self,vservername,ctxid): + for plc in self.plc_instances: + if plc.vservername==vservername: + header("WARNING, duplicate myplc %s running on %s"%\ + (vservername,self.hostname),banner=False) + return + self.plc_instances.append(PlcVsInstance(self,vservername,ctxid)) + + def line(self): + msg="%s [max=%d,free=%d] (%s)"%(self.hostname_fedora(virt="vs"), self.max_plcs,self.free_slots(),self.uptime()) + return msg + def plc_instance_by_vservername (self, vservername): for p in self.plc_instances: if p.vservername==vservername: return p return None - def sense (self, reboot=False, soft=False): - if reboot: - # remove mark for all running servers to avoid resurrection - stop_command=['rm','-rf','/etc/vservers/*/apps/init/mark'] - self.run_ssh(stop_command,"Removing all vserver marks on %s"%self.hostname) - if not soft: - self.reboot() - return - else: - self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers") - return - print 'p', - self._uname=self.backquote_ssh(['uname','-r']).strip() + def soft_reboot (self, options): + self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers on %s"%(self.hostname,), + dry_run=options.dry_run) + + def sense (self, options): + print 'vp', # try to find fullname (vserver_stat truncates to a ridiculously short name) # fetch the contexts for all vservers on that box map_command=['grep','.','/etc/vservers/*/context','/dev/null',] @@ -353,31 +606,88 @@ class PlcBox (Box): if not vserver_line: continue context=vserver_line.split()[0] if context=="CTX": continue - longname=ctx_dict[context] - self.add_vserver(longname,context) -# print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals() + try: + longname=ctx_dict[context] + self.add_vserver(longname,context) + except: + print 'WARNING: found ctx %s in vserver_stat but was unable to figure a corresp. vserver'%context - # scan timestamps + # scan timestamps + running_vsnames = [ i.vservername for i in self.plc_instances ] command= ['grep','.'] - command += ['/vservers/%s/timestamp'%b for b in ctx_dict.values()] + command += ['/vservers/%s.timestamp'%vs for vs in running_vsnames] command += ['/dev/null'] ts_lines=self.backquote_ssh(command,trash_err=True).split('\n') for ts_line in ts_lines: if not ts_line.strip(): continue - # expect /vservers//timestamp: + # expect /vservers/.timestamp: try: - (_,__,vservername,tail)=ts_line.split('/') - (_,timestamp)=tail.split(':') + (ts_file,timestamp)=ts_line.split(':') + ts_file=os.path.basename(ts_file) + (vservername,_)=os.path.splitext(ts_file) timestamp=int(timestamp) - q=self.plc_instance_by_vservername(vservername) - if not q: - print 'WARNING unattached plc instance',ts_line + p=self.plc_instance_by_vservername(vservername) + if not p: + print 'WARNING zombie plc',self.hostname,ts_line + print '... was expecting',vservername,'in',[i.vservername for i in self.plc_instances] continue - q.set_timestamp(timestamp) + p.set_timestamp(timestamp) except: print 'WARNING, could not parse ts line',ts_line +class PlcLxcBox (PlcBox): + def add_lxc (self,lxcname,pid): + for plc in self.plc_instances: + if plc.lxcname==lxcname: + header("WARNING, duplicate myplc %s running on %s"%\ + (lxcname,self.hostname),banner=False) + return + self.plc_instances.append(PlcLxcInstance(self,lxcname,pid)) + + + # a line describing the box + def line(self): + return "%s [max=%d,free=%d] (%s)"%(self.hostname_fedora(virt="lxc"), + self.max_plcs,self.free_slots(), + self.uptime(), + ) + + def plc_instance_by_lxcname (self, lxcname): + for p in self.plc_instances: + if p.lxcname==lxcname: return p + return None + + # essentially shutdown all running containers + def soft_reboot (self, options): + command="rsync lxc-driver.sh %s:/root"%self.hostname + commands.getstatusoutput(command) + self.run_ssh(['/root/lxc-driver.sh','-c','stop_all'],"Stopping all running lxc containers on %s"%(self.hostname,), + dry_run=options.dry_run) + + + # sense is expected to fill self.plc_instances with PlcLxcInstance's + # to describe the currently running VM's + def sense (self, options): + print "xp", + command="rsync lxc-driver.sh %s:/root"%self.hostname + commands.getstatusoutput(command) + command=['/root/lxc-driver.sh','-c','sense_all'] + lxc_stat = self.backquote_ssh (command) + for lxc_line in lxc_stat.split("\n"): + if not lxc_line: continue + lxcname=lxc_line.split(";")[0] + pid=lxc_line.split(";")[1] + timestamp=lxc_line.split(";")[2] + self.add_lxc(lxcname,pid) + try: timestamp=int(timestamp) + except: timestamp=0 + p=self.plc_instance_by_lxcname(lxcname) + if not p: + print 'WARNING zombie plc',self.hostname,lxcname + print '... was expecting',lxcname,'in',[i.lxcname for i in self.plc_instances] + continue + p.set_timestamp(timestamp) ############################################################ class QemuInstance: @@ -387,7 +697,7 @@ class QemuInstance: self.qemu_box=qemubox # not known yet self.buildname=None - self.timestamp=None + self.timestamp=0 def set_buildname (self,buildname): self.buildname=buildname def set_timestamp (self,timestamp): self.timestamp=timestamp @@ -395,17 +705,18 @@ class QemuInstance: def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp)) def line (self): - msg = "== %s == (pid=%s)"%(self.nodename,self.pid) - if self.buildname: msg += " <--> %s"%self.buildname - else: msg += " *unknown build*" + msg = "== %s =="%(short_hostname(self.nodename)) + msg += " [=%s]"%self.buildname + if self.pid: msg += " (pid=%s)"%self.pid + else: msg += " not (yet?) running" if self.timestamp: msg += " @ %s"%self.pretty_timestamp() else: msg += " *unknown timestamp*" - if self.pid: msg += "pid=%s"%self.pid - else: msg += " not (yet?) running" return msg def kill(self): - if self.pid==0: print "cannot kill qemu %s with pid==0"%self.nodename + if self.pid==0: + print "cannot kill qemu %s with pid==0"%self.nodename + return msg="Killing qemu %s with pid=%s on box %s"%(self.nodename,self.pid,self.qemu_box.hostname) self.qemu_box.run_ssh(['kill',"%s"%self.pid],msg) self.qemu_box.forget(self) @@ -429,24 +740,26 @@ class QemuBox (Box): self.qemu_instances.remove(qemu_instance) # fill one slot even though this one is not started yet - def add_fake (self, nodename): - fake=QemuInstance('fake_'+nodename,0,self) - fake.set_now() - self.qemu_instances.append(fake) + def add_dummy (self, nodename): + dummy=QemuInstance('dummy_'+nodename,0,self) + dummy.set_now() + self.qemu_instances.append(dummy) def line (self): - msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_qemus,self.free_spots(),self.driver()) - return msg + return "%s [max=%d,free=%d] (%s) %s"%( + self.hostname_fedora(virt="qemu"), self.max_qemus,self.free_slots(), + self.uptime(),self.driver()) - def list(self): + def list(self, verbose=False): if not self.qemu_instances: - header ('No qemu process on %s'%(self.line())) + header ('No qemu on %s'%(self.line())) else: - header ("Active qemu processes on %s"%(self.line())) + header ("Qemus on %s"%(self.line())) + self.qemu_instances.sort(timestamp_sort) for q in self.qemu_instances: header (q.line(),banner=False) - def free_spots (self): + def free_slots (self): return self.max_qemus - len(self.qemu_instances) def driver(self): @@ -464,23 +777,24 @@ class QemuBox (Box): return q return None + def reboot (self, options): + if not options.soft: + Box.reboot(self,options) + else: + self.run_ssh(['pkill','qemu'],"Killing qemu instances", + dry_run=options.dry_run) + matcher=re.compile("\s*(?P[0-9]+).*-cdrom\s+(?P[^\s]+)\.iso") - def sense(self, reboot=False, soft=False): - if reboot: - if not soft: - self.reboot() - else: - self.run_ssh(box,['pkill','qemu'],"Killing qemu instances") - return - print 'q', + def sense(self, options): + print 'qn', modules=self.backquote_ssh(['lsmod']).split('\n') - self._driver='*NO kqemu/kmv_intel MODULE LOADED*' + self._driver='*NO kqemu/kvm_intel MODULE LOADED*' for module in modules: if module.find('kqemu')==0: self._driver='kqemu module loaded' - # kvm might be loaded without vkm_intel (we dont have AMD) + # kvm might be loaded without kvm_intel (we dont have AMD) elif module.find('kvm_intel')==0: - self._driver='kvm_intel module loaded' + self._driver='kvm_intel OK' ########## find out running pids pids=self.backquote_ssh(['pgrep','qemu']) if not pids: return @@ -489,17 +803,20 @@ class QemuBox (Box): for line in ps_lines: if not line.strip() or line.find('PID') >=0 : continue m=QemuBox.matcher.match(line) - if m: self.add_node (m.group('nodename'),m.group('pid')) - else: header('command %r returned line that failed to match'%command) + if m: + self.add_node (m.group('nodename'),m.group('pid')) + continue + header('QemuBox.sense: command %r returned line that failed to match'%command) + header(">>%s<<"%line) ########## retrieve alive instances and map to build live_builds=[] - command=['grep','.','*/*/qemu.pid','/dev/null'] + command=['grep','.','/vservers/*/*/qemu.pid','/dev/null'] pid_lines=self.backquote_ssh(command,trash_err=True).split('\n') for pid_line in pid_lines: if not pid_line.strip(): continue # expect //qemu.pid:pid try: - (buildname,nodename,tail)=pid_line.split('/') + (_,__,buildname,nodename,tail)=pid_line.split('/') (_,pid)=tail.split(':') q=self.qemu_instance_by_pid (pid) if not q: continue @@ -507,128 +824,370 @@ class QemuBox (Box): live_builds.append(buildname) except: print 'WARNING, could not parse pid line',pid_line # retrieve timestamps + if not live_builds: return command= ['grep','.'] - command += ['%s/*/timestamp'%b for b in live_builds] + command += ['/vservers/%s/*/timestamp'%b for b in live_builds] command += ['/dev/null'] ts_lines=self.backquote_ssh(command,trash_err=True).split('\n') for ts_line in ts_lines: if not ts_line.strip(): continue # expect //timestamp: try: - (buildname,nodename,tail)=ts_line.split('/') + (_,__,buildname,nodename,tail)=ts_line.split('/') nodename=nodename.replace('qemu-','') (_,timestamp)=tail.split(':') timestamp=int(timestamp) q=self.qemu_instance_by_nodename_buildname(nodename,buildname) if not q: - print 'WARNING unattached qemu instance',ts_line,nodename,buildname + print 'WARNING zombie qemu',self.hostname,ts_line + print '... was expecting (',short_hostname(nodename),buildname,') in',\ + [ (short_hostname(i.nodename),i.buildname) for i in self.qemu_instances ] continue q.set_timestamp(timestamp) except: print 'WARNING, could not parse ts line',ts_line +#################### +class TestInstance: + def __init__ (self, buildname, pid=0): + self.pids=[] + if pid!=0: self.pid.append(pid) + self.buildname=buildname + # latest trace line + self.trace='' + # has a KO test + self.broken_steps=[] + self.timestamp = 0 + + def set_timestamp (self,timestamp): self.timestamp=timestamp + def set_now (self): self.timestamp=int(time.time()) + def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp)) + + def is_running (self): return len(self.pids) != 0 + + def add_pid (self,pid): + self.pids.append(pid) + def set_broken (self, plcindex, step): + self.broken_steps.append ( (plcindex, step,) ) + + def line (self): + double='==' + if self.pids: double='*'+double[1] + if self.broken_steps: double=double[0]+'B' + msg = " %s %s =="%(double,self.buildname) + if not self.pids: pass + elif len(self.pids)==1: msg += " (pid=%s)"%self.pids[0] + else: msg += " !!!pids=%s!!!"%self.pids + msg += " @%s"%self.pretty_timestamp() + if self.broken_steps: + # sometimes we have an empty plcindex + msg += " [BROKEN=" + " ".join( [ "%s@%s"%(s,i) if i else s for (i,s) in self.broken_steps ] ) + "]" + return msg + +class TestBox (Box): + def __init__ (self,hostname): + Box.__init__(self,hostname) + self.starting_ips=[] + self.test_instances=[] + + def reboot (self, options): + # can't reboot a vserver VM + self.run_ssh (['pkill','run_log'],"Terminating current runs", + dry_run=options.dry_run) + self.run_ssh (['rm','-f',Starting.location],"Cleaning %s"%Starting.location, + dry_run=options.dry_run) + + def get_test (self, buildname): + for i in self.test_instances: + if i.buildname==buildname: return i + + # we scan ALL remaining test results, even the ones not running + def add_timestamp (self, buildname, timestamp): + i=self.get_test(buildname) + if i: + i.set_timestamp(timestamp) + else: + i=TestInstance(buildname,0) + i.set_timestamp(timestamp) + self.test_instances.append(i) + + def add_running_test (self, pid, buildname): + i=self.get_test(buildname) + if not i: + self.test_instances.append (TestInstance (buildname,pid)) + return + if i.pids: + print "WARNING: 2 concurrent tests run on same build %s"%buildname + i.add_pid (pid) + + def add_broken (self, buildname, plcindex, step): + i=self.get_test(buildname) + if not i: + i=TestInstance(buildname) + self.test_instances.append(i) + i.set_broken(plcindex, step) + + matcher_proc=re.compile (".*/proc/(?P[0-9]+)/cwd.*/root/(?P[^/]+)$") + matcher_grep=re.compile ("/root/(?P[^/]+)/logs/trace.*:TRACE:\s*(?P[0-9]+).*step=(?P\S+).*") + matcher_grep_missing=re.compile ("grep: /root/(?P[^/]+)/logs/trace: No such file or directory") + def sense (self, options): + print 'tm', + self.starting_ips=[x for x in self.backquote_ssh(['cat',Starting.location], trash_err=True).strip().split('\n') if x] + + # scan timestamps on all tests + # this is likely to not invoke ssh so we need to be a bit smarter to get * expanded + # xxx would make sense above too + command=['bash','-c',"grep . /root/*/timestamp /dev/null"] + ts_lines=self.backquote_ssh(command,trash_err=True).split('\n') + for ts_line in ts_lines: + if not ts_line.strip(): continue + # expect /root//timestamp: + try: + (ts_file,timestamp)=ts_line.split(':') + ts_file=os.path.dirname(ts_file) + buildname=os.path.basename(ts_file) + timestamp=int(timestamp) + t=self.add_timestamp(buildname,timestamp) + except: print 'WARNING, could not parse ts line',ts_line + + # let's try to be robust here -- tests that fail very early like e.g. + # "Cannot make space for a PLC instance: vplc IP pool exhausted", that occurs as part of provision + # will result in a 'trace' symlink to an inexisting 'trace-<>.txt' because no step has gone through + # simple 'trace' sohuld exist though as it is created by run_log + command=['bash','-c',"grep KO /root/*/logs/trace /dev/null 2>&1" ] + trace_lines=self.backquote_ssh (command).split('\n') + for line in trace_lines: + if not line.strip(): continue + m=TestBox.matcher_grep_missing.match(line) + if m: + buildname=m.group('buildname') + self.add_broken(buildname,'','NO STEP DONE') + continue + m=TestBox.matcher_grep.match(line) + if m: + buildname=m.group('buildname') + plcindex=m.group('plcindex') + step=m.group('step') + self.add_broken(buildname,plcindex, step) + continue + header("TestBox.sense: command %r returned line that failed to match\n%s"%(command,line)) + header(">>%s<<"%line) + + pids = self.backquote_ssh (['pgrep','run_log'],trash_err=True) + if not pids: return + command=['ls','-ld'] + ["/proc/%s/cwd"%pid for pid in pids.split("\n") if pid] + ps_lines=self.backquote_ssh (command).split('\n') + for line in ps_lines: + if not line.strip(): continue + m=TestBox.matcher_proc.match(line) + if m: + pid=m.group('pid') + buildname=m.group('buildname') + self.add_running_test(pid, buildname) + continue + header("TestBox.sense: command %r returned line that failed to match\n%s"%(command,line)) + header(">>%s<<"%line) + + + def line (self): + return self.hostname_fedora() + + def list (self, verbose=False): + # verbose shows all tests + if verbose: + instances = self.test_instances + msg="tests" + else: + instances = [ i for i in self.test_instances if i.is_running() ] + msg="running tests" + + if not instances: + header ("No %s on %s"%(msg,self.line())) + else: + header ("%s on %s"%(msg,self.line())) + instances.sort(timestamp_sort) + for i in instances: print i.line() + # show 'starting' regardless of verbose + if self.starting_ips: + header ("Starting IP addresses on %s"%self.line()) + self.starting_ips.sort() + for starting in self.starting_ips: print starting + else: + header ("Empty 'starting' on %s"%self.line()) + ############################################################ class Options: pass class Substrate: - def test (self): - self.sense() - - def __init__ (self): + def __init__ (self, plcs_on_vs=True, plcs_on_lxc=False): self.options=Options() self.options.dry_run=False self.options.verbose=False - self.options.probe=True - self.options.soft=True - self.build_boxes = [ BuildBox(h) for h in self.build_boxes_spec() ] - self.plc_boxes = [ PlcBox (h,m) for (h,m) in self.plc_boxes_spec ()] + self.options.reboot=False + self.options.soft=False + self.test_box = TestBox (self.test_box_spec()) + self.build_vs_boxes = [ BuildVsBox(h) for h in self.build_vs_boxes_spec() ] + self.build_lxc_boxes = [ BuildLxcBox(h) for h in self.build_lxc_boxes_spec() ] + self.plc_vs_boxes = [ PlcVsBox (h,m) for (h,m) in self.plc_vs_boxes_spec ()] + self.plc_lxc_boxes = [ PlcLxcBox (h,m) for (h,m) in self.plc_lxc_boxes_spec ()] self.qemu_boxes = [ QemuBox (h,m) for (h,m) in self.qemu_boxes_spec ()] - self.all_boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes self._sensed=False - self.vplc_pool = Pool (self.vplc_ips(),"for vplcs") - self.vnode_pool = Pool (self.vnode_ips(),"for vnodes") - - self.vnode_pool.list() - + self.vplc_pool = Pool (self.vplc_ips(),"for vplcs",self) + self.vnode_pool = Pool (self.vnode_ips(),"for vnodes",self) + + self.rescope (plcs_on_vs=plcs_on_vs, plcs_on_lxc=plcs_on_lxc) + + # which plc boxes are we interested in ? + def rescope (self, plcs_on_vs, plcs_on_lxc): + self.build_boxes = self.build_vs_boxes + self.build_lxc_boxes + self.plc_boxes=[] + if plcs_on_vs: self.plc_boxes += self.plc_vs_boxes + if plcs_on_lxc: self.plc_boxes += self.plc_lxc_boxes + self.default_boxes = self.plc_boxes + self.qemu_boxes + self.all_boxes = self.build_boxes + [ self.test_box ] + self.plc_boxes + self.qemu_boxes + + def summary_line (self): + msg = "[" + msg += " %d vp"%len(self.plc_vs_boxes) + msg += " %d xp"%len(self.plc_lxc_boxes) + msg += " %d tried plc boxes"%len(self.plc_boxes) + msg += "]" + return msg -# def build_box_names (self): -# return [ h for h in self.build_boxes_spec() ] -# def plc_boxes (self): -# return [ h for (h,m) in self.plc_boxes_spec() ] -# def qemu_boxes (self): -# return [ h for (h,m) in self.qemu_boxes_spec() ] + def fqdn (self, hostname): + if hostname.find('.')<0: return "%s.%s"%(hostname,self.domain()) + return hostname + # return True if actual sensing takes place def sense (self,force=False): - if self._sensed and not force: return + if self._sensed and not force: return False print 'Sensing local substrate...', - for b in self.all_boxes: b.sense() + for b in self.default_boxes: b.sense(self.options) print 'Done' self._sensed=True + return True + + def list (self, verbose=False): + for b in self.default_boxes: + b.list() + + def add_dummy_plc (self, plc_boxname, plcname): + for pb in self.plc_boxes: + if pb.hostname==plc_boxname: + pb.add_dummy(plcname) + return True + def add_dummy_qemu (self, qemu_boxname, qemuname): + for qb in self.qemu_boxes: + if qb.hostname==qemu_boxname: + qb.add_dummy(qemuname) + return True + + def add_starting_dummy (self, bname, vname): + return self.add_dummy_plc (bname, vname) or self.add_dummy_qemu (bname, vname) ########## def provision (self,plcs,options): try: - self.sense() - self.list_all() # attach each plc to a plc box and an IP address plcs = [ self.provision_plc (plc,options) for plc in plcs ] # attach each node/qemu to a qemu box with an IP address plcs = [ self.provision_qemus (plc,options) for plc in plcs ] # update the SFA spec accordingly plcs = [ self.localize_sfa_rspec(plc,options) for plc in plcs ] + self.list() return plcs except Exception, e: print '* Could not provision this test on current substrate','--',e,'--','exiting' traceback.print_exc() sys.exit(1) + # it is expected that a couple of options like ips_bplc and ips_vplc + # are set or unset together + @staticmethod + def check_options (x,y): + if not x and not y: return True + return len(x)==len(y) + # find an available plc box (or make space) # and a free IP address (using options if present) def provision_plc (self, plc, options): - #### we need to find one plc box that still has a slot - plc_box=None - max_free=0 - # use the box that has max free spots for load balancing - for pb in self.plc_boxes: - free=pb.free_spots() - if free>max_free: - plc_box=pb - max_free=free - # everything is already used - if not plc_box: - # find the oldest of all our instances - all_plc_instances=reduce(lambda x, y: x+y, - [ pb.plc_instances for pb in self.plc_boxes ], - []) - all_plc_instances.sort(timestamp_sort) - plc_instance_to_kill=all_plc_instances[0] - plc_box=plc_instance_to_kill.plc_box - plc_instance_to_kill.kill() - print 'killed oldest = %s on %s'%(plc_instance_to_kill.line(), - plc_instance_to_kill.plc_box.hostname) - - utils.header( 'plc %s -> box %s'%(plc['name'],plc_box.line())) - plc_box.add_fake(plc['name']) - #### OK we have a box to run in, let's find an IP address - # look in options + + assert Substrate.check_options (options.ips_bplc, options.ips_vplc) + + #### let's find an IP address for that plc + # look in options if options.ips_vplc: + # this is a rerun + # we don't check anything here, + # it is the caller's responsability to cleanup and make sure this makes sense + plc_boxname = options.ips_bplc.pop() vplc_hostname=options.ips_vplc.pop() else: + if self.sense(): self.list() + plc_boxname=None + vplc_hostname=None + # try to find an available IP self.vplc_pool.sense() - (vplc_hostname,unused)=self.vplc_pool.next_free() + couple=self.vplc_pool.next_free() + if couple: + (vplc_hostname,unused)=couple + #### we need to find one plc box that still has a slot + max_free=0 + # use the box that has max free spots for load balancing + for pb in self.plc_boxes: + free=pb.free_slots() + if free>max_free: + plc_boxname=pb.hostname + max_free=free + # if there's no available slot in the plc_boxes, or we need a free IP address + # make space by killing the oldest running instance + if not plc_boxname or not vplc_hostname: + # find the oldest of all our instances + all_plc_instances=reduce(lambda x, y: x+y, + [ pb.plc_instances for pb in self.plc_boxes ], + []) + all_plc_instances.sort(timestamp_sort) + try: + plc_instance_to_kill=all_plc_instances[0] + except: + msg="" + if not plc_boxname: msg += " PLC boxes are full" + if not vplc_hostname: msg += " vplc IP pool exhausted" + msg += " %s"%self.summary_line() + raise Exception,"Cannot make space for a PLC instance:"+msg + freed_plc_boxname=plc_instance_to_kill.plc_box.hostname + freed_vplc_hostname=plc_instance_to_kill.vplcname() + message='killing oldest plc instance = %s on %s'%(plc_instance_to_kill.line(), + freed_plc_boxname) + plc_instance_to_kill.kill() + # use this new plcbox if that was the problem + if not plc_boxname: + plc_boxname=freed_plc_boxname + # ditto for the IP address + if not vplc_hostname: + vplc_hostname=freed_vplc_hostname + # record in pool as mine + self.vplc_pool.set_mine(vplc_hostname) + + # + self.add_dummy_plc(plc_boxname,plc['name']) vplc_ip = self.vplc_pool.get_ip(vplc_hostname) + self.vplc_pool.add_starting(vplc_hostname, plc_boxname) #### compute a helpful vserver name # remove domain in hostname - vplc_simple = vplc_hostname.split('.')[0] - vservername = "%s-%d-%s" % (options.buildname,plc['index'],vplc_simple) - plc_name = "%s_%s"%(plc['name'],vplc_simple) + vplc_short = short_hostname(vplc_hostname) + vservername = "%s-%d-%s" % (options.buildname,plc['index'],vplc_short) + plc_name = "%s_%s"%(plc['name'],vplc_short) + + utils.header( 'PROVISION plc %s in box %s at IP %s as %s'%\ + (plc['name'],plc_boxname,vplc_hostname,vservername)) #### apply in the plc_spec # # informative # label=options.personality.replace("linux","") - mapper = {'plc': [ ('*' , {'hostname':plc_box.hostname, + mapper = {'plc': [ ('*' , {'host_box':plc_boxname, # 'name':'%s-'+label, 'name': plc_name, 'vservername':vservername, @@ -642,63 +1201,87 @@ class Substrate: } ) ] } - utils.header("Attaching %s on IP %s in vserver %s"%(plc['name'],vplc_hostname,vservername)) + # mappers only work on a list of plcs return TestMapper([plc],options).map(mapper)[0] ########## def provision_qemus (self, plc, options): + + assert Substrate.check_options (options.ips_bnode, options.ips_vnode) + test_mapper = TestMapper ([plc], options) nodenames = test_mapper.node_names() maps=[] for nodename in nodenames: - #### similarly we want to find a qemu box that can host us - qemu_box=None - max_free=0 - # use the box that has max free spots for load balancing - for qb in self.qemu_boxes: - free=qb.free_spots() - if free>max_free: - qemu_box=qb - max_free=free - # everything is already used - if not qemu_box: - # find the oldest of all our instances - all_qemu_instances=reduce(lambda x, y: x+y, - [ qb.qemu_instances for qb in self.qemu_boxes ], - []) - all_qemu_instances.sort(timestamp_sort) - qemu_instance_to_kill=all_qemu_instances[0] - qemu_box=qemu_instance_to_kill.qemu_box - qemu_instance_to_kill.kill() - print 'killed oldest = %s on %s'%(qemu_instance_to_kill.line(), - qemu_instance_to_kill.qemu_box.hostname) - - utils.header( 'node %s -> qemu box %s'%(nodename,qemu_box.line())) - qemu_box.add_fake(nodename) - #### OK we have a box to run in, let's find an IP address - # look in options + if options.ips_vnode: - qemu_hostname=options.ips_vnode.pop() - mac=self.vnode_pool.retrieve_userdata(qemu_hostname) - print 'case 1 hostname',qemu_hostname,'mac',mac + # as above, it's a rerun, take it for granted + qemu_boxname=options.ips_bnode.pop() + vnode_hostname=options.ips_vnode.pop() else: + if self.sense(): self.list() + qemu_boxname=None + vnode_hostname=None + # try to find an available IP self.vnode_pool.sense() - (qemu_hostname,mac)=self.vnode_pool.next_free() - print 'case 2 hostname',qemu_hostname,'mac',mac - ip=self.vnode_pool.get_ip (qemu_hostname) - utils.header("Attaching %s on IP %s MAC %s"%(plc['name'],qemu_hostname,mac)) - - if qemu_hostname.find('.')<0: - qemu_hostname += "."+self.domain() - nodemap={'host_box':qemu_box.hostname, - 'node_fields:hostname':qemu_hostname, + couple=self.vnode_pool.next_free() + if couple: + (vnode_hostname,unused)=couple + # find a physical box + max_free=0 + # use the box that has max free spots for load balancing + for qb in self.qemu_boxes: + free=qb.free_slots() + if free>max_free: + qemu_boxname=qb.hostname + max_free=free + # if we miss the box or the IP, kill the oldest instance + if not qemu_boxname or not vnode_hostname: + # find the oldest of all our instances + all_qemu_instances=reduce(lambda x, y: x+y, + [ qb.qemu_instances for qb in self.qemu_boxes ], + []) + all_qemu_instances.sort(timestamp_sort) + try: + qemu_instance_to_kill=all_qemu_instances[0] + except: + msg="" + if not qemu_boxname: msg += " QEMU boxes are full" + if not vnode_hostname: msg += " vnode IP pool exhausted" + msg += " %s"%self.summary_line() + raise Exception,"Cannot make space for a QEMU instance:"+msg + freed_qemu_boxname=qemu_instance_to_kill.qemu_box.hostname + freed_vnode_hostname=short_hostname(qemu_instance_to_kill.nodename) + # kill it + message='killing oldest qemu node = %s on %s'%(qemu_instance_to_kill.line(), + freed_qemu_boxname) + qemu_instance_to_kill.kill() + # use these freed resources where needed + if not qemu_boxname: + qemu_boxname=freed_qemu_boxname + if not vnode_hostname: + vnode_hostname=freed_vnode_hostname + self.vnode_pool.set_mine(vnode_hostname) + + self.add_dummy_qemu (qemu_boxname,vnode_hostname) + mac=self.vnode_pool.retrieve_userdata(vnode_hostname) + ip=self.vnode_pool.get_ip (vnode_hostname) + self.vnode_pool.add_starting(vnode_hostname,qemu_boxname) + + vnode_fqdn = self.fqdn(vnode_hostname) + nodemap={'host_box':qemu_boxname, + 'node_fields:hostname':vnode_fqdn, 'interface_fields:ip':ip, + 'ipaddress_fields:ip_addr':ip, 'interface_fields:mac':mac, } nodemap.update(self.network_settings()) maps.append ( (nodename, nodemap) ) + utils.header("PROVISION node %s in box %s at IP %s with MAC %s"%\ + (nodename,qemu_boxname,vnode_hostname,mac)) + return test_mapper.map({'node':maps})[0] def localize_sfa_rspec (self,plc,options): @@ -706,37 +1289,82 @@ class Substrate: plc['sfa']['SFA_REGISTRY_HOST'] = plc['PLC_DB_HOST'] plc['sfa']['SFA_AGGREGATE_HOST'] = plc['PLC_DB_HOST'] plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST'] - plc['sfa']['SFA_PLC_DB_HOST'] = plc['PLC_DB_HOST'] + plc['sfa']['SFA_DB_HOST'] = plc['PLC_DB_HOST'] plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/' - for site in plc['sites']: - for node in site['nodes']: - plc['sfa']['sfa_slice_rspec']['part4'] = node['node_fields']['hostname'] return plc + #################### release: + def release (self,options): + self.vplc_pool.release_my_starting() + self.vnode_pool.release_my_starting() + pass + #################### show results for interactive mode - def list_all (self): - self.sense() - for b in self.all_boxes: b.list() - - def get_box (self,box): - for b in self.build_boxes + self.plc_boxes + self.qemu_boxes: - if b.simple_hostname()==box: - return b - print "Could not find box %s"%box + def get_box (self,boxname): + for b in self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box] : + if b.shortname()==boxname: return b + try: + if b.shortname()==boxname.split('.')[0]: return b + except: pass + print "Could not find box %s"%boxname return None - def list_box(self,box): - b=self.get_box(box) - if not b: return - b.sense() - b.list() - - # can be run as a utility to manage the local infrastructure + def list_boxes(self,box_or_names): + print 'Sensing', + for box in box_or_names: + if not isinstance(box,Box): box=self.get_box(box) + if not box: continue + box.sense(self.options) + print 'Done' + for box in box_or_names: + if not isinstance(box,Box): box=self.get_box(box) + if not box: continue + box.list(self.options.verbose) + + def reboot_boxes(self,box_or_names): + for box in box_or_names: + if not isinstance(box,Box): box=self.get_box(box) + if not box: continue + box.reboot(self.options) + + #################### + # can be run as a utility to probe/display/manage the local infrastructure def main (self): parser=OptionParser() - (options,args)=parser.parse_args() - if not args: - self.list_all() - else: - for box in args: - self.list_box(box) + parser.add_option ('-r',"--reboot",action='store_true',dest='reboot',default=False, + help='reboot mode (use shutdown -r)') + parser.add_option ('-s',"--soft",action='store_true',dest='soft',default=False, + help='soft mode for reboot (vserver stop or kill qemus)') + parser.add_option ('-t',"--testbox",action='store_true',dest='testbox',default=False, + help='add test box') + parser.add_option ('-b',"--build",action='store_true',dest='builds',default=False, + help='add build boxes') + parser.add_option ('-p',"--plc",action='store_true',dest='plcs',default=False, + help='add plc boxes') + parser.add_option ('-q',"--qemu",action='store_true',dest='qemus',default=False, + help='add qemu boxes') + parser.add_option ('-a',"--all",action='store_true',dest='all',default=False, + help='address all known boxes, like -b -t -p -q') + parser.add_option ('-v',"--verbose",action='store_true',dest='verbose',default=False, + help='verbose mode') + parser.add_option ('-n',"--dry_run",action='store_true',dest='dry_run',default=False, + help='dry run mode') + (self.options,args)=parser.parse_args() + + self.rescope (plcs_on_vs=True, plcs_on_lxc=True) + + boxes=args + if self.options.testbox: boxes += [self.test_box] + if self.options.builds: boxes += self.build_boxes + if self.options.plcs: boxes += self.plc_boxes + if self.options.qemus: boxes += self.qemu_boxes + if self.options.all: boxes += self.all_boxes + + global verbose + verbose=self.options.verbose + # default scope is -b -p -q -t + if not boxes: + boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box] + + if self.options.reboot: self.reboot_boxes (boxes) + else: self.list_boxes (boxes)