X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=system%2FSubstrate.py;h=066fbc35ce632b2413f80d9e5b87f7b8e42effca;hb=8974b09984fdcae42a0b5fa674091d6dfdb1c8f6;hp=0f89b055f24c04cd80d1e3508c5ab05f81ee31e3;hpb=2018172ed04f8cd645b877dd0ef38768ad1030b7;p=tests.git diff --git a/system/Substrate.py b/system/Substrate.py index 0f89b05..066fbc3 100644 --- a/system/Substrate.py +++ b/system/Substrate.py @@ -233,7 +233,7 @@ class Pool: print 'Done' for (vname,bname) in self.load_starting(): self.substrate.add_starting_dummy (bname, vname) - print 'After starting: IP pool' + print "After having loaded 'starting': IP pool" print self.line() # OS-dependent ping option (support for macos, for convenience) ping_timeout_option = None @@ -267,7 +267,7 @@ class Box: def hostname_fedora (self,virt=None): result = "%s {"%self.hostname if virt: result += "%s-"%virt - result += "%s"%self.fedora() + result += "%s %s"%(self.fedora(),self.memory()) # too painful to propagate this cleanly global verbose if verbose: @@ -290,6 +290,8 @@ class Box: composite_command += [ "uname", "-r"] composite_command += [ ";" , "echo", Box.separator , ";" ] composite_command += [ "cat" , "/etc/fedora-release" ] + composite_command += [ ";" , "echo", Box.separator , ";" ] + composite_command += [ "grep", "MemTotal", "/proc/meminfo" ] # due to colons and all, this is going wrong on the local box (typically testmaster) # I am reluctant to change TestSsh as it might break all over the place, so @@ -298,7 +300,7 @@ class Box: else: probe_argv=self.test_ssh().actual_argv(composite_command) composite=self.backquote ( probe_argv, trash_err=True ) - self._hostname = self._uptime = self._uname = self._fedora = "** Unknown **" + self._hostname = self._uptime = self._uname = self._fedora = self._memory = "** Unknown **" if not composite: print "root@%s unreachable"%self.hostname self._probed='' @@ -306,10 +308,15 @@ class Box: try: pieces = composite.split(Box.separator) pieces = [ x.strip() for x in pieces ] - [self._hostname, self._uptime, self._uname, self._fedora] = pieces + # get raw data + [hostname, uptime, uname, fedora, memory] = pieces # customize - self._uptime = ', '.join([ x.strip() for x in self._uptime.split(',')[2:]]) - self._fedora = self._fedora.replace("Fedora release ","f").split(" ")[0] + self._hostname = hostname + self._uptime = ', '.join([ x.strip() for x in uptime.split(',')[2:]]).replace("load average","load") + self._uname = uname + self._fedora = fedora.replace("Fedora release ","f").split(" ")[0] + # translate into Mb + self._memory = int(memory.split()[1])/(1024) except: import traceback print 'BEG issue with pieces',pieces @@ -331,6 +338,10 @@ class Box: self.probe() if hasattr(self,'_fedora') and self._fedora: return self._fedora return '*unprobed* fedora' + def memory(self): + self.probe() + if hasattr(self,'_memory') and self._memory: return "%s Mb"%self._memory + return '*unprobed* memory' def run(self,argv,message=None,trash_err=False,dry_run=False): if dry_run: @@ -407,34 +418,6 @@ class BuildBox (Box): build_matcher=re.compile("\s*(?P[0-9]+).*-[bo]\s+(?P[^\s]+)(\s|\Z)") build_matcher_initvm=re.compile("\s*(?P[0-9]+).*initvm.*\s+(?P[^\s]+)\s*\Z") -class BuildVsBox (BuildBox): - def soft_reboot (self, options): - command=['pkill','vbuild'] - self.run_ssh(command,"Terminating vbuild processes",dry_run=options.dry_run) - - # inspect box and find currently running builds - def sense(self, options): - print 'vb', - pids=self.backquote_ssh(['pgrep','vbuild'],trash_err=True) - if not pids: return - command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] - ps_lines=self.backquote_ssh (command).split('\n') - for line in ps_lines: - if not line.strip() or line.find('PID')>=0: continue - m=build_matcher.match(line) - if m: - date=time.strftime('%Y-%m-%d',time.localtime(time.time())) - buildname=m.group('buildname').replace('@DATE@',date) - self.add_build (buildname,m.group('pid')) - continue - m=build_matcher_initvm.match(line) - if m: - # buildname is expansed here - self.add_build (buildname,m.group('pid')) - continue - header('BuildVsBox.sense: command %r returned line that failed to match'%command) - header(">>%s<<"%line) - class BuildLxcBox (BuildBox): def soft_reboot (self, options): command=['pkill','lbuild'] @@ -474,31 +457,6 @@ class PlcInstance: def set_now (self): self.timestamp=int(time.time()) def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp)) -class PlcVsInstance (PlcInstance): - def __init__ (self, plcbox, vservername, ctxid): - PlcInstance.__init__(self,plcbox) - self.vservername=vservername - self.ctxid=ctxid - - def vplcname (self): - return self.vservername.split('-')[-1] - def buildname (self): - return self.vservername.rsplit('-',2)[0] - - def line (self): - msg="== %s =="%(self.vplcname()) - msg += " [=%s]"%self.vservername - if self.ctxid==0: msg+=" not (yet?) running" - else: msg+=" (ctx=%s)"%self.ctxid - if self.timestamp: msg += " @ %s"%self.pretty_timestamp() - else: msg += " *unknown timestamp*" - return msg - - def kill (self): - msg="vserver stopping %s on %s"%(self.vservername,self.plc_box.hostname) - self.plc_box.run_ssh(['vserver',self.vservername,'stop'],msg) - self.plc_box.forget(self) - class PlcLxcInstance (PlcInstance): # does lxc have a context id of any kind ? def __init__ (self, plcbox, lxcname, pid): @@ -539,7 +497,7 @@ class PlcBox (Box): # fill one slot even though this one is not started yet def add_dummy (self, plcname): - dummy=PlcVsInstance(self,'dummy_'+plcname,0) + dummy=PlcLxcInstance(self,'dummy_'+plcname,0) dummy.set_now() self.plc_instances.append(dummy) @@ -561,80 +519,7 @@ class PlcBox (Box): for p in self.plc_instances: header (p.line(),banner=False) -# we do not this at INRIA any more -class PlcVsBox (PlcBox): - - def add_vserver (self,vservername,ctxid): - for plc in self.plc_instances: - if plc.vservername==vservername: - header("WARNING, duplicate myplc %s running on %s"%\ - (vservername,self.hostname),banner=False) - return - self.plc_instances.append(PlcVsInstance(self,vservername,ctxid)) - - def line(self): - msg="%s [max=%d,free=%d] (%s)"%(self.hostname_fedora(virt="vs"), self.max_plcs,self.free_slots(),self.uptime()) - return msg - - def plc_instance_by_vservername (self, vservername): - for p in self.plc_instances: - if p.vservername==vservername: return p - return None - - def soft_reboot (self, options): - self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers on %s"%(self.hostname,), - dry_run=options.dry_run) - - def sense (self, options): - print 'vp', - # try to find fullname (vserver_stat truncates to a ridiculously short name) - # fetch the contexts for all vservers on that box - map_command=['grep','.','/etc/vservers/*/context','/dev/null',] - context_map=self.backquote_ssh (map_command) - # at this point we have a set of lines like - # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144 - ctx_dict={} - for map_line in context_map.split("\n"): - if not map_line: continue - [path,xid] = map_line.split(':') - ctx_dict[xid]=os.path.basename(os.path.dirname(path)) - # at this point ctx_id maps context id to vservername - - command=['vserver-stat'] - vserver_stat = self.backquote_ssh (command) - for vserver_line in vserver_stat.split("\n"): - if not vserver_line: continue - context=vserver_line.split()[0] - if context=="CTX": continue - try: - longname=ctx_dict[context] - self.add_vserver(longname,context) - except: - print 'WARNING: found ctx %s in vserver_stat but was unable to figure a corresp. vserver'%context - - # scan timestamps - running_vsnames = [ i.vservername for i in self.plc_instances ] - command= ['grep','.'] - command += ['/vservers/%s.timestamp'%vs for vs in running_vsnames] - command += ['/dev/null'] - ts_lines=self.backquote_ssh(command,trash_err=True).split('\n') - for ts_line in ts_lines: - if not ts_line.strip(): continue - # expect /vservers/.timestamp: - try: - (ts_file,timestamp)=ts_line.split(':') - ts_file=os.path.basename(ts_file) - (vservername,_)=os.path.splitext(ts_file) - timestamp=int(timestamp) - p=self.plc_instance_by_vservername(vservername) - if not p: - print 'WARNING zombie plc',self.hostname,ts_line - print '... was expecting',vservername,'in',[i.vservername for i in self.plc_instances] - continue - p.set_timestamp(timestamp) - except: print 'WARNING, could not parse ts line',ts_line - - +## we do not this at INRIA any more class PlcLxcBox (PlcBox): def add_lxc (self,lxcname,pid): @@ -736,6 +621,9 @@ class QemuBox (Box): return self.qemu_instances.append(QemuInstance(nodename,pid,self)) + def node_names (self): + return [ qi.nodename for qi in self.qemu_instances ] + def forget (self, qemu_instance): self.qemu_instances.remove(qemu_instance) @@ -890,9 +778,9 @@ class TestInstance: else: msg += " !!!pids=%s!!!"%self.pids msg += " @%s"%self.pretty_timestamp() if letter2 != '=': - msg = 'BROKEN' if letter2 == 'B' else 'WARNING' + msg2 = ( ' BROKEN' if letter2 == 'B' else ' WARNING' ) # sometimes we have an empty plcindex - msg += " [%s="%msg + " ".join( [ "%s@%s"%(s,i) if i else s for (i,s) in self.broken_steps ] ) + "]" + msg += " [%s="%msg2 + " ".join( [ "%s@%s"%(s,i) if i else s for (i,s) in self.broken_steps ] ) + "]" return msg class TestBox (Box): @@ -1195,12 +1083,14 @@ class Substrate: 'name': plc_name, 'vservername':vservername, 'vserverip':vplc_ip, - 'PLC_DB_HOST':vplc_hostname, - 'PLC_API_HOST':vplc_hostname, - 'PLC_BOOT_HOST':vplc_hostname, - 'PLC_WWW_HOST':vplc_hostname, - 'PLC_NET_DNS1' : self.network_settings() [ 'interface_fields:dns1' ], - 'PLC_NET_DNS2' : self.network_settings() [ 'interface_fields:dns2' ], +# 'settings': { + 'settings:PLC_DB_HOST':vplc_hostname, + 'settings:PLC_API_HOST':vplc_hostname, + 'settings:PLC_BOOT_HOST':vplc_hostname, + 'settings:PLC_WWW_HOST':vplc_hostname, + 'settings:PLC_NET_DNS1' : self.network_settings() [ 'interface_fields:dns1' ], + 'settings:PLC_NET_DNS2' : self.network_settings() [ 'interface_fields:dns2' ], +# } } ) ] } @@ -1289,11 +1179,11 @@ class Substrate: def localize_sfa_rspec (self,plc,options): - plc['sfa']['SFA_REGISTRY_HOST'] = plc['PLC_DB_HOST'] - plc['sfa']['SFA_AGGREGATE_HOST'] = plc['PLC_DB_HOST'] - plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST'] - plc['sfa']['SFA_DB_HOST'] = plc['PLC_DB_HOST'] - plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/' + plc['sfa']['settings']['SFA_REGISTRY_HOST'] = plc['settings']['PLC_DB_HOST'] + plc['sfa']['settings']['SFA_AGGREGATE_HOST'] = plc['settings']['PLC_DB_HOST'] + plc['sfa']['settings']['SFA_SM_HOST'] = plc['settings']['PLC_DB_HOST'] + plc['sfa']['settings']['SFA_DB_HOST'] = plc['settings']['PLC_DB_HOST'] + plc['sfa']['settings']['SFA_PLC_URL'] = 'https://%s:443/PLCAPI/' % plc['settings']['PLC_API_HOST'] return plc #################### release: @@ -1312,24 +1202,52 @@ class Substrate: print "Could not find box %s"%boxname return None - def list_boxes(self,box_or_names): - print 'Sensing', + # deal with the mix of boxes and names and stores the current focus + # as a list of Box instances in self.focus_all + def normalize (self, box_or_names): + self.focus_all=[] for box in box_or_names: if not isinstance(box,Box): box=self.get_box(box) - if not box: continue + if not box: + print 'Warning - could not handle box',box + self.focus_all.append(box) + # elaborate by type + self.focus_build = [ x for x in self.focus_all if isinstance(x,BuildBox) ] + self.focus_plc = [ x for x in self.focus_all if isinstance(x,PlcBox) ] + self.focus_qemu = [ x for x in self.focus_all if isinstance(x,QemuBox) ] + + def list_boxes(self): + print 'Sensing', + for box in self.focus_all: box.sense(self.options) print 'Done' - for box in box_or_names: - if not isinstance(box,Box): box=self.get_box(box) - if not box: continue + for box in self.focus_all: box.list(self.options.verbose) - def reboot_boxes(self,box_or_names): - for box in box_or_names: - if not isinstance(box,Box): box=self.get_box(box) - if not box: continue + def reboot_boxes(self): + for box in self.focus_all: box.reboot(self.options) + def sanity_check (self): + print 'Sanity check' + self.sanity_check_plc() + self.sanity_check_qemu() + + def sanity_check_plc (self): + pass + + def sanity_check_qemu (self): + all_nodes=[] + for box in self.focus_qemu: + all_nodes += box.node_names() + hash={} + for node in all_nodes: + if node not in hash: hash[node]=0 + hash[node]+=1 + for (node,count) in hash.items(): + if count!=1: print 'WARNING - duplicate node',node + + #################### # can be run as a utility to probe/display/manage the local infrastructure def main (self): @@ -1367,5 +1285,10 @@ class Substrate: if not boxes: boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box] - if self.options.reboot: self.reboot_boxes (boxes) - else: self.list_boxes (boxes) + self.normalize (boxes) + + if self.options.reboot: + self.reboot_boxes () + else: + self.list_boxes () + self.sanity_check ()