from TestSsh import TestSsh
from TestMapper import TestMapper
+# too painful to propagate this cleanly
+verbose=None
+
def header (message,banner=True):
if not message: return
if banner: print "===============",
# where to send notifications upon load_starting
self.substrate=substrate
- def list (self):
+ def list (self, verbose=False):
for i in self.pool_items: print i.line()
def line (self):
print 'Done'
for (vname,bname) in self.load_starting():
self.substrate.add_starting_dummy (bname, vname)
- print 'After starting: IP pool'
+ print "After having loaded 'starting': IP pool"
print self.line()
# OS-dependent ping option (support for macos, for convenience)
ping_timeout_option = None
self.test_ssh().run("shutdown -r now",message="Rebooting %s"%self.hostname,
dry_run=options.dry_run)
+ def hostname_fedora (self,virt=None):
+ result = "%s {"%self.hostname
+ if virt: result += "%s-"%virt
+ result += "%s"%self.fedora()
+ # too painful to propagate this cleanly
+ global verbose
+ if verbose:
+ result += "-%s" % self.uname()
+ result += "}"
+ return result
+
+ separator = "===composite==="
+
+ # probe the ssh link
+ # take this chance to gather useful stuff
+ def probe (self):
+ # try it only once
+ if self._probed is not None: return self._probed
+ composite_command = [ ]
+ composite_command += [ "hostname" ]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "uptime" ]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "uname", "-r"]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "cat" , "/etc/fedora-release" ]
+
+ # due to colons and all, this is going wrong on the local box (typically testmaster)
+ # I am reluctant to change TestSsh as it might break all over the place, so
+ if self.test_ssh().is_local():
+ probe_argv = [ "bash", "-c", " ".join (composite_command) ]
+ else:
+ probe_argv=self.test_ssh().actual_argv(composite_command)
+ composite=self.backquote ( probe_argv, trash_err=True )
+ self._hostname = self._uptime = self._uname = self._fedora = "** Unknown **"
+ if not composite:
+ print "root@%s unreachable"%self.hostname
+ self._probed=''
+ else:
+ try:
+ pieces = composite.split(Box.separator)
+ pieces = [ x.strip() for x in pieces ]
+ [self._hostname, self._uptime, self._uname, self._fedora] = pieces
+ # customize
+ self._uptime = ', '.join([ x.strip() for x in self._uptime.split(',')[2:]])
+ self._fedora = self._fedora.replace("Fedora release ","f").split(" ")[0]
+ except:
+ import traceback
+ print 'BEG issue with pieces',pieces
+ traceback.print_exc()
+ print 'END issue with pieces',pieces
+ self._probed=self._hostname
+ return self._probed
+
+ # use argv=['bash','-c',"the command line"]
def uptime(self):
+ self.probe()
if hasattr(self,'_uptime') and self._uptime: return self._uptime
- return '*undef* uptime'
- def sense_uptime (self):
- command=['uptime']
- self._uptime=self.backquote_ssh(command,trash_err=True).strip()
- if not self._uptime: self._uptime='unreachable'
+ return '*unprobed* uptime'
+ def uname(self):
+ self.probe()
+ if hasattr(self,'_uname') and self._uname: return self._uname
+ return '*unprobed* uname'
+ def fedora(self):
+ self.probe()
+ if hasattr(self,'_fedora') and self._fedora: return self._fedora
+ return '*unprobed* fedora'
def run(self,argv,message=None,trash_err=False,dry_run=False):
if dry_run:
result= subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0]
return result
- def probe (self):
- if self._probed is not None: return self._probed
- # first probe the ssh link
- probe_argv=self.test_ssh().actual_argv(['hostname'])
- self._probed=self.backquote ( probe_argv, trash_err=True )
- if not self._probed: print "root@%s unreachable"%self.hostname
- return self._probed
-
- # use argv=['bash','-c',"the command line"]
# if you have any shell-expanded arguments like *
# and if there's any chance the command is adressed to the local host
def backquote_ssh (self, argv, trash_err=False):
return
self.build_instances.append(BuildInstance(buildname, pid, self))
- def list(self):
+ def list(self, verbose=False):
if not self.build_instances:
- header ('No build process on %s (%s)'%(self.hostname,self.uptime()))
+ header ('No build process on %s (%s)'%(self.hostname_fedora(),self.uptime()))
else:
- header ("Builds on %s (%s)"%(self.hostname,self.uptime()))
+ header ("Builds on %s (%s)"%(self.hostname_fedora(),self.uptime()))
for b in self.build_instances:
header (b.line(),banner=False)
def reboot (self, options):
if not options.soft:
- self.reboot(options)
+ Box.reboot(self,options)
else:
- command=['pkill','vbuild']
+ self.soft_reboot (options)
+
+build_matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
+build_matcher_initvm=re.compile("\s*(?P<pid>[0-9]+).*initvm.*\s+(?P<buildname>[^\s]+)\s*\Z")
+
+class BuildLxcBox (BuildBox):
+ def soft_reboot (self, options):
+ command=['pkill','lbuild']
self.run_ssh(command,"Terminating vbuild processes",dry_run=options.dry_run)
# inspect box and find currently running builds
- matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
def sense(self, options):
- print 'b',
- self.sense_uptime()
- pids=self.backquote_ssh(['pgrep','vbuild'],trash_err=True)
+ print 'xb',
+ pids=self.backquote_ssh(['pgrep','lbuild'],trash_err=True)
if not pids: return
command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
ps_lines=self.backquote_ssh (command).split('\n')
for line in ps_lines:
if not line.strip() or line.find('PID')>=0: continue
- m=BuildBox.matcher.match(line)
+ m=build_matcher.match(line)
if m:
date=time.strftime('%Y-%m-%d',time.localtime(time.time()))
buildname=m.group('buildname').replace('@DATE@',date)
self.add_build (buildname,m.group('pid'))
- else: header('command %r returned line that failed to match'%command)
-
+ continue
+ m=build_matcher_initvm.match(line)
+ if m:
+ # buildname is expansed here
+ self.add_build (buildname,m.group('pid'))
+ continue
+ header('BuildLxcBox.sense: command %r returned line that failed to match'%command)
+ header(">>%s<<"%line)
+
############################################################
class PlcInstance:
- def __init__ (self, vservername, ctxid, plcbox):
- self.vservername=vservername
- self.ctxid=ctxid
+ def __init__ (self, plcbox):
self.plc_box=plcbox
# unknown yet
self.timestamp=0
-
+
def set_timestamp (self,timestamp): self.timestamp=timestamp
def set_now (self): self.timestamp=int(time.time())
def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
+class PlcLxcInstance (PlcInstance):
+ # does lxc have a context id of any kind ?
+ def __init__ (self, plcbox, lxcname, pid):
+ PlcInstance.__init__(self, plcbox)
+ self.lxcname = lxcname
+ self.pid = pid
+
def vplcname (self):
- return self.vservername.split('-')[-1]
+ return self.lxcname.split('-')[-1]
def buildname (self):
- return self.vservername.rsplit('-',2)[0]
+ return self.lxcname.rsplit('-',2)[0]
def line (self):
msg="== %s =="%(self.vplcname())
- msg += " [=%s]"%self.vservername
- if self.ctxid==0: msg+=" not (yet?) running"
- else: msg+=" (ctx=%s)"%self.ctxid
+ msg += " [=%s]"%self.lxcname
+ if self.pid==-1: msg+=" not (yet?) running"
+ else: msg+=" (pid=%s)"%self.pid
if self.timestamp: msg += " @ %s"%self.pretty_timestamp()
else: msg += " *unknown timestamp*"
return msg
def kill (self):
- msg="vserver stopping %s on %s"%(self.vservername,self.plc_box.hostname)
- self.plc_box.run_ssh(['vserver',self.vservername,'stop'],msg)
+ command="rsync lxc-driver.sh %s:/root"%self.plc_box.hostname
+ commands.getstatusoutput(command)
+ msg="lxc container stopping %s on %s"%(self.lxcname,self.plc_box.hostname)
+ self.plc_box.run_ssh(['/root/lxc-driver.sh','-c','stop_lxc','-n',self.lxcname],msg)
self.plc_box.forget(self)
+##########
class PlcBox (Box):
def __init__ (self, hostname, max_plcs):
Box.__init__(self,hostname)
self.plc_instances=[]
self.max_plcs=max_plcs
- def add_vserver (self,vservername,ctxid):
- for plc in self.plc_instances:
- if plc.vservername==vservername:
- header("WARNING, duplicate myplc %s running on %s"%\
- (vservername,self.hostname),banner=False)
- return
- self.plc_instances.append(PlcInstance(vservername,ctxid,self))
-
- def forget (self, plc_instance):
- self.plc_instances.remove(plc_instance)
+ def free_slots (self):
+ return self.max_plcs - len(self.plc_instances)
# fill one slot even though this one is not started yet
def add_dummy (self, plcname):
- dummy=PlcInstance('dummy_'+plcname,0,self)
+ dummy=PlcLxcInstance(self,'dummy_'+plcname,0)
dummy.set_now()
self.plc_instances.append(dummy)
- def line(self):
- msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_plcs,self.free_spots(),self.uname())
- return msg
-
- def list(self):
+ def forget (self, plc_instance):
+ self.plc_instances.remove(plc_instance)
+
+ def reboot (self, options):
+ if not options.soft:
+ Box.reboot(self,options)
+ else:
+ self.soft_reboot (options)
+
+ def list(self, verbose=False):
if not self.plc_instances:
- header ('No vserver running on %s'%(self.line()))
+ header ('No plc running on %s'%(self.line()))
else:
header ("Active plc VMs on %s"%self.line())
self.plc_instances.sort(timestamp_sort)
for p in self.plc_instances:
header (p.line(),banner=False)
- def free_spots (self):
- return self.max_plcs - len(self.plc_instances)
+## we do not this at INRIA any more
+class PlcLxcBox (PlcBox):
- def uname(self):
- if hasattr(self,'_uname') and self._uname: return self._uname
- return '*undef* uname'
+ def add_lxc (self,lxcname,pid):
+ for plc in self.plc_instances:
+ if plc.lxcname==lxcname:
+ header("WARNING, duplicate myplc %s running on %s"%\
+ (lxcname,self.hostname),banner=False)
+ return
+ self.plc_instances.append(PlcLxcInstance(self,lxcname,pid))
- def plc_instance_by_vservername (self, vservername):
+
+ # a line describing the box
+ def line(self):
+ return "%s [max=%d,free=%d] (%s)"%(self.hostname_fedora(virt="lxc"),
+ self.max_plcs,self.free_slots(),
+ self.uptime(),
+ )
+
+ def plc_instance_by_lxcname (self, lxcname):
for p in self.plc_instances:
- if p.vservername==vservername: return p
+ if p.lxcname==lxcname: return p
return None
+
+ # essentially shutdown all running containers
+ def soft_reboot (self, options):
+ command="rsync lxc-driver.sh %s:/root"%self.hostname
+ commands.getstatusoutput(command)
+ self.run_ssh(['/root/lxc-driver.sh','-c','stop_all'],"Stopping all running lxc containers on %s"%(self.hostname,),
+ dry_run=options.dry_run)
- def reboot (self, options):
- if not options.soft:
- self.reboot(options)
- else:
- self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers",
- dry_run=options.dry_run)
+ # sense is expected to fill self.plc_instances with PlcLxcInstance's
+ # to describe the currently running VM's
def sense (self, options):
- print 'p',
- self._uname=self.backquote_ssh(['uname','-r']).strip()
- # try to find fullname (vserver_stat truncates to a ridiculously short name)
- # fetch the contexts for all vservers on that box
- map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
- context_map=self.backquote_ssh (map_command)
- # at this point we have a set of lines like
- # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
- ctx_dict={}
- for map_line in context_map.split("\n"):
- if not map_line: continue
- [path,xid] = map_line.split(':')
- ctx_dict[xid]=os.path.basename(os.path.dirname(path))
- # at this point ctx_id maps context id to vservername
-
- command=['vserver-stat']
- vserver_stat = self.backquote_ssh (command)
- for vserver_line in vserver_stat.split("\n"):
- if not vserver_line: continue
- context=vserver_line.split()[0]
- if context=="CTX": continue
- try:
- longname=ctx_dict[context]
- self.add_vserver(longname,context)
- except:
- print 'WARNING: found ctx %s in vserver_stat but was unable to figure a corresp. vserver'%context
-
- # scan timestamps
- running_vsnames = [ i.vservername for i in self.plc_instances ]
- command= ['grep','.']
- command += ['/vservers/%s.timestamp'%vs for vs in running_vsnames]
- command += ['/dev/null']
- ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
- for ts_line in ts_lines:
- if not ts_line.strip(): continue
- # expect /vservers/<vservername>.timestamp:<timestamp>
- try:
- (ts_file,timestamp)=ts_line.split(':')
- ts_file=os.path.basename(ts_file)
- (vservername,_)=os.path.splitext(ts_file)
- timestamp=int(timestamp)
- p=self.plc_instance_by_vservername(vservername)
- if not p:
- print 'WARNING zombie plc',self.hostname,ts_line
- print '... was expecting',vservername,'in',[i.vservername for i in self.plc_instances]
- continue
- p.set_timestamp(timestamp)
- except: print 'WARNING, could not parse ts line',ts_line
-
-
-
+ print "xp",
+ command="rsync lxc-driver.sh %s:/root"%self.hostname
+ commands.getstatusoutput(command)
+ command=['/root/lxc-driver.sh','-c','sense_all']
+ lxc_stat = self.backquote_ssh (command)
+ for lxc_line in lxc_stat.split("\n"):
+ if not lxc_line: continue
+ lxcname=lxc_line.split(";")[0]
+ pid=lxc_line.split(";")[1]
+ timestamp=lxc_line.split(";")[2]
+ self.add_lxc(lxcname,pid)
+ try: timestamp=int(timestamp)
+ except: timestamp=0
+ p=self.plc_instance_by_lxcname(lxcname)
+ if not p:
+ print 'WARNING zombie plc',self.hostname,lxcname
+ print '... was expecting',lxcname,'in',[i.lxcname for i in self.plc_instances]
+ continue
+ p.set_timestamp(timestamp)
############################################################
class QemuInstance:
return
self.qemu_instances.append(QemuInstance(nodename,pid,self))
+ def node_names (self):
+ return [ qi.nodename for qi in self.qemu_instances ]
+
def forget (self, qemu_instance):
self.qemu_instances.remove(qemu_instance)
self.qemu_instances.append(dummy)
def line (self):
- msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_qemus,self.free_spots(),self.driver())
- return msg
+ return "%s [max=%d,free=%d] (%s) %s"%(
+ self.hostname_fedora(virt="qemu"), self.max_qemus,self.free_slots(),
+ self.uptime(),self.driver())
- def list(self):
+ def list(self, verbose=False):
if not self.qemu_instances:
- header ('No qemu process on %s'%(self.line()))
+ header ('No qemu on %s'%(self.line()))
else:
- header ("Active qemu processes on %s"%(self.line()))
+ header ("Qemus on %s"%(self.line()))
self.qemu_instances.sort(timestamp_sort)
for q in self.qemu_instances:
header (q.line(),banner=False)
- def free_spots (self):
+ def free_slots (self):
return self.max_qemus - len(self.qemu_instances)
def driver(self):
def reboot (self, options):
if not options.soft:
- self.reboot(options)
+ Box.reboot(self,options)
else:
self.run_ssh(['pkill','qemu'],"Killing qemu instances",
dry_run=options.dry_run)
matcher=re.compile("\s*(?P<pid>[0-9]+).*-cdrom\s+(?P<nodename>[^\s]+)\.iso")
def sense(self, options):
- print 'q',
+ print 'qn',
modules=self.backquote_ssh(['lsmod']).split('\n')
- self._driver='*NO kqemu/kmv_intel MODULE LOADED*'
+ self._driver='*NO kqemu/kvm_intel MODULE LOADED*'
for module in modules:
if module.find('kqemu')==0:
self._driver='kqemu module loaded'
- # kvm might be loaded without vkm_intel (we dont have AMD)
+ # kvm might be loaded without kvm_intel (we dont have AMD)
elif module.find('kvm_intel')==0:
- self._driver='kvm_intel module loaded'
+ self._driver='kvm_intel OK'
########## find out running pids
pids=self.backquote_ssh(['pgrep','qemu'])
if not pids: return
for line in ps_lines:
if not line.strip() or line.find('PID') >=0 : continue
m=QemuBox.matcher.match(line)
- if m: self.add_node (m.group('nodename'),m.group('pid'))
- else: header('command %r returned line that failed to match'%command)
+ if m:
+ self.add_node (m.group('nodename'),m.group('pid'))
+ continue
+ header('QemuBox.sense: command %r returned line that failed to match'%command)
+ header(">>%s<<"%line)
########## retrieve alive instances and map to build
live_builds=[]
- command=['grep','.','*/*/qemu.pid','/dev/null']
+ command=['grep','.','/vservers/*/*/qemu.pid','/dev/null']
pid_lines=self.backquote_ssh(command,trash_err=True).split('\n')
for pid_line in pid_lines:
if not pid_line.strip(): continue
# expect <build>/<nodename>/qemu.pid:<pid>pid
try:
- (buildname,nodename,tail)=pid_line.split('/')
+ (_,__,buildname,nodename,tail)=pid_line.split('/')
(_,pid)=tail.split(':')
q=self.qemu_instance_by_pid (pid)
if not q: continue
# retrieve timestamps
if not live_builds: return
command= ['grep','.']
- command += ['%s/*/timestamp'%b for b in live_builds]
+ command += ['/vservers/%s/*/timestamp'%b for b in live_builds]
command += ['/dev/null']
ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
for ts_line in ts_lines:
if not ts_line.strip(): continue
# expect <build>/<nodename>/timestamp:<timestamp>
try:
- (buildname,nodename,tail)=ts_line.split('/')
+ (_,__,buildname,nodename,tail)=ts_line.split('/')
nodename=nodename.replace('qemu-','')
(_,timestamp)=tail.split(':')
timestamp=int(timestamp)
def set_now (self): self.timestamp=int(time.time())
def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
+ def is_running (self): return len(self.pids) != 0
def add_pid (self,pid):
self.pids.append(pid)
- def set_broken (self,plcindex, step):
+ def set_broken (self, plcindex, step):
self.broken_steps.append ( (plcindex, step,) )
+ def second_letter (self):
+ if not self.broken_steps: return '='
+ else:
+ really_broken = [ step for (i,step) in self.broken_steps if '_ignore' not in step ]
+ # W is for warning like what's in the build mail
+ if len(really_broken)==0: return 'W'
+ else: return 'B'
+
def line (self):
- double='=='
- if self.pids: double='*'+double[1]
- if self.broken_steps: double=double[0]+'B'
+ # make up a 2-letter sign
+ # first letter : '=', unless build is running : '*'
+ double = '*' if self.pids else '='
+ # second letter : '=' if fine, 'W' for warnings (only ignored steps) 'B' for broken
+ letter2 = self.second_letter()
+ double += letter2
msg = " %s %s =="%(double,self.buildname)
if not self.pids: pass
elif len(self.pids)==1: msg += " (pid=%s)"%self.pids[0]
else: msg += " !!!pids=%s!!!"%self.pids
msg += " @%s"%self.pretty_timestamp()
- if self.broken_steps:
- msg += "\n BROKEN IN STEPS"
- for (i,s) in self.broken_steps: msg += " %s@%s"%(s,i)
+ if letter2 != '=':
+ msg2 = ( ' BROKEN' if letter2 == 'B' else ' WARNING' )
+ # sometimes we have an empty plcindex
+ msg += " [%s="%msg2 + " ".join( [ "%s@%s"%(s,i) if i else s for (i,s) in self.broken_steps ] ) + "]"
return msg
class TestBox (Box):
matcher_proc=re.compile (".*/proc/(?P<pid>[0-9]+)/cwd.*/root/(?P<buildname>[^/]+)$")
matcher_grep=re.compile ("/root/(?P<buildname>[^/]+)/logs/trace.*:TRACE:\s*(?P<plcindex>[0-9]+).*step=(?P<step>\S+).*")
+ matcher_grep_missing=re.compile ("grep: /root/(?P<buildname>[^/]+)/logs/trace: No such file or directory")
def sense (self, options):
- print 't',
- self.sense_uptime()
+ print 'tm',
self.starting_ips=[x for x in self.backquote_ssh(['cat',Starting.location], trash_err=True).strip().split('\n') if x]
# scan timestamps on all tests
t=self.add_timestamp(buildname,timestamp)
except: print 'WARNING, could not parse ts line',ts_line
- command=['bash','-c',"grep KO /root/*/logs/trace* /dev/null" ]
+ # let's try to be robust here -- tests that fail very early like e.g.
+ # "Cannot make space for a PLC instance: vplc IP pool exhausted", that occurs as part of provision
+ # will result in a 'trace' symlink to an inexisting 'trace-<>.txt' because no step has gone through
+ # simple 'trace' should exist though as it is created by run_log
+ command=['bash','-c',"grep KO /root/*/logs/trace /dev/null 2>&1" ]
trace_lines=self.backquote_ssh (command).split('\n')
for line in trace_lines:
if not line.strip(): continue
+ m=TestBox.matcher_grep_missing.match(line)
+ if m:
+ buildname=m.group('buildname')
+ self.add_broken(buildname,'','NO STEP DONE')
+ continue
m=TestBox.matcher_grep.match(line)
if m:
buildname=m.group('buildname')
plcindex=m.group('plcindex')
step=m.group('step')
self.add_broken(buildname,plcindex, step)
- else: header("command %r returned line that failed to match\n%s"%(command,line))
+ continue
+ header("TestBox.sense: command %r returned line that failed to match\n%s"%(command,line))
+ header(">>%s<<"%line)
pids = self.backquote_ssh (['pgrep','run_log'],trash_err=True)
if not pids: return
pid=m.group('pid')
buildname=m.group('buildname')
self.add_running_test(pid, buildname)
- else: header("command %r returned line that failed to match\n%s"%(command,line))
+ continue
+ header("TestBox.sense: command %r returned line that failed to match\n%s"%(command,line))
+ header(">>%s<<"%line)
def line (self):
- return "%s (%s)"%(self.hostname,self.uptime())
+ return self.hostname_fedora()
- def list (self):
- if not self.test_instances:
- header ("No known tests on %s"%self.line())
+ def list (self, verbose=False):
+ # verbose shows all tests
+ if verbose:
+ instances = self.test_instances
+ msg="tests"
else:
- header ("Known tests on %s"%self.line())
- self.test_instances.sort(timestamp_sort)
- for i in self.test_instances: print i.line()
+ instances = [ i for i in self.test_instances if i.is_running() ]
+ msg="running tests"
+
+ if not instances:
+ header ("No %s on %s"%(msg,self.line()))
+ else:
+ header ("%s on %s"%(msg,self.line()))
+ instances.sort(timestamp_sort)
+ for i in instances: print i.line()
+ # show 'starting' regardless of verbose
if self.starting_ips:
header ("Starting IP addresses on %s"%self.line())
self.starting_ips.sort()
for starting in self.starting_ips: print starting
+ else:
+ header ("Empty 'starting' on %s"%self.line())
############################################################
class Options: pass
self.options.reboot=False
self.options.soft=False
self.test_box = TestBox (self.test_box_spec())
- self.build_boxes = [ BuildBox(h) for h in self.build_boxes_spec() ]
- self.plc_boxes = [ PlcBox (h,m) for (h,m) in self.plc_boxes_spec ()]
+ self.build_lxc_boxes = [ BuildLxcBox(h) for h in self.build_lxc_boxes_spec() ]
+ self.plc_lxc_boxes = [ PlcLxcBox (h,m) for (h,m) in self.plc_lxc_boxes_spec ()]
self.qemu_boxes = [ QemuBox (h,m) for (h,m) in self.qemu_boxes_spec ()]
- self.default_boxes = self.plc_boxes + self.qemu_boxes
- self.all_boxes = self.build_boxes + [ self.test_box ] + self.plc_boxes + self.qemu_boxes
self._sensed=False
self.vplc_pool = Pool (self.vplc_ips(),"for vplcs",self)
self.vnode_pool = Pool (self.vnode_ips(),"for vnodes",self)
+
+ self.build_boxes = self.build_lxc_boxes
+ self.plc_boxes = self.plc_lxc_boxes
+ self.default_boxes = self.plc_boxes + self.qemu_boxes
+ self.all_boxes = self.build_boxes + [ self.test_box ] + self.plc_boxes + self.qemu_boxes
+
+ def summary_line (self):
+ msg = "["
+ msg += " %d xp"%len(self.plc_lxc_boxes)
+ msg += " %d tried plc boxes"%len(self.plc_boxes)
+ msg += "]"
+ return msg
def fqdn (self, hostname):
if hostname.find('.')<0: return "%s.%s"%(hostname,self.domain())
self._sensed=True
return True
- def list (self):
+ def list (self, verbose=False):
for b in self.default_boxes:
b.list()
max_free=0
# use the box that has max free spots for load balancing
for pb in self.plc_boxes:
- free=pb.free_spots()
+ free=pb.free_slots()
if free>max_free:
plc_boxname=pb.hostname
max_free=free
except:
msg=""
if not plc_boxname: msg += " PLC boxes are full"
- if not vplc_hostname: msg += " vplc IP pool exhausted"
- raise Exception,"Could not make space for a PLC instance:"+msg
+ if not vplc_hostname: msg += " vplc IP pool exhausted"
+ msg += " %s"%self.summary_line()
+ raise Exception,"Cannot make space for a PLC instance:"+msg
freed_plc_boxname=plc_instance_to_kill.plc_box.hostname
freed_vplc_hostname=plc_instance_to_kill.vplcname()
message='killing oldest plc instance = %s on %s'%(plc_instance_to_kill.line(),
'name': plc_name,
'vservername':vservername,
'vserverip':vplc_ip,
- 'PLC_DB_HOST':vplc_hostname,
- 'PLC_API_HOST':vplc_hostname,
- 'PLC_BOOT_HOST':vplc_hostname,
- 'PLC_WWW_HOST':vplc_hostname,
- 'PLC_NET_DNS1' : self.network_settings() [ 'interface_fields:dns1' ],
- 'PLC_NET_DNS2' : self.network_settings() [ 'interface_fields:dns2' ],
+# 'settings': {
+ 'settings:PLC_DB_HOST':vplc_hostname,
+ 'settings:PLC_API_HOST':vplc_hostname,
+ 'settings:PLC_BOOT_HOST':vplc_hostname,
+ 'settings:PLC_WWW_HOST':vplc_hostname,
+ 'settings:PLC_NET_DNS1' : self.network_settings() [ 'interface_fields:dns1' ],
+ 'settings:PLC_NET_DNS2' : self.network_settings() [ 'interface_fields:dns2' ],
+# }
} ) ]
}
max_free=0
# use the box that has max free spots for load balancing
for qb in self.qemu_boxes:
- free=qb.free_spots()
+ free=qb.free_slots()
if free>max_free:
qemu_boxname=qb.hostname
max_free=free
msg=""
if not qemu_boxname: msg += " QEMU boxes are full"
if not vnode_hostname: msg += " vnode IP pool exhausted"
- raise Exception,"Could not make space for a QEMU instance:"+msg
+ msg += " %s"%self.summary_line()
+ raise Exception,"Cannot make space for a QEMU instance:"+msg
freed_qemu_boxname=qemu_instance_to_kill.qemu_box.hostname
freed_vnode_hostname=short_hostname(qemu_instance_to_kill.nodename)
# kill it
nodemap={'host_box':qemu_boxname,
'node_fields:hostname':vnode_fqdn,
'interface_fields:ip':ip,
+ 'ipaddress_fields:ip_addr':ip,
'interface_fields:mac':mac,
}
nodemap.update(self.network_settings())
def localize_sfa_rspec (self,plc,options):
- plc['sfa']['SFA_REGISTRY_HOST'] = plc['PLC_DB_HOST']
- plc['sfa']['SFA_AGGREGATE_HOST'] = plc['PLC_DB_HOST']
- plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST']
- plc['sfa']['SFA_PLC_DB_HOST'] = plc['PLC_DB_HOST']
- plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/'
- for site in plc['sites']:
- for node in site['nodes']:
- plc['sfa']['sfa_slice_rspec']['part4'] = node['node_fields']['hostname']
+ plc['sfa']['settings']['SFA_REGISTRY_HOST'] = plc['settings']['PLC_DB_HOST']
+ plc['sfa']['settings']['SFA_AGGREGATE_HOST'] = plc['settings']['PLC_DB_HOST']
+ plc['sfa']['settings']['SFA_SM_HOST'] = plc['settings']['PLC_DB_HOST']
+ plc['sfa']['settings']['SFA_DB_HOST'] = plc['settings']['PLC_DB_HOST']
+ plc['sfa']['settings']['SFA_PLC_URL'] = 'https://%s:443/PLCAPI/' % plc['settings']['PLC_API_HOST']
return plc
#################### release:
#################### show results for interactive mode
def get_box (self,boxname):
for b in self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box] :
- if b.shortname()==boxname:
- return b
+ if b.shortname()==boxname: return b
+ try:
+ if b.shortname()==boxname.split('.')[0]: return b
+ except: pass
print "Could not find box %s"%boxname
return None
- def list_boxes(self,box_or_names):
- print 'Sensing',
+ # deal with the mix of boxes and names and stores the current focus
+ # as a list of Box instances in self.focus_all
+ def normalize (self, box_or_names):
+ self.focus_all=[]
for box in box_or_names:
if not isinstance(box,Box): box=self.get_box(box)
- if not box: continue
+ if not box:
+ print 'Warning - could not handle box',box
+ self.focus_all.append(box)
+ # elaborate by type
+ self.focus_build = [ x for x in self.focus_all if isinstance(x,BuildBox) ]
+ self.focus_plc = [ x for x in self.focus_all if isinstance(x,PlcBox) ]
+ self.focus_qemu = [ x for x in self.focus_all if isinstance(x,QemuBox) ]
+
+ def list_boxes(self):
+ print 'Sensing',
+ for box in self.focus_all:
box.sense(self.options)
print 'Done'
- for box in box_or_names:
- if not isinstance(box,Box): box=self.get_box(box)
- if not box: continue
- box.list()
+ for box in self.focus_all:
+ box.list(self.options.verbose)
- def reboot_boxes(self,box_or_names):
- for box in box_or_names:
- if not isinstance(box,Box): box=self.get_box(box)
- if not box: continue
+ def reboot_boxes(self):
+ for box in self.focus_all:
box.reboot(self.options)
+ def sanity_check (self):
+ print 'Sanity check'
+ self.sanity_check_plc()
+ self.sanity_check_qemu()
+
+ def sanity_check_plc (self):
+ pass
+
+ def sanity_check_qemu (self):
+ all_nodes=[]
+ for box in self.focus_qemu:
+ all_nodes += box.node_names()
+ hash={}
+ for node in all_nodes:
+ if node not in hash: hash[node]=0
+ hash[node]+=1
+ for (node,count) in hash.items():
+ if count!=1: print 'WARNING - duplicate node',node
+
+
####################
- # can be run as a utility to manage the local infrastructure
+ # can be run as a utility to probe/display/manage the local infrastructure
def main (self):
parser=OptionParser()
parser.add_option ('-r',"--reboot",action='store_true',dest='reboot',default=False,
help='reboot mode (use shutdown -r)')
parser.add_option ('-s',"--soft",action='store_true',dest='soft',default=False,
- help='soft mode for reboot (vserver stop or kill qemus)')
+ help='soft mode for reboot (terminates processes)')
parser.add_option ('-t',"--testbox",action='store_true',dest='testbox',default=False,
help='add test box')
parser.add_option ('-b',"--build",action='store_true',dest='builds',default=False,
if self.options.qemus: boxes += self.qemu_boxes
if self.options.all: boxes += self.all_boxes
- # default scope is -b -p -q
+ global verbose
+ verbose=self.options.verbose
+ # default scope is -b -p -q -t
if not boxes:
- boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes
+ boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box]
+
+ self.normalize (boxes)
- if self.options.reboot: self.reboot_boxes (boxes)
- else: self.list_boxes (boxes)
+ if self.options.reboot:
+ self.reboot_boxes ()
+ else:
+ self.list_boxes ()
+ self.sanity_check ()