#
# #################### history
#
+# see also Substrate.readme
+#
# This is a complete rewrite of TestResources/Tracker/Pool
# we don't use trackers anymore and just probe/sense the running
# boxes to figure out where we are
import re
import traceback
import subprocess
-import commands
import socket
from optparse import OptionParser
import utils
from TestSsh import TestSsh
from TestMapper import TestMapper
+from functools import reduce
+
+# too painful to propagate this cleanly
+verbose = None
-def header (message,banner=True):
+def header (message, banner=True):
if not message: return
- if banner: print "===============",
- print message
+ if banner:
+ print("===============", end=' ')
+ print(message)
sys.stdout.flush()
-def timestamp_sort(o1,o2): return o1.timestamp-o2.timestamp
+def timestamp_key(o): return o.timestamp
+
+def short_hostname (hostname):
+ return hostname.split('.')[0]
+
+####################
+# the place were other test instances tell about their not-yet-started
+# instances, that go undetected through sensing
+class Starting:
+
+ location = '/root/starting'
+
+ def __init__ (self):
+ self.tuples=[]
+
+ def load (self):
+ try:
+ with open(Starting.location) as starting:
+ self.tuples = [line.strip().split('@') for line in starting.readlines()]
+ except:
+ self.tuples = []
+
+ def vnames (self) :
+ self.load()
+ return [ x for (x, _) in self.tuples ]
+ def add (self, vname, bname):
+ if not vname in self.vnames():
+ with open(Starting.location, 'a') as out:
+ out.write("{}@{}\n".format(vname, bname))
+
+ def delete_vname (self, vname):
+ self.load()
+ if vname in self.vnames():
+ with open(Starting.location, 'w') as f:
+ for (v, b) in self.tuples:
+ if v != vname:
+ f.write("{}@{}\n".format(v, b))
+
####################
# pool class
# allows to pick an available IP among a pool
# that is, even if ip2 is not busy/pingable when the second next_free() is issued
class PoolItem:
- def __init__ (self,hostname,userdata):
- self.hostname=hostname
- self.userdata=userdata
+ def __init__ (self, hostname, userdata):
+ self.hostname = hostname
+ self.userdata = userdata
# slot holds 'busy' or 'free' or 'mine' or 'starting' or None
# 'mine' is for our own stuff, 'starting' from the concurrent tests
- self.status=None
- self.ip=None
+ self.status = None
+ self.ip = None
def line(self):
- return "Pooled %s (%s) -> %s"%(self.hostname,self.userdata, self.status)
+ return "Pooled {} ({}) -> {}".format(self.hostname, self.userdata, self.status)
def char (self):
- if self.status==None: return '?'
- elif self.status=='busy': return '*'
- elif self.status=='free': return '.'
- elif self.status=='mine': return 'M'
- elif self.status=='starting': return 'S'
+ if self.status == None: return '?'
+ elif self.status == 'busy': return '+'
+ elif self.status == 'free': return '-'
+ elif self.status == 'mine': return 'M'
+ elif self.status == 'starting': return 'S'
def get_ip(self):
if self.ip: return self.ip
class Pool:
- def __init__ (self, tuples,message):
- self.pool= [ PoolItem (h,u) for (h,u) in tuples ]
- self.message=message
+ def __init__ (self, tuples, message, substrate):
+ self.pool_items = [ PoolItem (hostname, userdata) for (hostname, userdata) in tuples ]
+ self.message = message
+ # where to send notifications upon load_starting
+ self.substrate = substrate
- def list (self):
- for i in self.pool: print i.line()
+ def list (self, verbose=False):
+ for i in self.pool_items: print(i.line())
def line (self):
- line=self.message
- for i in self.pool: line += ' ' + i.char()
+ line = self.message
+ for i in self.pool_items: line += ' ' + i.char()
return line
def _item (self, hostname):
- for i in self.pool:
- if i.hostname==hostname: return i
+ for i in self.pool_items:
+ if i.hostname == hostname: return i
raise Exception ("Could not locate hostname %s in pool %s"%(hostname,self.message))
def retrieve_userdata (self, hostname):
return self._item(hostname).userdata
def get_ip (self, hostname):
- try: return self._item(hostname).get_ip()
- except: return socket.gethostbyname(hostname)
+ try:
+ return self._item(hostname).get_ip()
+ except:
+ return socket.gethostbyname(hostname)
def set_mine (self, hostname):
try:
self._item(hostname).status='mine'
except:
- print 'WARNING: host %s not found in IP pool %s'%(hostname,self.message)
+ print('WARNING: host %s not found in IP pool %s'%(hostname,self.message))
def next_free (self):
- for i in self.pool:
+ for i in self.pool_items:
if i.status == 'free':
i.status='mine'
- return (i.hostname,i.userdata)
+ return (i.hostname, i.userdata)
return None
- # the place were other test instances tell about their not-yet-started
- # instances, that go undetected through sensing
- starting='/root/starting'
- def add_starting (self, name):
- try: items=[line.strip() for line in file(Pool.starting).readlines()]
- except: items=[]
- if not name in items:
- file(Pool.starting,'a').write(name+'\n')
- for i in self.pool:
- if i.hostname==name: i.status='mine'
-
- # we load this after actual sensing;
+ ####################
+ # we have a starting instance of our own
+ def add_starting (self, vname, bname):
+ Starting().add(vname, bname)
+ for i in self.pool_items:
+ if i.hostname == vname:
+ i.status = 'mine'
+
+ # load the starting instances from the common file
+ # remember that might be ours
+ # return the list of (vname,bname) that are not ours
def load_starting (self):
- try: items=[line.strip() for line in file(Pool.starting).readlines()]
- except: items=[]
- for i in self.pool:
- if i.hostname in items:
- if i.status=='free' : i.status='starting'
+ starting = Starting()
+ starting.load()
+ new_tuples = []
+ for (v, b) in starting.tuples:
+ for i in self.pool_items:
+ if i.hostname == v and i.status == 'free':
+ i.status = 'starting'
+ new_tuples.append( (v, b,) )
+ return new_tuples
def release_my_starting (self):
- for i in self.pool:
- if i.status=='mine':
- self.del_starting(i.hostname)
- i.status=None
-
- def del_starting (self, name):
- try: items=[line.strip() for line in file(Pool.starting).readlines()]
- except: items=[]
- if name in items:
- f=file(Pool.starting,'w')
- for item in items:
- if item != name: f.write(item+'\n')
- f.close()
-
+ for i in self.pool_items:
+ if i.status == 'mine':
+ Starting().delete_vname(i.hostname)
+ i.status = None
+
+
##########
def _sense (self):
- for item in self.pool:
+ for item in self.pool_items:
if item.status is not None:
+ print(item.char(), end=' ')
continue
if self.check_ping (item.hostname):
- item.status='busy'
+ item.status = 'busy'
+ print('*', end=' ')
else:
- item.status='free'
+ item.status = 'free'
+ print('.', end=' ')
def sense (self):
- print 'Sensing IP pool',self.message,
+ print('Sensing IP pool', self.message, end=' ')
self._sense()
- print 'Done'
- self.load_starting()
- print 'After starting: IP pool'
- print self.line()
-
+ print('Done')
+ for (vname,bname) in self.load_starting():
+ self.substrate.add_starting_dummy(bname, vname)
+ print("After having loaded 'starting': IP pool")
+ print(self.line())
# OS-dependent ping option (support for macos, for convenience)
ping_timeout_option = None
# returns True when a given hostname/ip responds to ping
- def check_ping (self,hostname):
+ def check_ping (self, hostname):
if not Pool.ping_timeout_option:
- (status,osname) = commands.getstatusoutput("uname -s")
+ (status, osname) = subprocess.getstatusoutput("uname -s")
if status != 0:
- raise Exception, "TestPool: Cannot figure your OS name"
+ raise Exception("TestPool: Cannot figure your OS name")
if osname == "Linux":
- Pool.ping_timeout_option="-w"
+ Pool.ping_timeout_option = "-w"
elif osname == "Darwin":
- Pool.ping_timeout_option="-t"
+ Pool.ping_timeout_option = "-t"
- command="ping -c 1 %s 1 %s"%(Pool.ping_timeout_option,hostname)
- (status,output) = commands.getstatusoutput(command)
- if status==0: print '*',
- else: print '.',
+ command="ping -c 1 {} 1 {}".format(Pool.ping_timeout_option, hostname)
+ (status, output) = subprocess.getstatusoutput(command)
return status == 0
####################
class Box:
- def __init__ (self,hostname):
- self.hostname=hostname
- def short_hostname (self):
- return self.hostname.split('.')[0]
- def test_ssh (self): return TestSsh(self.hostname,username='root',unknown_host=False)
- def reboot (self):
- self.test_ssh().run("shutdown -r now",message="Rebooting %s"%self.hostname)
-
- def run(self,argv,message=None,trash_err=False,dry_run=False):
+ def __init__ (self, hostname):
+ self.hostname = hostname
+ self._probed = None
+ def shortname (self):
+ return short_hostname(self.hostname)
+ def test_ssh (self):
+ return TestSsh(self.hostname, username='root', unknown_host=False)
+ def reboot (self, options):
+ self.test_ssh().run("shutdown -r now",
+ message="Rebooting {}".format(self.hostname),
+ dry_run=options.dry_run)
+
+ def hostname_fedora (self, virt=None):
+ # this truly is an opening bracket
+ result = "{}".format(self.hostname) + " {"
+ if virt:
+ result += "{}-".format(virt)
+ result += "{} {}".format(self.fedora(), self.memory())
+ # too painful to propagate this cleanly
+ global verbose
+ if verbose:
+ result += "-{}".format(self.uname())
+ # and the matching closing bracket
+ result += "}"
+ return result
+
+ separator = "===composite==="
+
+ # probe the ssh link
+ # take this chance to gather useful stuff
+ def probe (self):
+ # try it only once
+ if self._probed is not None: return self._probed
+ composite_command = [ ]
+ composite_command += [ "hostname" ]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "uptime" ]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "uname", "-r"]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "cat" , "/etc/fedora-release" ]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "grep", "MemTotal", "/proc/meminfo" ]
+
+ # due to colons and all, this is going wrong on the local box (typically testmaster)
+ # I am reluctant to change TestSsh as it might break all over the place, so
+ if self.test_ssh().is_local():
+ probe_argv = [ "bash", "-c", " ".join (composite_command) ]
+ else:
+ probe_argv = self.test_ssh().actual_argv(composite_command)
+ composite = self.backquote ( probe_argv, trash_err=True )
+ self._hostname = self._uptime = self._uname = self._fedora = self._memory = "** Unknown **"
+ if not composite:
+ print("root@{} unreachable".format(self.hostname))
+ self._probed = ''
+ else:
+ try:
+ pieces = composite.split(Box.separator)
+ pieces = [ x.strip() for x in pieces ]
+ # get raw data
+ [hostname, uptime, uname, fedora, memory] = pieces
+ # customize
+ self._hostname = hostname
+ self._uptime = ', '.join([ x.strip() for x in uptime.split(',')[2:]]).replace("load average", "load")
+ self._uname = uname
+ self._fedora = fedora.replace("Fedora release ","f").split(" ")[0]
+ # translate into Mb
+ self._memory = int(memory.split()[1])/(1024)
+ except Exception as e:
+ import traceback
+ print('BEG issue with pieces')
+ traceback.print_exc()
+ self._probed = self._hostname
+ return self._probed
+
+ # use argv=['bash','-c',"the command line"]
+ def uptime(self):
+ self.probe()
+ if hasattr(self,'_uptime') and self._uptime: return self._uptime
+ return '*unprobed* uptime'
+ def uname(self):
+ self.probe()
+ if hasattr(self,'_uname') and self._uname: return self._uname
+ return '*unprobed* uname'
+ def fedora(self):
+ self.probe()
+ if hasattr(self,'_fedora') and self._fedora: return self._fedora
+ return '*unprobed* fedora'
+ def memory(self):
+ self.probe()
+ if hasattr(self,'_memory') and self._memory: return "{} Mb".format(self._memory)
+ return '*unprobed* memory'
+
+ def run(self, argv, message=None, trash_err=False, dry_run=False):
if dry_run:
- print 'DRY_RUN:',
- print " ".join(argv)
+ print('DRY_RUN:', end=' ')
+ print(" ".join(argv))
return 0
else:
header(message)
if not trash_err:
return subprocess.call(argv)
else:
- return subprocess.call(argv,stderr=file('/dev/null','w'))
+ with open('/dev/null', 'w') as null:
+ return subprocess.call(argv, stderr=null)
- def run_ssh (self, argv, message, trash_err=False):
+ def run_ssh (self, argv, message, trash_err=False, dry_run=False):
ssh_argv = self.test_ssh().actual_argv(argv)
- result=self.run (ssh_argv, message, trash_err)
- if result!=0:
- print "WARNING: failed to run %s on %s"%(" ".join(argv),self.hostname)
+ result = self.run (ssh_argv, message, trash_err, dry_run=dry_run)
+ if result != 0:
+ print("WARNING: failed to run {} on {}".format(" ".join(argv), self.hostname))
return result
def backquote (self, argv, trash_err=False):
+ # in python3 we need to set universal_newlines=True
if not trash_err:
- result= subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
+ out_err = subprocess.Popen(argv, stdout=subprocess.PIPE,
+ universal_newlines=True).communicate()
else:
- result= subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0]
- return result
-
+ with open('/dev/null', 'w') as null:
+ out_err = subprocess.Popen(argv, stdout=subprocess.PIPE, stderr=null,
+ universal_newlines=True).communicate()
+ # only interested in stdout here
+ return out_err[0]
+
+ # if you have any shell-expanded arguments like *
+ # and if there's any chance the command is adressed to the local host
def backquote_ssh (self, argv, trash_err=False):
- # first probe the ssh link
- probe_argv=self.test_ssh().actual_argv(['hostname'])
- hostname=self.backquote ( probe_argv, trash_err=True )
- if not hostname:
- print "root@%s unreachable"%self.hostname
- return ''
- else:
- return self.backquote( self.test_ssh().actual_argv(argv), trash_err)
+ if not self.probe(): return ''
+ return self.backquote(self.test_ssh().actual_argv(argv), trash_err)
############################################################
class BuildInstance:
def __init__ (self, buildname, pid, buildbox):
- self.buildname=buildname
- self.buildbox=buildbox
- self.pids=[pid]
+ self.buildname = buildname
+ self.buildbox = buildbox
+ self.pids = [pid]
def add_pid(self,pid):
self.pids.append(pid)
def line (self):
- return "== %s == (pids=%r)"%(self.buildname,self.pids)
+ return "== {} == (pids={})".format(self.buildname, self.pids)
class BuildBox (Box):
- def __init__ (self,hostname):
- Box.__init__(self,hostname)
- self.build_instances=[]
+ def __init__ (self, hostname):
+ Box.__init__(self, hostname)
+ self.build_instances = []
- def add_build (self,buildname,pid):
+ def add_build(self, buildname, pid):
for build in self.build_instances:
- if build.buildname==buildname:
+ if build.buildname == buildname:
build.add_pid(pid)
return
self.build_instances.append(BuildInstance(buildname, pid, self))
- def list(self):
+ def list(self, verbose=False):
if not self.build_instances:
- header ('No build process on %s (%s)'%(self.hostname,self.uptime()))
+ header ('No build process on {} ({})'.format(self.hostname_fedora(), self.uptime()))
else:
- header ("Builds on %s (%s)"%(self.hostname,self.uptime()))
+ header ("Builds on {} ({})".format(self.hostname_fedora(), self.uptime()))
for b in self.build_instances:
- header (b.line(),banner=False)
+ header (b.line(), banner=False)
- def uptime(self):
- if hasattr(self,'_uptime') and self._uptime: return self._uptime
- return '*undef* uptime'
+ def reboot (self, options):
+ if not options.soft:
+ Box.reboot(self, options)
+ else:
+ self.soft_reboot (options)
+
+build_matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
+build_matcher_initvm=re.compile("\s*(?P<pid>[0-9]+).*initvm.*\s+(?P<buildname>[^\s]+)\s*\Z")
+
+class BuildLxcBox (BuildBox):
+ def soft_reboot (self, options):
+ command=['pkill','lbuild']
+ self.run_ssh(command, "Terminating vbuild processes", dry_run=options.dry_run)
# inspect box and find currently running builds
- matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
- def sense(self,reboot=False,verbose=True):
- if reboot:
- self.reboot(box)
- return
- print 'b',
- command=['uptime']
- self._uptime=self.backquote_ssh(command,trash_err=True).strip()
- if not self._uptime: self._uptime='unreachable'
- pids=self.backquote_ssh(['pgrep','build'],trash_err=True)
+ def sense(self, options):
+ print('xb', end=' ')
+ pids = self.backquote_ssh(['pgrep','lbuild'], trash_err=True)
if not pids: return
- command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
- ps_lines=self.backquote_ssh (command).split('\n')
+ command = ['ps', '-o', 'pid,command'] + [ pid for pid in pids.split("\n") if pid]
+ ps_lines = self.backquote_ssh(command).split('\n')
for line in ps_lines:
- if not line.strip() or line.find('PID')>=0: continue
- m=BuildBox.matcher.match(line)
- if m: self.add_build (m.group('buildname'),m.group('pid'))
- else: header('command %r returned line that failed to match'%command)
-
+ if not line.strip() or line.find('PID') >= 0: continue
+ m = build_matcher.match(line)
+ if m:
+ date = time.strftime('%Y-%m-%d', time.localtime(time.time()))
+ buildname = m.group('buildname').replace('@DATE@', date)
+ self.add_build(buildname, m.group('pid'))
+ continue
+ m = build_matcher_initvm.match(line)
+ if m:
+ # buildname is expansed here
+ self.add_build(buildname, m.group('pid'))
+ continue
+ header('BuildLxcBox.sense: command {} returned line that failed to match'.format(command))
+ header(">>{}<<".format(line))
+
############################################################
class PlcInstance:
- def __init__ (self, vservername, ctxid, plcbox):
- self.vservername=vservername
- self.ctxid=ctxid
- self.plc_box=plcbox
+ def __init__ (self, plcbox):
+ self.plc_box = plcbox
# unknown yet
- self.timestamp=0
-
- def set_timestamp (self,timestamp): self.timestamp=timestamp
- def set_now (self): self.timestamp=int(time.time())
- def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
+ self.timestamp = 0
+
+ def set_timestamp (self,timestamp):
+ self.timestamp = timestamp
+ def set_now (self):
+ self.timestamp = int(time.time())
+ def pretty_timestamp (self):
+ return time.strftime("%Y-%m-%d:%H-%M", time.localtime(self.timestamp))
+
+class PlcLxcInstance (PlcInstance):
+ # does lxc have a context id of any kind ?
+ def __init__ (self, plcbox, lxcname, pid):
+ PlcInstance.__init__(self, plcbox)
+ self.lxcname = lxcname
+ self.pid = pid
def vplcname (self):
- return self.vservername.split('-')[-1]
+ return self.lxcname.split('-')[-1]
+ def buildname (self):
+ return self.lxcname.rsplit('-',2)[0]
def line (self):
- msg="== %s == (ctx=%s)"%(self.vservername,self.ctxid)
- if self.timestamp: msg += " @ %s"%self.pretty_timestamp()
+ msg="== {} ==".format(self.vplcname())
+ msg += " [={}]".format(self.lxcname)
+ if self.pid==-1: msg+=" not (yet?) running"
+ else: msg+=" (pid={})".format(self.pid)
+ if self.timestamp: msg += " @ {}".format(self.pretty_timestamp())
else: msg += " *unknown timestamp*"
- if self.ctxid==0: msg+=" not (yet?) running"
return msg
def kill (self):
- msg="vserver stopping %s on %s"%(self.vservername,self.plc_box.hostname)
- self.plc_box.run_ssh(['vserver',self.vservername,'stop'],msg)
+ command="rsync lxc-driver.sh {}:/root".format(self.plc_box.hostname)
+ subprocess.getstatusoutput(command)
+ msg="lxc container stopping {} on {}".format(self.lxcname, self.plc_box.hostname)
+ self.plc_box.run_ssh(['/root/lxc-driver.sh', '-c', 'stop_lxc', '-n', self.lxcname], msg)
self.plc_box.forget(self)
+##########
class PlcBox (Box):
def __init__ (self, hostname, max_plcs):
- Box.__init__(self,hostname)
- self.plc_instances=[]
- self.max_plcs=max_plcs
+ Box.__init__(self, hostname)
+ self.plc_instances = []
+ self.max_plcs = max_plcs
- def add_vserver (self,vservername,ctxid):
- for plc in self.plc_instances:
- if plc.vservername==vservername:
- header("WARNING, duplicate myplc %s running on %s"%\
- (vservername,self.hostname),banner=False)
- return
- self.plc_instances.append(PlcInstance(vservername,ctxid,self))
-
- def forget (self, plc_instance):
- self.plc_instances.remove(plc_instance)
+ def free_slots (self):
+ return self.max_plcs - len(self.plc_instances)
# fill one slot even though this one is not started yet
def add_dummy (self, plcname):
- dummy=PlcInstance('dummy_'+plcname,0,self)
+ dummy=PlcLxcInstance(self, 'dummy_'+plcname, 0)
dummy.set_now()
self.plc_instances.append(dummy)
- def line(self):
- msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_plcs,self.free_spots(),self.uname())
- return msg
-
- def list(self):
+ def forget (self, plc_instance):
+ self.plc_instances.remove(plc_instance)
+
+ def reboot (self, options):
+ if not options.soft:
+ Box.reboot(self, options)
+ else:
+ self.soft_reboot (options)
+
+ def list(self, verbose=False):
if not self.plc_instances:
- header ('No vserver running on %s'%(self.line()))
+ header ('No plc running on {}'.format(self.line()))
else:
- header ("Active plc VMs on %s"%self.line())
+ header ("Active plc VMs on {}".format(self.line()))
+ self.plc_instances.sort(key=timestamp_key)
for p in self.plc_instances:
- header (p.line(),banner=False)
-
- def free_spots (self):
- return self.max_plcs - len(self.plc_instances)
-
- def uname(self):
- if hasattr(self,'_uname') and self._uname: return self._uname
- return '*undef* uname'
+ header (p.line(), banner=False)
- def plc_instance_by_vservername (self, vservername):
- for p in self.plc_instances:
- if p.vservername==vservername: return p
- return None
+## we do not this at INRIA any more
+class PlcLxcBox (PlcBox):
- def sense (self, reboot=False, soft=False):
- if reboot:
- # remove mark for all running servers to avoid resurrection
- stop_command=['rm','-rf','/etc/vservers/*/apps/init/mark']
- self.run_ssh(stop_command,"Removing all vserver marks on %s"%self.hostname)
- if not soft:
- self.reboot()
+ def add_lxc (self, lxcname, pid):
+ for plc in self.plc_instances:
+ if plc.lxcname == lxcname:
+ header("WARNING, duplicate myplc {} running on {}"\
+ .format(lxcname, self.hostname), banner=False)
return
- else:
- self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers")
- return
- print 'p',
- self._uname=self.backquote_ssh(['uname','-r']).strip()
- # try to find fullname (vserver_stat truncates to a ridiculously short name)
- # fetch the contexts for all vservers on that box
- map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
- context_map=self.backquote_ssh (map_command)
- # at this point we have a set of lines like
- # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
- ctx_dict={}
- for map_line in context_map.split("\n"):
- if not map_line: continue
- [path,xid] = map_line.split(':')
- ctx_dict[xid]=os.path.basename(os.path.dirname(path))
- # at this point ctx_id maps context id to vservername
-
- command=['vserver-stat']
- vserver_stat = self.backquote_ssh (command)
- for vserver_line in vserver_stat.split("\n"):
- if not vserver_line: continue
- context=vserver_line.split()[0]
- if context=="CTX": continue
- longname=ctx_dict[context]
- self.add_vserver(longname,context)
-# print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
-
- # scan timestamps
- running_vsnames = [ i.vservername for i in self.plc_instances ]
- command= ['grep','.']
- command += ['/vservers/%s/timestamp'%vs for vs in running_vsnames]
- command += ['/dev/null']
- ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
- for ts_line in ts_lines:
- if not ts_line.strip(): continue
- # expect /vservers/<vservername>/timestamp:<timestamp>
- try:
- (_,__,vservername,tail)=ts_line.split('/')
- (_,timestamp)=tail.split(':')
- timestamp=int(timestamp)
- p=self.plc_instance_by_vservername(vservername)
- if not p:
- print 'WARNING unattached plc instance',ts_line
- print 'was expecting to find',vservername,'in',[i.vservername for i in self.plc_instances]
- continue
- p.set_timestamp(timestamp)
- except: print 'WARNING, could not parse ts line',ts_line
-
+ self.plc_instances.append(PlcLxcInstance(self, lxcname, pid))
+ # a line describing the box
+ def line(self):
+ return "{} [max={},free={}] ({})".format(self.hostname_fedora(virt="lxc"),
+ self.max_plcs, self.free_slots(),
+ self.uptime())
+
+ def plc_instance_by_lxcname(self, lxcname):
+ for p in self.plc_instances:
+ if p.lxcname == lxcname:
+ return p
+ return None
+
+ # essentially shutdown all running containers
+ def soft_reboot(self, options):
+ command="rsync lxc-driver.sh {}:/root".format(self.hostname)
+ subprocess.getstatusoutput(command)
+ self.run_ssh( ['/root/lxc-driver.sh','-c','stop_all'],
+ "Stopping all running lxc containers on {}".format(self.hostname),
+ dry_run=options.dry_run)
+
+
+ # sense is expected to fill self.plc_instances with PlcLxcInstance's
+ # to describe the currently running VM's
+ def sense(self, options):
+ print("xp", end=' ')
+ command = "rsync lxc-driver.sh {}:/root".format(self.hostname)
+ subprocess.getstatusoutput(command)
+ command = ['/root/lxc-driver.sh', '-c', 'sense_all']
+ lxc_stat = self.backquote_ssh (command)
+ for lxc_line in lxc_stat.split("\n"):
+ if not lxc_line: continue
+ lxcname = lxc_line.split(";")[0]
+ pid = lxc_line.split(";")[1]
+ timestamp = lxc_line.split(";")[2]
+ self.add_lxc(lxcname,pid)
+ try: timestamp = int(timestamp)
+ except: timestamp = 0
+ p = self.plc_instance_by_lxcname(lxcname)
+ if not p:
+ print('WARNING zombie plc',self.hostname,lxcname)
+ print('... was expecting',lxcname,'in',[i.lxcname for i in self.plc_instances])
+ continue
+ p.set_timestamp(timestamp)
############################################################
class QemuInstance:
- def __init__ (self, nodename, pid, qemubox):
- self.nodename=nodename
- self.pid=pid
- self.qemu_box=qemubox
+ def __init__(self, nodename, pid, qemubox):
+ self.nodename = nodename
+ self.pid = pid
+ self.qemu_box = qemubox
# not known yet
- self.buildname=None
- self.timestamp=0
+ self.buildname = None
+ self.timestamp = 0
- def set_buildname (self,buildname): self.buildname=buildname
- def set_timestamp (self,timestamp): self.timestamp=timestamp
- def set_now (self): self.timestamp=int(time.time())
- def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
+ def set_buildname (self, buildname):
+ self.buildname = buildname
+ def set_timestamp (self, timestamp):
+ self.timestamp = timestamp
+ def set_now (self):
+ self.timestamp = int(time.time())
+ def pretty_timestamp (self):
+ return time.strftime("%Y-%m-%d:%H-%M", time.localtime(self.timestamp))
def line (self):
- msg = "== %s == (pid=%s)"%(self.nodename,self.pid)
- if self.buildname: msg += " <--> %s"%self.buildname
- else: msg += " *unknown build*"
- if self.timestamp: msg += " @ %s"%self.pretty_timestamp()
- else: msg += " *unknown timestamp*"
- if self.pid: msg += " pid=%s"%self.pid
+ msg = "== {} ==".format(short_hostname(self.nodename))
+ msg += " [={}]".format(self.buildname)
+ if self.pid: msg += " (pid={})".format(self.pid)
else: msg += " not (yet?) running"
+ if self.timestamp: msg += " @ {}".format(self.pretty_timestamp())
+ else: msg += " *unknown timestamp*"
return msg
def kill(self):
- if self.pid==0:
- print "cannot kill qemu %s with pid==0"%self.nodename
+ if self.pid == 0:
+ print("cannot kill qemu {} with pid==0".format(self.nodename))
return
- msg="Killing qemu %s with pid=%s on box %s"%(self.nodename,self.pid,self.qemu_box.hostname)
- self.qemu_box.run_ssh(['kill',"%s"%self.pid],msg)
+ msg = "Killing qemu {} with pid={} on box {}".format(self.nodename, self.pid, self.qemu_box.hostname)
+ self.qemu_box.run_ssh(['kill', "{}".format(self.pid)], msg)
self.qemu_box.forget(self)
class QemuBox (Box):
def __init__ (self, hostname, max_qemus):
- Box.__init__(self,hostname)
- self.qemu_instances=[]
- self.max_qemus=max_qemus
+ Box.__init__(self, hostname)
+ self.qemu_instances = []
+ self.max_qemus = max_qemus
- def add_node (self,nodename,pid):
+ def add_node(self, nodename, pid):
for qemu in self.qemu_instances:
- if qemu.nodename==nodename:
- header("WARNING, duplicate qemu %s running on %s"%\
- (nodename,self.hostname), banner=False)
+ if qemu.nodename == nodename:
+ header("WARNING, duplicate qemu {} running on {}"\
+ .format(nodename,self.hostname), banner=False)
return
- self.qemu_instances.append(QemuInstance(nodename,pid,self))
+ self.qemu_instances.append(QemuInstance(nodename, pid, self))
+
+ def node_names (self):
+ return [ qi.nodename for qi in self.qemu_instances ]
def forget (self, qemu_instance):
self.qemu_instances.remove(qemu_instance)
# fill one slot even though this one is not started yet
- def add_dummy (self, nodename):
- dummy=QemuInstance('dummy_'+nodename,0,self)
+ def add_dummy(self, nodename):
+ dummy=QemuInstance('dummy_'+nodename, 0, self)
dummy.set_now()
self.qemu_instances.append(dummy)
def line (self):
- msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_qemus,self.free_spots(),self.driver())
- return msg
+ return "{} [max={},free={}] ({}) {}"\
+ .format(self.hostname_fedora(virt="qemu"),
+ self.max_qemus, self.free_slots(),
+ self.uptime(), self.driver())
- def list(self):
+ def list(self, verbose=False):
if not self.qemu_instances:
- header ('No qemu process on %s'%(self.line()))
+ header ('No qemu on {}'.format(self.line()))
else:
- header ("Active qemu processes on %s"%(self.line()))
+ header ("Qemus on {}".format(self.line()))
+ self.qemu_instances.sort(key=timestamp_key)
for q in self.qemu_instances:
- header (q.line(),banner=False)
+ header (q.line(), banner=False)
- def free_spots (self):
+ def free_slots (self):
return self.max_qemus - len(self.qemu_instances)
def driver(self):
- if hasattr(self,'_driver') and self._driver: return self._driver
+ if hasattr(self,'_driver') and self._driver:
+ return self._driver
return '*undef* driver'
- def qemu_instance_by_pid (self,pid):
+ def qemu_instance_by_pid(self, pid):
for q in self.qemu_instances:
- if q.pid==pid: return q
+ if q.pid == pid:
+ return q
return None
- def qemu_instance_by_nodename_buildname (self,nodename,buildname):
+ def qemu_instance_by_nodename_buildname (self, nodename, buildname):
for q in self.qemu_instances:
- if q.nodename==nodename and q.buildname==buildname:
+ if q.nodename == nodename and q.buildname == buildname:
return q
return None
+ def reboot (self, options):
+ if not options.soft:
+ Box.reboot(self, options)
+ else:
+ self.run_ssh(['pkill','qemu'], "Killing qemu instances",
+ dry_run=options.dry_run)
+
matcher=re.compile("\s*(?P<pid>[0-9]+).*-cdrom\s+(?P<nodename>[^\s]+)\.iso")
- def sense(self, reboot=False, soft=False):
- if reboot:
- if not soft:
- self.reboot()
- else:
- self.run_ssh(box,['pkill','qemu'],"Killing qemu instances")
- return
- print 'q',
- modules=self.backquote_ssh(['lsmod']).split('\n')
- self._driver='*NO kqemu/kmv_intel MODULE LOADED*'
+
+ def sense(self, options):
+ print('qn', end=' ')
+ modules = self.backquote_ssh(['lsmod']).split('\n')
+ self._driver = '*NO kqemu/kvm_intel MODULE LOADED*'
for module in modules:
- if module.find('kqemu')==0:
- self._driver='kqemu module loaded'
- # kvm might be loaded without vkm_intel (we dont have AMD)
- elif module.find('kvm_intel')==0:
- self._driver='kvm_intel module loaded'
+ if module.find('kqemu') == 0:
+ self._driver = 'kqemu module loaded'
+ # kvm might be loaded without kvm_intel (we dont have AMD)
+ elif module.find('kvm_intel') == 0:
+ self._driver = 'kvm_intel OK'
########## find out running pids
- pids=self.backquote_ssh(['pgrep','qemu'])
- if not pids: return
- command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
- ps_lines = self.backquote_ssh (command).split("\n")
+ pids = self.backquote_ssh(['pgrep','qemu'])
+ if not pids:
+ return
+ command = ['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
+ ps_lines = self.backquote_ssh(command).split("\n")
for line in ps_lines:
- if not line.strip() or line.find('PID') >=0 : continue
- m=QemuBox.matcher.match(line)
- if m: self.add_node (m.group('nodename'),m.group('pid'))
- else: header('command %r returned line that failed to match'%command)
+ if not line.strip() or line.find('PID') >=0 :
+ continue
+ m = QemuBox.matcher.match(line)
+ if m:
+ self.add_node(m.group('nodename'), m.group('pid'))
+ continue
+ header('QemuBox.sense: command {} returned line that failed to match'.format(command))
+ header(">>{}<<".format(line))
########## retrieve alive instances and map to build
live_builds=[]
- command=['grep','.','*/*/qemu.pid','/dev/null']
- pid_lines=self.backquote_ssh(command,trash_err=True).split('\n')
+ command = ['grep', '.', '/vservers/*/*/qemu.pid', '/dev/null']
+ pid_lines = self.backquote_ssh(command, trash_err=True).split('\n')
for pid_line in pid_lines:
- if not pid_line.strip(): continue
+ if not pid_line.strip():
+ continue
# expect <build>/<nodename>/qemu.pid:<pid>pid
try:
- (buildname,nodename,tail)=pid_line.split('/')
- (_,pid)=tail.split(':')
- q=self.qemu_instance_by_pid (pid)
- if not q: continue
+ (_, __, buildname, nodename, tail) = pid_line.split('/')
+ (_,pid) = tail.split(':')
+ q = self.qemu_instance_by_pid(pid)
+ if not q:
+ continue
q.set_buildname(buildname)
live_builds.append(buildname)
- except: print 'WARNING, could not parse pid line',pid_line
+ except:
+ print('WARNING, could not parse pid line', pid_line)
# retrieve timestamps
- command= ['grep','.']
- command += ['%s/*/timestamp'%b for b in live_builds]
+ if not live_builds:
+ return
+ command = ['grep','.']
+ command += ['/vservers/{}/*/timestamp'.format(b) for b in live_builds]
command += ['/dev/null']
- ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
+ ts_lines = self.backquote_ssh(command, trash_err=True).split('\n')
for ts_line in ts_lines:
- if not ts_line.strip(): continue
+ if not ts_line.strip():
+ continue
# expect <build>/<nodename>/timestamp:<timestamp>
try:
- (buildname,nodename,tail)=ts_line.split('/')
- nodename=nodename.replace('qemu-','')
- (_,timestamp)=tail.split(':')
- timestamp=int(timestamp)
- q=self.qemu_instance_by_nodename_buildname(nodename,buildname)
+ (_, __, buildname, nodename, tail) = ts_line.split('/')
+ nodename = nodename.replace('qemu-', '')
+ (_, timestamp) = tail.split(':')
+ timestamp = int(timestamp)
+ q = self.qemu_instance_by_nodename_buildname(nodename, buildname)
if not q:
- print 'WARNING unattached qemu instance',ts_line,nodename,buildname
+ # this warning corresponds to qemu instances that were not killed properly
+ # and that have a dangling qemu.pid - and not even all of them as they need
+ # to be attached to a build that has a node running...
+ # it is more confusing than helpful, so let's just trash it
+ #print 'WARNING zombie qemu',self.hostname,ts_line
+ #print '... was expecting (',short_hostname(nodename),buildname,') in',\
+ # [ (short_hostname(i.nodename),i.buildname) for i in self.qemu_instances ]
continue
q.set_timestamp(timestamp)
- except: print 'WARNING, could not parse ts line',ts_line
+ except:
+ print('WARNING, could not parse ts line',ts_line)
+
+####################
+class TestInstance:
+ def __init__(self, buildname, pid=0):
+ self.pids = []
+ if pid != 0:
+ self.pid.append(pid)
+ self.buildname = buildname
+ # latest trace line
+ self.trace = ''
+ # has a KO test
+ self.broken_steps = []
+ self.timestamp = 0
+
+ def set_timestamp(self, timestamp):
+ self.timestamp = timestamp
+ def set_now(self):
+ self.timestamp = int(time.time())
+ def pretty_timestamp(self):
+ return time.strftime("%Y-%m-%d:%H-%M", time.localtime(self.timestamp))
+ def is_running (self):
+ return len(self.pids) != 0
+ def add_pid(self, pid):
+ self.pids.append(pid)
+ def set_broken(self, plcindex, step):
+ self.broken_steps.append( (plcindex, step,) )
+
+ def second_letter(self):
+ if not self.broken_steps:
+ return '='
+ else:
+ really_broken = [ step for (i,step) in self.broken_steps if '_ignore' not in step ]
+ # W is for warning like what's in the build mail
+ if len(really_broken) == 0:
+ return 'W'
+ else:
+ return 'B'
+
+ def line (self):
+ # make up a 2-letter sign
+ # first letter : '=', unless build is running : '*'
+ double = '*' if self.pids else '='
+ # second letter : '=' if fine, 'W' for warnings (only ignored steps) 'B' for broken
+ letter2 = self.second_letter()
+ double += letter2
+ msg = " {} {} ==".format(double, self.buildname)
+ if not self.pids:
+ pass
+ elif len(self.pids)==1:
+ msg += " (pid={})".format(self.pids[0])
+ else:
+ msg += " !!!pids={}!!!".format(self.pids)
+ msg += " @{}".format(self.pretty_timestamp())
+ if letter2 != '=':
+ msg2 = ( ' BROKEN' if letter2 == 'B' else ' WARNING' )
+ # sometimes we have an empty plcindex
+ msg += " [{}=".format(msg2) \
+ + " ".join(["{}@{}".format(s, i) if i else s for (i, s) in self.broken_steps]) \
+ + "]"
+ return msg
+
+class TestBox(Box):
+ def __init__(self, hostname):
+ Box.__init__(self, hostname)
+ self.starting_ips = []
+ self.test_instances = []
+
+ def reboot(self, options):
+ # can't reboot a vserver VM
+ self.run_ssh(['pkill', 'run_log'], "Terminating current runs",
+ dry_run=options.dry_run)
+ self.run_ssh(['rm', '-f', Starting.location], "Cleaning {}".format(Starting.location),
+ dry_run=options.dry_run)
+
+ def get_test(self, buildname):
+ for i in self.test_instances:
+ if i.buildname == buildname:
+ return i
+
+ # we scan ALL remaining test results, even the ones not running
+ def add_timestamp(self, buildname, timestamp):
+ i = self.get_test(buildname)
+ if i:
+ i.set_timestamp(timestamp)
+ else:
+ i = TestInstance(buildname, 0)
+ i.set_timestamp(timestamp)
+ self.test_instances.append(i)
+
+ def add_running_test(self, pid, buildname):
+ i = self.get_test(buildname)
+ if not i:
+ self.test_instances.append(TestInstance(buildname, pid))
+ return
+ if i.pids:
+ print("WARNING: 2 concurrent tests run on same build {}".format(buildname))
+ i.add_pid(pid)
+
+ def add_broken(self, buildname, plcindex, step):
+ i = self.get_test(buildname)
+ if not i:
+ i = TestInstance(buildname)
+ self.test_instances.append(i)
+ i.set_broken(plcindex, step)
+
+ matcher_proc=re.compile (".*/proc/(?P<pid>[0-9]+)/cwd.*/root/(?P<buildname>[^/]+)$")
+ matcher_grep=re.compile ("/root/(?P<buildname>[^/]+)/logs/trace.*:TRACE:\s*(?P<plcindex>[0-9]+).*step=(?P<step>\S+).*")
+ matcher_grep_missing=re.compile ("grep: /root/(?P<buildname>[^/]+)/logs/trace: No such file or directory")
+
+ def sense(self, options):
+ print('tm', end=' ')
+ self.starting_ips = [x for x in self.backquote_ssh(['cat',Starting.location], trash_err=True).strip().split('\n') if x]
+
+ # scan timestamps on all tests
+ # this is likely to not invoke ssh so we need to be a bit smarter to get * expanded
+ # xxx would make sense above too
+ command = ['bash', '-c', "grep . /root/*/timestamp /dev/null"]
+ ts_lines = self.backquote_ssh(command, trash_err=True).split('\n')
+ for ts_line in ts_lines:
+ if not ts_line.strip():
+ continue
+ # expect /root/<buildname>/timestamp:<timestamp>
+ try:
+ (ts_file, timestamp) = ts_line.split(':')
+ ts_file = os.path.dirname(ts_file)
+ buildname = os.path.basename(ts_file)
+ timestamp = int(timestamp)
+ t = self.add_timestamp(buildname, timestamp)
+ except:
+ print('WARNING, could not parse ts line', ts_line)
+
+ # let's try to be robust here -- tests that fail very early like e.g.
+ # "Cannot make space for a PLC instance: vplc IP pool exhausted", that occurs as part of provision
+ # will result in a 'trace' symlink to an inexisting 'trace-<>.txt' because no step has gone through
+ # simple 'trace' should exist though as it is created by run_log
+ command = ['bash', '-c', "grep KO /root/*/logs/trace /dev/null 2>&1" ]
+ trace_lines = self.backquote_ssh(command).split('\n')
+ for line in trace_lines:
+ if not line.strip():
+ continue
+ m = TestBox.matcher_grep_missing.match(line)
+ if m:
+ buildname = m.group('buildname')
+ self.add_broken(buildname, '', 'NO STEP DONE')
+ continue
+ m = TestBox.matcher_grep.match(line)
+ if m:
+ buildname = m.group('buildname')
+ plcindex = m.group('plcindex')
+ step = m.group('step')
+ self.add_broken(buildname, plcindex, step)
+ continue
+ header("TestBox.sense: command {} returned line that failed to match\n{}".format(command, line))
+ header(">>{}<<".format(line))
+
+ pids = self.backquote_ssh (['pgrep', 'run_log'], trash_err=True)
+ if not pids:
+ return
+ command = ['ls','-ld'] + ["/proc/{}/cwd".format(pid) for pid in pids.split("\n") if pid]
+ ps_lines = self.backquote_ssh(command).split('\n')
+ for line in ps_lines:
+ if not line.strip():
+ continue
+ m = TestBox.matcher_proc.match(line)
+ if m:
+ pid = m.group('pid')
+ buildname = m.group('buildname')
+ self.add_running_test(pid, buildname)
+ continue
+ header("TestBox.sense: command {} returned line that failed to match\n{}".format(command, line))
+ header(">>{}<<".format(line))
+
+
+ def line (self):
+ return self.hostname_fedora()
+
+ def list (self, verbose=False):
+ # verbose shows all tests
+ if verbose:
+ instances = self.test_instances
+ msg="tests"
+ else:
+ instances = [ i for i in self.test_instances if i.is_running() ]
+ msg="running tests"
+
+ if not instances:
+ header ("No {} on {}".format(msg, self.line()))
+ else:
+ header ("{} on {}".format(msg, self.line()))
+ instances.sort(sort=timestamp_sort)
+ for i in instances:
+ print(i.line())
+ # show 'starting' regardless of verbose
+ if self.starting_ips:
+ header("Starting IP addresses on {}".format(self.line()))
+ self.starting_ips.sort()
+ for starting in self.starting_ips:
+ print(starting)
+ else:
+ header("Empty 'starting' on {}".format(self.line()))
############################################################
class Options: pass
class Substrate:
- def __init__ (self):
- self.options=Options()
- self.options.dry_run=False
- self.options.verbose=False
- self.options.probe=True
- self.options.soft=True
- self.build_boxes = [ BuildBox(h) for h in self.build_boxes_spec() ]
- self.plc_boxes = [ PlcBox (h,m) for (h,m) in self.plc_boxes_spec ()]
- self.qemu_boxes = [ QemuBox (h,m) for (h,m) in self.qemu_boxes_spec ()]
- self.all_boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes
- self._sensed=False
-
- self.vplc_pool = Pool (self.vplc_ips(),"for vplcs")
- self.vnode_pool = Pool (self.vnode_ips(),"for vnodes")
+ def __init__(self):
+ self.options = Options()
+ self.options.dry_run = False
+ self.options.verbose = False
+ self.options.reboot = False
+ self.options.soft = False
+ self.test_box = TestBox (self.test_box_spec())
+ self.build_lxc_boxes = [ BuildLxcBox(h) for h in self.build_lxc_boxes_spec() ]
+ self.plc_lxc_boxes = [ PlcLxcBox (h, m) for (h, m) in self.plc_lxc_boxes_spec ()]
+ self.qemu_boxes = [ QemuBox (h, m) for (h, m) in self.qemu_boxes_spec ()]
+ self._sensed = False
+
+ self.vplc_pool = Pool(self.vplc_ips(), "for vplcs", self)
+ self.vnode_pool = Pool(self.vnode_ips(), "for vnodes", self)
+
+ self.build_boxes = self.build_lxc_boxes
+ self.plc_boxes = self.plc_lxc_boxes
+ self.default_boxes = self.plc_boxes + self.qemu_boxes
+ self.all_boxes = self.build_boxes + [ self.test_box ] + self.plc_boxes + self.qemu_boxes
+
+ def summary_line (self):
+ msg = "["
+ msg += " {} xp".format(len(self.plc_lxc_boxes))
+ msg += " {} tried plc boxes".format(len(self.plc_boxes))
+ msg += "]"
+ return msg
def fqdn (self, hostname):
- if hostname.find('.')<0: return "%s.%s"%(hostname,self.domain())
- def short_hostname (self, hostname):
- if hostname.find('.')>=0: return hostname.split('.')[0]
+ if hostname.find('.') < 0:
+ return "{}.{}".format(hostname, self.domain())
+ return hostname
# return True if actual sensing takes place
- def sense (self,force=False):
- if self._sensed and not force: return False
- print 'Sensing local substrate...',
- for b in self.all_boxes: b.sense()
- print 'Done'
- self._sensed=True
+ def sense(self, force=False):
+ if self._sensed and not force:
+ return False
+ print('Sensing local substrate...', end=' ')
+ for b in self.default_boxes:
+ b.sense(self.options)
+ print('Done')
+ self._sensed = True
return True
- def add_dummy_plc (self, plc_boxname, plcname):
+ def list(self, verbose=False):
+ for b in self.default_boxes:
+ b.list()
+
+ def add_dummy_plc(self, plc_boxname, plcname):
for pb in self.plc_boxes:
- if pb.hostname==plc_boxname:
+ if pb.hostname == plc_boxname:
pb.add_dummy(plcname)
- def add_dummy_qemu (self, qemu_boxname, qemuname):
+ return True
+ def add_dummy_qemu(self, qemu_boxname, qemuname):
for qb in self.qemu_boxes:
- if qb.hostname==qemu_boxname:
+ if qb.hostname == qemu_boxname:
qb.add_dummy(qemuname)
+ return True
+
+ def add_starting_dummy(self, bname, vname):
+ return self.add_dummy_plc(bname, vname) or self.add_dummy_qemu(bname, vname)
##########
- def provision (self,plcs,options):
+ def provision(self, plcs, options):
try:
# attach each plc to a plc box and an IP address
- plcs = [ self.provision_plc (plc,options) for plc in plcs ]
+ plcs = [ self.provision_plc(plc, options) for plc in plcs ]
# attach each node/qemu to a qemu box with an IP address
- plcs = [ self.provision_qemus (plc,options) for plc in plcs ]
+ plcs = [ self.provision_qemus(plc,options) for plc in plcs ]
# update the SFA spec accordingly
- plcs = [ self.localize_sfa_rspec(plc,options) for plc in plcs ]
+ plcs = [ self.localize_sfa_rspec(plc, options) for plc in plcs ]
+ self.list()
return plcs
- except Exception, e:
- print '* Could not provision this test on current substrate','--',e,'--','exiting'
+ except Exception as e:
+ print('* Could not provision this test on current substrate','--',e,'--','exiting')
traceback.print_exc()
sys.exit(1)
# it is expected that a couple of options like ips_bplc and ips_vplc
# are set or unset together
@staticmethod
- def check_options (x,y):
- if not x and not y: return True
- return len(x)==len(y)
+ def check_options(x, y):
+ if not x and not y:
+ return True
+ return len(x) == len(y)
# find an available plc box (or make space)
# and a free IP address (using options if present)
- def provision_plc (self, plc, options):
+ def provision_plc(self, plc, options):
- assert Substrate.check_options (options.ips_bplc, options.ips_vplc)
+ assert Substrate.check_options(options.ips_bplc, options.ips_vplc)
#### let's find an IP address for that plc
# look in options
# we don't check anything here,
# it is the caller's responsability to cleanup and make sure this makes sense
plc_boxname = options.ips_bplc.pop()
- vplc_hostname=options.ips_vplc.pop()
+ vplc_hostname = options.ips_vplc.pop()
else:
- if self.sense(): self.list_all()
- plc_boxname=None
- vplc_hostname=None
+ if self.sense():
+ self.list()
+ plc_boxname = None
+ vplc_hostname = None
# try to find an available IP
self.vplc_pool.sense()
- couple=self.vplc_pool.next_free()
+ couple = self.vplc_pool.next_free()
if couple:
- (vplc_hostname,unused)=couple
+ (vplc_hostname, unused) = couple
#### we need to find one plc box that still has a slot
- max_free=0
+ max_free = 0
# use the box that has max free spots for load balancing
for pb in self.plc_boxes:
- free=pb.free_spots()
- if free>max_free:
- plc_boxname=pb.hostname
- max_free=free
+ free = pb.free_slots()
+ if free > max_free:
+ plc_boxname = pb.hostname
+ max_free = free
# if there's no available slot in the plc_boxes, or we need a free IP address
# make space by killing the oldest running instance
if not plc_boxname or not vplc_hostname:
# find the oldest of all our instances
- all_plc_instances=reduce(lambda x, y: x+y,
- [ pb.plc_instances for pb in self.plc_boxes ],
- [])
- all_plc_instances.sort(timestamp_sort)
+ all_plc_instances = reduce(lambda x, y: x+y,
+ [ pb.plc_instances for pb in self.plc_boxes ],
+ [])
+ all_plc_instances.sort(key=timestamp_key)
try:
- plc_instance_to_kill=all_plc_instances[0]
+ plc_instance_to_kill = all_plc_instances[0]
except:
- msg=""
- if not plc_boxname: msg += " PLC boxes are full"
- if not vplc_hostname: msg += " vplc IP pool exhausted"
- raise Exception,"Could not make space for a PLC instance:"+msg
- freed_plc_boxname=plc_instance_to_kill.plc_box.hostname
- freed_vplc_hostname=plc_instance_to_kill.vplcname()
- message='killing oldest plc instance = %s on %s'%(plc_instance_to_kill.line(),
- freed_plc_boxname)
+ msg = ""
+ if not plc_boxname:
+ msg += " PLC boxes are full"
+ if not vplc_hostname:
+ msg += " vplc IP pool exhausted"
+ msg += " {}".format(self.summary_line())
+ raise Exception("Cannot make space for a PLC instance:" + msg)
+ freed_plc_boxname = plc_instance_to_kill.plc_box.hostname
+ freed_vplc_hostname = plc_instance_to_kill.vplcname()
+ message = 'killing oldest plc instance = {} on {}'\
+ .format(plc_instance_to_kill.line(), freed_plc_boxname)
plc_instance_to_kill.kill()
# use this new plcbox if that was the problem
if not plc_boxname:
- plc_boxname=freed_plc_boxname
+ plc_boxname = freed_plc_boxname
# ditto for the IP address
if not vplc_hostname:
- vplc_hostname=freed_vplc_hostname
+ vplc_hostname = freed_vplc_hostname
# record in pool as mine
self.vplc_pool.set_mine(vplc_hostname)
#
- self.add_dummy_plc(plc_boxname,plc['name'])
+ self.add_dummy_plc(plc_boxname, plc['name'])
vplc_ip = self.vplc_pool.get_ip(vplc_hostname)
- self.vplc_pool.add_starting(vplc_hostname)
+ self.vplc_pool.add_starting(vplc_hostname, plc_boxname)
#### compute a helpful vserver name
# remove domain in hostname
- vplc_short = self.short_hostname(vplc_hostname)
- vservername = "%s-%d-%s" % (options.buildname,plc['index'],vplc_short)
- plc_name = "%s_%s"%(plc['name'],vplc_short)
+ vplc_short = short_hostname(vplc_hostname)
+ vservername = "{}-{}-{}".format(options.buildname, plc['index'], vplc_short)
+ plc_name = "{}_{}".format(plc['name'], vplc_short)
- utils.header( 'PROVISION plc %s in box %s at IP %s as %s'%\
- (plc['name'],plc_boxname,vplc_hostname,vservername))
+ utils.header('PROVISION plc {} in box {} at IP {} as {}'\
+ .format(plc['name'], plc_boxname, vplc_hostname, vservername))
#### apply in the plc_spec
# # informative
- # label=options.personality.replace("linux","")
- mapper = {'plc': [ ('*' , {'host_box':plc_boxname,
- # 'name':'%s-'+label,
- 'name': plc_name,
- 'vservername':vservername,
- 'vserverip':vplc_ip,
- 'PLC_DB_HOST':vplc_hostname,
- 'PLC_API_HOST':vplc_hostname,
- 'PLC_BOOT_HOST':vplc_hostname,
- 'PLC_WWW_HOST':vplc_hostname,
- 'PLC_NET_DNS1' : self.network_settings() [ 'interface_fields:dns1' ],
- 'PLC_NET_DNS2' : self.network_settings() [ 'interface_fields:dns2' ],
- } ) ]
- }
+ # label = options.personality.replace("linux","")
+ mapper = {'plc' : [ ('*' , {'host_box' : plc_boxname,
+ 'name' : plc_name,
+ 'vservername' : vservername,
+ 'vserverip' : vplc_ip,
+ 'settings:PLC_DB_HOST' : vplc_hostname,
+ 'settings:PLC_API_HOST' : vplc_hostname,
+ 'settings:PLC_BOOT_HOST' : vplc_hostname,
+ 'settings:PLC_WWW_HOST' : vplc_hostname,
+ 'settings:PLC_NET_DNS1' : self.network_settings() [ 'interface_fields:dns1' ],
+ 'settings:PLC_NET_DNS2' : self.network_settings() [ 'interface_fields:dns2' ],
+ } ) ]
+ }
# mappers only work on a list of plcs
- return TestMapper([plc],options).map(mapper)[0]
+ return TestMapper([plc], options).map(mapper)[0]
##########
- def provision_qemus (self, plc, options):
+ def provision_qemus(self, plc, options):
- assert Substrate.check_options (options.ips_bnode, options.ips_vnode)
+ assert Substrate.check_options(options.ips_bnode, options.ips_vnode)
- test_mapper = TestMapper ([plc], options)
+ test_mapper = TestMapper([plc], options)
nodenames = test_mapper.node_names()
- maps=[]
+ maps = []
for nodename in nodenames:
if options.ips_vnode:
# as above, it's a rerun, take it for granted
- qemu_boxname=options.ips_bnode.pop()
- vnode_hostname=options.ips_vnode.pop()
+ qemu_boxname = options.ips_bnode.pop()
+ vnode_hostname = options.ips_vnode.pop()
else:
- if self.sense(): self.list_all()
- qemu_boxname=None
- vnode_hostname=None
+ if self.sense():
+ self.list()
+ qemu_boxname = None
+ vnode_hostname = None
# try to find an available IP
self.vnode_pool.sense()
- couple=self.vnode_pool.next_free()
+ couple = self.vnode_pool.next_free()
if couple:
- (vnode_hostname,unused)=couple
+ (vnode_hostname, unused) = couple
# find a physical box
- max_free=0
+ max_free = 0
# use the box that has max free spots for load balancing
for qb in self.qemu_boxes:
- free=qb.free_spots()
+ free = qb.free_slots()
if free>max_free:
- qemu_boxname=qb.hostname
- max_free=free
+ qemu_boxname = qb.hostname
+ max_free = free
# if we miss the box or the IP, kill the oldest instance
if not qemu_boxname or not vnode_hostname:
# find the oldest of all our instances
- all_qemu_instances=reduce(lambda x, y: x+y,
- [ qb.qemu_instances for qb in self.qemu_boxes ],
- [])
- all_qemu_instances.sort(timestamp_sort)
+ all_qemu_instances = reduce(lambda x, y: x+y,
+ [ qb.qemu_instances for qb in self.qemu_boxes ],
+ [])
+ all_qemu_instances.sort(key=timestamp_key)
try:
- qemu_instance_to_kill=all_qemu_instances[0]
+ qemu_instance_to_kill = all_qemu_instances[0]
except:
- msg=""
- if not qemu_boxname: msg += " QEMU boxes are full"
- if not vnode_hostname: msg += " vnode IP pool exhausted"
- raise Exception,"Could not make space for a QEMU instance:"+msg
- freed_qemu_boxname=qemu_instance_to_kill.qemu_box.hostname
- freed_vnode_hostname=self.short_hostname(qemu_instance_to_kill.nodename)
+ msg = ""
+ if not qemu_boxname:
+ msg += " QEMU boxes are full"
+ if not vnode_hostname:
+ msg += " vnode IP pool exhausted"
+ msg += " {}".format(self.summary_line())
+ raise Exception("Cannot make space for a QEMU instance:"+msg)
+ freed_qemu_boxname = qemu_instance_to_kill.qemu_box.hostname
+ freed_vnode_hostname = short_hostname(qemu_instance_to_kill.nodename)
# kill it
- message='killing oldest qemu node = %s on %s'%(qemu_instance_to_kill.line(),
- freed_qemu_boxname)
+ message = 'killing oldest qemu node = {} on {}'.format(qemu_instance_to_kill.line(),
+ freed_qemu_boxname)
qemu_instance_to_kill.kill()
# use these freed resources where needed
if not qemu_boxname:
- qemu_boxname=freed_qemu_boxname
+ qemu_boxname = freed_qemu_boxname
if not vnode_hostname:
- vnode_hostname=freed_vnode_hostname
+ vnode_hostname = freed_vnode_hostname
self.vnode_pool.set_mine(vnode_hostname)
- self.add_dummy_qemu (qemu_boxname,nodename)
- mac=self.vnode_pool.retrieve_userdata(vnode_hostname)
- ip=self.vnode_pool.get_ip (vnode_hostname)
- self.vnode_pool.add_starting(vnode_hostname)
+ self.add_dummy_qemu(qemu_boxname, vnode_hostname)
+ mac = self.vnode_pool.retrieve_userdata(vnode_hostname)
+ ip = self.vnode_pool.get_ip(vnode_hostname)
+ self.vnode_pool.add_starting(vnode_hostname, qemu_boxname)
vnode_fqdn = self.fqdn(vnode_hostname)
- nodemap={'host_box':qemu_boxname,
- 'node_fields:hostname':vnode_fqdn,
- 'interface_fields:ip':ip,
- 'interface_fields:mac':mac,
- }
+ nodemap = {'host_box' : qemu_boxname,
+ 'node_fields:hostname' : vnode_fqdn,
+ 'interface_fields:ip' : ip,
+ 'ipaddress_fields:ip_addr' : ip,
+ 'interface_fields:mac' : mac,
+ }
nodemap.update(self.network_settings())
- maps.append ( (nodename, nodemap) )
+ maps.append( (nodename, nodemap) )
- utils.header("PROVISION node %s in box %s at IP %s with MAC %s"%\
- (nodename,qemu_boxname,vnode_hostname,mac))
+ utils.header("PROVISION node {} in box {} at IP {} with MAC {}"\
+ .format(nodename, qemu_boxname, vnode_hostname, mac))
return test_mapper.map({'node':maps})[0]
- def localize_sfa_rspec (self,plc,options):
+ def localize_sfa_rspec(self, plc, options):
- plc['sfa']['SFA_REGISTRY_HOST'] = plc['PLC_DB_HOST']
- plc['sfa']['SFA_AGGREGATE_HOST'] = plc['PLC_DB_HOST']
- plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST']
- plc['sfa']['SFA_PLC_DB_HOST'] = plc['PLC_DB_HOST']
- plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/'
- for site in plc['sites']:
- for node in site['nodes']:
- plc['sfa']['sfa_slice_rspec']['part4'] = node['node_fields']['hostname']
- return plc
+ plc['sfa']['settings']['SFA_REGISTRY_HOST'] = plc['settings']['PLC_DB_HOST']
+ plc['sfa']['settings']['SFA_AGGREGATE_HOST'] = plc['settings']['PLC_DB_HOST']
+ plc['sfa']['settings']['SFA_SM_HOST'] = plc['settings']['PLC_DB_HOST']
+ plc['sfa']['settings']['SFA_DB_HOST'] = plc['settings']['PLC_DB_HOST']
+ plc['sfa']['settings']['SFA_PLC_URL'] = 'https://{}:443/PLCAPI/'.format(plc['settings']['PLC_API_HOST'])
+ return plc
#################### release:
- def release (self,options):
+ def release(self, options):
self.vplc_pool.release_my_starting()
self.vnode_pool.release_my_starting()
pass
#################### show results for interactive mode
- def list_all (self):
- self.sense()
- for b in self.all_boxes: b.list()
-
- def get_box (self,box):
- for b in self.build_boxes + self.plc_boxes + self.qemu_boxes:
- if b.short_hostname()==box:
+ def get_box(self, boxname):
+ for b in self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box] :
+ if b.shortname() == boxname:
return b
- print "Could not find box %s"%box
+ try:
+ if b.shortname() == boxname.split('.')[0]:
+ return b
+ except:
+ pass
+ print("Could not find box {}".format(boxname))
return None
- def list_box(self,box):
- b=self.get_box(box)
- if not b: return
- b.sense()
- b.list()
-
- # can be run as a utility to manage the local infrastructure
- def main (self):
- parser=OptionParser()
- parser.add_option ('-v',"--verbose",action='store_true',dest='verbose',default=False,
- help='verbose mode')
- (options,args)=parser.parse_args()
- if options.verbose:
- self.options.verbose=True
- if not args:
- self.list_all()
+ # deal with the mix of boxes and names and stores the current focus
+ # as a list of Box instances in self.focus_all
+ def normalize(self, box_or_names):
+ self.focus_all = []
+ for box in box_or_names:
+ if not isinstance(box, Box):
+ box = self.get_box(box)
+ if not box:
+ print('Warning - could not handle box',box)
+ self.focus_all.append(box)
+ # elaborate by type
+ self.focus_build = [ x for x in self.focus_all if isinstance(x, BuildBox) ]
+ self.focus_plc = [ x for x in self.focus_all if isinstance(x, PlcBox) ]
+ self.focus_qemu = [ x for x in self.focus_all if isinstance(x, QemuBox) ]
+
+ def list_boxes(self):
+ print('Sensing', end=' ')
+ for box in self.focus_all:
+ box.sense(self.options)
+ print('Done')
+ for box in self.focus_all:
+ box.list(self.options.verbose)
+
+ def reboot_boxes(self):
+ for box in self.focus_all:
+ box.reboot(self.options)
+
+ def sanity_check(self):
+ print('Sanity check')
+ self.sanity_check_plc()
+ self.sanity_check_qemu()
+
+ def sanity_check_plc(self):
+ pass
+
+ def sanity_check_qemu(self):
+ all_nodes = []
+ for box in self.focus_qemu:
+ all_nodes += box.node_names()
+ hash = {}
+ for node in all_nodes:
+ if node not in hash:
+ hash[node] = 0
+ hash[node]+=1
+ for (node,count) in list(hash.items()):
+ if count!=1:
+ print('WARNING - duplicate node', node)
+
+
+ ####################
+ # can be run as a utility to probe/display/manage the local infrastructure
+ def main(self):
+ parser = OptionParser()
+ parser.add_option('-r', "--reboot", action='store_true', dest='reboot', default=False,
+ help='reboot mode (use shutdown -r)')
+ parser.add_option('-s', "--soft", action='store_true', dest='soft', default=False,
+ help='soft mode for reboot (terminates processes)')
+ parser.add_option('-t', "--testbox", action='store_true', dest='testbox', default=False,
+ help='add test box')
+ parser.add_option('-b', "--build", action='store_true', dest='builds', default=False,
+ help='add build boxes')
+ parser.add_option('-p', "--plc", action='store_true', dest='plcs', default=False,
+ help='add plc boxes')
+ parser.add_option('-q', "--qemu", action='store_true', dest='qemus', default=False,
+ help='add qemu boxes')
+ parser.add_option('-a', "--all", action='store_true', dest='all', default=False,
+ help='address all known boxes, like -b -t -p -q')
+ parser.add_option('-v', "--verbose", action='store_true', dest='verbose', default=False,
+ help='verbose mode')
+ parser.add_option('-n', "--dry_run", action='store_true', dest='dry_run', default=False,
+ help='dry run mode')
+ (self.options, args) = parser.parse_args()
+
+ boxes = args
+ if self.options.testbox: boxes += [self.test_box]
+ if self.options.builds: boxes += self.build_boxes
+ if self.options.plcs: boxes += self.plc_boxes
+ if self.options.qemus: boxes += self.qemu_boxes
+ if self.options.all: boxes += self.all_boxes
+
+ global verbose
+ verbose = self.options.verbose
+ # default scope is -b -p -q -t
+ if not boxes:
+ boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box]
+
+ self.normalize(boxes)
+
+ if self.options.reboot:
+ self.reboot_boxes()
else:
- for box in args:
- self.list_box(box)
+ self.list_boxes()
+ self.sanity_check()