From f8436f56fe24e8e545c9b3669897b8332dddffbc Mon Sep 17 00:00:00 2001 From: Thierry Parmentelat Date: Thu, 15 Sep 2011 14:27:31 +0200 Subject: [PATCH] first draft for a tracker-less rewrite --- system/LocalSubstrate.inria.py | 70 ++ system/LocalTestResources.sample.inria | 67 -- system/LocalTestResources.sample.princeton | 58 -- system/Substrate.py | 742 +++++++++++++++++++++ system/TestMain.py | 36 +- system/TestMapper.py | 6 +- system/TestNode.py | 7 +- system/TestPlc.py | 13 +- system/TestPool.py | 125 ---- system/TestResources.py | 242 ------- system/TestSsh.py | 29 +- system/Trackers.py | 160 ----- system/config_default.py | 1 + 13 files changed, 879 insertions(+), 677 deletions(-) create mode 100755 system/LocalSubstrate.inria.py delete mode 100755 system/LocalTestResources.sample.inria delete mode 100755 system/LocalTestResources.sample.princeton create mode 100644 system/Substrate.py delete mode 100644 system/TestPool.py delete mode 100644 system/TestResources.py delete mode 100644 system/Trackers.py diff --git a/system/LocalSubstrate.inria.py b/system/LocalSubstrate.inria.py new file mode 100755 index 0000000..465d8b2 --- /dev/null +++ b/system/LocalSubstrate.inria.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# Thierry Parmentelat +# Copyright (C) 2010 INRIA +# +# this is only an example file +# the actual file is installed in your testmaster box as /root/LocalTestResources.py +# + +if __name__ == '__main__': + import sys, os.path + sys.path.append(os.path.expanduser("~/git-tests/system")) + +from Substrate import Substrate + +# domain name .pl.sophia.inria.fr is implicit on our network +class OnelabSubstrate (Substrate): + + # the build boxes we use + def build_boxes_spec (self): + return [ 'liquid', 'reed', 'velvet', ] + + # the vs-capable box for PLCs + def plc_boxes_spec (self): + return [ ('vs64-1', 10), # how many plcs max in this box + ] + + # vplc01 to 15 + def vplc_ips (self): + return [ ( 'vplc%02d'%i, # DNS name +# '02:34:56:00:ee:%02d'%i) # MAC address + 'unused') # MAC address + for i in range(1,5) ] # 21 + + def qemu_boxes_spec (self): + return [ +# ('kvm64-1', 3), # how many plcs max in this box + ('kvm64-2', 3), +# ('kvm64-3', 3), +# ('kvm64-4', 3), +# ('kvm64-5', 3), +# ('kvm64-6', 3), + ] + + # the nodes pool has a MAC address as user-data (3rd elt in tuple) + def vnode_ips (self): + return [ ( 'vnode%02d'%i, # DNS name + '02:34:56:00:00:%02d'%i) # MAC address + for i in range(1,5) ] # 21 + + # local network settings + def domain (self): + return 'pl.sophia.inria.fr' + + def network_settings (self): + return { 'interface_fields:gateway':'138.96.112.250', + 'interface_fields:network':'138.96.112.0', + 'interface_fields:broadcast':'138.96.119.255', + 'interface_fields:netmask':'255.255.248.0', + 'interface_fields:dns1': '138.96.112.1', + 'interface_fields:dns2': '138.96.112.2', + } + +# the hostname for the testmaster - in case we'd like to run this remotely + def testmaster (self): + return 'testmaster' + +local_substrate = OnelabSubstrate () + +if __name__ == '__main__': + local_substrate.main() diff --git a/system/LocalTestResources.sample.inria b/system/LocalTestResources.sample.inria deleted file mode 100755 index 8eaad0f..0000000 --- a/system/LocalTestResources.sample.inria +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/python -# Thierry Parmentelat -# Copyright (C) 2010 INRIA -# -# this is only an example file -# the actual file is installed in your testmaster box as /root/LocalTestResources.py -# - -if __name__ == '__main__': - import sys, os.path - sys.path.append(os.path.expanduser("~/git-tests/system")) - -from TestResources import TestResources - -class OnelabTestResources (TestResources): - - # we use only one for now but who knows - def plc_boxes (self): - return [ 'testplc.pl.sophia.inria.fr' ] - - def network_dict (self): - return { 'interface_fields:gateway':'138.96.112.250', - 'interface_fields:network':'138.96.112.0', - 'interface_fields:broadcast':'138.96.119.255', - 'interface_fields:netmask':'255.255.248.0', - 'interface_fields:dns1': '138.96.112.1', - 'interface_fields:dns2': '138.96.112.2', - } - - def nodes_ip_pool (self): - return [ ( 'vnode%02d.pl.sophia.inria.fr'%i, - '138.96.112.%d'%(110+i), - '02:34:56:00:00:%02d'%i) for i in range(1,8) ] - - # 32bits : qemu32-[1-5] + 64bits : qemu64-[1-3] - # removing qemu64-3 until it gets on the right IP segment again - def qemus_ip_pool (self): - return [ - ('kvm64-1.pl.sophia.inria.fr', None, None), - ('kvm64-2.pl.sophia.inria.fr', None, None), - ('kvm64-3.pl.sophia.inria.fr', None, None), - ('kvm64-4.pl.sophia.inria.fr', None, None), - ('kvm64-5.pl.sophia.inria.fr', None, None), - ('kvm64-6.pl.sophia.inria.fr', None, None), - ] - - def max_qemus (self): - # let's be tight; nighlty builds.sh wipes it clean beforehand - return len(self.qemus_ip_pool()) - - # 1 to 15 - def plcs_ip_pool (self): - return [ ( 'vplc%02d.pl.sophia.inria.fr'%i, - '138.96.112.%d'%(70+i), - '02:34:56:00:ee:%02d'%i) for i in range(1,16) ] - - def max_plcs (self): - # leave space for the triangle setups - return len(self.plcs_ip_pool())-3 - - def preferred_hostname (self): - return None - -local_resources = OnelabTestResources () - -if __name__ == '__main__': - for (h,_,__) in local_resources.qemus_ip_pool(): print h diff --git a/system/LocalTestResources.sample.princeton b/system/LocalTestResources.sample.princeton deleted file mode 100755 index aa623a1..0000000 --- a/system/LocalTestResources.sample.princeton +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/python -# Thierry Parmentelat -# Copyright (C) 2010 INRIA -# -# this is the file that gets installed at onelab -# in our testmaster box as /root/LocalTestResources.py -# - -if __name__ == '__main__': - import sys, os.path - sys.path.append(os.path.expanduser("~/git-tests/system")) - -from TestResources import TestResources - -class PlanetlabTestResources (TestResources): - - # we use only one for now but who knows - def plc_boxes (self): - return [ 'testbox.test.planet-lab.org' ] - - def network_dict (self): - return { 'interface_fields:gateway': '128.112.139.1', - 'interface_fields:network': '128.112.139.0', - 'interface_fields:broadcast': '128.112.139.127', - 'interface_fields:netmask': '255.255.255.128', - 'interface_fields:dns1': '128.112.136.10', - 'interface_fields:dns2': '128.112.136.12', - } - - def nodes_ip_pool (self): - return [ ("node-01.test.planet-lab.org", "128.112.139.44", "de:ad:be:ef:00:10"), - ("node-02.test.planet-lab.org", "128.112.139.66", "de:ad:be:ef:00:20"), - ] - - def qemus_ip_pool (self): - return [ ( 'testqemu1.test.planet-lab.org', None, None ) ] - - def max_qemus (self): - return 1 - - def plcs_ip_pool (self): - return [ ("pl-service-08.CS.Princeton.EDU","128.112.139.34", "de:ad:be:ef:ff:01"), - ("pl-service-09.CS.Princeton.EDU","128.112.139.35", "de:ad:be:ef:ff:02"), - ("pl-service-10.CS.Princeton.EDU","128.112.139.36", "de:ad:be:ef:ff:03"), - ("pl-service-11.CS.Princeton.EDU","128.112.139.37", "de:ad:be:ef:ff:04"), - ("pl-service-12.CS.Princeton.EDU","128.112.139.41", "de:ad:be:ef:ff:05"), - ] - - def max_plcs (self): - return 4 - - def preferred_hostname (self): - return "plc" - -local_resources = PlanetlabTestResources () - -if __name__ == '__main__': - for (h,_,__) in local_resources.qemus_ip_pool(): print h diff --git a/system/Substrate.py b/system/Substrate.py new file mode 100644 index 0000000..e660b47 --- /dev/null +++ b/system/Substrate.py @@ -0,0 +1,742 @@ +# +# Thierry Parmentelat +# Copyright (C) 2010 INRIA +# +# #################### history +# +# This is a complete rewrite of TestResources/Tracker/Pool +# we don't use trackers anymore and just probe/sense the running +# boxes to figure out where we are +# in order to implement some fairness in the round-robin allocation scheme +# we need an indication of the 'age' of each running entity, +# hence the 'timestamp-*' steps in TestPlc +# +# this should be much more flexible: +# * supports several plc boxes +# * supports several qemu guests per host +# * no need to worry about tracker being in sync or not +# +# #################### howto use +# +# each site is to write its own LocalSubstrate.py, +# (see e.g. LocalSubstrate.inria.py) +# LocalSubstrate.py is expected to be in /root on the testmaster box +# and needs to define +# MYPLCs +# . the vserver-capable boxes used for hosting myplcs +# . and their admissible load (max # of myplcs) +# . the pool of DNS-names and IP-addresses available for myplcs +# QEMU nodes +# . the kvm-qemu capable boxes to host qemu instances +# . and their admissible load (max # of myplcs) +# . the pool of DNS-names and IP-addresses available for nodes +# +# #################### + +import os.path, sys +import time +import re +import traceback +import subprocess +import commands +import socket +from optparse import OptionParser + +import utils +from TestSsh import TestSsh +from TestMapper import TestMapper + +def header (message,banner=True): + if not message: return + if banner: print "===============", + print message + sys.stdout.flush() + +def timestamp_sort(o1,o2): + if not o1.timestamp: return -1 + elif not o2.timestamp: return 1 + else: return o2.timestamp-o1.timestamp + +#################### +# pool class +# allows to pick an available IP among a pool +# input is expressed as a list of tuples (hostname,ip,user_data) +# that can be searched iteratively for a free slot +# e.g. +# pool = [ (hostname1,user_data1), +# (hostname2,user_data2), +# (hostname3,user_data2), +# (hostname4,user_data4) ] +# assuming that ip1 and ip3 are taken (pingable), then we'd get +# pool=Pool(pool) +# pool.next_free() -> entry2 +# pool.next_free() -> entry4 +# pool.next_free() -> None +# that is, even if ip2 is not busy/pingable when the second next_free() is issued + +class PoolItem: + def __init__ (self,hostname,userdata): + self.hostname=hostname + self.userdata=userdata + # slot holds 'busy' or 'free' or 'fake' or None + self.status=None + self.ip=None + + def line(self): + return "Pooled %s (%s) -> %s"%(self.hostname,self.userdata, self.status) + def get_ip(self): + if self.ip: return self.ip + ip=socket.gethostbyname(self.hostname) + self.ip=ip + return ip + +class Pool: + + def __init__ (self, tuples,message): + self.pool= [ PoolItem (h,u) for (h,u) in tuples ] + self.message=message + self._sensed=False + + def sense (self): + if self._sensed: return + print 'Checking IP pool',self.message, + for item in self.pool: + if self.check_ping (item.hostname): item.status='busy' + else: item.status='free' + self._sensed=True + print 'Done' + + def list (self): + for i in self.pool: print i.line() + + def retrieve_userdata (self, hostname): + for i in self.pool: + if i.hostname==hostname: return i.userdata + return None + + def get_ip (self, hostname): + # use cached if in pool + for i in self.pool: + if i.hostname==hostname: return i.get_ip() + # otherwise just ask dns again + return socket.gethostbyname(hostname) + + def next_free (self): + for i in self.pool: + if i.status in ['busy','fake']: continue + i.status='fake' + return (i.hostname,i.userdata) + raise Exception,"No IP address available in pool %s"%self.message + +# OS-dependent ping option (support for macos, for convenience) + ping_timeout_option = None +# checks whether a given hostname/ip responds to ping + def check_ping (self,hostname): + if not Pool.ping_timeout_option: + (status,osname) = commands.getstatusoutput("uname -s") + if status != 0: + raise Exception, "TestPool: Cannot figure your OS name" + if osname == "Linux": + Pool.ping_timeout_option="-w" + elif osname == "Darwin": + Pool.ping_timeout_option="-t" + + command="ping -c 1 %s 1 %s"%(Pool.ping_timeout_option,hostname) + (status,output) = commands.getstatusoutput(command) + if status==0: print '+', + else: print '-', + return status == 0 + +#################### +class Box: + def __init__ (self,hostname): + self.hostname=hostname + def simple_hostname (self): + return self.hostname.split('.')[0] + def test_ssh (self): return TestSsh(self.hostname,username='root',unknown_host=False) + def reboot (self): + self.test_ssh().run("shutdown -r now",message="Rebooting %s"%self.hostname) + + def run(self,argv,message=None,trash_err=False,dry_run=False): + if dry_run: + print 'DRY_RUN:', + print " ".join(argv) + return 0 + else: + header(message) + if not trash_err: + return subprocess.call(argv) + else: + return subprocess.call(argv,stderr=file('/dev/null','w')) + + def run_ssh (self, argv, message, trash_err=False): + ssh_argv = self.test_ssh().actual_argv(argv) + result=self.run (ssh_argv, message, trash_err) + if result!=0: + print "WARNING: failed to run %s on %s"%(" ".join(argv),self.hostname) + return result + + def backquote (self, argv, trash_err=False): + if not trash_err: + return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0] + else: + return subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0] + + def backquote_ssh (self, argv, trash_err=False): + # first probe the ssh link + probe_argv=self.test_ssh().actual_argv(['hostname']) + hostname=self.backquote ( probe_argv, trash_err=True ) + if not hostname: + print "root@%s unreachable"%self.hostname + return '' + else: + return self.backquote( self.test_ssh().actual_argv(argv), trash_err) + +############################################################ +class BuildInstance: + def __init__ (self, buildname, pid, buildbox): + self.buildname=buildname + self.buildbox=buildbox + self.pids=[pid] + + def add_pid(self,pid): + self.pids.append(pid) + + def line (self): + return "== %s == (pids=%r)"%(self.buildname,self.pids) + +class BuildBox (Box): + def __init__ (self,hostname): + Box.__init__(self,hostname) + self.build_instances=[] + + def add_build (self,buildname,pid): + for build in self.build_instances: + if build.buildname==buildname: + build.add_pid(pid) + return + self.build_instances.append(BuildInstance(buildname, pid, self)) + + def list(self): + if not self.build_instances: + header ('No build process on %s (%s)'%(self.hostname,self.uptime())) + else: + header ("Builds on %s (%s)"%(self.hostname,self.uptime())) + for b in self.build_instances: + header (b.line(),banner=False) + + def uptime(self): + if hasattr(self,'_uptime') and self._uptime: return self._uptime + return '*undef* uptime' + + # inspect box and find currently running builds + matcher=re.compile("\s*(?P[0-9]+).*-[bo]\s+(?P[^\s]+)(\s|\Z)") + def sense(self,reboot=False,verbose=True): + if reboot: + self.reboot(box) + return + print 'b', + command=['uptime'] + self._uptime=self.backquote_ssh(command,trash_err=True).strip() + if not self._uptime: self._uptime='unreachable' + pids=self.backquote_ssh(['pgrep','build'],trash_err=True) + if not pids: return + command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] + ps_lines=self.backquote_ssh (command).split('\n') + for line in ps_lines: + if not line.strip() or line.find('PID')>=0: continue + m=BuildBox.matcher.match(line) + if m: self.add_build (m.group('buildname'),m.group('pid')) + else: header('command %r returned line that failed to match'%command) + +############################################################ +class PlcInstance: + def __init__ (self, vservername, ctxid, plcbox): + self.vservername=vservername + self.ctxid=ctxid + self.plc_box=plcbox + # unknown yet + self.timestamp=None + + def set_timestamp (self,timestamp): self.timestamp=timestamp + def set_now (self): self.timestamp=int(time.time()) + def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp)) + + def line (self): + msg="== %s == (ctx=%s)"%(self.vservername,self.ctxid) + if self.timestamp: msg += " @ %s"%self.pretty_timestamp() + else: msg += " *unknown timestamp*" + if self.ctxid==0: msg+=" not (yet?) running" + return msg + + def kill (self): + msg="vserver stopping %s on %s"%(self.vservername,self.plc_box.hostname) + self.plc_box.run_ssh(['vserver',self.vservername,'stop'],msg) + self.plc_box.forget(self) + +class PlcBox (Box): + def __init__ (self, hostname, max_plcs): + Box.__init__(self,hostname) + self.plc_instances=[] + self.max_plcs=max_plcs + + def add_vserver (self,vservername,ctxid): + for plc in self.plc_instances: + if plc.vservername==vservername: + header("WARNING, duplicate myplc %s running on %s"%\ + (vservername,self.hostname),banner=False) + return + self.plc_instances.append(PlcInstance(vservername,ctxid,self)) + + def forget (self, plc_instance): + self.plc_instances.remove(plc_instance) + + # fill one slot even though this one is not started yet + def add_fake (self, plcname): + fake=PlcInstance('fake_'+plcname,0,self) + fake.set_now() + self.plc_instances.append(fake) + + def line(self): + msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_plcs,self.free_spots(),self.uname()) + return msg + + def list(self): + if not self.plc_instances: + header ('No vserver running on %s'%(self.line())) + else: + header ("Active plc VMs on %s"%self.line()) + for p in self.plc_instances: + header (p.line(),banner=False) + + def free_spots (self): + return self.max_plcs - len(self.plc_instances) + + def uname(self): + if hasattr(self,'_uname') and self._uname: return self._uname + return '*undef* uname' + + def plc_instance_by_vservername (self, vservername): + for p in self.plc_instances: + if p.vservername==vservername: return p + return None + + def sense (self, reboot=False, soft=False): + if reboot: + # remove mark for all running servers to avoid resurrection + stop_command=['rm','-rf','/etc/vservers/*/apps/init/mark'] + self.run_ssh(stop_command,"Removing all vserver marks on %s"%self.hostname) + if not soft: + self.reboot() + return + else: + self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers") + return + print 'p', + self._uname=self.backquote_ssh(['uname','-r']).strip() + # try to find fullname (vserver_stat truncates to a ridiculously short name) + # fetch the contexts for all vservers on that box + map_command=['grep','.','/etc/vservers/*/context','/dev/null',] + context_map=self.backquote_ssh (map_command) + # at this point we have a set of lines like + # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144 + ctx_dict={} + for map_line in context_map.split("\n"): + if not map_line: continue + [path,xid] = map_line.split(':') + ctx_dict[xid]=os.path.basename(os.path.dirname(path)) + # at this point ctx_id maps context id to vservername + + command=['vserver-stat'] + vserver_stat = self.backquote_ssh (command) + for vserver_line in vserver_stat.split("\n"): + if not vserver_line: continue + context=vserver_line.split()[0] + if context=="CTX": continue + longname=ctx_dict[context] + self.add_vserver(longname,context) +# print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals() + + # scan timestamps + command= ['grep','.'] + command += ['/vservers/%s/timestamp'%b for b in ctx_dict.values()] + command += ['/dev/null'] + ts_lines=self.backquote_ssh(command,trash_err=True).split('\n') + for ts_line in ts_lines: + if not ts_line.strip(): continue + # expect /vservers//timestamp: + try: + (_,__,vservername,tail)=ts_line.split('/') + (_,timestamp)=tail.split(':') + timestamp=int(timestamp) + q=self.plc_instance_by_vservername(vservername) + if not q: + print 'WARNING unattached plc instance',ts_line + continue + q.set_timestamp(timestamp) + except: print 'WARNING, could not parse ts line',ts_line + + + + +############################################################ +class QemuInstance: + def __init__ (self, nodename, pid, qemubox): + self.nodename=nodename + self.pid=pid + self.qemu_box=qemubox + # not known yet + self.buildname=None + self.timestamp=None + + def set_buildname (self,buildname): self.buildname=buildname + def set_timestamp (self,timestamp): self.timestamp=timestamp + def set_now (self): self.timestamp=int(time.time()) + def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp)) + + def line (self): + msg = "== %s == (pid=%s)"%(self.nodename,self.pid) + if self.buildname: msg += " <--> %s"%self.buildname + else: msg += " *unknown build*" + if self.timestamp: msg += " @ %s"%self.pretty_timestamp() + else: msg += " *unknown timestamp*" + if self.pid: msg += "pid=%s"%self.pid + else: msg += " not (yet?) running" + return msg + + def kill(self): + if self.pid==0: print "cannot kill qemu %s with pid==0"%self.nodename + msg="Killing qemu %s with pid=%s on box %s"%(self.nodename,self.pid,self.qemu_box.hostname) + self.qemu_box.run_ssh(['kill',"%s"%self.pid],msg) + self.qemu_box.forget(self) + + +class QemuBox (Box): + def __init__ (self, hostname, max_qemus): + Box.__init__(self,hostname) + self.qemu_instances=[] + self.max_qemus=max_qemus + + def add_node (self,nodename,pid): + for qemu in self.qemu_instances: + if qemu.nodename==nodename: + header("WARNING, duplicate qemu %s running on %s"%\ + (nodename,self.hostname), banner=False) + return + self.qemu_instances.append(QemuInstance(nodename,pid,self)) + + def forget (self, qemu_instance): + self.qemu_instances.remove(qemu_instance) + + # fill one slot even though this one is not started yet + def add_fake (self, nodename): + fake=QemuInstance('fake_'+nodename,0,self) + fake.set_now() + self.qemu_instances.append(fake) + + def line (self): + msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_qemus,self.free_spots(),self.driver()) + return msg + + def list(self): + if not self.qemu_instances: + header ('No qemu process on %s'%(self.line())) + else: + header ("Active qemu processes on %s"%(self.line())) + for q in self.qemu_instances: + header (q.line(),banner=False) + + def free_spots (self): + return self.max_qemus - len(self.qemu_instances) + + def driver(self): + if hasattr(self,'_driver') and self._driver: return self._driver + return '*undef* driver' + + def qemu_instance_by_pid (self,pid): + for q in self.qemu_instances: + if q.pid==pid: return q + return None + + def qemu_instance_by_nodename_buildname (self,nodename,buildname): + for q in self.qemu_instances: + if q.nodename==nodename and q.buildname==buildname: + return q + return None + + matcher=re.compile("\s*(?P[0-9]+).*-cdrom\s+(?P[^\s]+)\.iso") + def sense(self, reboot=False, soft=False): + if reboot: + if not soft: + self.reboot() + else: + self.run_ssh(box,['pkill','qemu'],"Killing qemu instances") + return + print 'q', + modules=self.backquote_ssh(['lsmod']).split('\n') + self._driver='*NO kqemu/kmv_intel MODULE LOADED*' + for module in modules: + if module.find('kqemu')==0: + self._driver='kqemu module loaded' + # kvm might be loaded without vkm_intel (we dont have AMD) + elif module.find('kvm_intel')==0: + self._driver='kvm_intel module loaded' + ########## find out running pids + pids=self.backquote_ssh(['pgrep','qemu']) + if not pids: return + command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] + ps_lines = self.backquote_ssh (command).split("\n") + for line in ps_lines: + if not line.strip() or line.find('PID') >=0 : continue + m=QemuBox.matcher.match(line) + if m: self.add_node (m.group('nodename'),m.group('pid')) + else: header('command %r returned line that failed to match'%command) + ########## retrieve alive instances and map to build + live_builds=[] + command=['grep','.','*/*/qemu.pid','/dev/null'] + pid_lines=self.backquote_ssh(command,trash_err=True).split('\n') + for pid_line in pid_lines: + if not pid_line.strip(): continue + # expect //qemu.pid:pid + try: + (buildname,nodename,tail)=pid_line.split('/') + (_,pid)=tail.split(':') + q=self.qemu_instance_by_pid (pid) + if not q: continue + q.set_buildname(buildname) + live_builds.append(buildname) + except: print 'WARNING, could not parse pid line',pid_line + # retrieve timestamps + command= ['grep','.'] + command += ['%s/*/timestamp'%b for b in live_builds] + command += ['/dev/null'] + ts_lines=self.backquote_ssh(command,trash_err=True).split('\n') + for ts_line in ts_lines: + if not ts_line.strip(): continue + # expect //timestamp: + try: + (buildname,nodename,tail)=ts_line.split('/') + nodename=nodename.replace('qemu-','') + (_,timestamp)=tail.split(':') + timestamp=int(timestamp) + q=self.qemu_instance_by_nodename_buildname(nodename,buildname) + if not q: + print 'WARNING unattached qemu instance',ts_line,nodename,buildname + continue + q.set_timestamp(timestamp) + except: print 'WARNING, could not parse ts line',ts_line + +############################################################ +class Options: pass + +class Substrate: + + def test (self): + self.sense() + + def __init__ (self): + self.options=Options() + self.options.dry_run=False + self.options.verbose=False + self.options.probe=True + self.options.soft=True + self.build_boxes = [ BuildBox(h) for h in self.build_boxes_spec() ] + self.plc_boxes = [ PlcBox (h,m) for (h,m) in self.plc_boxes_spec ()] + self.qemu_boxes = [ QemuBox (h,m) for (h,m) in self.qemu_boxes_spec ()] + self.all_boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes + self._sensed=False + + self.vplc_pool = Pool (self.vplc_ips(),"for vplcs") + self.vnode_pool = Pool (self.vnode_ips(),"for vnodes") + + self.vnode_pool.list() + + +# def build_box_names (self): +# return [ h for h in self.build_boxes_spec() ] +# def plc_boxes (self): +# return [ h for (h,m) in self.plc_boxes_spec() ] +# def qemu_boxes (self): +# return [ h for (h,m) in self.qemu_boxes_spec() ] + + def sense (self,force=False): + if self._sensed and not force: return + print 'Sensing local substrate...', + for b in self.all_boxes: b.sense() + print 'Done' + self._sensed=True + + ########## + def provision (self,plcs,options): + try: + self.sense() + self.list_all() + # attach each plc to a plc box and an IP address + plcs = [ self.provision_plc (plc,options) for plc in plcs ] + # attach each node/qemu to a qemu box with an IP address + plcs = [ self.provision_qemus (plc,options) for plc in plcs ] + # update the SFA spec accordingly + plcs = [ self.localize_sfa_rspec(plc,options) for plc in plcs ] + return plcs + except Exception, e: + print '* Could not provision this test on current substrate','--',e,'--','exiting' + traceback.print_exc() + sys.exit(1) + + # find an available plc box (or make space) + # and a free IP address (using options if present) + def provision_plc (self, plc, options): + #### we need to find one plc box that still has a slot + plc_box=None + max_free=0 + # use the box that has max free spots for load balancing + for pb in self.plc_boxes: + free=pb.free_spots() + if free>max_free: + plc_box=pb + max_free=free + # everything is already used + if not plc_box: + # find the oldest of all our instances + all_plc_instances=reduce(lambda x, y: x+y, + [ pb.plc_instances for pb in self.plc_boxes ], + []) + all_plc_instances.sort(timestamp_sort) + plc_instance_to_kill=all_plc_instances[0] + plc_box=plc_instance_to_kill.plc_box + plc_instance_to_kill.kill() + print 'killed oldest = %s on %s'%(plc_instance_to_kill.line(), + plc_instance_to_kill.plc_box.hostname) + + utils.header( 'plc %s -> box %s'%(plc['name'],plc_box.line())) + plc_box.add_fake(plc['name']) + #### OK we have a box to run in, let's find an IP address + # look in options + if options.ips_vplc: + vplc_hostname=options.ips_vplc.pop() + else: + self.vplc_pool.sense() + (vplc_hostname,unused)=self.vplc_pool.next_free() + vplc_ip = self.vplc_pool.get_ip(vplc_hostname) + + #### compute a helpful vserver name + # remove domain in hostname + vplc_simple = vplc_hostname.split('.')[0] + vservername = "%s-%d-%s" % (options.buildname,plc['index'],vplc_simple) + plc_name = "%s_%s"%(plc['name'],vplc_simple) + + #### apply in the plc_spec + # # informative + # label=options.personality.replace("linux","") + mapper = {'plc': [ ('*' , {'hostname':plc_box.hostname, + # 'name':'%s-'+label, + 'name': plc_name, + 'vservername':vservername, + 'vserverip':vplc_ip, + 'PLC_DB_HOST':vplc_hostname, + 'PLC_API_HOST':vplc_hostname, + 'PLC_BOOT_HOST':vplc_hostname, + 'PLC_WWW_HOST':vplc_hostname, + 'PLC_NET_DNS1' : self.network_settings() [ 'interface_fields:dns1' ], + 'PLC_NET_DNS2' : self.network_settings() [ 'interface_fields:dns2' ], + } ) ] + } + + utils.header("Attaching %s on IP %s in vserver %s"%(plc['name'],vplc_hostname,vservername)) + # mappers only work on a list of plcs + return TestMapper([plc],options).map(mapper)[0] + + ########## + def provision_qemus (self, plc, options): + test_mapper = TestMapper ([plc], options) + nodenames = test_mapper.node_names() + maps=[] + for nodename in nodenames: + #### similarly we want to find a qemu box that can host us + qemu_box=None + max_free=0 + # use the box that has max free spots for load balancing + for qb in self.qemu_boxes: + free=qb.free_spots() + if free>max_free: + qemu_box=qb + max_free=free + # everything is already used + if not qemu_box: + # find the oldest of all our instances + all_qemu_instances=reduce(lambda x, y: x+y, + [ qb.qemu_instances for qb in self.qemu_boxes ], + []) + all_qemu_instances.sort(timestamp_sort) + qemu_instance_to_kill=all_qemu_instances[0] + qemu_box=qemu_instance_to_kill.qemu_box + qemu_instance_to_kill.kill() + print 'killed oldest = %s on %s'%(qemu_instance_to_kill.line(), + qemu_instance_to_kill.qemu_box.hostname) + + utils.header( 'node %s -> qemu box %s'%(nodename,qemu_box.line())) + qemu_box.add_fake(nodename) + #### OK we have a box to run in, let's find an IP address + # look in options + if options.ips_vnode: + qemu_hostname=options.ips_vnode.pop() + mac=self.vnode_pool.retrieve_userdata(qemu_hostname) + print 'case 1 hostname',qemu_hostname,'mac',mac + else: + self.vnode_pool.sense() + (qemu_hostname,mac)=self.vnode_pool.next_free() + print 'case 2 hostname',qemu_hostname,'mac',mac + ip=self.vnode_pool.get_ip (qemu_hostname) + utils.header("Attaching %s on IP %s MAC %s"%(plc['name'],qemu_hostname,mac)) + + if qemu_hostname.find('.')<0: + qemu_hostname += "."+self.domain() + nodemap={'host_box':qemu_box.hostname, + 'node_fields:hostname':qemu_hostname, + 'interface_fields:ip':ip, + 'interface_fields:mac':mac, + } + nodemap.update(self.network_settings()) + maps.append ( (nodename, nodemap) ) + + return test_mapper.map({'node':maps})[0] + + def localize_sfa_rspec (self,plc,options): + + plc['sfa']['SFA_REGISTRY_HOST'] = plc['PLC_DB_HOST'] + plc['sfa']['SFA_AGGREGATE_HOST'] = plc['PLC_DB_HOST'] + plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST'] + plc['sfa']['SFA_PLC_DB_HOST'] = plc['PLC_DB_HOST'] + plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/' + for site in plc['sites']: + for node in site['nodes']: + plc['sfa']['sfa_slice_rspec']['part4'] = node['node_fields']['hostname'] + return plc + + #################### show results for interactive mode + def list_all (self): + self.sense() + for b in self.all_boxes: b.list() + + def get_box (self,box): + for b in self.build_boxes + self.plc_boxes + self.qemu_boxes: + if b.simple_hostname()==box: + return b + print "Could not find box %s"%box + return None + + def list_box(self,box): + b=self.get_box(box) + if not b: return + b.sense() + b.list() + + # can be run as a utility to manage the local infrastructure + def main (self): + parser=OptionParser() + (options,args)=parser.parse_args() + if not args: + self.list_all() + else: + for box in args: + self.list_box(box) diff --git a/system/TestMain.py b/system/TestMain.py index 2645fe1..6b18b26 100755 --- a/system/TestMain.py +++ b/system/TestMain.py @@ -14,9 +14,9 @@ from TestPlc import TestPlc from TestSite import TestSite from TestNode import TestNode -# add $HOME in PYTHONPATH so we can import LocalTestResources.py +# add $HOME in PYTHONPATH so we can import LocalSubstrate.py sys.path.append(os.environ['HOME']) -import LocalTestResources +import LocalSubstrate class TestMain: @@ -75,7 +75,7 @@ arch-rpms-url defaults to the last value used, as stored in arg-arch-rpms-url, no default config defaults to the last value used, as stored in arg-config, or %r -ips_node, ips_plc and ips_qemu defaults to the last value used, as stored in arg-ips-{node,plc,qemu}, +ips_vnode, ips_vplc and ips_qemu defaults to the last value used, as stored in arg-ips-{node,vplc,qemu}, default is to use IP scanning steps refer to a method in TestPlc or to a step_* module === @@ -100,9 +100,9 @@ steps refer to a method in TestPlc or to a step_* module help="Run all default steps") parser.add_option("-l","--list",action="store_true",dest="list_steps", default=False, help="List known steps") - parser.add_option("-N","--nodes",action="append", dest="ips_node", default=[], + parser.add_option("-N","--nodes",action="append", dest="ips_vnode", default=[], help="Specify the set of hostname/IP's to use for nodes") - parser.add_option("-P","--plcs",action="append", dest="ips_plc", default=[], + parser.add_option("-P","--plcs",action="append", dest="ips_vplc", default=[], help="Specify the set of hostname/IP's to use for plcs") parser.add_option("-Q","--qemus",action="append", dest="ips_qemu", default=[], help="Specify the set of hostname/IP's to use for qemu boxes") @@ -137,20 +137,20 @@ steps refer to a method in TestPlc or to a step_* module result.append(el) return result # flatten relevant options - for optname in ['config','exclude','ips_node','ips_plc','ips_qemu']: + for optname in ['config','exclude','ips_vnode','ips_vplc','ips_qemu']: setattr(self.options,optname, flatten ( [ arg.split() for arg in getattr(self.options,optname) ] )) # handle defaults and option persistence for (recname,filename,default,need_reverse) in ( ('build_url','arg-build-url',TestMain.default_build_url,None) , - ('ips_node','arg-ips-node',[],True) , - ('ips_plc','arg-ips-plc',[],True) , + ('ips_vnode','arg-ips-vnode',[],True) , + ('ips_vplc','arg-ips-vplc',[],True) , ('ips_qemu','arg-ips-qemu',[],True) , ('config','arg-config',TestMain.default_config,False) , ('arch_rpms_url','arg-arch-rpms-url',"",None) , - ('personality','arg-personality',"linux32",None), - ('pldistro','arg-pldistro',"planetlab",None), - ('fcdistro','arg-fcdistro','centos5',None), + ('personality','arg-personality',"linux64",None), + ('pldistro','arg-pldistro',"onelab",None), + ('fcdistro','arg-fcdistro','f14',None), ) : # print 'handling',recname path=filename @@ -245,20 +245,20 @@ steps refer to a method in TestPlc or to a step_* module raise # run localize as defined by local_resources - all_plc_specs = LocalTestResources.local_resources.localize(all_plc_specs,self.options) + all_plc_specs = LocalSubstrate.local_substrate.provision(all_plc_specs,self.options) # remember plc IP address(es) if not specified - ips_plc_file=open('arg-ips-plc','w') + ips_vplc_file=open('arg-ips-vplc','w') for plc_spec in all_plc_specs: - ips_plc_file.write("%s\n"%plc_spec['PLC_API_HOST']) - ips_plc_file.close() + ips_vplc_file.write("%s\n"%plc_spec['PLC_API_HOST']) + ips_vplc_file.close() # ditto for nodes - ips_node_file=open('arg-ips-node','w') + ips_vnode_file=open('arg-ips-vnode','w') for plc_spec in all_plc_specs: for site_spec in plc_spec['sites']: for node_spec in site_spec['nodes']: - ips_node_file.write("%s\n"%node_spec['node_fields']['hostname']) - ips_node_file.close() + ips_vnode_file.write("%s\n"%node_spec['node_fields']['hostname']) + ips_vnode_file.close() # ditto for qemu boxes ips_qemu_file=open('arg-ips-qemu','w') for plc_spec in all_plc_specs: diff --git a/system/TestMapper.py b/system/TestMapper.py index c0358a5..7d0547f 100644 --- a/system/TestMapper.py +++ b/system/TestMapper.py @@ -6,8 +6,7 @@ # mapper class # # this works on a spec as defined in a config file -# and allows to remap various fields, typically to another testbox -# see an example in config_onelab_testbox32.py +# and allows to remap various fields on the local substrate # import utils @@ -58,6 +57,9 @@ class TestMapper: utils.header ("WARNING : inserting key %s for path %s on %s %s"%( step,path,type,name)) # apply formatting if '%s' found in the value + if v is None: + if self.options.verbose: print "TestMapper WARNING - None value - ignored, key=",k + continue if v.find('%s')>=0: v=v%obj[k] if self.options.verbose: diff --git a/system/TestNode.py b/system/TestNode.py index 4b499e7..11e9391 100644 --- a/system/TestNode.py +++ b/system/TestNode.py @@ -198,8 +198,13 @@ class TestNode: utils.header("TestNode.qemu_start : %s model %s taken as real node"%(self.name(),model)) return True + def timestamp_qemu (self): + test_box = self.test_box() + test_box.run_in_buildname("mkdir -p %s"%self.nodedir()) + now=int(time.time()) + return test_box.run_in_buildname("echo %d > %s/timestamp"%(now,self.nodedir()))==0 + def start_qemu (self): - options = self.test_plc.options test_box = self.test_box() utils.header("Starting qemu node %s on %s"%(self.name(),test_box.hostname())) diff --git a/system/TestPlc.py b/system/TestPlc.py index 1ccdc80..fbd3295 100644 --- a/system/TestPlc.py +++ b/system/TestPlc.py @@ -86,7 +86,7 @@ SEPSFA='' class TestPlc: default_steps = [ - 'show', 'local_pre', SEP, + 'show', 'timestamp_plc', 'timestamp_qemu', SEP, 'vs_delete','vs_create','plc_install', 'plc_configure', 'plc_start', SEP, 'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP, 'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP, @@ -441,6 +441,12 @@ class TestPlc: print '+\tqemu box %s'%node_spec['host_box'] print '+\thostname=%s'%node_spec['node_fields']['hostname'] + # write a timestamp in /vservers/<>/ + def timestamp_plc (self): + now=int(time.time()) + utils.system(self.test_ssh.actual_command("mkdir -p /vservers/%s"%self.vservername)) + return utils.system(self.test_ssh.actual_command("echo %d > /vservers/%s/timestamp"%(now,self.vservername)))==0 + def local_pre (self): "run site-dependant pre-test script as defined in LocalTestResources" from LocalTestResources import local_resources @@ -1056,6 +1062,11 @@ class TestPlc: "all nodes: start the qemu instance (also runs qemu-bridge-init start)" pass + @node_mapper + def timestamp_qemu (self) : + "all nodes: start the qemu instance (also runs qemu-bridge-init start)" + pass + def check_tcp (self): "check TCP connectivity between 2 slices (or in loopback if only one is defined)" specs = self.plc_spec['tcp_test'] diff --git a/system/TestPool.py b/system/TestPool.py deleted file mode 100644 index f1ffa6d..0000000 --- a/system/TestPool.py +++ /dev/null @@ -1,125 +0,0 @@ -# -# Thierry Parmentelat -# Copyright (C) 2010 INRIA -# -# -# pool class -# -# allows to pick an available IP among a pool -# -# input is expressed as a list of tuples (hostname,ip,user_data) -# that can be searched iteratively for a free slot -# TestPoolIP : look for a free IP address -# TestPoolQemu : look for a test_box with no qemu running -# e.g. -# pool = [ (hostname1,ip1,user_data1), -# (hostname2,ip2,user_data2), -# (hostname3,ip3,user_data2), -# (hostname4,ip4,user_data4) ] -# assuming that ip1 and ip3 are taken (pingable), then we'd get -# pool=TestPoolIP(pool) -# pool.next_free() -> entry2 -# pool.next_free() -> entry4 -# pool.next_free() -> None -# that is, even if ip2 is not busy/pingable when the second next_free() is issued - -import commands -import utils - -class TestPool: - - def __init__ (self, pool, options,message): - self.pool=pool - self.options=options - self.busy=[] - self.message=message - - # let's be flexible - def match (self,triple,hostname_or_ip): - (h,i,u)=triple - return h.find(hostname_or_ip)>=0 or (i and i.find(hostname_or_ip)>=0) or hostname_or_ip.find(h)==0 - - def locate_entry (self, hostname_or_ip): - for (h,i,u) in self.pool: - if self.match ( (h,i,u,), hostname_or_ip): - self.busy.append(h) - return (h,i,u) - utils.header('TestPool.locate_entry: Could not locate entry for %r in pool:'%hostname_or_ip) - return None - - # the hostnames provided (from a tracker) are considered last - def next_free (self, tracker_hostnames): - utils.header('TestPool is looking for a %s'%self.message) - # create 2 lists of (h,i,u) entries, the ones not in the tracker, and the ones in the tracker - in_track_pool=[] - out_track_pool=[] - for (h,i,u) in self.pool: - in_tracker=False - for hostname in tracker_hostnames: - if self.match ( (h,i,u,) , hostname) : in_tracker = True - if in_tracker: in_track_pool.append ( (h,i,u,) ) - else: out_track_pool.append ( (h,i,u,) ) - # consider outsiders first - for (hostname,ip,user_data) in out_track_pool + in_track_pool: - utils.header ('* candidate %s' % hostname) - for (hostname,ip,user_data) in out_track_pool + in_track_pool: - if hostname in self.busy: - continue - utils.header('TestPool : checking %s'%hostname) - if self.free_hostname(hostname): - utils.header('%s is available'%hostname) - self.busy.append(hostname) - return (hostname,ip,user_data) - else: - self.busy.append(hostname) - raise Exception, "No space left in pool (%s)"%self.message - -class TestPoolIP (TestPool): - - def __init__ (self,pool,options): - TestPool.__init__(self,pool,options,"free IP address") - - def free_hostname (self, hostname): - return not self.check_ping(hostname) - -# OS-dependent ping option (support for macos, for convenience) - ping_timeout_option = None -# checks whether a given hostname/ip responds to ping - def check_ping (self,hostname): - if not TestPoolIP.ping_timeout_option: - (status,osname) = commands.getstatusoutput("uname -s") - if status != 0: - raise Exception, "TestPool: Cannot figure your OS name" - if osname == "Linux": - TestPoolIP.ping_timeout_option="-w" - elif osname == "Darwin": - TestPoolIP.ping_timeout_option="-t" - - if self.options.verbose: - utils.header ("TestPoolIP: pinging %s"%hostname) - command="ping -c 1 %s 1 %s"%(TestPoolIP.ping_timeout_option,hostname) - (status,output) = commands.getstatusoutput(command) - return status == 0 - -class TestPoolQemu (TestPool): - - def __init__ (self,pool,options): - TestPool.__init__(self,pool,options,"free qemu box") - - def free_hostname (self, hostname): - return not self.busy_qemu(hostname) - - # is there a qemu runing on that box already ? - def busy_qemu (self, hostname): - if self.options.verbose: - utils.header("TestPoolQemu: checking for running qemu instances on %s"%hostname) - command="ssh -o ConnectTimeout=5 root@%s ps -e -o cmd"%hostname - (status,output) = commands.getstatusoutput(command) - # if we fail to run that, let's assume we don't have ssh access, so - # we pretend the box is busy - if status!=0: - return True - elif output.find("qemu") >=0 : - return True - else: - return False diff --git a/system/TestResources.py b/system/TestResources.py deleted file mode 100644 index c75bebb..0000000 --- a/system/TestResources.py +++ /dev/null @@ -1,242 +0,0 @@ -# Thierry Parmentelat -# Copyright (C) 2010 INRIA -# -import sys -import traceback - -import utils -from TestMapper import TestMapper -from TestPool import TestPoolQemu, TestPoolIP -from Trackers import TrackerPlc, TrackerQemu - -class TestResources: - - # need more specialization, see an example in LocalTestResources.sample.inria - - ########## - def localize (self,plcs,options): - try: - plcs = self.localize_qemus(plcs,options) - except Exception, e: - print '* Could not localize qemus','--',e,'--','exiting' - traceback.print_exc() - sys.exit(1) - try: - plcs = self.localize_nodes(plcs,options) - except Exception,e: - print '* Could not localize nodes','--',e,'--','exiting' - sys.exit(1) - try: - plcs = self.localize_plcs(plcs,options) - except Exception,e: - print '* Could not localize plcs','--',e,'--','exiting' - sys.exit(1) - try: - plcs = self.localize_rspec(plcs,options) - except Exception,e: - print '* Could not localize RSpec','--',e,'--','exiting' - sys.exit(1) - return plcs - - def localize_qemus (self,plcs,options): - - # all plcs on the same vserver box - plc_box = self.plc_boxes()[0] - - # informative - label=options.personality.replace("linux","") - - node_map = [] - qemu_pool = TestPoolQemu (self.qemus_ip_pool(), options) - - for index in range(1,options.size+1): - if options.ips_qemu: - ip_or_hostname=options.ips_qemu.pop() - (hostname,ip,unused)=qemu_pool.locate_entry(ip_or_hostname) - else: - tracker=TrackerQemu(options,instances=self.max_qemus()-1) - (hostname,ip,unused) = qemu_pool.next_free(tracker.hostnames()) - - node_map += [ ('node%d'%index, {'host_box':hostname},) ] - - mapper = {'plc': [ ('*' , {'hostname':plc_box, - 'PLC_DB_HOST':plc_box, - 'PLC_API_HOST':plc_box, - 'PLC_BOOT_HOST':plc_box, - 'PLC_WWW_HOST':plc_box, - 'name':'%s-'+label } ) - ], - 'node': node_map, - } - - return TestMapper(plcs,options).map(mapper) - - - def localize_nodes (self, plcs, options): - - ip_pool = TestPoolIP (self.nodes_ip_pool(),options) - network_dict = self.network_dict() - - test_mapper = TestMapper (plcs, options) - - all_nodenames = test_mapper.node_names() - maps = [] - for nodename in all_nodenames: - if options.ips_node: - ip_or_hostname=options.ips_node.pop() - (hostname,ip,mac)=ip_pool.locate_entry(ip_or_hostname) - else: - tracker=TrackerQemu(options,instances=self.max_qemus()-1) - (hostname,ip,mac) = ip_pool.next_free(tracker.nodenames()) - # myplc needs a fqdn or whines otherwise - if hostname.find('.')<0: hostname += "." + self.domain() - utils.header('Attaching node %s to %s (%s)'%(nodename,hostname,ip)) - node_dict= {'node_fields:hostname':hostname, - 'interface_fields:ip':ip, - 'interface_fields:mac':mac, - } - - node_dict.update(network_dict) - maps.append ( ( nodename, node_dict) ) - - plc_map = [ ( '*' , { 'PLC_NET_DNS1' : network_dict [ 'interface_fields:dns1' ], - 'PLC_NET_DNS2' : network_dict [ 'interface_fields:dns2' ], } ) ] - - return test_mapper.map ({'node': maps, 'plc' : plc_map } ) - - - def localize_plcs (self,plcs,options): - - ip_pool = TestPoolIP (self.plcs_ip_pool(),options) - - plc_counter=0 - for plc in plcs: - if options.ips_plc : - ip_or_hostname=options.ips_plc.pop() - (hostname,ip,mac)=ip_pool.locate_entry(ip_or_hostname) - if options.verbose: - utils.header("Using user-provided %s %s for plc %s"%( - hostname,ip_or_hostname,plc['name'])) - else: - tracker = TrackerPlc(options,instances=self.max_plcs()) - (hostname,ip,mac)=ip_pool.next_free(tracker.plcnames()) - if options.verbose: - utils.header("Using auto-allocated %s %s for plc %s"%( - hostname,ip,plc['name'])) - - ### rewrite fields in plc - # compute a helpful vserver name - remove domain in hostname - simplehostname = hostname.split('.')[0] - preferred_hostname = self.preferred_hostname() - vservername = options.buildname - if len(plcs) == 1 : - vservername = "%s-%s" % (vservername,simplehostname) - #ugly hack for "vuname: vc_set_vhi_name(): Arg list too long" errors - if len(vservername) > 38 and preferred_hostname is not None: - vservername = "%s-%s" % (options.buildname,preferred_hostname) - else: - plc_counter += 1 - vservername = "%s-%d-%s" % (vservername,plc_counter,simplehostname) - #ugly hack for "vuname: vc_set_vhi_name(): Arg list too long" errors - if len(vservername) > 38 and preferred_hostname is not None: - vservername = "%s-%d-%s" % (options.buildname,plc_counter,preferred_hostname) - - # apply - plc['vservername']=vservername - plc['vserverip']=ip - plc['name'] = "%s_%s"%(plc['name'],simplehostname) - utils.header("Attaching plc %s to vserver %s (%s)"%( - plc['name'],plc['vservername'],plc['vserverip'])) - for key in [ 'PLC_DB_HOST', 'PLC_API_HOST', 'PLC_WWW_HOST', 'PLC_BOOT_HOST',]: - plc[key] = hostname - - return plcs - - # as a plc step this should return a boolean - def step_pre (self,plc): - return self.trqemu_record (plc) and self.trqemu_make_space(plc) \ - and self.trplc_record (plc) and self.trplc_make_space(plc) - - def step_post (self,plc): - return True - - def step_release (self,plc): - return self.trqemu_release(plc) and self.trplc_release(plc) - - def step_release_plc (self,plc): - return self.trplc_release(plc) - - def step_release_qemu (self,plc): - return self.trqemu_release(plc) - - def step_list (self,plc): - return self.trqemu_list(plc) and self.trplc_list(plc) - - #################### - def trplc_record (self,plc): - tracker = TrackerPlc(plc.options,instances=self.max_plcs()) - tracker.record(plc.test_ssh.hostname,plc.vservername) - tracker.store() - return True - - def trplc_release (self,plc): - tracker = TrackerPlc(plc.options,instances=self.max_plcs()) - tracker.release(plc.test_ssh.hostname,plc.vservername) - tracker.store() - return True - - def trplc_make_space (self,plc): - tracker = TrackerPlc(plc.options,instances=self.max_plcs()) - tracker.make_space() - tracker.store() - return True - - def trplc_list (self,plc): - TrackerPlc(plc.options,instances=self.max_plcs()).list() - return True - - ### - def trqemu_record (self,plc): - tracker=TrackerQemu(plc.options,instances=self.max_qemus()-1) - for site_spec in plc.plc_spec['sites']: - for node_spec in site_spec['nodes']: - tracker.record(node_spec['host_box'],plc.options.buildname,node_spec['node_fields']['hostname']) - tracker.store() - return True - - def trqemu_release (self,plc): - tracker=TrackerQemu(plc.options,instances=self.max_qemus()-1) - for site_spec in plc.plc_spec['sites']: - for node_spec in site_spec['nodes']: - tracker.release(node_spec['host_box'],plc.options.buildname,node_spec['node_fields']['hostname']) - tracker.store() - return True - - def trqemu_make_space (self,plc): - tracker=TrackerQemu(plc.options,instances=self.max_qemus()-1) - for site_spec in plc.plc_spec['sites']: - for node_spec in site_spec['nodes']: - tracker.make_space() - tracker.store() - return True - - def trqemu_list (self,plc): - TrackerQemu(plc.options,instances=self.max_qemus()-1).list() - return True - - ### - def localize_rspec (self,plcs,options): - - utils.header ("Localize SFA Slice RSpec") - - for plc in plcs: - for site in plc['sites']: - for node in site['nodes']: - plc['sfa']['sfa_slice_rspec']['part4'] = node['node_fields']['hostname'] - plc['sfa']['SFA_REGISTRY_HOST'] = plc['PLC_DB_HOST'] - plc['sfa']['SFA_AGGREGATE_HOST'] = plc['PLC_DB_HOST'] - plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST'] - plc['sfa']['SFA_PLC_DB_HOST'] = plc['PLC_DB_HOST'] - plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/' - - return plcs diff --git a/system/TestSsh.py b/system/TestSsh.py index 1028a18..cae5fa4 100644 --- a/system/TestSsh.py +++ b/system/TestSsh.py @@ -47,16 +47,18 @@ class TestSsh: utils.header("WARNING : something wrong in is_local_hostname with hostname=%s"%hostname) return False - def __init__(self,hostname,buildname=None,key=None, username=None): + def __init__(self,hostname,buildname=None,key=None, username=None,unknown_host=True): self.hostname=hostname self.buildname=buildname self.key=key self.username=username + self.unknown_host=unknown_host def is_local(self): return TestSsh.is_local_hostname(self.hostname) - std_options="-o BatchMode=yes -o StrictHostKeyChecking=no -o CheckHostIP=no -o ConnectTimeout=5 -o UserKnownHostsFile=/dev/null " + std_options="-o BatchMode=yes -o StrictHostKeyChecking=no -o CheckHostIP=no -o ConnectTimeout=5 " + unknown_option="-o UserKnownHostsFile=/dev/null " def key_part (self): if not self.key: @@ -77,12 +79,33 @@ class TestSsh: if not keep_stdin: ssh_command += "-n " ssh_command += TestSsh.std_options + if self.unknown_host: ssh_command += TestSsh.unknown_option ssh_command += self.key_part() ssh_command += "%s %s" %(self.hostname_part(),TestSsh.backslash_shell_specials(command)) return ssh_command - def run(self, command,background=False): + # same in argv form + def actual_argv (self, argv,keep_stdin=False): + if self.is_local(): + return argv + ssh_argv=[] + ssh_argv.append('ssh') + if not keep_stdin: ssh_argv.append('-n') + ssh_argv += TestSsh.std_options.split() + if self.unknown_host: ssh_argv += TestSsh.unknown_option.split() + ssh_argv += self.key_part().split() + ssh_argv.append(self.hostname_part()) + ssh_argv += argv + return ssh_argv + + def header (self,message): + if not message: return + print "===============",message + sys.stdout.flush() + + def run(self, command,message=None,background=False): local_command = self.actual_command(command) + self.header(message) return utils.system(local_command,background) def clean_dir (self,dirname): diff --git a/system/Trackers.py b/system/Trackers.py deleted file mode 100644 index de26b31..0000000 --- a/system/Trackers.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/python - -# Thierry Parmentelat -# Copyright (C) 2010 INRIA -# -import os - -import utils -from TestSsh import TestSsh - -# 2 types of trackers -# (*) plc trackers remembers the running myplcs -# (*) qemu trackers keeps track of the running qemu nodes -# -# trackers allow us to let the test run after the build has finished, -# and to kill/stop the oldest instances later when we need space -# - -#################### Tracker -class Tracker: - - def __init__ (self, options,filename, instances): - self.options=options - self.filename=filename - self.instances=instances - try: - tracks=file(self.filename).readlines() - tracks = [ track.strip() for track in tracks ] - except: - tracks=[] - self.tracks = [track for track in tracks if track] - - def list (self): - try: - contents=file(self.filename).read() - print "==>",self.filename,"<==" - print contents - except: - print "xxxxxxxxxxxx",self.filename,"not found" - - def store (self): - out = file(self.filename,'w') - for track in self.tracks: - out.write('%s\n'%(track)) - out.close() - - def record (self,track): - for already in self.tracks: - if already==track: - print '%s is already included in %s'%(already,self.filename) - return - if self.options.dry_run: - print 'dry_run: Tracker.record - skipping %s'%(track) - return - self.tracks.append( track ) - print "Recorded %s in tracker %s"%(track,self.filename) - - # this stops the instances currently attached with this test session and release tracker - def release (self,track): - for already in self.tracks: - if already==track: - if self.options.dry_run: - print 'dry_run: Tracker.release - skipping %s'%(track) - return - self.tracks.remove(track) - print "Releasing %s in tracker %s"%(track,self.filename) - command = self.stop_command (track) - utils.header("Trackers.make_space track : %s"%command) - utils.system(command) - print '%s was not found in %s'%(track,self.filename) - return - - # this actually stops the old instances, so that the total fits in the number of instances - def make_space (self): - # number of instances to stop - how_many=len(self.tracks)-self.instances - # nothing todo until we have more than keep_vservers in the tracker - if how_many <= 0: - print 'Tracker.make_space : limit %d not reached'%self.instances - return - to_stop = self.tracks[:how_many] - for track in to_stop: - command = self.stop_command (track) - utils.header("Trackers.make_space track : %s"%command) - utils.system(command) - if not self.options.dry_run: - self.tracks = self.tracks[how_many:] - - # this stops ALL known instances - def cleanup (self): - for track in self.tracks: - command=self.stop_command(track) - utils.header("Trackers.cleanup track : %s"%command) - utils.system(command) - if not self.options.dry_run: - self.tracks=[] - -class TrackerPlc (Tracker): - - DEFAULT_FILENAME=os.environ['HOME']+"/tracker-plcs" - # how many concurrent plcs are we keeping alive - adjust with the IP pool size - DEFAULT_MAX_INSTANCES = 12 - - def __init__ (self,options,filename=None,instances=0): - if not filename: filename=TrackerPlc.DEFAULT_FILENAME - if not instances: instances=TrackerPlc.DEFAULT_MAX_INSTANCES - Tracker.__init__(self,options,filename,instances) - - def record (self, hostname, vservername): - Tracker.record (self,"%s@%s"%(hostname,vservername)) - - def release (self, hostname, vservername): - Tracker.release (self,"%s@%s"%(hostname,vservername)) - - def stop_command (self, track): - (hostname,vservername) = track.split('@') - return TestSsh(hostname).actual_command("vserver --silent %s stop"%vservername) - - def plcnames (self): - return [ self.plcname(track) for track in self.tracks ] - - def plcname (self, track): - (hostname,vservername) = track.split('@') - return vservername.rsplit('-',1)[1] - -class TrackerQemu (Tracker): - - DEFAULT_FILENAME=os.environ['HOME']+"/tracker-qemus" - # how many concurrent plcs are we keeping alive - adjust with the IP pool size - DEFAULT_MAX_INSTANCES = 3 - - def __init__ (self,options,filename=None,instances=0): - if not filename: filename=TrackerQemu.DEFAULT_FILENAME - if not instances: instances=TrackerQemu.DEFAULT_MAX_INSTANCES - Tracker.__init__(self,options,filename,instances) - - def record (self, hostname, buildname, nodename): - Tracker.record (self,"%s@%s@%s"%(hostname,buildname,nodename)) - - def release (self, hostname, buildname, nodename): - Tracker.release (self,"%s@%s@%s"%(hostname,buildname,nodename)) - - def stop_command (self, track): - (hostname,buildname,nodename) = track.split('@') - return TestSsh(hostname).actual_command("%s/qemu-%s/qemu-kill-node this"%(buildname,nodename)) - - def hostnames (self): - return [ self.hostname(track) for track in self.tracks ] - - def hostname (self, track): - (hostname,buildname,nodename) = track.split('@') - return hostname - - def nodenames (self): - return [ self.nodename(track) for track in self.tracks ] - - def nodename (self, track): - (hostname,buildname,nodename) = track.split('@') - return nodename - diff --git a/system/config_default.py b/system/config_default.py index df41bab..16a4e90 100644 --- a/system/config_default.py +++ b/system/config_default.py @@ -260,6 +260,7 @@ def leases (options, index): def plc (options,index) : return { + 'index' : index, 'name' : 'onetest%d'%index, # as of yet, not sure we can handle foreign hosts, but this is required though 'hostname' : 'deferred-myplc-hostbox-%d'%index, -- 2.43.0