Merge branch 'master' into sfa-geni-v3
authorThierry Parmentelat <thierry.parmentelat@inria.fr>
Wed, 17 Apr 2013 07:29:10 +0000 (09:29 +0200)
committerThierry Parmentelat <thierry.parmentelat@inria.fr>
Wed, 17 Apr 2013 07:29:10 +0000 (09:29 +0200)
14 files changed:
system/Completer.py [new file with mode: 0755]
system/LocalSubstrate.inria.py
system/PlcapiUrlScanner.py [new file with mode: 0755]
system/Substrate.py
system/TestBoxQemu.py
system/TestMain.py
system/TestNode.py
system/TestPlc.py
system/TestSlice.py
system/TestSliceSfa.py
system/TestSsh.py
system/lxc-driver.sh
system/template-qemu/qemu-bridge-init
tests.spec

diff --git a/system/Completer.py b/system/Completer.py
new file mode 100755 (executable)
index 0000000..0ebf6e6
--- /dev/null
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+import sys, time
+from datetime import datetime, timedelta
+
+import utils
+
+### more generic code for waiting for any number of things 
+### within a given timeframe i.e. given some timeout/silent/period
+### takes in argument a list of tasks that are instances 
+### of a CompleterTask subclass
+class Completer:
+    def __init__ (self, tasks, verbose=True):
+        self.tasks=tasks
+        self.verbose=verbose
+    def run (self, timeout_timedelta, silent_timedelta, period=None):
+        timeout = datetime.now()+timeout_timedelta
+        timeout_minutes = timeout_timedelta.total_seconds()/60
+        graceout = datetime.now()+silent_timedelta
+        silent_minutes = silent_timedelta.total_seconds()/60
+        period_seconds=int(period.total_seconds())
+        if self.verbose:
+            utils.header("max timeout is %d minutes, silent for %d minutes (period is %s s)"%\
+                             (timeout_minutes,silent_minutes,period_seconds))
+        tasks=self.tasks
+        while tasks:
+            fine=[]
+            for task in tasks:
+                success=task.run (silent=datetime.now() <= graceout)
+                if success: fine.append(task)
+            for task in fine: tasks.remove(task)
+            if not tasks: return True
+            if datetime.now() > timeout:
+                for task in tasks: 
+                    print task.failure_message()
+                return False
+            if self.verbose:
+                print '%ds..'%period_seconds,
+            time.sleep(period_seconds)
+        # in case we're empty 
+        return True
+
+
+#################### CompleterTask
+### . run(silent)  (return True or False)
+###   silent is an input boolean indicating if we're within the silent period
+### . failure()    (print a message)
+
+########## expectations (+ first arg self)
+# failure()     (to describe which went wrong once it's over)
+# -- and --
+# run (silent)  
+# -- or -- 
+# actual_run()
+# message()
+
+class CompleterTask:
+    def run (self, silent):
+        result=self.actual_run()
+        if silent:
+            print '+' if result else '.',
+            sys.stdout.flush()
+        else:
+            print self.message(),"->","OK" if result else "KO"
+        return result
+    def message (self): return "you-need-to-redefine-message"
+    def failure_message (self): return "you-need-to-redefine-failure_message"
+
+# random result
+class TaskTest (CompleterTask):
+    counter=1
+    def __init__ (self,max):
+        import random
+        self.counter=TaskTest.counter
+        TaskTest.counter+=1
+        self.delay=random.random()*max
+        self.fire=datetime.now()+timedelta(seconds=self.delay)
+    def actual_run(self):
+        return datetime.now()>=self.fire
+    def message (self):
+        return "Task %d - delay was %d s"%(self.counter,self.delay)
+
+    def failure_message (self): return "BOTTOM LINE: FAILURE with task (%s)"%self.counter
+
+def main ():
+    import sys
+    if len(sys.argv)!=6:
+        print "Usage: <command> number_tasks max_random timeout_s silent_s period_s"
+        sys.exit(1)
+    [number,max,timeout,silent,period]= [ int(x) for x in sys.argv[1:]]
+    tasks = [ TaskTest(max) for i in range(number)]
+    success=Completer(tasks,verbose=True).run(timedelta(seconds=timeout),
+                                              timedelta(seconds=silent),
+                                              timedelta(seconds=period))
+    print "OVERALL",success
+
+if __name__ == '__main__':
+    main()
index f512440..b9f81d5 100755 (executable)
@@ -20,17 +20,19 @@ class OnelabSubstrate (Substrate):
 
    # the build boxes we use 
    def build_boxes_spec (self):
-      return [ 'liquid', 'reed', 'velvet', 'gorillaz', ]
+      return [ 'liquid', 'reed', 'velvet' ]
 
    # the vs-capable box for PLCs
    def plc_vs_boxes_spec (self):
-      return [ ('vs64-1', 35),  # how many plcs max in this box
+      return [ ('warhol', 35),  # how many plcs max in this box
                ]  
 
    # the lxc-capable box for PLCs
    def plc_lxc_boxes_spec (self):
-      return [ ('lxc64-1', 35),  # how many plcs max in this box
-               ]  
+      return [ 
+         ('gotan', 30),         # how many plcs max in this box 
+         ('deathvegas', 30),    
+         ]  
 
    # vplc01 to 40
    def vplc_ips (self):
@@ -40,12 +42,13 @@ class OnelabSubstrate (Substrate):
 
    def qemu_boxes_spec (self):
       return [ # how many qemus max in this box
+# enfoui runs f18 - also can't seem to enter BIOS on that one
+         ('kvm64-6', 4), # 4 cores, 8Gb
          ('kvm64-1', 3), # 4 cores, 4Gb
          ('kvm64-2', 3), # 4 cores, 4Gb
-         ('kvm64-3', 2), # 2 cores, 4Gb
-         ('kvm64-4', 1), # 4 cores, 8Gb
+         ('kvm64-4', 2), # 4 cores, 8Gb
+         ('kvm64-3', 1), # 2 cores, 4Gb
          ('kvm64-5', 1), # 2 cores, 4Gb
-         ('kvm64-6', 2), # 2 cores, 4Gb
          ]
 
    # the nodes pool has a MAC address as user-data (3rd elt in tuple)
diff --git a/system/PlcapiUrlScanner.py b/system/PlcapiUrlScanner.py
new file mode 100755 (executable)
index 0000000..c790bd9
--- /dev/null
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+#
+# this checks various forms of URLS for reaching a PLCAPI
+# i.e. with http:// or https:// (only the latter is expected to work)
+# with or without a trailing slash
+# using a hostname or an IP
+
+import socket
+import xmlrpclib
+import traceback
+
+class PlcapiUrlScanner:
+
+    # turns out the config has an ip but no name..
+    def __init__ (self, auth, hostname=None, ip=None, verbose=False):
+        self.auth=auth
+        if not hostname and not ip:
+            raise Exception,"PlcapiUrlScanner needs _some_ input"
+        if hostname:
+            if not ip: 
+                try:    ip=socket.gethostbyname(hostname)
+                except: 
+                    hostname="%s.pl.sophia.inria.fr"%hostname
+                    ip=socket.gethostbyname(hostname)
+        else:
+            if not hostname: hostname=socket.gethostbyaddr(ip)[0]
+        self.hostname=hostname
+        self.ip=ip
+        self.verbose=verbose
+        
+    def try_url (self,url):
+        try:
+            xmlrpclib.ServerProxy (url, verbose=self.verbose, allow_none=True).GetNodes(self.auth)
+            print 'YES',url
+            return True
+        except xmlrpclib.ProtocolError as e:
+            print '... (http error %s)'%e.errcode,url
+            return False
+        except Exception as e:
+            print '---',type(e).__name__,url,e
+            if self.verbose: traceback.print_exc()
+            return False
+
+    def try_url_required (self, url, required):
+        result=self.try_url(url)
+        if required and not result:     return False
+        else:                           return True
+
+    def scan(self):
+        overall=True
+        for protocol in ['http','https']:
+            for dest in [ self.hostname, self.ip ]:
+                for port in [ '',':80',':443']:
+                    for path in ['PLCAPI','PLCAPI/']:
+                        if protocol=='http' and port==':443': continue
+                        if protocol=='https' and port==':80': continue
+                        required = (protocol=='https') and (path=='PLCAPI/')
+                        url="%s://%s%s/%s"%(protocol,dest,port,path)
+                        if not self.try_url_required (url,required): overall=False
+        return overall
+
+from optparse import OptionParser
+import sys
+
+auth={'AuthMethod':'password','Username':'root@test.onelab.eu','AuthString':'test++'}
+
+def main ():
+    usage="%prog hostname"
+    parser=OptionParser()
+    parser.add_option("-v","--verbose",dest='verbose',action='store_true',default=False)
+    (options,args)=parser.parse_args()
+    if len(args)!=1:
+        parser.print_help()
+        sys.exit(1)
+    hostname=args[0]
+    success=PlcapiUrlScanner (auth=auth, hostname=hostname,verbose=options.verbose).scan()
+    sys.exit(0 if success else -1)
+
+if __name__ == '__main__':
+    main()
index b0d28f0..fe6d6c7 100644 (file)
@@ -708,11 +708,11 @@ class QemuBox (Box):
     def sense(self, options):
         print 'qn',
         modules=self.backquote_ssh(['lsmod']).split('\n')
-        self._driver='*NO kqemu/kmv_intel MODULE LOADED*'
+        self._driver='*NO kqemu/kvm_intel MODULE LOADED*'
         for module in modules:
             if module.find('kqemu')==0:
                 self._driver='kqemu module loaded'
-            # kvm might be loaded without vkm_intel (we dont have AMD)
+            # kvm might be loaded without kvm_intel (we dont have AMD)
             elif module.find('kvm_intel')==0:
                 self._driver='kvm_intel module loaded'
         ########## find out running pids
@@ -730,13 +730,13 @@ class QemuBox (Box):
             header(">>%s<<"%line)
         ########## retrieve alive instances and map to build
         live_builds=[]
-        command=['grep','.','*/*/qemu.pid','/dev/null']
+        command=['grep','.','/vservers/*/*/qemu.pid','/dev/null']
         pid_lines=self.backquote_ssh(command,trash_err=True).split('\n')
         for pid_line in pid_lines:
             if not pid_line.strip(): continue
             # expect <build>/<nodename>/qemu.pid:<pid>pid
             try:
-                (buildname,nodename,tail)=pid_line.split('/')
+                (_,__,buildname,nodename,tail)=pid_line.split('/')
                 (_,pid)=tail.split(':')
                 q=self.qemu_instance_by_pid (pid)
                 if not q: continue
index e2527e7..6d118a1 100644 (file)
@@ -7,6 +7,9 @@ import os.path
 import utils
 from TestSsh import TestSsh
 
+# Apriol 2013 - using /vservers/ for hosting this stuff as well, making IT-like stuff easier
+KVMROOT="/vservers"
+
 # xxx this should probably inherit TestSsh
 class TestBoxQemu:
 
@@ -14,7 +17,7 @@ class TestBoxQemu:
         self.hostname_value=hostname
         self.buildname=buildname
         self.key=key
-        self.test_ssh=TestSsh(self.hostname_value,self.buildname,self.key)
+        self.test_ssh=TestSsh(self.hostname_value,self.buildname,self.key,root=KVMROOT)
         
     def hostname (self):
         return self.hostname_value
@@ -22,18 +25,18 @@ class TestBoxQemu:
     def is_local(self):
         return self.test_ssh.is_local()
     
-    def run_in_buildname (self,command,background=False):
+    def run_in_buildname (self,command,background=False, dry_run=False):
         message="On %s: running %s"%(self.hostname(),command)
         if background: message += " &"
         utils.header(message)
-        return self.test_ssh.run_in_buildname (command,background)
+        return self.test_ssh.run_in_buildname (command,background, dry_run)
 
     # xxx could/should use rsync instead
-    def copy (self,local_file,recursive=False):
-        return self.test_ssh.copy (local_file,recursive)
+    def copy (self,local_file,recursive=False,dry_run=False):
+        return self.test_ssh.copy (local_file,recursive,dry_run=dry_run)
 
-    def clean_dir (self,dirname):
-        return self.test_ssh.clean_dir(dirname)
+    def rmdir (self,dirname, dry_run=False):
+        return self.test_ssh.rmdir(dirname, dry_run=dry_run)
 
     def mkdir (self,dirname):
         return self.test_ssh.mkdir(dirname)
index c72aa99..ae4a861 100755 (executable)
@@ -189,8 +189,9 @@ steps refer to a method in TestPlc or to a step_* module
                           help="prompts before each step")
         parser.add_option("-n","--dry-run", action="store_true", dest="dry_run", default=False,
                           help="Show environment and exits")
-        parser.add_option("-r","--restart-nm", action="store_true", dest="forcenm", default=False, 
-                          help="Force the NM to restart in ssh_slices step")
+# dropped when added Completer.py
+#        parser.add_option("-r","--restart-nm", action="store_true", dest="forcenm", default=False, 
+#                          help="Force the NM to restart in ssh_slices step")
         parser.add_option("-t","--trace", action="store", dest="trace_file", default=None,
                           help="Trace file location")
         (self.options, self.args) = parser.parse_args()
index 9a980f5..d65ec81 100644 (file)
@@ -18,6 +18,8 @@ class TestNode:
     def name(self):
         return self.node_spec['node_fields']['hostname']
     
+    def dry_run (self):
+        return self.test_plc.options.dry_run
     @staticmethod
     def is_qemu_model (model):
         return model.find("qemu") >= 0
@@ -142,7 +144,7 @@ class TestNode:
 
         filename="%s/%s.iso"%(self.nodedir(),self.name())
         utils.header('Storing boot medium into %s'%filename)
-        if self.test_plc.options.dry_run:
+        if self.dry_run():
             print "Dry_run: skipped writing of iso image"
             return True
         else:
@@ -169,7 +171,7 @@ class TestNode:
 
     def nodestate_show (self):
         "all nodes: show PLCAPI boot_state"
-        if self.test_plc.options.dry_run:
+        if self.dry_run():
             print "Dry_run: skipped getting current node state"
             return True
         state=self.test_plc.apiserver.GetNodes(self.test_plc.auth_root(), self.name(), ['boot_state'])[0]['boot_state']
@@ -186,7 +188,7 @@ class TestNode:
         auth=self.test_plc.auth_root()
         target_arch=self.test_plc.apiserver.GetPlcRelease(auth)['build']['target-arch']
         conf_filename="%s/qemu.conf"%(self.nodedir())
-        if self.test_plc.options.dry_run:
+        if self.dry_run():
             print "dry_run: skipped actual storage of qemu.conf"
             return True
         utils.header('Storing qemu config for %s in %s'%(self.name(),conf_filename))
@@ -204,10 +206,11 @@ class TestNode:
         # if relevant, push the qemu area onto the host box
         if self.test_box().is_local():
             return True
+        dry_run=self.dry_run()
         utils.header ("Cleaning any former sequel of %s on %s"%(self.name(),self.host_box()))
-        self.test_box().run_in_buildname("rm -rf %s"%self.nodedir())
+        self.test_box().rmdir(self.nodedir(), dry_run=dry_run)
         utils.header ("Transferring configuration files for node %s onto %s"%(self.name(),self.host_box()))
-        return self.test_box().copy(self.nodedir(),recursive=True)==0
+        return self.test_box().copy(self.nodedir(),recursive=True,dry_run=dry_run)==0
             
     def qemu_start (self):
         "all nodes: start the qemu instance (also runs qemu-bridge-init start)"
@@ -222,22 +225,23 @@ class TestNode:
     def timestamp_qemu (self):
         "all nodes: start the qemu instance (also runs qemu-bridge-init start)"
         test_box = self.test_box()
-        test_box.run_in_buildname("mkdir -p %s"%self.nodedir())
+        test_box.run_in_buildname("mkdir -p %s"%self.nodedir(), dry_run=self.dry_run())
         now=int(time.time())
-        return test_box.run_in_buildname("echo %d > %s/timestamp"%(now,self.nodedir()))==0
+        return test_box.run_in_buildname("echo %d > %s/timestamp"%(now,self.nodedir()), dry_run=self.dry_run())==0
 
     def start_qemu (self):
         test_box = self.test_box()
         utils.header("Starting qemu node %s on %s"%(self.name(),test_box.hostname()))
 
-        test_box.run_in_buildname("%s/qemu-bridge-init start >> %s/log.txt"%(self.nodedir(),self.nodedir()))
+        test_box.run_in_buildname("%s/qemu-bridge-init start >> %s/log.txt"%(self.nodedir(),self.nodedir()),
+                                  dry_run=self.dry_run())
         # kick it off in background, as it would otherwise hang
         test_box.run_in_buildname("%s/qemu-start-node 2>&1 >> %s/log.txt"%(self.nodedir(),self.nodedir()))
 
     def list_qemu (self):
         utils.header("Listing qemu for host %s on box %s"%(self.name(),self.test_box().hostname()))
         command="%s/qemu-kill-node -l %s"%(self.nodedir(),self.name())
-        self.test_box().run_in_buildname(command)
+        self.test_box().run_in_buildname(command, dry_run=self.dry_run())
         return True
 
     def kill_qemu (self):
@@ -246,7 +250,7 @@ class TestNode:
         # kill the right processes 
         utils.header("Stopping qemu for node %s on box %s"%(self.name(),self.test_box().hostname()))
         command="%s/qemu-kill-node %s"%(self.nodedir(),self.name())
-        self.test_box().run_in_buildname(command)
+        self.test_box().run_in_buildname(command, dry_run=self.dry_run())
         return True
 
     def gather_qemu_logs (self):
@@ -254,7 +258,7 @@ class TestNode:
             return True
         remote_log="%s/log.txt"%self.nodedir()
         local_log="logs/node.qemu.%s.txt"%self.name()
-        self.test_box().test_ssh.fetch(remote_log,local_log)
+        self.test_box().test_ssh.fetch(remote_log,local_log,dry_run=self.dry_run())
 
     def keys_clear_known_hosts (self):
         "remove test nodes entries from the local known_hosts file"
index a58ae04..e001526 100644 (file)
@@ -1,13 +1,13 @@
 # Thierry Parmentelat <thierry.parmentelat@inria.fr>
 # Copyright (C) 2010 INRIA 
 #
-import os, os.path
-import datetime
-import time
 import sys
+import time
+import os, os.path
 import traceback
-from types import StringTypes
 import socket
+from datetime import datetime, timedelta
+from types import StringTypes
 
 import utils
 from TestSite import TestSite
@@ -20,6 +20,8 @@ from TestBoxQemu import TestBoxQemu
 from TestSsh import TestSsh
 from TestApiserver import TestApiserver
 from TestAuthSfa import TestAuthSfa
+from PlcapiUrlScanner import PlcapiUrlScanner
+from Completer import Completer, CompleterTask
 
 # step methods must take (self) and return a boolean (options is a member of the class)
 
@@ -83,7 +85,8 @@ class TestPlc:
         'show', SEP,
         'vs_delete','timestamp_vs','vs_create', SEP,
         'plc_install', 'plc_configure', 'plc_start', SEP,
-        'keys_fetch', 'keys_store', 'keys_clear_known_hosts', 'speed_up_slices', SEP,
+        'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP,
+        'plcapi_urls','speed_up_slices', SEP,
         'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP,
 # slices created under plcsh interactively seem to be fine but these ones don't have the tags
 # keep this our of the way for now
@@ -98,7 +101,7 @@ class TestPlc:
         # we used to run plcsh_stress_test, and then ssh_node_debug and ssh_node_boot
         # but as the stress test might take a while, we sometimes missed the debug mode..
         'ssh_node_debug@1', 'plcsh_stress_test@1', SEP,
-        'ssh_node_boot@1', 'ssh_slice', 'check_initscripts', SEP,
+        'ssh_node_boot@1', 'node_bmlogs@1', 'ssh_slice', 'ssh_slice_basics', 'check_initscripts', SEP,
         'ssh_slice_sfa@1', 'sfa_delete_slice@1', 'sfa_delete_user@1', SEPSFA,
         'cross_check_tcp@1', 'check_system_slice', SEP,
         'empty_slices', 'ssh_slice_off', 'fill_slices', SEP,
@@ -177,49 +180,49 @@ class TestPlc:
        pass
 
     def actual_command_in_guest (self,command):
-        return self.test_ssh.actual_command(self.host_to_guest(command))
+        return self.test_ssh.actual_command(self.host_to_guest(command),dry_run=self.options.dry_run)
     
     def start_guest (self):
-      return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
+      return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()),dry_run=self.options.dry_run)
     
     def stop_guest (self):
-      return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host()))
+      return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host()),dry_run=self.options.dry_run)
     
     def run_in_guest (self,command):
-        return utils.system(self.actual_command_in_guest(command))
+        return utils.system(self.actual_command_in_guest(command),dry_run=self.options.dry_run)
     
     def run_in_host (self,command):
-        return self.test_ssh.run_in_buildname(command)
+        return self.test_ssh.run_in_buildname(command, dry_run=self.options.dry_run)
 
     #command gets run in the plc's vm
     def host_to_guest(self,command):
         if self.options.plcs_use_lxc:
-            return "ssh -o StrictHostKeyChecking=no %s %s"%(self.vserverip,command)
+            return "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null %s %s"%(self.vserverip,command)
         else:
             return "vserver %s exec %s"%(self.vservername,command)
     
     def vm_root_in_host(self):
         if self.options.plcs_use_lxc:
-            return "/var/lib/lxc/%s/rootfs/"%(self.vservername)
+            return "/vservers/%s/rootfs/"%(self.vservername)
         else:
             return "/vservers/%s"%(self.vservername)
 
     def vm_timestamp_path (self):
         if self.options.plcs_use_lxc:
-            return "/var/lib/lxc/%s/%s.timestamp"%(self.vservername,self.vservername)
+            return "/vservers/%s/%s.timestamp"%(self.vservername,self.vservername)
         else:
             return "/vservers/%s.timestamp"%(self.vservername)
 
     #start/stop the vserver
     def start_guest_in_host(self):
         if self.options.plcs_use_lxc:
-            return "lxc-start --daemon --name=%s"%(self.vservername)
+            return "virsh -c lxc:// start %s"%(self.vservername)
         else:
             return "vserver %s start"%(self.vservername)
     
     def stop_guest_in_host(self):
         if self.options.plcs_use_lxc:
-            return "lxc-stop --name=%s"%(self.vservername)
+            return "virsh -c lxc:// destroy %s"%(self.vservername)
         else:
             return "vserver %s stop"%(self.vservername)
     
@@ -543,8 +546,9 @@ class TestPlc:
         stamp_path=self.vm_timestamp_path()
         self.run_in_host("rm -f %s"%stamp_path)
         if self.options.plcs_use_lxc:
-            self.run_in_host("lxc-stop --name %s"%self.vservername)
-            self.run_in_host("lxc-destroy --name %s"%self.vservername)
+            self.run_in_host("virsh -c lxc:// destroy %s"%self.vservername)
+            self.run_in_host("virsh -c lxc:// undefine %s"%self.vservername)
+            self.run_in_host("rm -fr /vservers/%s"%self.vservername)
             return True
         else:
             self.run_in_host("vserver --silent %s delete"%self.vservername)
@@ -923,58 +927,58 @@ class TestPlc:
         return res
 
     # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
-    def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15):
+    def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period_seconds=15):
         if self.options.dry_run:
             print 'dry_run'
             return True
-        # compute timeout
-        timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
-        graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
+
+        class CompleterTaskBootState (CompleterTask):
+            def __init__ (self, test_plc,hostname):
+                self.test_plc=test_plc
+                self.hostname=hostname
+                self.last_boot_state='undef'
+            def actual_run (self):
+                try:
+                    node = self.test_plc.apiserver.GetNodes(self.test_plc.auth_root(), [ self.hostname ],
+                                                               ['boot_state'])[0]
+                    self.last_boot_state = node['boot_state'] 
+                    return self.last_boot_state == target_boot_state
+                except:
+                    return False
+            def message (self):
+                return "CompleterTaskBootState with node %s"%self.hostname
+            def failure_message (self):
+                return "node %s in state %s - expected %s"%(self.hostname,self.last_boot_state,target_boot_state)
+                
+        timeout = timedelta(minutes=timeout_minutes)
+        graceout = timedelta(minutes=silent_minutes)
+        period   = timedelta(seconds=period_seconds)
         # the nodes that haven't checked yet - start with a full list and shrink over time
-        tocheck = self.all_hostnames()
-        utils.header("checking nodes %r"%tocheck)
-        # create a dict hostname -> status
-        status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
-        while tocheck:
-            # get their status
-            tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
-            # update status
-            for array in tocheck_status:
-                hostname=array['hostname']
-                boot_state=array['boot_state']
-                if boot_state == target_boot_state:
-                    utils.header ("%s has reached the %s state"%(hostname,target_boot_state))
-                else:
-                    # if it's a real node, never mind
-                    (site_spec,node_spec)=self.locate_hostname(hostname)
-                    if TestNode.is_real_model(node_spec['node_fields']['model']):
-                        utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
-                        # let's cheat
-                        boot_state = target_boot_state
-                    elif datetime.datetime.now() > graceout:
-                        utils.header ("%s still in '%s' state"%(hostname,boot_state))
-                        graceout=datetime.datetime.now()+datetime.timedelta(1)
-                status[hostname] = boot_state
-            # refresh tocheck
-            tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ]
-            if not tocheck:
-                return True
-            if datetime.datetime.now() > timeout:
-                for hostname in tocheck:
-                    utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
-                return False
-            # otherwise, sleep for a while
-            time.sleep(period)
-        # only useful in empty plcs
-        return True
+        utils.header("checking nodes boot state (expected %s)"%target_boot_state)
+        tasks = [ CompleterTaskBootState (self,hostname) \
+                      for (hostname,_) in self.all_node_infos() ]
+        return Completer (tasks).run (timeout, graceout, period)
 
     def nodes_booted(self):
         return self.nodes_check_boot_state('boot',timeout_minutes=30,silent_minutes=28)
 
-    def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=15):
-        # compute timeout
-        timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
-        graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
+    def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period_seconds=15):
+        class CompleterTaskNodeSsh (CompleterTask):
+            def __init__ (self, hostname, qemuname, boot_state, local_key):
+                self.hostname=hostname
+                self.qemuname=qemuname
+                self.boot_state=boot_state
+                self.local_key=local_key
+            def run (self, silent):
+                command = TestSsh (self.hostname,key=self.local_key).actual_command("hostname;uname -a")
+                return utils.system (command, silent=silent)==0
+            def failure_message (self):
+                return "Cannot reach %s @ %s in %s mode"%(self.hostname, self.qemuname, self.boot_state)
+
+        # various delays 
+        timeout  = timedelta(minutes=timeout_minutes)
+        graceout = timedelta(minutes=silent_minutes)
+        period   = timedelta(seconds=period_seconds)
         vservername=self.vservername
         if debug: 
             message="debug"
@@ -982,39 +986,11 @@ class TestPlc:
         else: 
             message="boot"
            local_key = "keys/key_admin.rsa"
+        utils.header("checking ssh access to nodes (expected in %s mode)"%message)
         node_infos = self.all_node_infos()
-        utils.header("checking ssh access (expected in %s mode) to nodes:"%message)
-        for (nodename,qemuname) in node_infos:
-            utils.header("hostname=%s -- qemubox=%s"%(nodename,qemuname))
-        utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
-                         (timeout_minutes,silent_minutes,period))
-        while node_infos:
-            for node_info in node_infos:
-                (hostname,qemuname) = node_info
-                # try to run 'hostname' in the node
-                command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a")
-                # don't spam logs - show the command only after the grace period 
-                success = utils.system ( command, silent=datetime.datetime.now() < graceout)
-                if success==0:
-                    utils.header('Successfully entered root@%s (%s)'%(hostname,message))
-                    # refresh node_infos
-                    node_infos.remove(node_info)
-                else:
-                    # we will have tried real nodes once, in case they're up - but if not, just skip
-                    (site_spec,node_spec)=self.locate_hostname(hostname)
-                    if TestNode.is_real_model(node_spec['node_fields']['model']):
-                        utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
-                       node_infos.remove(node_info)
-            if  not node_infos:
-                return True
-            if datetime.datetime.now() > timeout:
-                for (hostname,qemuname) in node_infos:
-                    utils.header("FAILURE to ssh into %s (on %s)"%(hostname,qemuname))
-                return False
-            # otherwise, sleep for a while
-            time.sleep(period)
-        # only useful in empty plcs
-        return True
+        tasks = [ CompleterTaskNodeSsh (nodename, qemuname, message, local_key) \
+                      for (nodename,qemuname) in node_infos ]
+        return Completer (tasks).run (timeout, graceout, period)
         
     def ssh_node_debug(self):
         "Tries to ssh into nodes in debug mode with the debug ssh key"
@@ -1027,6 +1003,10 @@ class TestPlc:
         return self.check_nodes_ssh(debug=False,
                                     timeout_minutes=self.ssh_node_boot_timeout,
                                     silent_minutes=self.ssh_node_boot_silent)
+
+    def node_bmlogs(self):
+        "Checks that there's a non-empty dir. /var/log/bm/raw"
+        return utils.system(self.actual_command_in_guest("ls /var/log/bm/raw"))==0
     
     @node_mapper
     def qemu_local_init (self): pass
@@ -1057,21 +1037,33 @@ class TestPlc:
 
     ### initscripts
     def do_check_initscripts(self):
-        overall = True
+        class CompleterTaskInitscript (CompleterTask):
+            def __init__ (self, test_sliver, stamp):
+                self.test_sliver=test_sliver
+                self.stamp=stamp
+            def actual_run (self):
+                return self.test_sliver.check_initscript_stamp (self.stamp)
+            def message (self):
+                return "initscript checker for %s"%self.test_sliver.name()
+            def failure_message (self):
+                return "initscript stamp %s not found in sliver %s"%(self.stamp,self.test_sliver.name())
+            
+        tasks=[]
         for slice_spec in self.plc_spec['slices']:
             if not slice_spec.has_key('initscriptstamp'):
                 continue
             stamp=slice_spec['initscriptstamp']
+            slicename=slice_spec['slice_fields']['name']
             for nodename in slice_spec['nodenames']:
+                print 'nodename',nodename,'slicename',slicename,'stamp',stamp
                 (site,node) = self.locate_node (nodename)
                 # xxx - passing the wrong site - probably harmless
                 test_site = TestSite (self,site)
                 test_slice = TestSlice (self,test_site,slice_spec)
                 test_node = TestNode (self,test_site,node)
                 test_sliver = TestSliver (self, test_node, test_slice)
-                if not test_sliver.check_initscript_stamp(stamp):
-                    overall = False
-        return overall
+                tasks.append ( CompleterTaskInitscript (test_sliver, stamp))
+        return Completer (tasks).run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10))
            
     def check_initscripts(self):
         "check that the initscripts have triggered"
@@ -1132,6 +1124,8 @@ class TestPlc:
     def ssh_slice(self): pass
     @slice_mapper
     def ssh_slice_off (self): pass
+    @slice_mapper
+    def ssh_slice_basics(self): pass
 
     @slice_mapper
     def check_vsys_defaults(self): pass
@@ -1139,6 +1133,9 @@ class TestPlc:
     @node_mapper
     def keys_clear_known_hosts (self): pass
     
+    def plcapi_urls (self):
+        return PlcapiUrlScanner (self.auth_root(),ip=self.vserverip).scan()
+
     def speed_up_slices (self):
         "tweak nodemanager settings on all nodes using a conf file"
         # create the template on the server-side 
@@ -1223,24 +1220,23 @@ class TestPlc:
     def check_drl (self): return self._check_system_slice ('drl')
 
     # we have the slices up already here, so it should not take too long
-    def _check_system_slice (self, slicename, timeout_minutes=5, period=15):
-        timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
-        test_nodes=self.all_nodes()
-        while test_nodes:
-            for test_node in test_nodes:
-                if test_node._check_system_slice (slicename,dry_run=self.options.dry_run):
-                    utils.header ("ok")
-                    test_nodes.remove(test_node)
-                else:
-                    print '.',
-            if not test_nodes:
-                return True
-            if datetime.datetime.now () > timeout:
-                for test_node in test_nodes:
-                    utils.header ("can't find system slice %s in %s"%(slicename,test_node.name()))
-                return False
-            time.sleep(period)
-        return True
+    def _check_system_slice (self, slicename, timeout_minutes=5, period_seconds=15):
+        class CompleterTaskSystemSlice (CompleterTask):
+            def __init__ (self, test_node, dry_run): 
+                self.test_node=test_node
+                self.dry_run=dry_run
+            def actual_run (self): 
+                return self.test_node._check_system_slice (slicename, dry_run=self.dry_run)
+            def message (self): 
+                return "System slice %s @ %s"%(slicename, self.test_node.name())
+            def failure_message (self): 
+                return "COULD not find system slice %s @ %s"%(slicename, self.test_node.name())
+        timeout = timedelta(minutes=timeout_minutes)
+        silent  = timedelta (0)
+        period  = timedelta (seconds=period_seconds)
+        tasks = [ CompleterTaskSystemSlice (test_node, self.options.dry_run) \
+                      for test_node in self.all_nodes() ]
+        return Completer (tasks) . run (timeout, silent, period)
 
     def plcsh_stress_test (self):
         "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents"
@@ -1623,7 +1619,7 @@ class TestPlc:
             if not isinstance(name,StringTypes):
                 raise Exception
         except:
-            t=datetime.datetime.now()
+            t=datetime.now()
             d=t.date()
             name=str(d)
         return "/root/%s-%s.sql"%(database,name)
index eef5ee8..25912cc 100644 (file)
@@ -3,13 +3,35 @@
 #
 import utils
 import os, os.path
-import datetime
+from datetime import datetime, timedelta
 import time
 
 from TestKey import TestKey
 from TestUser import TestUser
 from TestNode import TestNode
 from TestSsh import TestSsh
+from Completer import Completer, CompleterTask
+
+class CompleterTaskSshSlice (CompleterTask):
+
+    def __init__ (self, test_plc, hostname, slicename, private_key,command, expected, dry_run):
+        self.test_plc=test_plc
+        self.hostname=hostname
+        self.slicename=slicename
+        self.private_key=private_key
+        self.command=command
+        self.dry_run=dry_run
+        self.expected=expected
+    def run (self, silent): 
+        (site_spec,node_spec) = self.test_plc.locate_hostname(self.hostname)
+        test_ssh = TestSsh (self.hostname,key=self.private_key,username=self.slicename)
+        full_command = test_ssh.actual_command(self.command)
+        retcod = utils.system (full_command, silent=silent)
+        if self.dry_run: return True
+        if self.expected:       return retcod==0
+        else:                   return retcod!=0
+    def failure_message (self): 
+        return "Could not ssh into slice %s @ %s"%(self.slicename,self.hostname)
 
 class TestSlice:
 
@@ -122,9 +144,15 @@ class TestSlice:
         "tries to ssh-enter the slice with the user key, expecting it to be unreachable"
         return self.do_ssh_slice(options, expected=False, *args, **kwds)
 
-    def do_ssh_slice(self,options,expected=True,timeout_minutes=20,silent_minutes=10,period=15):
-        timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
-        graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
+    def do_ssh_slice(self,options,expected=True,
+                     timeout_minutes=20,silent_minutes=10,period_seconds=15,command=None):
+        "tries to enter a slice"
+
+        timeout  = timedelta(minutes=timeout_minutes)
+        graceout = timedelta(minutes=silent_minutes)
+        period   = timedelta(seconds=period_seconds)
+        if not command:
+            command="echo hostname ; hostname; echo id; id; echo uname -a ; uname -a"
         # locate a key
         private_key=self.locate_private_key()
         if not private_key :
@@ -132,55 +160,52 @@ class TestSlice:
             return False
 
         # convert nodenames to real hostnames
-        slice_spec = self.slice_spec
-        restarted=[]
-        tocheck=[]
-        for nodename in slice_spec['nodenames']:
-            (site_spec,node_spec) = self.test_plc.locate_node(nodename)
-            tocheck.append(node_spec['node_fields']['hostname'])
-
         if expected:    msg="ssh slice access enabled"
         else:           msg="ssh slice access disabled"
+        utils.header("checking for %s -- slice %s"%(msg,self.name()))
+
+        tasks=[]
+        slicename=self.name()
+        dry_run = getattr(options,'dry_run',False)
+        for nodename in self.slice_spec['nodenames']:
+            (site_spec,node_spec) = self.test_plc.locate_node(nodename)
+            tasks.append( CompleterTaskSshSlice(self.test_plc,node_spec['node_fields']['hostname'],
+                                                slicename,private_key,command,expected,dry_run))
+        return Completer (tasks).run (timeout, graceout, period)
+
+    def ssh_slice_basics (self, options, *args, **kwds):
+        "the slice is expected to be UP and we just check a few simple sanity commands, including 'ps' to check for /proc"
+        overall=True
+        if not self.do_ssh_slice_once(options,expected=True,  command='true'): overall=False
+        if not self.do_ssh_slice_once(options,expected=False, command='false'): overall=False
+        if not self.do_ssh_slice_once(options,expected=False, command='someimprobablecommandname'): overall=False
+        if not self.do_ssh_slice_once(options,expected=True,  command='ps'): overall=False
+        return overall
+
+    # pick just one nodename and runs the ssh command once
+    def do_ssh_slice_once(self,options,command,expected):
+        # locate a key
+        private_key=self.locate_private_key()
+        if not private_key :
+            utils.header("WARNING: Cannot find a valid key for slice %s"%self.name())
+            return False
+
+        # convert nodenames to real hostnames
+        slice_spec = self.slice_spec
+        nodename=slice_spec['nodenames'][0]
+        (site_spec,node_spec) = self.test_plc.locate_node(nodename)
+        hostname=node_spec['node_fields']['hostname']
+
+        if expected:    msg="%s to return TRUE from ssh"%command
+        else:           msg="%s to return FALSE from ssh"%command
             
-        utils.header("checking for %s -- slice %s on nodes %r"%(msg,self.name(),tocheck))
-        utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
-                         (timeout_minutes,silent_minutes,period))
-        while tocheck:
-            for hostname in tocheck:
-                (site_spec,node_spec) = self.test_plc.locate_hostname(hostname)
-                date_test_ssh = TestSsh (hostname,key=private_key,username=self.name())
-                command = date_test_ssh.actual_command("echo hostname ; hostname; echo id; id; echo uname -a ; uname -a")
-                date = utils.system (command, silent=datetime.datetime.now() < graceout)
-                if getattr(options,'dry_run',None): return True
-                if expected:    success = date==0
-                else:           success = date!=0
-                    
-                if success:
-                    utils.header("OK %s - slice=%s@%s"%(msg,self.name(),hostname))
-                    tocheck.remove(hostname)
-                else:
-                    # real nodes will have been checked once in case they're up - skip if not
-                    if TestNode.is_real_model(node_spec['node_fields']['model']):
-                        utils.header("WARNING : Checking slice %s on real node %s skipped"%(self.name(),hostname))
-                        tocheck.remove(hostname)
-                    # nm restart after first failure, if requested 
-                    if options.forcenm and hostname not in restarted:
-                        utils.header ("forcenm option : restarting nm on %s"%hostname)
-                        restart_test_ssh=TestSsh(hostname,key="keys/key_admin.rsa")
-                        access=restart_test_ssh.actual_command('service nm restart')
-                        if (access==0):
-                            utils.header('nm restarted on %s'%hostname)
-                        else:
-                            utils.header('Failed to restart nm on %s'%(hostname))
-                        restarted.append(hostname)
-            if not tocheck:
-                # we're done
-                return True
-            if datetime.datetime.now() > timeout:
-                for hostname in tocheck:
-                    utils.header("FAILED %s slice=%s@%s"%(msg,self.name(),hostname))
-                return False
-            # wait for the period
-            time.sleep (period)
-        # for an empty slice
-        return True
+        utils.header("checking %s -- slice %s on node %s"%(msg,self.name(),hostname))
+        (site_spec,node_spec) = self.test_plc.locate_hostname(hostname)
+        test_ssh = TestSsh (hostname,key=private_key,username=self.name())
+        full_command = test_ssh.actual_command(command)
+        retcod = utils.system (full_command,silent=True)
+        if getattr(options,'dry_run',None): return True
+        if expected:    success = retcod==0
+        else:           success = retcod!=0
+        if not success: utils.header ("WRONG RESULT for %s"%msg)
+        return success
index c8fa055..7731f86 100644 (file)
@@ -3,7 +3,7 @@
 #
 
 import time
-import datetime
+from datetime import datetime, timedelta
 
 import utils
 from TestNode import TestNode
@@ -11,6 +11,8 @@ from TestUser import TestUser
 from TestBoxQemu import TestBoxQemu
 from TestSsh import TestSsh
 
+from Completer import Completer, CompleterTask
+from TestSlice import CompleterTaskSshSlice
 
 class TestSliceSfa:
 
@@ -47,8 +49,8 @@ class TestSliceSfa:
 
     def sfa_renew_slice(self, options):
         "run sfi renew (on Aggregates)"
-        too_late = datetime.datetime.now()+datetime.timedelta(weeks=52)
-        one_month = datetime.datetime.now()+datetime.timedelta(weeks=4)
+        too_late =  datetime.now() + timedelta(weeks=52)
+        one_month = datetime.now() + timedelta(weeks=4)
         # we expect this to fail on too long term attemps, but to succeed otherwise
         overall=True
         for ( renew_until, expected) in [ (too_late, False), (one_month, True) ] :
@@ -123,60 +125,23 @@ class TestSliceSfa:
         return self.test_plc.locate_private_key_from_key_names ( [ self.slice_spec['key_name'] ] )
 
     # check the resulting sliver
-    def ssh_slice_sfa(self,options,timeout_minutes=40,silent_minutes=30,period=15):
+    def ssh_slice_sfa(self,options,timeout_minutes=40,silent_minutes=0,period_seconds=15):
        "tries to ssh-enter the SFA slice"
-        timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
-        graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
+        timeout  = timedelta(minutes=timeout_minutes)
+        graceout = timedelta(minutes=silent_minutes)
+        period   = timedelta(seconds=period_seconds)
         # locate a key
         private_key=self.locate_private_key()
         if not private_key :
             utils.header("WARNING: Cannot find a valid key for slice %s"%self.name())
             return False
-
-        # convert nodenames to real hostnames
-        restarted=[]
-        tocheck=[]
+        command="echo hostname ; hostname; echo id; id; echo uname -a ; uname -a"
+        
+        tasks=[]
+        slicename=self.plc_name()
+        dry_run = getattr(options,'dry_run',False)
         for nodename in self.slice_spec['nodenames']:
             (site_spec,node_spec) = self.test_plc.locate_node(nodename)
-            tocheck.append(node_spec['node_fields']['hostname'])
-
-        utils.header("checking ssh access into slice %s on nodes %r"%(self.plc_name(),tocheck))
-        utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
-                         (timeout_minutes,silent_minutes,period))
-        while tocheck:
-            for hostname in tocheck:
-                (site_spec,node_spec) = self.test_plc.locate_hostname(hostname)
-                date_test_ssh = TestSsh (hostname,key=private_key,username=self.plc_name())
-                command = date_test_ssh.actual_command("echo hostname ; hostname; echo id; id; echo uname -a ; uname -a")
-                date = utils.system (command, silent=datetime.datetime.now() < graceout)
-                if date==0:
-                    utils.header("Successfuly entered slice %s on %s"%(self.plc_name(),hostname))
-                    tocheck.remove(hostname)
-                else:
-                    # real nodes will have been checked once in case they're up - skip if not
-                    if TestNode.is_real_model(node_spec['node_fields']['model']):
-                        utils.header("WARNING : Checking slice %s on real node %s skipped"%(self.plc_name(),hostname))
-                        tocheck.remove(hostname)
-                    # nm restart after first failure, if requested 
-                    if options.forcenm and hostname not in restarted:
-                        utils.header ("forcenm option : restarting nm on %s"%hostname)
-                        restart_test_ssh=TestSsh(hostname,key="keys/key_admin.rsa")
-                        access=restart_test_ssh.actual_command('service nm restart')
-                        if (access==0):
-                            utils.header('nm restarted on %s'%hostname)
-                        else:
-                            utils.header('Failed to restart nm on %s'%(hostname))
-                        restarted.append(hostname)
-            if not tocheck:
-                # we're done
-                return True
-            if datetime.datetime.now() > timeout:
-                for hostname in tocheck:
-                    utils.header("FAILURE to ssh into %s@%s"%(self.plc_name(),hostname))
-                return False
-            # wait for the period
-            time.sleep (period)
-        # for an empty slice
-        return True
-
-    
+            tasks.append( CompleterTaskSshSlice(self.test_plc,node_spec['node_fields']['hostname'],
+                                                slicename,private_key,command,expected=True,dry_run=dry_run))
+        return Completer (tasks).run (timeout, graceout, period)
index 1f70b40..c9d1e25 100644 (file)
@@ -48,12 +48,16 @@ class TestSsh:
             utils.header("WARNING : something wrong in is_local_hostname with hostname=%s"%hostname)
             return False
 
-    def __init__(self,hostname,buildname=None,key=None, username=None,unknown_host=True):
+    # some boxes have their working space in user's homedir (/root), 
+    # some others in a dedicated area with max. space (/vservers)
+    # when root is not specified we use the homedir
+    def __init__(self,hostname,buildname=None,key=None, username=None,unknown_host=True, root=None):
         self.hostname=hostname
         self.buildname=buildname
         self.key=key
         self.username=username
         self.unknown_host=unknown_host
+        self.root=root
 
     def is_local(self):
         return TestSsh.is_local_hostname(self.hostname)
@@ -73,27 +77,29 @@ class TestSsh:
             return "%s@%s"%(self.username,self.hostname)
     
     # command gets run on the right box
-    def actual_command (self, command,keep_stdin=False):
+    def actual_command (self, command, keep_stdin=False, dry_run=False):
         if self.is_local():
             return command
         ssh_command = "ssh "
-        if not keep_stdin:
-            ssh_command += "-n "
-        ssh_command += TestSsh.std_options
-        if self.unknown_host: ssh_command += TestSsh.unknown_option
+        if not dry_run:
+            if not keep_stdin:
+                ssh_command += "-n "
+            ssh_command += TestSsh.std_options
+            if self.unknown_host: ssh_command += TestSsh.unknown_option
         ssh_command += self.key_part()
         ssh_command += "%s %s" %(self.hostname_part(),TestSsh.backslash_shell_specials(command))
         return ssh_command
 
     # same in argv form
-    def actual_argv (self, argv,keep_stdin=False):
+    def actual_argv (self, argv,keep_stdin=False, dry_run=False):
         if self.is_local():
             return argv
         ssh_argv=[]
         ssh_argv.append('ssh')
-        if not keep_stdin: ssh_argv.append('-n')
-        ssh_argv += TestSsh.std_options.split()
-        if self.unknown_host: ssh_argv += TestSsh.unknown_option.split()
+        if not dry_run:
+            if not keep_stdin: ssh_argv.append('-n')
+            ssh_argv += TestSsh.std_options.split()
+            if self.unknown_host: ssh_argv += TestSsh.unknown_option.split()
         ssh_argv += self.key_part().split()
         ssh_argv.append(self.hostname_part())
         ssh_argv += argv
@@ -105,7 +111,7 @@ class TestSsh:
         sys.stdout.flush()
 
     def run(self, command,message=None,background=False,dry_run=False):
-        local_command = self.actual_command(command)
+        local_command = self.actual_command(command, dry_run=dry_run)
         if dry_run:
             utils.header("DRY RUN " + local_command)
             return 0
@@ -113,25 +119,33 @@ class TestSsh:
             self.header(message)
             return utils.system(local_command,background)
 
-    def clean_dir (self,dirname):
+    def run_in_buildname (self,command, background=False, dry_run=False):
         if self.is_local():
-            return 0
-        return self.run("rm -rf %s"%dirname)
+            return utils.system(command,background)
+        self.create_buildname_once(dry_run)
+        return self.run("cd %s ; %s"%(self.fullname(self.buildname),command),
+                        background=background, dry_run=dry_run)
 
-    def mkdir (self,dirname=None,abs=False):
+    def fullname (self,dirname):
+        if self.root==None:     return dirname
+        else:                   return os.path.join(self.root,dirname)
+        
+    def mkdir (self,dirname=None,abs=False,dry_run=False):
         if self.is_local():
             if dirname:
                 return os.path.mkdir(dirname)
             return 0
+        # ab. paths remain as-is
         if not abs:
             if dirname:
                 dirname="%s/%s"%(self.buildname,dirname)
             else:
                 dirname=self.buildname
+            dirname=self.fullname(dirname)
         if dirname=='.': return
-        return self.run("mkdir -p %s"%dirname)
+        return self.run("mkdir -p %s"%dirname,dry_run=dry_run)
 
-    def rmdir (self,dirname=None):
+    def rmdir (self,dirname=None, dry_run=False):
         if self.is_local():
             if dirname:
                 return shutil.rmtree(dirname)
@@ -140,34 +154,33 @@ class TestSsh:
             dirname="%s/%s"%(self.buildname,dirname)
         else:
             dirname=self.buildname
-        return self.run("rm -rf %s"%dirname)
+        dirname=self.fullname(dirname)
+        return self.run("rm -rf %s"%dirname, dry_run=dry_run)
 
-    def create_buildname_once (self):
+    def create_buildname_once (self, dry_run):
         if self.is_local():
             return
         # create remote buildname on demand
         try:
             self.buildname_created
         except:
-            self.mkdir()
+            self.mkdir(dry_run=dry_run)
             self.buildname_created=True
 
-    def run_in_buildname (self,command, background=False):
-        if self.is_local():
-            return utils.system(command,background)
-        self.create_buildname_once()
-        return self.run("cd %s ; %s"%(self.buildname,command),background)
-
-    def copy (self,local_file,recursive=False):
+    def copy (self,local_file,recursive=False,dry_run=False):
         if self.is_local():
             return 0
-        self.create_buildname_once()
+        self.create_buildname_once(dry_run)
         scp_command="scp "
-        scp_command += TestSsh.std_options
+        if not dry_run:
+            scp_command += TestSsh.std_options
         if recursive: scp_command += "-r "
         scp_command += self.key_part()
         scp_command += "%s %s:%s/%s"%(local_file,self.hostname_part(),
-                                      self.buildname,os.path.basename(local_file) or ".")
+                                      self.fullname(self.buildname),os.path.basename(local_file) or ".")
+        if dry_run:
+            utils.header ("DRY RUN TestSsh.copy %s"%scp_command)
+            return True
         return utils.system(scp_command)
 
     def copy_abs (self,local_file,remote_file,recursive=False):
@@ -185,14 +198,15 @@ class TestSsh:
     def copy_home (self, local_file, recursive=False):
         return self.copy_abs(local_file,os.path.basename(local_file),recursive)
 
-    def fetch (self, remote_file, local_file, recursive=False):
+    def fetch (self, remote_file, local_file, recursive=False, dry_run=False):
         if self.is_local():
             command="cp "
             if recursive: command += "-r "
             command += "%s %s"%(remote_file,local_file)
         else:
             command="scp "
-            command += TestSsh.std_options
+            if not dry_run:
+                command += TestSsh.std_options
             if recursive: command += "-r "
             command += self.key_part()
             # absolute path - do not preprend buildname
@@ -200,6 +214,7 @@ class TestSsh:
                 remote_path=remote_file
             else:
                 remote_path="%s/%s"%(self.buildname,remote_file)
+                remote_path=self.fullname(remote_path)
             command += "%s:%s %s"%(self.hostname_part(),remote_path,local_file)
         return utils.system(command)
 
index ae4debd..36d4f98 100755 (executable)
@@ -1,50 +1,54 @@
 #!/bin/bash
 
-function sense_all () {
+path=/vservers
 
-    for i in $(lxc-ls -1|sort|uniq); do 
-       [ "$(lxc-info -n $i | grep state| awk '{print $2;}' )" == "RUNNING" ] && echo "$i;$(lxc-info -n $i | grep pid | awk '{print $2;}');$(cat /var/lib/lxc/$i/$i.timestamp)" || :
-    done    
+function sense_all () {
+    virsh -c lxc:// list | grep running | while read line; do
+        pid=$(echo $line | cut -d' ' -f1)
+        lxc_name=$(echo $line | cut -d' ' -f2)
+        timestamp=$(cat $path/$lxc_name/$lxc_name.timestamp)
+        echo "$lxc_name;$pid;$timestamp" 
+    done  
 }
 
 function start_all () {
-
-    for i in $(lxc-ls -1|sort|uniq); do 
-        [ "$(lxc-info -n $i | grep state| awk '{print $2;}' )" != "RUNNING" ] && lxc-start -d -n $i || :
-    done
-   
-    #sense_all
+    virsh -c lxc:// list --inactive | grep " - "| while read line; do
+        lxc_name=$(echo $line | cut -d' ' -f2)
+        virsh -c lxc:// start $lxc_name
+    done    
 }
 
 function stop_all () {
-   
-    for i in $(lxc-ls -1|sort|uniq); do
-        [ "$(lxc-info -n $i | grep state| awk '{print $2;}' )" != "STOPPED" ] && lxc-stop -n $i
-    done
-    
-    #sense_all
+    virsh -c lxc:// list | grep running | while read line; do
+        lxc_name=$(echo $line | cut -d' ' -f2)
+        virsh -c lxc:// destroy $lxc_name
+    done   
 }
 
 function sense_lxc () {
 
-    lxc=$1; shift
-    [ "$(lxc-info -n $lxc | grep state | awk '{print $2;}')" == "RUNNING" ] && echo "$lxc;$(lxc-info -n $lxc | grep pid | awk '{print $2;}');$(cat /var/lib/lxc/$lxc/$lxc.timestamp)" || :
+    lxc_name=$1; shift
+    if [ "$(virsh -c lxc:// dominfo $lxc_name | grep State| cut -d' ' -f11)" == "running" ] ; then
+       pid=$(virsh -c lxc:// dominfo $lxc_name| grep Id | cut -d' ' -f14)
+       timestamp=$(cat $path/$lxc_name/$lxc_name.timestamp)
+       echo "$lxc_name;$pid;$timestamp"
+    fi
 }
 
 function start_lxc () {
 
-    lxc=$1; shift
-    [ "$(lxc-info -n $lxc | grep state| awk '{print $2;}' )" != "RUNNING" ] && lxc-start -d -n $lxc ||:
-    
-    #sense_lxc $lxc
+    lxc_name=$1; shift
+    if [ "$(virsh -c lxc:// dominfo $lxc_name | grep State| cut -d' ' -f11)" != "running" ] ; then
+       virsh -c lxc:// start $lxc_name
+    fi
 }
 
 function stop_lxc () {
 
-    lxc=$1; shift
-    [ "$(lxc-info -n $lxc | grep state| awk '{print $2;}' )" != "STOPPED" ] && lxc-stop -n $lxc
-
-    #sense_lxc $lxc
+    lxc_name=$1; shift
+    if [ "$(virsh -c lxc:// dominfo $lxc_name | grep State| cut -d' ' -f11)" != "shut off" ] ; then
+       virsh -c lxc:// destroy $lxc_name
+    fi
 }
 
 function restart_all () {
@@ -55,25 +59,27 @@ function restart_all () {
 
 function restart_lxc () {
 
-    lxc=$1; shift
-    stop_lxc $lxc
-    start_lxc $lxc
+    lxc_name=$1; shift
+    stop_lxc $lxc_name
+    start_lxc $lxc_name
 }
 
 function destroy_all () {
     
     stop_all
-    for i in $(lxc-ls -1|sort|uniq); do
-        lxc-destroy -n $i
+    virsh -c lxc:// list --all | while read line; do
+        lxc_name=$(echo $line | cut -d' ' -f2)
+        virsh -c lxc:// undefine $lxc_name
+        rm -fr $path/$lxc_name 
     done
-
 }
 
 function destroy_lxc () {
 
-    lxc=$1; shift
-    stop_lxc $lxc
-    lxc-destroy -n $lxc
+    lxc_name=$1; shift
+    stop_lxc $lxc_name
+    virsh -c lxc:// undefine $lxc_name
+    rm -fr $path/$lxc_name
 }
 
 function usage () {
@@ -81,7 +87,7 @@ function usage () {
     echo "Description:"
     echo "   This command is used to manage and retreive information on existing lxc containers "
     echo "lxc-driver.sh -c <COMMAND>_all"
-    echo "lxc-driver.sh -c <COMMAND>_lxc -l <LXCNAME>"
+    echo "lxc-driver.sh -c <COMMAND>_lxc -n <LXCNAME>"
     echo "<COMMAND> in {sense,start,stop,restart,destroy}"
 
 }
index 1749d6f..d182557 100755 (executable)
@@ -17,19 +17,32 @@ set -x
 # constant
 INTERFACE_BRIDGE=br0
 
-# Default Value for INTERFACE_LAN
-# let's try to figure out the interface to use - try these in order
-IFNAMES="eth0 eth1 eth2 eth3"
+#################### compute INTERFACE_LAN
+# use /proc/net/dev instead of a hard-wired list
+function gather_interfaces () {
+    python <<EOF
+for line in file("/proc/net/dev"):
+    if ':' not in line: continue
+    ifname=line.replace(" ","").split(":")[0]
+    if ifname.find("lo")==0: continue
+    if ifname.find("br")==0: continue
+    if ifname.find("virbr")==0: continue
+    if ifname.find("tap")==0: continue
+    print ifname
+EOF
+}
+    
 function discover_interface () {
-    for ifname in $IFNAMES; do
-       ip link show $ifname | grep -q UP && { INTERFACE_LAN=$ifname; return; }
+    for ifname in $(gather_interfaces); do
+       ip link show $ifname | grep -qi 'state UP' && { echo $ifname; return; }
     done
     # still not found ? that's bad
-    INTERFACE_LAN=unknown
+    echo unknown
 }
-discover_interface
-echo $INTERFACE_LAN
+INTERFACE_LAN=$(discover_interface)
+echo Using physical interface $INTERFACE_LAN
 
+####################
 # Fonction de mise en place du pont
 function start () {
 
index 2a87537..c529dfc 100644 (file)
@@ -1,3 +1,3 @@
 # for use by module-tools only
-%define version 5.1
-%define taglevel 9
+%define version 5.2
+%define taglevel 1