introduces the TestBox class - review how qemu gets stopped
[tests.git] / system / TestPlc.py
index 183d013..4694d1c 100644 (file)
@@ -14,9 +14,32 @@ from TestNode import TestNode
 from TestUser import TestUser
 from TestKey import TestKey
 from TestSlice import TestSlice
+from TestBox import TestBox
+
+# inserts a backslash before each occurence of the following chars
+# \ " ' < > & | ; ( ) $ * ~ 
+def backslash_shell_specials (command):
+    result=''
+    for char in command:
+        if char in "\\\"'<>&|;()$*~":
+            result +='\\'+char
+        else:
+            result +=char
+    return result
 
 # step methods must take (self, options) and return a boolean
 
+def standby(minutes):
+        utils.header('Entering StandBy for %d mn'%minutes)
+        time.sleep(60*minutes)
+        return True
+
+def standby_generic (func):
+    def actual(self,options):
+        minutes=int(func.__name__.split("_")[1])
+        return standby(minutes)
+    return actual
+
 class TestPlc:
 
     def __init__ (self,plc_spec):
@@ -48,31 +71,49 @@ class TestPlc:
     def connect (self):
        pass
     
-    # build the full command so command gets run in the chroot/vserver
-    def run_command(self,command):
+    # command gets run in the chroot/vserver
+    def host_to_guest(self,command):
         if self.vserver:
             return "vserver %s exec %s"%(self.vservername,command)
         else:
-            return "chroot /plc/root sh -c \\\"%s\\\""%command
+            return "chroot /plc/root %s"%backslash_shell_specials(command)
 
-    def ssh_command(self,command):
+    # command gets run on the right box
+    def to_host(self,command):
         if self.is_local():
             return command
         else:
-            return "ssh %s sh -c '\"%s\"'"%(self.plc_spec['hostname'],command)
+            return "ssh %s %s"%(self.plc_spec['hostname'],backslash_shell_specials(command))
 
     def full_command(self,command):
-        return self.ssh_command(self.run_command(command))
+        return self.to_host(self.host_to_guest(command))
 
     def run_in_guest (self,command):
         return utils.system(self.full_command(command))
     def run_in_host (self,command):
-        return utils.system(self.ssh_command(command))
+        return utils.system(self.to_host(command))
 
     # xxx quick n dirty
     def run_in_guest_piped (self,local,remote):
         return utils.system(local+" | "+self.full_command(remote))
 
+    # copy a file to the myplc root image - pass in_data=True if the file must go in /plc/data
+    def copy_in_guest (self, localfile, remotefile, in_data=False):
+        if in_data:
+            chroot_dest="/plc/data"
+        else:
+            chroot_dest="/plc/root"
+        if self.is_local():
+            if not self.vserver:
+                utils.system("cp %s %s/%s"%(localfile,chroot_dest,remotefile))
+            else:
+                utils.system("cp %s /vservers/%s/%s"%(localfile,self.vservername,remotefile))
+        else:
+            if not self.vserver:
+                utils.system("scp %s %s:%s/%s"%(localfile,self.plc_spec['hostname'],chroot_dest,remotefile))
+            else:
+                utils.system("scp %s %s@/vservers/%s/%s"%(localfile,self.plc_spec['hostname'],self.vservername,remotefile))
+
     def auth_root (self):
        return {'Username':self.plc_spec['PLC_ROOT_USER'],
                'AuthMethod':'password',
@@ -87,32 +128,59 @@ class TestPlc:
                 return site
         raise Exception,"Cannot locate site %s"%sitename
         
+    def locate_node (self,nodename):
+        for site in self.plc_spec['sites']:
+            for node in site['nodes']:
+                if node['node_fields']['hostname'] == nodename:
+                    return (site,node)
+        raise Exception,"Cannot locate node %s"%nodename
+        
     def locate_key (self,keyname):
         for key in self.plc_spec['keys']:
             if key['name'] == keyname:
                 return key
         raise Exception,"Cannot locate key %s"%keyname
-        
-    def kill_all_vmwares(self):
-        utils.header('Killing any running vmware or vmplayer instance')
-        utils.system('pgrep vmware | xargs -r kill')
-        utils.system('pgrep vmplayer | xargs -r kill ')
-        utils.system('pgrep vmware | xargs -r kill -9')
-        utils.system('pgrep vmplayer | xargs -r kill -9')
 
-    def kill_all_qemus(self):
+    # all different hostboxes used in this plc
+    def gather_hostBoxes(self):
+        # maps on sites and nodes, return [ (host_box,hostname) ]
+        tuples=[]
         for site_spec in self.plc_spec['sites']:
             test_site = TestSite (self,site_spec)
             for node_spec in site_spec['nodes']:
-                TestNode (self,test_site,node_spec).stop_qemu()
+                test_node = TestNode (self, test_site, node_spec)
+                if not test_node.is_real():
+                    tuples.append( (test_node.host_box(),node_spec['node_fields']['hostname']) )
+        # transform into a dict { 'host_box' -> [ hostnames .. ] }
+        result = {}
+        for (box,hostname) in tuples:
+            if not result.has_key(box):
+                result[box]=[hostname]
+            else:
+                result[box].append(hostname)
+        return result
                     
-    def clear_ssh_config (self):
-        # using ssh -o "BatchMode yes" is too tricky due to quoting - let's use the config
-        utils.header("Setting BatchMode and StrictHostKeyChecking in ssh config")
-        self.run_in_guest("sed -i -e '/BatchMode/d' /root/.ssh/config 2> /dev/null")
-        self.run_in_guest_piped("echo BatchMode yes", "cat >> /root/.ssh/config")
-        self.run_in_guest("sed -i -e '/StrictHostKeyChecking/d' /root/.ssh/config 2> /dev/null")
-        self.run_in_guest_piped("echo StrictHostKeyChecking no", "cat >> /root/.ssh/config")
+    # a step for checking this stuff
+    def showboxes (self,options):
+        print 'showboxes'
+        for (box,hosts) in self.gather_hostBoxes().iteritems():
+            print box,":"," + ".join(hosts)
+        return True
+
+    def kill_all_qemus(self):
+        for (box,hosts) in self.gather_hostBoxes().iteritems():
+            # this is the brute force version, kill all qemus on that host box
+            TestBox(box).kill_all_qemus()
+
+    def clear_ssh_config (self,options):
+        # install local ssh_config file as root's .ssh/config - ssh should be quiet
+        # dir might need creation first
+        self.run_in_guest("mkdir /root/.ssh")
+        self.run_in_guest("chmod 700 /root/.ssh")
+        # this does not work - > redirection somehow makes it until an argument to cat
+        #self.run_in_guest_piped("cat ssh_config","cat > /root/.ssh/config")
+        self.copy_in_guest("ssh_config","/root/.ssh/config",True)
+        return True
             
     #################### step methods
 
@@ -124,7 +192,7 @@ class TestPlc:
         ##### Clean up the /plc directory
         self.run_in_host('rm -rf  /plc/data')
         ##### stop any running vservers
-        self.run_in_host('for vserver in $(cd /vservers ; ls) ; do vserver $vserver stop ; done')
+        self.run_in_host('for vserver in $(ls /vservers/* | sed -e s,/vservers/,,) ; do vserver $vserver stop ; done')
         return True
 
     def uninstall_vserver(self,options):
@@ -312,36 +380,109 @@ class TestPlc:
                 self.server.AddNodeToNodeGroup(auth,node,nodegroupname)
         return True
 
-    def check_nodes(self,options):
-        time.sleep(10)#Wait for the qemu to mount. Only  matter of display
-        status=True
-        start_time = datetime.datetime.now()
-        dead_time=datetime.datetime.now()+ datetime.timedelta(minutes=5)
-        booted_nodes=[]
+    def all_hostnames (self) :
+        hostnames = []
         for site_spec in self.plc_spec['sites']:
-            test_site = TestSite (self,site_spec)
-            utils.header("Starting checking for nodes in site %s"%self.name())
-            notfullybooted_nodes=[ node_spec['node_fields']['hostname'] for node_spec in site_spec['nodes'] ]
-            nbr_nodes= len(notfullybooted_nodes)
-            while (status):
-                for node_spec in site_spec['nodes']:
-                    hostname=node_spec['node_fields']['hostname']
-                    if (hostname in notfullybooted_nodes): #to avoid requesting already booted node
-                        test_node=TestNode (self,test_site,node_spec)
-                        host_box=test_node.host_box()
-                        node_status=test_node.get_node_status(hostname)
-                        if (node_status):
-                            booted_nodes.append(hostname)
-                            del notfullybooted_nodes[notfullybooted_nodes.index(hostname)]
-                if ( not notfullybooted_nodes): break
-                elif ( start_time  <= dead_time ) :
-                    start_time=datetime.datetime.now()+ datetime.timedelta(minutes=2)
-                    time.sleep(15)
-                else: status=False
-            for nodeup in booted_nodes : utils.header("Node %s correctly installed and booted"%nodeup)
-            for nodedown  in notfullybooted_nodes : utils.header("Node %s not fully booted"%nodedown)
-            return status
+            hostnames += [ node_spec['node_fields']['hostname'] \
+                           for node_spec in site_spec['nodes'] ]
+        return hostnames
+
+    # gracetime : during the first <gracetime> minutes nothing gets printed
+    def do_nodes_booted (self, minutes, gracetime=2):
+        # compute timeout
+        timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
+        graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
+        # the nodes that haven't checked yet - start with a full list and shrink over time
+        tocheck = self.all_hostnames()
+        utils.header("checking nodes %r"%tocheck)
+        # create a dict hostname -> status
+        status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
+        while tocheck:
+            # get their status
+            tocheck_status=self.server.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
+            # update status
+            for array in tocheck_status:
+                hostname=array['hostname']
+                boot_state=array['boot_state']
+                if boot_state == 'boot':
+                    utils.header ("%s has reached the 'boot' state"%hostname)
+                else:
+                    # if it's a real node, never mind
+                    (site_spec,node_spec)=self.locate_node(hostname)
+                    if TestNode.is_real_model(node_spec['node_fields']['model']):
+                        utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
+                        # let's cheat
+                        boot_state = 'boot'
+                    if datetime.datetime.now() > graceout:
+                        utils.header ("%s still in '%s' state"%(hostname,boot_state))
+                        graceout=datetime.datetime.now()+datetime.timedelta(1)
+                status[hostname] = boot_state
+            # refresh tocheck
+            tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ]
+            if not tocheck:
+                return True
+            if datetime.datetime.now() > timeout:
+                for hostname in tocheck:
+                    utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
+                return False
+            # otherwise, sleep for a while
+            time.sleep(15)
+        # only useful in empty plcs
+        return True
+
+    def nodes_booted(self,options):
+        return self.do_nodes_booted(minutes=5)
     
+    #to scan and store the nodes's public keys and avoid to ask for confirmation when  ssh 
+    def scan_publicKeys(self,hostnames):
+        try:
+            temp_knownhosts="/root/known_hosts"
+            remote_knownhosts="/root/.ssh/known_hosts"
+            self.run_in_host("touch %s"%temp_knownhosts )
+            for hostname in hostnames:
+                utils.header("Scan Public %s key and store it in the known_host file(under the root image) "%hostname)
+                scan=self.run_in_host('ssh-keyscan -t rsa %s >> %s '%(hostname,temp_knownhosts))
+            #Store the public keys in the right root image
+            self.copy_in_guest(temp_knownhosts,remote_knownhosts,True)
+            #clean the temp keys file used
+            self.run_in_host('rm -f  %s '%temp_knownhosts )
+        except Exception, err:
+            print err
+            
+    def do_check_nodesSsh(self,minutes):
+        # compute timeout
+        timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
+        #graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
+        tocheck = self.all_hostnames()
+        self.scan_publicKeys(tocheck)
+        utils.header("checking Connectivity on nodes %r"%tocheck)
+        while tocheck:
+            for hostname in tocheck:
+                # try to ssh in nodes
+                access=self.run_in_guest('ssh -i /etc/planetlab/root_ssh_key.rsa root@%s date'%hostname )
+                if (not access):
+                    utils.header('The node %s is sshable -->'%hostname)
+                    # refresh tocheck
+                    tocheck.remove(hostname)
+                else:
+                    (site_spec,node_spec)=self.locate_node(hostname)
+                    if TestNode.is_real_model(node_spec['node_fields']['model']):
+                        utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
+                    tocheck.remove(hostname)
+            if not tocheck:
+                return True
+            if datetime.datetime.now() > timeout:
+                for hostname in tocheck:
+                    utils.header("FAILURE to ssh into %s"%hostname)
+                return False
+            # otherwise, sleep for a while
+            time.sleep(15)
+        # only useful in empty plcs
+        return True
+        
+    def nodes_ssh(self, options):
+        return  self.do_check_nodesSsh(minutes=2)
+            
     def bootcd (self, options):
         for site_spec in self.plc_spec['sites']:
             test_site = TestSite (self,site_spec)
@@ -381,19 +522,19 @@ class TestPlc:
             site_spec = self.locate_site (slice_spec['sitename'])
             test_site = TestSite(self,site_spec)
             test_slice=TestSlice(self,test_site,slice_spec)
-            status=test_slice.do_check_slices()
-            return status
+            status=test_slice.do_check_slice(options)
+            if (not status):
+                return False
+        return status
     
     def start_nodes (self, options):
-        self.kill_all_vmwares()
         self.kill_all_qemus()
-        utils.header("Starting vmware nodes")
+        utils.header("Starting  nodes")
         for site_spec in self.plc_spec['sites']:
             TestSite(self,site_spec).start_nodes (options)
         return True
 
     def stop_nodes (self, options):
-        self.kill_all_vmwares ()
         self.kill_all_qemus()
         return True
 
@@ -429,3 +570,45 @@ class TestPlc:
         self.run_in_guest('service httpd start')
 
         utils.header('Database restored from ' + dump)
+
+    @standby_generic 
+    def standby_1(): pass
+    @standby_generic 
+    def standby_2(): pass
+    @standby_generic 
+    def standby_3(): pass
+    @standby_generic 
+    def standby_4(): pass
+    @standby_generic 
+    def standby_5(): pass
+    @standby_generic 
+    def standby_6(): pass
+    @standby_generic 
+    def standby_7(): pass
+    @standby_generic 
+    def standby_8(): pass
+    @standby_generic 
+    def standby_9(): pass
+    @standby_generic 
+    def standby_10(): pass
+    @standby_generic 
+    def standby_11(): pass
+    @standby_generic 
+    def standby_12(): pass
+    @standby_generic 
+    def standby_13(): pass
+    @standby_generic 
+    def standby_14(): pass
+    @standby_generic 
+    def standby_15(): pass
+    @standby_generic 
+    def standby_16(): pass
+    @standby_generic 
+    def standby_17(): pass
+    @standby_generic 
+    def standby_18(): pass
+    @standby_generic 
+    def standby_19(): pass
+    @standby_generic 
+    def standby_20(): pass
+