since we run ssh-node-boot@1 we have to only run node-bmlogs on @1
[tests.git] / system / TestPlc.py
index aced650..98bddff 100644 (file)
@@ -19,7 +19,8 @@ from TestSliver import TestSliver
 from TestBoxQemu import TestBoxQemu
 from TestSsh import TestSsh
 from TestApiserver import TestApiserver
-from TestSliceSfa import TestSliceSfa
+from TestAuthSfa import TestAuthSfa
+from PlcapiUrlScanner import PlcapiUrlScanner
 
 # step methods must take (self) and return a boolean (options is a member of the class)
 
@@ -62,16 +63,16 @@ def slice_mapper (method):
     actual.__doc__=TestSlice.__dict__[method.__name__].__doc__
     return actual
 
-def slice_sfa_mapper (method):
+def auth_sfa_mapper (method):
     def actual(self):
         overall=True
-        slice_method = TestSliceSfa.__dict__[method.__name__]
-        for slice_spec in self.plc_spec['sfa']['sfa_slice_specs']:
-            test_slice=TestSliceSfa(self,slice_spec)
-            if not slice_method(test_slice,self.options): overall=False
+        auth_method = TestAuthSfa.__dict__[method.__name__]
+        for auth_spec in self.plc_spec['sfa']['auth_sfa_specs']:
+            test_auth=TestAuthSfa(self,auth_spec)
+            if not auth_method(test_auth,self.options): overall=False
         return overall
     # restore the doc text
-    actual.__doc__=TestSliceSfa.__dict__[method.__name__].__doc__
+    actual.__doc__=TestAuthSfa.__dict__[method.__name__].__doc__
     return actual
 
 SEP='<sep>'
@@ -83,22 +84,25 @@ class TestPlc:
         'show', SEP,
         'vs_delete','timestamp_vs','vs_create', SEP,
         'plc_install', 'plc_configure', 'plc_start', SEP,
-        'keys_fetch', 'keys_store', 'keys_clear_known_hosts', 'speed_up_slices', SEP,
+        'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP,
+        'plcapi_urls','speed_up_slices', SEP,
         'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP,
+# slices created under plcsh interactively seem to be fine but these ones don't have the tags
+# keep this our of the way for now
+#        'check_vsys_defaults', SEP,
         'nodestate_reinstall', 'qemu_local_init','bootcd', 'qemu_local_config', SEP,
         'qemu_export', 'qemu_kill_mine', 'qemu_start', 'timestamp_qemu', SEP,
         'sfa_install_all', 'sfa_configure', 'cross_sfa_configure', 'sfa_start', 'sfa_import', SEPSFA,
         'sfi_configure@1', 'sfa_add_site@1','sfa_add_pi@1', SEPSFA,
-        'sfa_add_user@1', 'sfa_add_slice@1', 'sfa_discover@1', SEPSFA,
-        'sfa_create_slice@1', 'sfa_check_slice_plc@1', SEPSFA, 
-        'sfa_update_user@1', 'sfa_update_slice@1', SEPSFA,
+        'sfa_add_user@1', 'sfa_update_user@1', 'sfa_add_slice@1', 'sfa_renew_slice@1', SEPSFA,
+        'sfa_discover@1', 'sfa_create_slice@1', 'sfa_check_slice_plc@1', 'sfa_update_slice@1', SEPSFA,
         'sfi_list@1', 'sfi_show@1', 'sfi_slices@1', 'sfa_utest@1', SEPSFA,
         # we used to run plcsh_stress_test, and then ssh_node_debug and ssh_node_boot
         # but as the stress test might take a while, we sometimes missed the debug mode..
         'ssh_node_debug@1', 'plcsh_stress_test@1', SEP,
-        'ssh_node_boot@1', 'ssh_slice', 'check_initscripts', SEP,
+        'ssh_node_boot@1', 'node_bmlogs@1', 'ssh_slice', 'ssh_slice_basics', 'check_initscripts', SEP,
         'ssh_slice_sfa@1', 'sfa_delete_slice@1', 'sfa_delete_user@1', SEPSFA,
-        'check_tcp', 'check_sys_slice', SEP,
+        'cross_check_tcp@1', 'check_system_slice', SEP,
         'empty_slices', 'ssh_slice_off', 'fill_slices', SEP,
         'force_gather_logs', SEP,
         ]
@@ -114,6 +118,8 @@ class TestPlc:
        'sfa_install_core', 'sfa_install_sfatables', 'sfa_install_plc', 'sfa_install_client', SEPSFA,
         'sfa_plcclean', 'sfa_dbclean', 'sfa_stop','sfa_uninstall', 'sfi_clean', SEPSFA,
         'plc_db_dump' , 'plc_db_restore', SEP,
+        'check_netflow','check_drl', SEP,
+        'debug_nodemanager', SEP,
         'standby_1_through_20',SEP,
         ]
 
@@ -151,6 +157,8 @@ class TestPlc:
         self.vservername=plc_spec['vservername']
         self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
        self.apiserver=TestApiserver(self.url,options.dry_run)
+        (self.ssh_node_boot_timeout,self.ssh_node_boot_silent)=plc_spec['ssh_node_boot_timers']
+        (self.ssh_node_debug_timeout,self.ssh_node_debug_silent)=plc_spec['ssh_node_debug_timers']
         
     def has_addresses_api (self):
         return self.apiserver.has_method('AddIpAddress')
@@ -263,11 +271,24 @@ class TestPlc:
                     return (site,node)
         raise Exception,"Cannot locate hostname %s"%hostname
         
-    def locate_key (self,keyname):
+    def locate_key (self,key_name):
         for key in self.plc_spec['keys']:
-            if key['name'] == keyname:
+            if key['key_name'] == key_name:
                 return key
-        raise Exception,"Cannot locate key %s"%keyname
+        raise Exception,"Cannot locate key %s"%key_name
+
+    def locate_private_key_from_key_names (self, key_names):
+        # locate the first avail. key
+        found=False
+        for key_name in key_names:
+            key_spec=self.locate_key(key_name)
+            test_key=TestKey(self,key_spec)
+            publickey=test_key.publicpath()
+            privatekey=test_key.privatepath()
+            if os.path.isfile(publickey) and os.path.isfile(privatekey):
+                found=True
+        if found: return privatekey
+        else:     return None
 
     def locate_slice (self, slicename):
         for slice in self.plc_spec['slices']:
@@ -375,9 +396,16 @@ class TestPlc:
         self.show_pass (2)
         return True
 
+    # uggly hack to make sure 'run export' only reports about the 1st plc 
+    # to avoid confusion - also we use 'inri_slice1' in various aliases..
+    exported_id=1
     def export (self):
         "print cut'n paste-able stuff to export env variables to your shell"
         # guess local domain from hostname
+        if TestPlc.exported_id>1: 
+            print "export GUESTHOSTNAME%d=%s"%(TestPlc.exported_id,self.plc_spec['vservername'])
+            return True
+        TestPlc.exported_id+=1
         domain=socket.gethostname().split('.',1)[1]
         fqdn="%s.%s"%(self.plc_spec['host_box'],domain)
         print "export BUILD=%s"%self.options.buildname
@@ -446,7 +474,7 @@ class TestPlc:
         print '+ ======== initscript',initscript['initscript_fields']['name']
 
     def display_key_spec (self,key):
-        print '+ ======== key',key['name']
+        print '+ ======== key',key['key_name']
 
     def display_slice_spec (self,slice):
         print '+ ======== slice',slice['slice_fields']['name']
@@ -616,6 +644,7 @@ class TestPlc:
                      'PLC_RESERVATION_GRANULARITY',
                      'PLC_OMF_ENABLED',
                      'PLC_OMF_XMPP_SERVER',
+                     'PLC_VSYS_DEFAULTS',
                      ]:
             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
         fileconf.write('w\n')
@@ -701,7 +730,7 @@ class TestPlc:
     def delete_all_sites (self):
         "Delete all sites in PLC, and related objects"
         print 'auth_root',self.auth_root()
-        sites = self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])
+        sites = self.apiserver.GetSites(self.auth_root(), {}, ['site_id','login_base'])
         for site in sites:
             # keep automatic site - otherwise we shoot in our own foot, root_auth is not valid anymore
             if site['login_base']==self.plc_spec['PLC_SLICE_PREFIX']: continue
@@ -954,7 +983,7 @@ class TestPlc:
             local_key = "keys/%(vservername)s-debug.rsa"%locals()
         else: 
             message="boot"
-           local_key = "keys/key1.rsa"
+           local_key = "keys/key_admin.rsa"
         node_infos = self.all_node_infos()
         utils.header("checking ssh access (expected in %s mode) to nodes:"%message)
         for (nodename,qemuname) in node_infos:
@@ -991,11 +1020,19 @@ class TestPlc:
         
     def ssh_node_debug(self):
         "Tries to ssh into nodes in debug mode with the debug ssh key"
-        return self.check_nodes_ssh(debug=True,timeout_minutes=10,silent_minutes=8)
+        return self.check_nodes_ssh(debug=True,
+                                    timeout_minutes=self.ssh_node_debug_timeout,
+                                    silent_minutes=self.ssh_node_debug_silent)
     
     def ssh_node_boot(self):
         "Tries to ssh into nodes in production mode with the root ssh key"
-        return self.check_nodes_ssh(debug=False,timeout_minutes=40,silent_minutes=38)
+        return self.check_nodes_ssh(debug=False,
+                                    timeout_minutes=self.ssh_node_boot_timeout,
+                                    silent_minutes=self.ssh_node_boot_silent)
+
+    def node_bmlogs(self):
+        "Checks that there's a non-empty dir. /var/log/bm/raw"
+        return utils.system(self.actual_command_in_guest("ls /var/log/bm/raw"))==0
     
     @node_mapper
     def qemu_local_init (self): pass
@@ -1101,10 +1138,18 @@ class TestPlc:
     def ssh_slice(self): pass
     @slice_mapper
     def ssh_slice_off (self): pass
+    @slice_mapper
+    def ssh_slice_basics(self): pass
+
+    @slice_mapper
+    def check_vsys_defaults(self): pass
 
     @node_mapper
     def keys_clear_known_hosts (self): pass
     
+    def plcapi_urls (self):
+        return PlcapiUrlScanner (self.auth_root(),ip=self.vserverip).scan()
+
     def speed_up_slices (self):
         "tweak nodemanager settings on all nodes using a conf file"
         # create the template on the server-side 
@@ -1122,45 +1167,79 @@ class TestPlc:
                                      'postinstall_cmd':'service nm restart',})
         return True
 
+    def debug_nodemanager (self):
+        "sets verbose mode for nodemanager, and speeds up cycle even more (needs speed_up_slices first)"
+        template="%s.nodemanager"%self.name()
+        template_file = open (template,"w")
+        template_file.write('OPTIONS="-p 10 -r 6 -v -d"\n')
+        template_file.close()
+        in_vm="/var/www/html/PlanetLabConf/nodemanager"
+        remote="%s/%s"%(self.vm_root_in_host(),in_vm)
+        self.test_ssh.copy_abs(template,remote)
+        return True
+
     @node_mapper
     def qemu_start (self) : pass
 
     @node_mapper
     def timestamp_qemu (self) : pass
 
-    def check_tcp (self):
+    # when a spec refers to a node possibly on another plc
+    def locate_sliver_obj_cross (self, nodename, slicename, other_plcs):
+        for plc in [ self ] + other_plcs:
+            try:
+                return plc.locate_sliver_obj (nodename, slicename)
+            except:
+                pass
+        raise Exception, "Cannot locate sliver %s@%s among all PLCs"%(nodename,slicename)
+
+    # implement this one as a cross step so that we can take advantage of different nodes
+    # in multi-plcs mode
+    def cross_check_tcp (self, other_plcs):
         "check TCP connectivity between 2 slices (or in loopback if only one is defined)"
-        specs = self.plc_spec['tcp_test']
+        if 'tcp_specs' not in self.plc_spec or not self.plc_spec['tcp_specs']: 
+            utils.header ("check_tcp: no/empty config found")
+            return True
+        specs = self.plc_spec['tcp_specs']
         overall=True
         for spec in specs:
             port = spec['port']
             # server side
-            s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
-            if not s_test_sliver.run_tcp_server(port,timeout=10):
+            s_test_sliver = self.locate_sliver_obj_cross (spec['server_node'],spec['server_slice'],other_plcs)
+            if not s_test_sliver.run_tcp_server(port,timeout=20):
                 overall=False
                 break
 
             # idem for the client side
-            c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
-            if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
+            c_test_sliver = self.locate_sliver_obj_cross (spec['client_node'],spec['client_slice'],other_plcs)
+            # use nodename from locatesd sliver, unless 'client_connect' is set
+            if 'client_connect' in spec:
+                destination = spec['client_connect']
+            else:
+                destination=s_test_sliver.test_node.name()
+            if not c_test_sliver.run_tcp_client(destination,port):
                 overall=False
         return overall
 
     # painfully enough, we need to allow for some time as netflow might show up last
-    def check_sys_slice (self): 
+    def check_system_slice (self): 
         "all nodes: check that a system slice is alive"
-# would probably make more sense to check for netflow, 
-# but that one is currently not working in the lxc distro        
-#        return self.check_systemslice ('netflow')
-        return self.check_systemslice ('drl')
+        # netflow currently not working in the lxc distro
+        # drl not built at all in the wtx distro
+        # if we find either of them we're happy
+        return self.check_netflow() or self.check_drl()
     
+    # expose these
+    def check_netflow (self): return self._check_system_slice ('netflow')
+    def check_drl (self): return self._check_system_slice ('drl')
+
     # we have the slices up already here, so it should not take too long
-    def check_systemslice (self, slicename, timeout_minutes=5, period=15):
+    def _check_system_slice (self, slicename, timeout_minutes=5, period=15):
         timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
         test_nodes=self.all_nodes()
         while test_nodes:
             for test_node in test_nodes:
-                if test_node.check_systemslice (slicename,dry_run=self.options.dry_run):
+                if test_node._check_system_slice (slicename,dry_run=self.options.dry_run):
                     utils.header ("ok")
                     test_nodes.remove(test_node)
                 else:
@@ -1238,22 +1317,28 @@ class TestPlc:
 
     def sfa_dbclean(self):
         "thoroughly wipes off the SFA database"
-        return self.run_in_guest("sfaadmin.py registry nuke")==0 or \
+        return self.run_in_guest("sfaadmin reg nuke")==0 or \
             self.run_in_guest("sfa-nuke.py")==0 or \
             self.run_in_guest("sfa-nuke-plc.py")==0
 
+    def sfa_fsclean(self):
+        "cleanup /etc/sfa/trusted_roots and /var/lib/sfa"
+        self.run_in_guest("rm -rf /etc/sfa/trusted_roots /var/lib/sfa/authorities")
+        return True
+
     def sfa_plcclean(self):
         "cleans the PLC entries that were created as a side effect of running the script"
         # ignore result 
         sfa_spec=self.plc_spec['sfa']
 
-        for sfa_slice_spec in sfa_spec['sfa_slice_specs']:
-            login_base=sfa_slice_spec['login_base']
+        for auth_sfa_spec in sfa_spec['auth_sfa_specs']:
+            login_base=auth_sfa_spec['login_base']
             try: self.apiserver.DeleteSite (self.auth_root(),login_base)
             except: print "Site %s already absent from PLC db"%login_base
 
-            for key in ['piuser','regularuser']:
-                username="%s@%s"%(sfa_slice_spec[key],sfa_slice_spec['domain'])
+            for spec_name in ['pi_spec','user_spec']:
+                user_spec=auth_sfa_spec[spec_name]
+                username=user_spec['email']
                 try: self.apiserver.DeletePerson(self.auth_root(),username)
                 except: 
                     # this in fact is expected as sites delete their members
@@ -1298,7 +1383,7 @@ class TestPlc:
         if not os.path.isdir(dirname):
             utils.system("mkdir -p %s"%dirname)
         if not os.path.isdir(dirname):
-            raise "Cannot create config dir for plc %s"%self.name()
+            raise Exception,"Cannot create config dir for plc %s"%self.name()
         return dirname
 
     def conffile(self,filename):
@@ -1382,10 +1467,10 @@ class TestPlc:
             and  self.test_ssh.copy_abs(reg_fname,'/%s/etc/sfa/registries.xml'%self.vm_root_in_host())==0
 
     def sfa_import(self):
-        "sfa-import-plc"
+        "use sfaadmin to import from plc"
         auth=self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH']
         return \
-            self.run_in_guest('sfaadmin.py reg import_registry')==0 
+            self.run_in_guest('sfaadmin reg import_registry')==0 
 # not needed anymore
 #        self.run_in_guest('cp /etc/sfa/authorities/%s/%s.pkey /etc/sfa/authorities/server.key'%(auth,auth))
 
@@ -1399,9 +1484,9 @@ class TestPlc:
             utils.header("DRY RUN - skipping step")
             return True
         sfa_spec=self.plc_spec['sfa']
-        # cannot use slice_sfa_mapper to pass dir_name
-        for slice_spec in self.plc_spec['sfa']['sfa_slice_specs']:
-            test_slice=TestSliceSfa(self,slice_spec)
+        # cannot use auth_sfa_mapper to pass dir_name
+        for slice_spec in self.plc_spec['sfa']['auth_sfa_specs']:
+            test_slice=TestAuthSfa(self,slice_spec)
             dir_basename=os.path.basename(test_slice.sfi_path())
             dir_name=self.confsubdir("dot-sfi/%s"%dir_basename,clean=True,dry_run=self.options.dry_run)
             test_slice.sfi_configure(dir_name)
@@ -1419,35 +1504,37 @@ class TestPlc:
         self.run_in_guest("rm -rf /root/sfi")
         return True
 
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_add_site (self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_add_pi (self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_add_user(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_update_user(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_add_slice(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
+    def sfa_renew_slice(self): pass
+    @auth_sfa_mapper
     def sfa_discover(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_create_slice(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_check_slice_plc(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_update_slice(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfi_list(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfi_show(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfi_slices(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def ssh_slice_sfa(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_delete_user(self): pass
-    @slice_sfa_mapper
+    @auth_sfa_mapper
     def sfa_delete_slice(self): pass
 
     def sfa_stop(self):
@@ -1534,7 +1621,7 @@ class TestPlc:
             test_site = TestSite (self,site_spec)
             for node_spec in site_spec['nodes']:
                 test_node=TestNode(self,test_site,node_spec)
-                test_ssh = TestSsh (test_node.name(),key="keys/key1.rsa")
+                test_ssh = TestSsh (test_node.name(),key="keys/key_admin.rsa")
                 command = test_ssh.actual_command("tar -C /var/log -cf - .")
                 command = command + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
                 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())