X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=system%2FTestPlc.py;h=5d6268bc9882beb41bf1f47b5882e11f6b6d6c99;hb=29add6ace27b5debf81984cb9df0c1e4ced7942f;hp=48f938ceef96d50a9953084e8468ae8c61d2f5ba;hpb=12d5d34e42cbabd96f2104e9a98eeebffb521a3d;p=tests.git diff --git a/system/TestPlc.py b/system/TestPlc.py index 48f938c..5d6268b 100644 --- a/system/TestPlc.py +++ b/system/TestPlc.py @@ -1,17 +1,18 @@ # Thierry Parmentelat # Copyright (C) 2010 INRIA # -import os, os.path -import datetime -import time import sys +import time +import os, os.path import traceback -from types import StringTypes import socket +from datetime import datetime, timedelta +from types import StringTypes import utils +from Completer import Completer, CompleterTask from TestSite import TestSite -from TestNode import TestNode +from TestNode import TestNode, CompleterTaskNodeSsh from TestUser import TestUser from TestKey import TestKey from TestSlice import TestSlice @@ -20,6 +21,9 @@ from TestBoxQemu import TestBoxQemu from TestSsh import TestSsh from TestApiserver import TestApiserver from TestAuthSfa import TestAuthSfa +from PlcapiUrlScanner import PlcapiUrlScanner + +has_sfa_cache_filename="sfa-cache" # step methods must take (self) and return a boolean (options is a member of the class) @@ -38,18 +42,20 @@ def standby_generic (func): return actual def node_mapper (method): - def actual(self,*args, **kwds): + def map_on_nodes(self,*args, **kwds): overall=True node_method = TestNode.__dict__[method.__name__] for test_node in self.all_nodes(): if not node_method(test_node, *args, **kwds): overall=False return overall + # maintain __name__ for ignore_result + map_on_nodes.__name__=method.__name__ # restore the doc text - actual.__doc__=TestNode.__dict__[method.__name__].__doc__ - return actual + map_on_nodes.__doc__=TestNode.__dict__[method.__name__].__doc__ + return map_on_nodes def slice_mapper (method): - def actual(self): + def map_on_slices(self): overall=True slice_method = TestSlice.__dict__[method.__name__] for slice_spec in self.plc_spec['slices']: @@ -58,22 +64,71 @@ def slice_mapper (method): test_slice=TestSlice(self,test_site,slice_spec) if not slice_method(test_slice,self.options): overall=False return overall + # maintain __name__ for ignore_result + map_on_slices.__name__=method.__name__ # restore the doc text - actual.__doc__=TestSlice.__dict__[method.__name__].__doc__ - return actual + map_on_slices.__doc__=TestSlice.__dict__[method.__name__].__doc__ + return map_on_slices + +# run a step but return True so that we can go on +def ignore_result (method): + def ignoring (self): + # ssh_slice_ignore->ssh_slice + ref_name=method.__name__.replace('_ignore','').replace('force_','') + ref_method=TestPlc.__dict__[ref_name] + result=ref_method(self) + print "Actual (but ignored) result for %(ref_name)s is %(result)s"%locals() + return Ignored (result) + name=method.__name__.replace('_ignore','').replace('force_','') + ignoring.__name__=name + ignoring.__doc__="ignored version of " + name + return ignoring + +# a variant that expects the TestSlice method to return a list of CompleterTasks that +# are then merged into a single Completer run to avoid wating for all the slices +# esp. useful when a test fails of course +# because we need to pass arguments we use a class instead.. +class slice_mapper__tasks (object): + # could not get this to work with named arguments + def __init__ (self,timeout_minutes,silent_minutes,period_seconds): + self.timeout=timedelta(minutes=timeout_minutes) + self.silent=timedelta(minutes=silent_minutes) + self.period=timedelta(seconds=period_seconds) + def __call__ (self, method): + decorator_self=self + # compute augmented method name + method_name = method.__name__ + "__tasks" + # locate in TestSlice + slice_method = TestSlice.__dict__[ method_name ] + def wrappee(self): + tasks=[] + for slice_spec in self.plc_spec['slices']: + site_spec = self.locate_site (slice_spec['sitename']) + test_site = TestSite(self,site_spec) + test_slice=TestSlice(self,test_site,slice_spec) + tasks += slice_method (test_slice, self.options) + return Completer (tasks).run (decorator_self.timeout, decorator_self.silent, decorator_self.period) + # restore the doc text from the TestSlice method even if a bit odd + wrappee.__name__ = method.__name__ + wrappee.__doc__ = slice_method.__doc__ + return wrappee def auth_sfa_mapper (method): def actual(self): overall=True - slice_method = TestAuthSfa.__dict__[method.__name__] - for slice_spec in self.plc_spec['sfa']['auth_sfa_specs']: - test_slice=TestAuthSfa(self,slice_spec) - if not slice_method(test_slice,self.options): overall=False + auth_method = TestAuthSfa.__dict__[method.__name__] + for auth_spec in self.plc_spec['sfa']['auth_sfa_specs']: + test_auth=TestAuthSfa(self,auth_spec) + if not auth_method(test_auth,self.options): overall=False return overall # restore the doc text actual.__doc__=TestAuthSfa.__dict__[method.__name__].__doc__ return actual +class Ignored: + def __init__ (self,result): + self.result=result + SEP='' SEPSFA='' @@ -81,29 +136,41 @@ class TestPlc: default_steps = [ 'show', SEP, - 'vs_delete','timestamp_vs','vs_create', SEP, + 'plcvm_delete','plcvm_timestamp','plcvm_create', SEP, 'plc_install', 'plc_configure', 'plc_start', SEP, - 'keys_fetch', 'keys_store', 'keys_clear_known_hosts', 'speed_up_slices', SEP, + 'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP, + 'plcapi_urls','speed_up_slices', SEP, 'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP, +# slices created under plcsh interactively seem to be fine but these ones don't have the tags +# keep this our of the way for now + 'check_vsys_defaults_ignore', SEP, +# run this first off so it's easier to re-run on another qemu box + 'qemu_kill_mine', SEP, 'nodestate_reinstall', 'qemu_local_init','bootcd', 'qemu_local_config', SEP, - 'qemu_export', 'qemu_kill_mine', 'qemu_start', 'timestamp_qemu', SEP, + 'qemu_clean_mine', 'qemu_export', 'qemu_start', 'qemu_timestamp', SEP, 'sfa_install_all', 'sfa_configure', 'cross_sfa_configure', 'sfa_start', 'sfa_import', SEPSFA, - 'sfi_configure@1', 'sfa_add_site@1','sfa_add_pi@1', SEPSFA, - 'sfa_add_user@1', 'sfa_update_user@1', 'sfa_add_slice@1', 'sfa_discover@1', SEPSFA, - 'sfa_create_slice@1', 'sfa_check_slice_plc@1', 'sfa_update_slice@1', SEPSFA, - 'sfi_list@1', 'sfi_show@1', 'sfi_slices@1', 'sfa_utest@1', SEPSFA, + 'sfi_configure@1', 'sfa_register_site@1','sfa_register_pi@1', SEPSFA, + 'sfa_register_user@1', 'sfa_update_user@1', 'sfa_register_slice@1', 'sfa_renew_slice@1', SEPSFA, + 'sfa_remove_user_from_slice@1','sfi_show_slice_researchers@1', + 'sfa_insert_user_in_slice@1','sfi_show_slice_researchers@1', SEPSFA, + 'sfa_discover@1', 'sfa_create_slice@1', 'sfa_check_slice_plc@1', 'sfa_update_slice@1', SEPSFA, + 'sfi_list@1', 'sfi_show_site@1', 'sfa_utest@1', SEPSFA, # we used to run plcsh_stress_test, and then ssh_node_debug and ssh_node_boot # but as the stress test might take a while, we sometimes missed the debug mode.. - 'ssh_node_debug@1', 'plcsh_stress_test@1', SEP, - 'ssh_node_boot@1', 'ssh_slice', 'check_initscripts', SEP, + 'probe_kvm_iptables', + 'ping_node', 'ssh_node_debug', 'plcsh_stress_test@1', SEP, + 'ssh_node_boot', 'node_bmlogs', 'ssh_slice', 'ssh_slice_basics', 'check_initscripts_ignore', SEP, 'ssh_slice_sfa@1', 'sfa_delete_slice@1', 'sfa_delete_user@1', SEPSFA, 'cross_check_tcp@1', 'check_system_slice', SEP, - 'empty_slices', 'ssh_slice_off', 'fill_slices', SEP, - 'force_gather_logs', SEP, + # check slices are turned off properly + 'empty_slices', 'ssh_slice_off', 'slice_fs_deleted_ignore', SEP, + # check they are properly re-created with the same name + 'fill_slices', 'ssh_slice_again', SEP, + 'gather_logs_force', SEP, ] other_steps = [ - 'export', 'show_boxes', SEP, - 'check_hooks', 'plc_stop', 'vs_start', 'vs_stop', SEP, + 'export', 'show_boxes', 'super_speed_up_slices', SEP, + 'check_hooks', 'plc_stop', 'plcvm_start', 'plcvm_stop', SEP, 'delete_initscripts', 'delete_nodegroups','delete_all_sites', SEP, 'delete_sites', 'delete_nodes', 'delete_slices', 'keys_clean', SEP, 'delete_leases', 'list_leases', SEP, @@ -114,8 +181,8 @@ class TestPlc: 'sfa_plcclean', 'sfa_dbclean', 'sfa_stop','sfa_uninstall', 'sfi_clean', SEPSFA, 'plc_db_dump' , 'plc_db_restore', SEP, 'check_netflow','check_drl', SEP, - 'debug_nodemanager', SEP, - 'standby_1_through_20',SEP, + 'debug_nodemanager', 'slice_fs_present', SEP, + 'standby_1_through_20','yes','no',SEP, ] @staticmethod @@ -130,12 +197,23 @@ class TestPlc: # this was originally for centos5 but is still valid # for up to f12 as recent SFAs with sqlalchemy won't build before f14 @staticmethod - def check_whether_build_has_sfa (rpms_url): - utils.header ("Checking if build provides SFA package...") + def _has_sfa_cached (rpms_url): + if os.path.isfile(has_sfa_cache_filename): + cached=file(has_sfa_cache_filename).read()=="yes" + utils.header("build provides SFA (cached):%s"%cached) + return cached # warning, we're now building 'sface' so let's be a bit more picky - retcod=os.system ("curl --silent %s/ | grep -q sfa-"%rpms_url) # full builds are expected to return with 0 here - if retcod==0: + utils.header ("Checking if build provides SFA package...") + retcod=os.system ("curl --silent %s/ | grep -q sfa-"%rpms_url)==0 + encoded='yes' if retcod else 'no' + file(has_sfa_cache_filename,'w').write(encoded) + return retcod + + @staticmethod + def check_whether_build_has_sfa (rpms_url): + has_sfa=TestPlc._has_sfa_cached(rpms_url) + if has_sfa: utils.header("build does provide SFA") else: # move all steps containing 'sfa' from default_steps to other_steps @@ -152,6 +230,8 @@ class TestPlc: self.vservername=plc_spec['vservername'] self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip'] self.apiserver=TestApiserver(self.url,options.dry_run) + (self.ssh_node_boot_timeout,self.ssh_node_boot_silent)=plc_spec['ssh_node_boot_timers'] + (self.ssh_node_debug_timeout,self.ssh_node_debug_silent)=plc_spec['ssh_node_debug_timers'] def has_addresses_api (self): return self.apiserver.has_method('AddIpAddress') @@ -171,52 +251,48 @@ class TestPlc: def connect (self): pass - def actual_command_in_guest (self,command): - return self.test_ssh.actual_command(self.host_to_guest(command)) + def actual_command_in_guest (self,command, backslash=False): + raw1=self.host_to_guest(command) + raw2=self.test_ssh.actual_command(raw1,dry_run=self.options.dry_run, backslash=backslash) + return raw2 def start_guest (self): - return utils.system(self.test_ssh.actual_command(self.start_guest_in_host())) + return utils.system(self.test_ssh.actual_command(self.start_guest_in_host(),dry_run=self.options.dry_run)) def stop_guest (self): - return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host())) + return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host(),dry_run=self.options.dry_run)) - def run_in_guest (self,command): - return utils.system(self.actual_command_in_guest(command)) + def run_in_guest (self,command,backslash=False): + raw=self.actual_command_in_guest(command,backslash) + return utils.system(raw) def run_in_host (self,command): - return self.test_ssh.run_in_buildname(command) + return self.test_ssh.run_in_buildname(command, dry_run=self.options.dry_run) + # backslashing turned out so awful at some point that I've turned off auto-backslashing + # see e.g. plc_start esp. the version for f14 #command gets run in the plc's vm def host_to_guest(self,command): - if self.options.plcs_use_lxc: - return "ssh -o StrictHostKeyChecking=no %s %s"%(self.vserverip,command) + # f14 still needs some extra help + if self.options.fcdistro == 'f14': + raw="virsh -c lxc:/// lxc-enter-namespace %s -- /usr/bin/env PATH=/bin:/sbin:/usr/bin:/usr/sbin %s" %(self.vservername,command) else: - return "vserver %s exec %s"%(self.vservername,command) + raw="virsh -c lxc:/// lxc-enter-namespace %s -- /usr/bin/env %s" %(self.vservername,command) + return raw + # this /vservers thing is legacy... def vm_root_in_host(self): - if self.options.plcs_use_lxc: - return "/var/lib/lxc/%s/rootfs/"%(self.vservername) - else: - return "/vservers/%s"%(self.vservername) + return "/vservers/%s/"%(self.vservername) def vm_timestamp_path (self): - if self.options.plcs_use_lxc: - return "/var/lib/lxc/%s/%s.timestamp"%(self.vservername,self.vservername) - else: - return "/vservers/%s.timestamp"%(self.vservername) + return "/vservers/%s/%s.timestamp"%(self.vservername,self.vservername) #start/stop the vserver def start_guest_in_host(self): - if self.options.plcs_use_lxc: - return "lxc-start --daemon --name=%s"%(self.vservername) - else: - return "vserver %s start"%(self.vservername) + return "virsh -c lxc:/// start %s"%(self.vservername) def stop_guest_in_host(self): - if self.options.plcs_use_lxc: - return "lxc-stop --name=%s"%(self.vservername) - else: - return "vserver %s stop"%(self.vservername) + return "virsh -c lxc:/// destroy %s"%(self.vservername) # xxx quick n dirty def run_in_guest_piped (self,local,remote): @@ -237,9 +313,9 @@ class TestPlc: return self.yum_check_installed (rpms) def auth_root (self): - return {'Username':self.plc_spec['PLC_ROOT_USER'], + return {'Username':self.plc_spec['settings']['PLC_ROOT_USER'], 'AuthMethod':'password', - 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'], + 'AuthString':self.plc_spec['settings']['PLC_ROOT_PASSWORD'], 'Role' : self.plc_spec['role'] } def locate_site (self,sitename): @@ -321,7 +397,7 @@ class TestPlc: return self.locate_sliver_obj(nodename,slicename) # all different hostboxes used in this plc - def gather_hostBoxes(self): + def get_BoxNodes(self): # maps on sites and nodes, return [ (host_box,test_node) ] tuples=[] for site_spec in self.plc_spec['sites']: @@ -342,7 +418,7 @@ class TestPlc: # a step for checking this stuff def show_boxes (self): 'print summary of nodes location' - for (box,nodes) in self.gather_hostBoxes().iteritems(): + for (box,nodes) in self.get_BoxNodes().iteritems(): print box,":"," + ".join( [ node.name() for node in nodes ] ) return True @@ -350,7 +426,7 @@ class TestPlc: def qemu_kill_all(self): 'kill all qemu instances on the qemu boxes involved by this setup' # this is the brute force version, kill all qemus on that host box - for (box,nodes) in self.gather_hostBoxes().iteritems(): + for (box,nodes) in self.get_BoxNodes().iteritems(): # pass the first nodename, as we don't push template-qemu on testboxes nodedir=nodes[0].nodedir() TestBoxQemu(box,self.options.buildname).qemu_kill_all(nodedir) @@ -359,24 +435,33 @@ class TestPlc: # make this a valid step def qemu_list_all(self): 'list all qemu instances on the qemu boxes involved by this setup' - for (box,nodes) in self.gather_hostBoxes().iteritems(): + for (box,nodes) in self.get_BoxNodes().iteritems(): # this is the brute force version, kill all qemus on that host box TestBoxQemu(box,self.options.buildname).qemu_list_all() return True - # kill only the right qemus + # kill only the qemus related to this test def qemu_list_mine(self): 'list qemu instances for our nodes' - for (box,nodes) in self.gather_hostBoxes().iteritems(): + for (box,nodes) in self.get_BoxNodes().iteritems(): # the fine-grain version for node in nodes: node.list_qemu() return True + # kill only the qemus related to this test + def qemu_clean_mine(self): + 'cleanup (rm -rf) qemu instances for our nodes' + for (box,nodes) in self.get_BoxNodes().iteritems(): + # the fine-grain version + for node in nodes: + node.qemu_clean() + return True + # kill only the right qemus def qemu_kill_mine(self): 'kill the qemu instances for our nodes' - for (box,nodes) in self.gather_hostBoxes().iteritems(): + for (box,nodes) in self.get_BoxNodes().iteritems(): # the fine-grain version for node in nodes: node.kill_qemu() @@ -389,16 +474,20 @@ class TestPlc: self.show_pass (2) return True + # uggly hack to make sure 'run export' only reports about the 1st plc + # to avoid confusion - also we use 'inri_slice1' in various aliases.. + exported_id=1 def export (self): "print cut'n paste-able stuff to export env variables to your shell" # guess local domain from hostname + if TestPlc.exported_id>1: + print "export GUESTHOSTNAME%d=%s"%(TestPlc.exported_id,self.plc_spec['vservername']) + return True + TestPlc.exported_id+=1 domain=socket.gethostname().split('.',1)[1] fqdn="%s.%s"%(self.plc_spec['host_box'],domain) print "export BUILD=%s"%self.options.buildname - if self.options.plcs_use_lxc: - print "export PLCHOSTLXC=%s"%fqdn - else: - print "export PLCHOSTVS=%s"%fqdn + print "export PLCHOSTLXC=%s"%fqdn print "export GUESTNAME=%s"%self.plc_spec['vservername'] vplcname=self.plc_spec['vservername'].split('-')[-1] print "export GUESTHOSTNAME=%s.%s"%(vplcname,domain) @@ -429,7 +518,7 @@ class TestPlc: for key in val: self.display_key_spec (key) elif passno == 1: - if key not in ['sites','initscripts','slices','keys', 'sfa']: + if key not in ['sites','initscripts','slices','keys']: print '+ ',key,':',val def display_site_spec (self,site): @@ -501,7 +590,7 @@ class TestPlc: print '+ MyPLC',plc_spec['name'] # WARNING this would not be right for lxc-based PLC's - should be harmless though print '+\tvserver address = root@%s:/vservers/%s'%(plc_spec['host_box'],plc_spec['vservername']) - print '+\tIP = %s/%s'%(plc_spec['PLC_API_HOST'],plc_spec['vserverip']) + print '+\tIP = %s/%s'%(plc_spec['settings']['PLC_API_HOST'],plc_spec['vserverip']) for site_spec in plc_spec['sites']: for node_spec in site_spec['nodes']: TestPlc.display_mapping_node(node_spec) @@ -514,7 +603,7 @@ class TestPlc: # write a timestamp in /vservers/<>.timestamp # cannot be inside the vserver, that causes vserver .. build to cough - def timestamp_vs (self): + def plcvm_timestamp (self): "Create a timestamp to remember creation date for this plc" now=int(time.time()) # TODO-lxc check this one @@ -526,23 +615,20 @@ class TestPlc: # this is called inconditionnally at the beginning of the test sequence # just in case this is a rerun, so if the vm is not running it's fine - def vs_delete(self): + def plcvm_delete(self): "vserver delete the test myplc" stamp_path=self.vm_timestamp_path() self.run_in_host("rm -f %s"%stamp_path) - if self.options.plcs_use_lxc: - self.run_in_host("lxc-stop --name %s"%self.vservername) - self.run_in_host("lxc-destroy --name %s"%self.vservername) - return True - else: - self.run_in_host("vserver --silent %s delete"%self.vservername) - return True + self.run_in_host("virsh -c lxc:// destroy %s"%self.vservername) + self.run_in_host("virsh -c lxc:// undefine %s"%self.vservername) + self.run_in_host("rm -fr /vservers/%s"%self.vservername) + return True ### install # historically the build was being fetched by the tests # now the build pushes itself as a subdir of the tests workdir # so that the tests do not have to worry about extracting the build (svn, git, or whatever) - def vs_create (self): + def plcvm_create (self): "vserver creation (no install done)" # push the local build/ dir to the testplc box if self.is_local(): @@ -562,25 +648,24 @@ class TestPlc: repo_url = self.options.arch_rpms_url for level in [ 'arch' ]: repo_url = os.path.dirname(repo_url) - # pass the vbuild-nightly options to vtest-init-vserver - test_env_options="" - test_env_options += " -p %s"%self.options.personality - test_env_options += " -d %s"%self.options.pldistro - test_env_options += " -f %s"%self.options.fcdistro - if self.options.plcs_use_lxc: - script="vtest-init-lxc.sh" - else: - script="vtest-init-vserver.sh" + + # invoke initvm (drop support for vs) + script="lbuild-initvm.sh" + script_options="" + # pass the vbuild-nightly options to [lv]test-initvm + script_options += " -p %s"%self.options.personality + script_options += " -d %s"%self.options.pldistro + script_options += " -f %s"%self.options.fcdistro + script_options += " -r %s"%repo_url vserver_name = self.vservername - vserver_options="--netdev eth0 --interface %s"%self.vserverip try: vserver_hostname=socket.gethostbyaddr(self.vserverip)[0] - vserver_options += " --hostname %s"%vserver_hostname + script_options += " -n %s"%vserver_hostname except: print "Cannot reverse lookup %s"%self.vserverip print "This is considered fatal, as this might pollute the test results" return False - create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals() + create_vserver="%(build_dir)s/%(script)s %(script_options)s %(vserver_name)s"%locals() return self.run_in_host(create_vserver) == 0 ### install_rpm @@ -608,30 +693,18 @@ class TestPlc: pkgs_string=" ".join(pkgs_list) return self.yum_install (pkgs_list) + ### + def mod_python(self): + """yum install mod_python, useful on f18 and above so as to avoid broken wsgi""" + return self.yum_install ( [ 'mod_python' ] ) + ### def plc_configure(self): "run plc-config-tty" tmpname='%s.plc-config-tty'%(self.name()) fileconf=open(tmpname,'w') - for var in [ 'PLC_NAME', - 'PLC_ROOT_USER', - 'PLC_ROOT_PASSWORD', - 'PLC_SLICE_PREFIX', - 'PLC_MAIL_ENABLED', - 'PLC_MAIL_SUPPORT_ADDRESS', - 'PLC_DB_HOST', -# 'PLC_DB_PASSWORD', - # Above line was added for integrating SFA Testing - 'PLC_API_HOST', - 'PLC_WWW_HOST', - 'PLC_BOOT_HOST', - 'PLC_NET_DNS1', - 'PLC_NET_DNS2', - 'PLC_RESERVATION_GRANULARITY', - 'PLC_OMF_ENABLED', - 'PLC_OMF_XMPP_SERVER', - ]: - fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var])) + for (var,value) in self.plc_spec['settings'].iteritems(): + fileconf.write ('e %s\n%s\n'%(var,value)) fileconf.write('w\n') fileconf.write('q\n') fileconf.close() @@ -640,22 +713,40 @@ class TestPlc: utils.system('rm %s'%tmpname) return True +# f14 is a bit odd in this respect, although this worked fine in guests up to f18 +# however using a vplc guest under f20 requires this trick +# the symptom is this: service plc start +# Starting plc (via systemctl): Failed to get D-Bus connection: \ +# Failed to connect to socket /org/freedesktop/systemd1/private: Connection refused +# weird thing is the doc says f14 uses upstart by default and not systemd +# so this sounds kind of harmless + def start_service (self,service): return self.start_stop_service (service,'start') + def stop_service (self,service): return self.start_stop_service (service,'stop') + + def start_stop_service (self, service,start_or_stop): + "utility to start/stop a service with the special trick for f14" + if self.options.fcdistro != 'f14': + return self.run_in_guest ("service %s %s"%(service,start_or_stop))==0 + else: + # patch /sbin/service so it does not reset environment + self.run_in_guest ('sed -i -e \\"s,env -i,env,\\" /sbin/service') + # this is because our own scripts in turn call service + return self.run_in_guest("SYSTEMCTL_SKIP_REDIRECT=true service %s %s"%(service,start_or_stop))==0 + def plc_start(self): "service plc start" - self.run_in_guest('service plc start') - return True + return self.start_service ('plc') def plc_stop(self): "service plc stop" - self.run_in_guest('service plc stop') - return True - - def vs_start (self): + return self.stop_service ('plc') + + def plcvm_start (self): "start the PLC vserver" self.start_guest() return True - def vs_stop (self): + def plcvm_stop (self): "stop the PLC vserver" self.stop_guest() return True @@ -715,10 +806,10 @@ class TestPlc: def delete_all_sites (self): "Delete all sites in PLC, and related objects" print 'auth_root',self.auth_root() - sites = self.apiserver.GetSites(self.auth_root(), {}, ['site_id']) + sites = self.apiserver.GetSites(self.auth_root(), {}, ['site_id','login_base']) for site in sites: # keep automatic site - otherwise we shoot in our own foot, root_auth is not valid anymore - if site['login_base']==self.plc_spec['PLC_SLICE_PREFIX']: continue + if site['login_base']==self.plc_spec['settings']['PLC_SLICE_PREFIX']: continue site_id=site['site_id'] print 'Deleting site_id',site_id self.apiserver.DeleteSite(self.auth_root(),site_id) @@ -910,58 +1001,73 @@ class TestPlc: return res # silent_minutes : during the first minutes nothing gets printed - def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15): + def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period_seconds=15): if self.options.dry_run: print 'dry_run' return True - # compute timeout - timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes) - graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes) + + class CompleterTaskBootState (CompleterTask): + def __init__ (self, test_plc,hostname): + self.test_plc=test_plc + self.hostname=hostname + self.last_boot_state='undef' + def actual_run (self): + try: + node = self.test_plc.apiserver.GetNodes(self.test_plc.auth_root(), [ self.hostname ], + ['boot_state'])[0] + self.last_boot_state = node['boot_state'] + return self.last_boot_state == target_boot_state + except: + return False + def message (self): + return "CompleterTaskBootState with node %s"%self.hostname + def failure_epilogue (self): + print "node %s in state %s - expected %s"%(self.hostname,self.last_boot_state,target_boot_state) + + timeout = timedelta(minutes=timeout_minutes) + graceout = timedelta(minutes=silent_minutes) + period = timedelta(seconds=period_seconds) # the nodes that haven't checked yet - start with a full list and shrink over time - tocheck = self.all_hostnames() - utils.header("checking nodes %r"%tocheck) - # create a dict hostname -> status - status = dict ( [ (hostname,'undef') for hostname in tocheck ] ) - while tocheck: - # get their status - tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] ) - # update status - for array in tocheck_status: - hostname=array['hostname'] - boot_state=array['boot_state'] - if boot_state == target_boot_state: - utils.header ("%s has reached the %s state"%(hostname,target_boot_state)) - else: - # if it's a real node, never mind - (site_spec,node_spec)=self.locate_hostname(hostname) - if TestNode.is_real_model(node_spec['node_fields']['model']): - utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state)) - # let's cheat - boot_state = target_boot_state - elif datetime.datetime.now() > graceout: - utils.header ("%s still in '%s' state"%(hostname,boot_state)) - graceout=datetime.datetime.now()+datetime.timedelta(1) - status[hostname] = boot_state - # refresh tocheck - tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ] - if not tocheck: - return True - if datetime.datetime.now() > timeout: - for hostname in tocheck: - utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname])) - return False - # otherwise, sleep for a while - time.sleep(period) - # only useful in empty plcs - return True + utils.header("checking nodes boot state (expected %s)"%target_boot_state) + tasks = [ CompleterTaskBootState (self,hostname) \ + for (hostname,_) in self.all_node_infos() ] + return Completer (tasks).run (timeout, graceout, period) def nodes_booted(self): return self.nodes_check_boot_state('boot',timeout_minutes=30,silent_minutes=28) - def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=15): - # compute timeout - timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes) - graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes) + def probe_kvm_iptables (self): + (_,kvmbox) = self.all_node_infos()[0] + TestSsh(kvmbox).run("iptables-save") + return True + + # probing nodes + def check_nodes_ping(self,timeout_seconds=120,period_seconds=10): + class CompleterTaskPingNode (CompleterTask): + def __init__ (self, hostname): + self.hostname=hostname + def run(self,silent): + command="ping -c 1 -w 1 %s >& /dev/null"%self.hostname + return utils.system (command, silent=silent)==0 + def failure_epilogue (self): + print "Cannot ping node with name %s"%self.hostname + timeout=timedelta (seconds=timeout_seconds) + graceout=timeout + period=timedelta (seconds=period_seconds) + node_infos = self.all_node_infos() + tasks = [ CompleterTaskPingNode (h) for (h,_) in node_infos ] + return Completer (tasks).run (timeout, graceout, period) + + # ping node before we try to reach ssh, helpful for troubleshooting failing bootCDs + def ping_node (self): + "Ping nodes" + return self.check_nodes_ping () + + def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period_seconds=15): + # various delays + timeout = timedelta(minutes=timeout_minutes) + graceout = timedelta(minutes=silent_minutes) + period = timedelta(seconds=period_seconds) vservername=self.vservername if debug: message="debug" @@ -969,47 +1075,27 @@ class TestPlc: else: message="boot" local_key = "keys/key_admin.rsa" + utils.header("checking ssh access to nodes (expected in %s mode)"%message) node_infos = self.all_node_infos() - utils.header("checking ssh access (expected in %s mode) to nodes:"%message) - for (nodename,qemuname) in node_infos: - utils.header("hostname=%s -- qemubox=%s"%(nodename,qemuname)) - utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\ - (timeout_minutes,silent_minutes,period)) - while node_infos: - for node_info in node_infos: - (hostname,qemuname) = node_info - # try to run 'hostname' in the node - command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a") - # don't spam logs - show the command only after the grace period - success = utils.system ( command, silent=datetime.datetime.now() < graceout) - if success==0: - utils.header('Successfully entered root@%s (%s)'%(hostname,message)) - # refresh node_infos - node_infos.remove(node_info) - else: - # we will have tried real nodes once, in case they're up - but if not, just skip - (site_spec,node_spec)=self.locate_hostname(hostname) - if TestNode.is_real_model(node_spec['node_fields']['model']): - utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname) - node_infos.remove(node_info) - if not node_infos: - return True - if datetime.datetime.now() > timeout: - for (hostname,qemuname) in node_infos: - utils.header("FAILURE to ssh into %s (on %s)"%(hostname,qemuname)) - return False - # otherwise, sleep for a while - time.sleep(period) - # only useful in empty plcs - return True + tasks = [ CompleterTaskNodeSsh (nodename, qemuname, local_key, boot_state=message) \ + for (nodename,qemuname) in node_infos ] + return Completer (tasks).run (timeout, graceout, period) def ssh_node_debug(self): "Tries to ssh into nodes in debug mode with the debug ssh key" - return self.check_nodes_ssh(debug=True,timeout_minutes=10,silent_minutes=8) + return self.check_nodes_ssh(debug=True, + timeout_minutes=self.ssh_node_debug_timeout, + silent_minutes=self.ssh_node_debug_silent) def ssh_node_boot(self): "Tries to ssh into nodes in production mode with the root ssh key" - return self.check_nodes_ssh(debug=False,timeout_minutes=40,silent_minutes=38) + return self.check_nodes_ssh(debug=False, + timeout_minutes=self.ssh_node_boot_timeout, + silent_minutes=self.ssh_node_boot_silent) + + def node_bmlogs(self): + "Checks that there's a non-empty dir. /var/log/bm/raw" + return utils.system(self.actual_command_in_guest("ls /var/log/bm/raw"))==0 @node_mapper def qemu_local_init (self): pass @@ -1040,21 +1126,33 @@ class TestPlc: ### initscripts def do_check_initscripts(self): - overall = True + class CompleterTaskInitscript (CompleterTask): + def __init__ (self, test_sliver, stamp): + self.test_sliver=test_sliver + self.stamp=stamp + def actual_run (self): + return self.test_sliver.check_initscript_stamp (self.stamp) + def message (self): + return "initscript checker for %s"%self.test_sliver.name() + def failure_epilogue (self): + print "initscript stamp %s not found in sliver %s"%(self.stamp,self.test_sliver.name()) + + tasks=[] for slice_spec in self.plc_spec['slices']: if not slice_spec.has_key('initscriptstamp'): continue stamp=slice_spec['initscriptstamp'] + slicename=slice_spec['slice_fields']['name'] for nodename in slice_spec['nodenames']: + print 'nodename',nodename,'slicename',slicename,'stamp',stamp (site,node) = self.locate_node (nodename) # xxx - passing the wrong site - probably harmless test_site = TestSite (self,site) test_slice = TestSlice (self,test_site,slice_spec) test_node = TestNode (self,test_site,node) test_sliver = TestSliver (self, test_node, test_slice) - if not test_sliver.check_initscript_stamp(stamp): - overall = False - return overall + tasks.append ( CompleterTaskInitscript (test_sliver, stamp)) + return Completer (tasks).run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10)) def check_initscripts(self): "check that the initscripts have triggered" @@ -1111,29 +1209,54 @@ class TestPlc: test_slice.create_slice() return True - @slice_mapper + @slice_mapper__tasks(20,10,15) def ssh_slice(self): pass - @slice_mapper + @slice_mapper__tasks(20,19,15) def ssh_slice_off (self): pass + @slice_mapper__tasks(1,1,15) + def slice_fs_present(self): pass + @slice_mapper__tasks(1,1,15) + def slice_fs_deleted(self): pass + + # use another name so we can exclude/ignore it from the tests on the nightly command line + def ssh_slice_again(self): return self.ssh_slice() + # note that simply doing ssh_slice_again=ssh_slice would kind of work too + # but for some reason the ignore-wrapping thing would not + + @slice_mapper + def ssh_slice_basics(self): pass + @slice_mapper + def check_vsys_defaults(self): pass @node_mapper def keys_clear_known_hosts (self): pass + def plcapi_urls (self): + return PlcapiUrlScanner (self.auth_root(),ip=self.vserverip).scan() + def speed_up_slices (self): - "tweak nodemanager settings on all nodes using a conf file" + "tweak nodemanager cycle (wait time) to 30+/-10 s" + return self._speed_up_slices (30,10) + def super_speed_up_slices (self): + "dev mode: tweak nodemanager cycle (wait time) to 5+/-1 s" + return self._speed_up_slices (5,1) + + def _speed_up_slices (self, p, r): # create the template on the server-side template="%s.nodemanager"%self.name() template_file = open (template,"w") - template_file.write('OPTIONS="-p 30 -r 11 -d"\n') + template_file.write('OPTIONS="-p %s -r %s -d"\n'%(p,r)) template_file.close() in_vm="/var/www/html/PlanetLabConf/nodemanager" remote="%s/%s"%(self.vm_root_in_host(),in_vm) self.test_ssh.copy_abs(template,remote) # Add a conf file - self.apiserver.AddConfFile (self.auth_root(), - {'dest':'/etc/sysconfig/nodemanager', - 'source':'PlanetLabConf/nodemanager', - 'postinstall_cmd':'service nm restart',}) + if not self.apiserver.GetConfFiles (self.auth_root(), + {'dest':'/etc/sysconfig/nodemanager'}): + self.apiserver.AddConfFile (self.auth_root(), + {'dest':'/etc/sysconfig/nodemanager', + 'source':'PlanetLabConf/nodemanager', + 'postinstall_cmd':'service nm restart',}) return True def debug_nodemanager (self): @@ -1151,7 +1274,7 @@ class TestPlc: def qemu_start (self) : pass @node_mapper - def timestamp_qemu (self) : pass + def qemu_timestamp (self) : pass # when a spec refers to a node possibly on another plc def locate_sliver_obj_cross (self, nodename, slicename, other_plcs): @@ -1175,13 +1298,18 @@ class TestPlc: port = spec['port'] # server side s_test_sliver = self.locate_sliver_obj_cross (spec['server_node'],spec['server_slice'],other_plcs) - if not s_test_sliver.run_tcp_server(port,timeout=10): + if not s_test_sliver.run_tcp_server(port,timeout=20): overall=False break # idem for the client side - c_test_sliver = self.locate_sliver_obj_cross (spec['server_node'],spec['client_slice'],other_plcs) - if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port): + c_test_sliver = self.locate_sliver_obj_cross (spec['client_node'],spec['client_slice'],other_plcs) + # use nodename from locatesd sliver, unless 'client_connect' is set + if 'client_connect' in spec: + destination = spec['client_connect'] + else: + destination=s_test_sliver.test_node.name() + if not c_test_sliver.run_tcp_client(destination,port): overall=False return overall @@ -1198,24 +1326,23 @@ class TestPlc: def check_drl (self): return self._check_system_slice ('drl') # we have the slices up already here, so it should not take too long - def _check_system_slice (self, slicename, timeout_minutes=5, period=15): - timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes) - test_nodes=self.all_nodes() - while test_nodes: - for test_node in test_nodes: - if test_node._check_system_slice (slicename,dry_run=self.options.dry_run): - utils.header ("ok") - test_nodes.remove(test_node) - else: - print '.', - if not test_nodes: - return True - if datetime.datetime.now () > timeout: - for test_node in test_nodes: - utils.header ("can't find system slice %s in %s"%(slicename,test_node.name())) - return False - time.sleep(period) - return True + def _check_system_slice (self, slicename, timeout_minutes=5, period_seconds=15): + class CompleterTaskSystemSlice (CompleterTask): + def __init__ (self, test_node, dry_run): + self.test_node=test_node + self.dry_run=dry_run + def actual_run (self): + return self.test_node._check_system_slice (slicename, dry_run=self.dry_run) + def message (self): + return "System slice %s @ %s"%(slicename, self.test_node.name()) + def failure_epilogue (self): + print "COULD not find system slice %s @ %s"%(slicename, self.test_node.name()) + timeout = timedelta(minutes=timeout_minutes) + silent = timedelta (0) + period = timedelta (seconds=period_seconds) + tasks = [ CompleterTaskSystemSlice (test_node, self.options.dry_run) \ + for test_node in self.all_nodes() ] + return Completer (tasks) . run (timeout, silent, period) def plcsh_stress_test (self): "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents" @@ -1281,10 +1408,15 @@ class TestPlc: def sfa_dbclean(self): "thoroughly wipes off the SFA database" - return self.run_in_guest("sfaadmin.py registry nuke")==0 or \ + return self.run_in_guest("sfaadmin reg nuke")==0 or \ self.run_in_guest("sfa-nuke.py")==0 or \ self.run_in_guest("sfa-nuke-plc.py")==0 + def sfa_fsclean(self): + "cleanup /etc/sfa/trusted_roots and /var/lib/sfa" + self.run_in_guest("rm -rf /etc/sfa/trusted_roots /var/lib/sfa/authorities") + return True + def sfa_plcclean(self): "cleans the PLC entries that were created as a side effect of running the script" # ignore result @@ -1342,7 +1474,7 @@ class TestPlc: if not os.path.isdir(dirname): utils.system("mkdir -p %s"%dirname) if not os.path.isdir(dirname): - raise "Cannot create config dir for plc %s"%self.name() + raise Exception,"Cannot create config dir for plc %s"%self.name() return dirname def conffile(self,filename): @@ -1366,30 +1498,13 @@ class TestPlc: "run sfa-config-tty" tmpname=self.conffile("sfa-config-tty") fileconf=open(tmpname,'w') - for var in [ 'SFA_REGISTRY_ROOT_AUTH', - 'SFA_INTERFACE_HRN', - 'SFA_REGISTRY_LEVEL1_AUTH', - 'SFA_REGISTRY_HOST', - 'SFA_AGGREGATE_HOST', - 'SFA_SM_HOST', - 'SFA_PLC_URL', - 'SFA_PLC_USER', - 'SFA_PLC_PASSWORD', - 'SFA_DB_HOST', - 'SFA_DB_USER', - 'SFA_DB_PASSWORD', - 'SFA_DB_NAME', - 'SFA_API_LOGLEVEL', - 'SFA_GENERIC_FLAVOUR', - 'SFA_AGGREGATE_ENABLED', - ]: - if self.plc_spec['sfa'].has_key(var): - fileconf.write ('e %s\n%s\n'%(var,self.plc_spec['sfa'][var])) - # the way plc_config handles booleans just sucks.. - for var in []: - val='false' - if self.plc_spec['sfa'][var]: val='true' - fileconf.write ('e %s\n%s\n'%(var,val)) + for (var,value) in self.plc_spec['sfa']['settings'].iteritems(): + fileconf.write ('e %s\n%s\n'%(var,value)) +# # the way plc_config handles booleans just sucks.. +# for var in []: +# val='false' +# if self.plc_spec['sfa'][var]: val='true' +# fileconf.write ('e %s\n%s\n'%(var,val)) fileconf.write('w\n') fileconf.write('R\n') fileconf.write('q\n') @@ -1401,11 +1516,11 @@ class TestPlc: def aggregate_xml_line(self): port=self.plc_spec['sfa']['neighbours-port'] return '' % \ - (self.vserverip,self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH'],port) + (self.vserverip,self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH'],port) def registry_xml_line(self): return '' % \ - (self.vserverip,self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH']) + (self.vserverip,self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH']) # a cross step that takes all other plcs in argument @@ -1426,16 +1541,14 @@ class TestPlc: and self.test_ssh.copy_abs(reg_fname,'/%s/etc/sfa/registries.xml'%self.vm_root_in_host())==0 def sfa_import(self): - "sfa-import-plc" - auth=self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH'] - return \ - self.run_in_guest('sfaadmin.py reg import_registry')==0 -# not needed anymore -# self.run_in_guest('cp /etc/sfa/authorities/%s/%s.pkey /etc/sfa/authorities/server.key'%(auth,auth)) + "use sfaadmin to import from plc" + auth=self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH'] + return self.run_in_guest('sfaadmin reg import_registry')==0 def sfa_start(self): "service sfa start" - return self.run_in_guest('service sfa start')==0 + return self.start_service('sfa') + def sfi_configure(self): "Create /root/sfi on the plc side for sfi client configuration" @@ -1464,15 +1577,17 @@ class TestPlc: return True @auth_sfa_mapper - def sfa_add_site (self): pass + def sfa_register_site (self): pass @auth_sfa_mapper - def sfa_add_pi (self): pass + def sfa_register_pi (self): pass @auth_sfa_mapper - def sfa_add_user(self): pass + def sfa_register_user(self): pass @auth_sfa_mapper def sfa_update_user(self): pass @auth_sfa_mapper - def sfa_add_slice(self): pass + def sfa_register_slice(self): pass + @auth_sfa_mapper + def sfa_renew_slice(self): pass @auth_sfa_mapper def sfa_discover(self): pass @auth_sfa_mapper @@ -1482,11 +1597,17 @@ class TestPlc: @auth_sfa_mapper def sfa_update_slice(self): pass @auth_sfa_mapper + def sfa_remove_user_from_slice(self): pass + @auth_sfa_mapper + def sfa_insert_user_in_slice(self): pass + @auth_sfa_mapper def sfi_list(self): pass @auth_sfa_mapper - def sfi_show(self): pass + def sfi_show_site(self): pass @auth_sfa_mapper - def sfi_slices(self): pass + def sfi_show_slice(self): pass + @auth_sfa_mapper + def sfi_show_slice_researchers(self): pass @auth_sfa_mapper def ssh_slice_sfa(self): pass @auth_sfa_mapper @@ -1496,8 +1617,7 @@ class TestPlc: def sfa_stop(self): "service sfa stop" - self.run_in_guest('service sfa stop')==0 - return True + return self.stop_service ('sfa') def populate (self): "creates random entries in the PLCAPI" @@ -1593,7 +1713,7 @@ class TestPlc: if not isinstance(name,StringTypes): raise Exception except: - t=datetime.datetime.now() + t=datetime.now() d=t.date() name=str(d) return "/root/%s-%s.sql"%(database,name) @@ -1619,6 +1739,26 @@ class TestPlc: utils.header('Database restored from ' + dump) + @staticmethod + def create_ignore_steps (): + for step in TestPlc.default_steps + TestPlc.other_steps: + # default step can have a plc qualifier + if '@' in step: (step,qualifier)=step.split('@') + # or be defined as forced or ignored by default + for keyword in ['_ignore','_force']: + if step.endswith (keyword): step=step.replace(keyword,'') + if step == SEP or step == SEPSFA : continue + method=getattr(TestPlc,step) + name=step+'_ignore' + wrapped=ignore_result(method) +# wrapped.__doc__ = method.__doc__ + " (run in ignore-result mode)" + setattr(TestPlc, name, wrapped) + +# @ignore_result +# def ssh_slice_again_ignore (self): pass +# @ignore_result +# def check_initscripts_ignore (self): pass + def standby_1_through_20(self): """convenience function to wait for a specified number of minutes""" pass @@ -1662,3 +1802,7 @@ class TestPlc: def standby_19(): pass @standby_generic def standby_20(): pass + + # convenience for debugging the test logic + def yes (self): return True + def no (self): return False