X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=system%2FTestPlc.py;h=93b5981b4158312f5dec0399f307695c0bfeb005;hb=40ab0cf39e29752c4abf2217bbc8fc382e438d11;hp=ffd8f5bbc32c70dfc5311b96096f56dfb98d238b;hpb=6385ce35811043697567e7a1ebb1c49ecd0fde8e;p=tests.git diff --git a/system/TestPlc.py b/system/TestPlc.py index ffd8f5b..93b5981 100644 --- a/system/TestPlc.py +++ b/system/TestPlc.py @@ -10,8 +10,9 @@ from datetime import datetime, timedelta from types import StringTypes import utils +from Completer import Completer, CompleterTask from TestSite import TestSite -from TestNode import TestNode +from TestNode import TestNode, CompleterTaskNodeSsh from TestUser import TestUser from TestKey import TestKey from TestSlice import TestSlice @@ -21,7 +22,8 @@ from TestSsh import TestSsh from TestApiserver import TestApiserver from TestAuthSfa import TestAuthSfa from PlcapiUrlScanner import PlcapiUrlScanner -from Completer import Completer, CompleterTask + +has_sfa_cache_filename="sfa-cache" # step methods must take (self) and return a boolean (options is a member of the class) @@ -40,18 +42,20 @@ def standby_generic (func): return actual def node_mapper (method): - def actual(self,*args, **kwds): + def map_on_nodes(self,*args, **kwds): overall=True node_method = TestNode.__dict__[method.__name__] for test_node in self.all_nodes(): if not node_method(test_node, *args, **kwds): overall=False return overall + # maintain __name__ for ignore_result + map_on_nodes.__name__=method.__name__ # restore the doc text - actual.__doc__=TestNode.__dict__[method.__name__].__doc__ - return actual + map_on_nodes.__doc__=TestNode.__dict__[method.__name__].__doc__ + return map_on_nodes def slice_mapper (method): - def actual(self): + def map_on_slices(self): overall=True slice_method = TestSlice.__dict__[method.__name__] for slice_spec in self.plc_spec['slices']: @@ -60,21 +64,25 @@ def slice_mapper (method): test_slice=TestSlice(self,test_site,slice_spec) if not slice_method(test_slice,self.options): overall=False return overall + # maintain __name__ for ignore_result + map_on_slices.__name__=method.__name__ # restore the doc text - actual.__doc__=TestSlice.__dict__[method.__name__].__doc__ - return actual + map_on_slices.__doc__=TestSlice.__dict__[method.__name__].__doc__ + return map_on_slices # run a step but return True so that we can go on def ignore_result (method): - def wrappee (self): + def ignoring (self): # ssh_slice_ignore->ssh_slice ref_name=method.__name__.replace('_ignore','').replace('force_','') ref_method=TestPlc.__dict__[ref_name] result=ref_method(self) - print "Actual - but ignored - result for %(ref_name)s is %(result)s"%locals() + print "Actual (but ignored) result for %(ref_name)s is %(result)s"%locals() return Ignored (result) - wrappee.__doc__="ignored version of " + method.__name__.replace('_ignore','').replace('ignore_','') - return wrappee + name=method.__name__.replace('_ignore','').replace('force_','') + ignoring.__name__=name + ignoring.__doc__="ignored version of " + name + return ignoring # a variant that expects the TestSlice method to return a list of CompleterTasks that # are then merged into a single Completer run to avoid wating for all the slices @@ -83,7 +91,6 @@ def ignore_result (method): class slice_mapper__tasks (object): # could not get this to work with named arguments def __init__ (self,timeout_minutes,silent_minutes,period_seconds): - print "timeout_minutes,silent_minutes,period_seconds",timeout_minutes,silent_minutes,period_seconds self.timeout=timedelta(minutes=timeout_minutes) self.silent=timedelta(minutes=silent_minutes) self.period=timedelta(seconds=period_seconds) @@ -102,6 +109,7 @@ class slice_mapper__tasks (object): tasks += slice_method (test_slice, self.options) return Completer (tasks).run (decorator_self.timeout, decorator_self.silent, decorator_self.period) # restore the doc text from the TestSlice method even if a bit odd + wrappee.__name__ = method.__name__ wrappee.__doc__ = slice_method.__doc__ return wrappee @@ -128,37 +136,41 @@ class TestPlc: default_steps = [ 'show', SEP, - 'vs_delete','timestamp_vs','vs_create', SEP, -# 'plc_install', 'mod_python', 'plc_configure', 'plc_start', SEP, + 'plcvm_delete','plcvm_timestamp','plcvm_create', SEP, 'plc_install', 'plc_configure', 'plc_start', SEP, 'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP, 'plcapi_urls','speed_up_slices', SEP, 'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP, # slices created under plcsh interactively seem to be fine but these ones don't have the tags # keep this our of the way for now -# 'check_vsys_defaults', SEP, + 'check_vsys_defaults_ignore', SEP, +# run this first off so it's easier to re-run on another qemu box + 'qemu_kill_mine', SEP, 'nodestate_reinstall', 'qemu_local_init','bootcd', 'qemu_local_config', SEP, - 'qemu_kill_mine','qemu_clean_mine', 'qemu_export', 'qemu_start', 'timestamp_qemu', SEP, + 'qemu_clean_mine', 'qemu_export', 'qemu_start', 'qemu_timestamp', SEP, 'sfa_install_all', 'sfa_configure', 'cross_sfa_configure', 'sfa_start', 'sfa_import', SEPSFA, - 'sfi_configure@1', 'sfa_add_site@1','sfa_add_pi@1', SEPSFA, - 'sfa_add_user@1', 'sfa_update_user@1', 'sfa_add_slice@1', 'sfa_renew_slice@1', SEPSFA, + 'sfi_configure@1', 'sfa_register_site@1','sfa_register_pi@1', SEPSFA, + 'sfa_register_user@1', 'sfa_update_user@1', 'sfa_register_slice@1', 'sfa_renew_slice@1', SEPSFA, + 'sfa_remove_user_from_slice@1','sfi_show_slice_researchers@1', + 'sfa_insert_user_in_slice@1','sfi_show_slice_researchers@1', SEPSFA, 'sfa_discover@1', 'sfa_create_slice@1', 'sfa_check_slice_plc@1', 'sfa_update_slice@1', SEPSFA, - 'sfi_list@1', 'sfi_show@1', 'sfa_utest@1', SEPSFA, + 'sfi_list@1', 'sfi_show_site@1', 'sfa_utest@1', SEPSFA, # we used to run plcsh_stress_test, and then ssh_node_debug and ssh_node_boot # but as the stress test might take a while, we sometimes missed the debug mode.. - 'ssh_node_debug@1', 'plcsh_stress_test@1', SEP, - 'ssh_node_boot@1', 'node_bmlogs@1', 'ssh_slice', 'ssh_slice_basics', 'check_initscripts_ignore', SEP, + 'probe_kvm_iptables', + 'ping_node', 'ssh_node_debug', 'plcsh_stress_test@1', SEP, + 'ssh_node_boot', 'node_bmlogs', 'ssh_slice', 'ssh_slice_basics', 'check_initscripts_ignore', SEP, 'ssh_slice_sfa@1', 'sfa_delete_slice@1', 'sfa_delete_user@1', SEPSFA, 'cross_check_tcp@1', 'check_system_slice', SEP, # check slices are turned off properly - 'empty_slices', 'ssh_slice_off', SEP, + 'empty_slices', 'ssh_slice_off', 'slice_fs_deleted_ignore', SEP, # check they are properly re-created with the same name 'fill_slices', 'ssh_slice_again_ignore', SEP, 'gather_logs_force', SEP, ] other_steps = [ 'export', 'show_boxes', SEP, - 'check_hooks', 'plc_stop', 'vs_start', 'vs_stop', SEP, + 'check_hooks', 'plc_stop', 'plcvm_start', 'plcvm_stop', SEP, 'delete_initscripts', 'delete_nodegroups','delete_all_sites', SEP, 'delete_sites', 'delete_nodes', 'delete_slices', 'keys_clean', SEP, 'delete_leases', 'list_leases', SEP, @@ -169,8 +181,8 @@ class TestPlc: 'sfa_plcclean', 'sfa_dbclean', 'sfa_stop','sfa_uninstall', 'sfi_clean', SEPSFA, 'plc_db_dump' , 'plc_db_restore', SEP, 'check_netflow','check_drl', SEP, - 'debug_nodemanager', SEP, - 'standby_1_through_20',SEP, + 'debug_nodemanager', 'slice_fs_present', SEP, + 'standby_1_through_20','yes','no',SEP, ] @staticmethod @@ -185,12 +197,23 @@ class TestPlc: # this was originally for centos5 but is still valid # for up to f12 as recent SFAs with sqlalchemy won't build before f14 @staticmethod - def check_whether_build_has_sfa (rpms_url): - utils.header ("Checking if build provides SFA package...") + def _has_sfa_cached (rpms_url): + if os.path.isfile(has_sfa_cache_filename): + cached=file(has_sfa_cache_filename).read()=="yes" + utils.header("build provides SFA (cached):%s"%cached) + return cached # warning, we're now building 'sface' so let's be a bit more picky - retcod=os.system ("curl --silent %s/ | grep -q sfa-"%rpms_url) # full builds are expected to return with 0 here - if retcod==0: + utils.header ("Checking if build provides SFA package...") + retcod=os.system ("curl --silent %s/ | grep -q sfa-"%rpms_url)==0 + encoded='yes' if retcod else 'no' + file(has_sfa_cache_filename,'w').write(encoded) + return retcod + + @staticmethod + def check_whether_build_has_sfa (rpms_url): + has_sfa=TestPlc._has_sfa_cached(rpms_url) + if has_sfa: utils.header("build does provide SFA") else: # move all steps containing 'sfa' from default_steps to other_steps @@ -228,8 +251,10 @@ class TestPlc: def connect (self): pass - def actual_command_in_guest (self,command): - return self.test_ssh.actual_command(self.host_to_guest(command),dry_run=self.options.dry_run) + def actual_command_in_guest (self,command, backslash=False): + raw1=self.host_to_guest(command) + raw2=self.test_ssh.actual_command(raw1,dry_run=self.options.dry_run, backslash=backslash) + return raw2 def start_guest (self): return utils.system(self.test_ssh.actual_command(self.start_guest_in_host(),dry_run=self.options.dry_run)) @@ -237,43 +262,37 @@ class TestPlc: def stop_guest (self): return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host(),dry_run=self.options.dry_run)) - def run_in_guest (self,command): - return utils.system(self.actual_command_in_guest(command)) + def run_in_guest (self,command,backslash=False): + raw=self.actual_command_in_guest(command,backslash) + return utils.system(raw) def run_in_host (self,command): return self.test_ssh.run_in_buildname(command, dry_run=self.options.dry_run) + # backslashing turned out so awful at some point that I've turned off auto-backslashing + # see e.g. plc_start esp. the version for f14 #command gets run in the plc's vm def host_to_guest(self,command): - if self.options.plcs_use_lxc: - return "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null %s %s"%(self.vserverip,command) + # f14 still needs some extra help + if self.options.fcdistro == 'f14': + raw="virsh -c lxc:/// lxc-enter-namespace %s -- /usr/bin/env PATH=/bin:/sbin:/usr/bin:/usr/sbin %s" %(self.vservername,command) else: - return "vserver %s exec %s"%(self.vservername,command) + raw="virsh -c lxc:/// lxc-enter-namespace %s -- /usr/bin/env %s" %(self.vservername,command) + return raw + # this /vservers thing is legacy... def vm_root_in_host(self): - if self.options.plcs_use_lxc: - return "/vservers/%s/"%(self.vservername) - else: - return "/vservers/%s"%(self.vservername) + return "/vservers/%s/"%(self.vservername) def vm_timestamp_path (self): - if self.options.plcs_use_lxc: - return "/vservers/%s/%s.timestamp"%(self.vservername,self.vservername) - else: - return "/vservers/%s.timestamp"%(self.vservername) + return "/vservers/%s/%s.timestamp"%(self.vservername,self.vservername) #start/stop the vserver def start_guest_in_host(self): - if self.options.plcs_use_lxc: - return "virsh -c lxc:// start %s"%(self.vservername) - else: - return "vserver %s start"%(self.vservername) + return "virsh -c lxc:/// start %s"%(self.vservername) def stop_guest_in_host(self): - if self.options.plcs_use_lxc: - return "virsh -c lxc:// destroy %s"%(self.vservername) - else: - return "vserver %s stop"%(self.vservername) + return "virsh -c lxc:/// destroy %s"%(self.vservername) # xxx quick n dirty def run_in_guest_piped (self,local,remote): @@ -468,10 +487,7 @@ class TestPlc: domain=socket.gethostname().split('.',1)[1] fqdn="%s.%s"%(self.plc_spec['host_box'],domain) print "export BUILD=%s"%self.options.buildname - if self.options.plcs_use_lxc: - print "export PLCHOSTLXC=%s"%fqdn - else: - print "export PLCHOSTVS=%s"%fqdn + print "export PLCHOSTLXC=%s"%fqdn print "export GUESTNAME=%s"%self.plc_spec['vservername'] vplcname=self.plc_spec['vservername'].split('-')[-1] print "export GUESTHOSTNAME=%s.%s"%(vplcname,domain) @@ -502,7 +518,7 @@ class TestPlc: for key in val: self.display_key_spec (key) elif passno == 1: - if key not in ['sites','initscripts','slices','keys', 'sfa']: + if key not in ['sites','initscripts','slices','keys']: print '+ ',key,':',val def display_site_spec (self,site): @@ -587,7 +603,7 @@ class TestPlc: # write a timestamp in /vservers/<>.timestamp # cannot be inside the vserver, that causes vserver .. build to cough - def timestamp_vs (self): + def plcvm_timestamp (self): "Create a timestamp to remember creation date for this plc" now=int(time.time()) # TODO-lxc check this one @@ -599,24 +615,20 @@ class TestPlc: # this is called inconditionnally at the beginning of the test sequence # just in case this is a rerun, so if the vm is not running it's fine - def vs_delete(self): + def plcvm_delete(self): "vserver delete the test myplc" stamp_path=self.vm_timestamp_path() self.run_in_host("rm -f %s"%stamp_path) - if self.options.plcs_use_lxc: - self.run_in_host("virsh -c lxc:// destroy %s"%self.vservername) - self.run_in_host("virsh -c lxc:// undefine %s"%self.vservername) - self.run_in_host("rm -fr /vservers/%s"%self.vservername) - return True - else: - self.run_in_host("vserver --silent %s delete"%self.vservername) - return True + self.run_in_host("virsh -c lxc:// destroy %s"%self.vservername) + self.run_in_host("virsh -c lxc:// undefine %s"%self.vservername) + self.run_in_host("rm -fr /vservers/%s"%self.vservername) + return True ### install # historically the build was being fetched by the tests # now the build pushes itself as a subdir of the tests workdir # so that the tests do not have to worry about extracting the build (svn, git, or whatever) - def vs_create (self): + def plcvm_create (self): "vserver creation (no install done)" # push the local build/ dir to the testplc box if self.is_local(): @@ -638,7 +650,7 @@ class TestPlc: repo_url = os.path.dirname(repo_url) # invoke initvm (drop support for vs) - script="ltest-initvm.sh" + script="lbuild-initvm.sh" script_options="" # pass the vbuild-nightly options to [lv]test-initvm script_options += " -p %s"%self.options.personality @@ -719,22 +731,40 @@ class TestPlc: utils.system('rm %s'%tmpname) return True +# f14 is a bit odd in this respect, although this worked fine in guests up to f18 +# however using a vplc guest under f20 requires this trick +# the symptom is this: service plc start +# Starting plc (via systemctl): Failed to get D-Bus connection: \ +# Failed to connect to socket /org/freedesktop/systemd1/private: Connection refused +# weird thing is the doc says f14 uses upstart by default and not systemd +# so this sounds kind of harmless + def start_service (self,service): return self.start_stop_service (service,'start') + def stop_service (self,service): return self.start_stop_service (service,'stop') + + def start_stop_service (self, service,start_or_stop): + "utility to start/stop a service with the special trick for f14" + if self.options.fcdistro != 'f14': + return self.run_in_guest ("service %s %s"%(service,start_or_stop))==0 + else: + # patch /sbin/service so it does not reset environment + self.run_in_guest ('sed -i -e \\"s,env -i,env,\\" /sbin/service') + # this is because our own scripts in turn call service + return self.run_in_guest("SYSTEMCTL_SKIP_REDIRECT=true service %s %s"%(service,start_or_stop))==0 + def plc_start(self): "service plc start" - self.run_in_guest('service plc start') - return True + return self.start_service ('plc') def plc_stop(self): "service plc stop" - self.run_in_guest('service plc stop') - return True - - def vs_start (self): + return self.stop_service ('plc') + + def plcvm_start (self): "start the PLC vserver" self.start_guest() return True - def vs_stop (self): + def plcvm_stop (self): "stop the PLC vserver" self.stop_guest() return True @@ -1024,19 +1054,34 @@ class TestPlc: def nodes_booted(self): return self.nodes_check_boot_state('boot',timeout_minutes=30,silent_minutes=28) - def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period_seconds=15): - class CompleterTaskNodeSsh (CompleterTask): - def __init__ (self, hostname, qemuname, boot_state, local_key): + def probe_kvm_iptables (self): + (_,kvmbox) = self.all_node_infos()[0] + TestSsh(kvmbox).run("iptables-save") + return True + + # probing nodes + def check_nodes_ping(self,timeout_seconds=120,period_seconds=10): + class CompleterTaskPingNode (CompleterTask): + def __init__ (self, hostname): self.hostname=hostname - self.qemuname=qemuname - self.boot_state=boot_state - self.local_key=local_key - def run (self, silent): - command = TestSsh (self.hostname,key=self.local_key).actual_command("hostname;uname -a") + def run(self,silent): + command="ping -c 1 -w 1 %s >& /dev/null"%self.hostname return utils.system (command, silent=silent)==0 def failure_message (self): - return "Cannot reach %s @ %s in %s mode"%(self.hostname, self.qemuname, self.boot_state) + return "Cannot ping node with name %s"%self.hostname + timeout=timedelta (seconds=timeout_seconds) + graceout=timeout + period=timedelta (seconds=period_seconds) + node_infos = self.all_node_infos() + tasks = [ CompleterTaskPingNode (h) for (h,_) in node_infos ] + return Completer (tasks).run (timeout, graceout, period) + # ping node before we try to reach ssh, helpful for troubleshooting failing bootCDs + def ping_node (self): + "Ping nodes" + return self.check_nodes_ping () + + def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period_seconds=15): # various delays timeout = timedelta(minutes=timeout_minutes) graceout = timedelta(minutes=silent_minutes) @@ -1050,7 +1095,7 @@ class TestPlc: local_key = "keys/key_admin.rsa" utils.header("checking ssh access to nodes (expected in %s mode)"%message) node_infos = self.all_node_infos() - tasks = [ CompleterTaskNodeSsh (nodename, qemuname, message, local_key) \ + tasks = [ CompleterTaskNodeSsh (nodename, qemuname, local_key, boot_state=message) \ for (nodename,qemuname) in node_infos ] return Completer (tasks).run (timeout, graceout, period) @@ -1186,15 +1231,18 @@ class TestPlc: def ssh_slice(self): pass @slice_mapper__tasks(20,19,15) def ssh_slice_off (self): pass + @slice_mapper__tasks(2,1,15) + def slice_fs_present(self): pass + @slice_mapper__tasks(2,1,15) + def slice_fs_deleted(self): pass # use another name so we can exclude/ignore it from the tests on the nightly command line def ssh_slice_again(self): return self.ssh_slice() - # note that simply doing ssh_slice_again=ssh_slice would kind od work too + # note that simply doing ssh_slice_again=ssh_slice would kind of work too # but for some reason the ignore-wrapping thing would not @slice_mapper def ssh_slice_basics(self): pass - @slice_mapper def check_vsys_defaults(self): pass @@ -1236,7 +1284,7 @@ class TestPlc: def qemu_start (self) : pass @node_mapper - def timestamp_qemu (self) : pass + def qemu_timestamp (self) : pass # when a spec refers to a node possibly on another plc def locate_sliver_obj_cross (self, nodename, slicename, other_plcs): @@ -1522,14 +1570,12 @@ class TestPlc: def sfa_import(self): "use sfaadmin to import from plc" auth=self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH'] - return \ - self.run_in_guest('sfaadmin reg import_registry')==0 -# not needed anymore -# self.run_in_guest('cp /etc/sfa/authorities/%s/%s.pkey /etc/sfa/authorities/server.key'%(auth,auth)) + return self.run_in_guest('sfaadmin reg import_registry')==0 def sfa_start(self): "service sfa start" - return self.run_in_guest('service sfa start')==0 + return self.start_service('sfa') + def sfi_configure(self): "Create /root/sfi on the plc side for sfi client configuration" @@ -1558,15 +1604,15 @@ class TestPlc: return True @auth_sfa_mapper - def sfa_add_site (self): pass + def sfa_register_site (self): pass @auth_sfa_mapper - def sfa_add_pi (self): pass + def sfa_register_pi (self): pass @auth_sfa_mapper - def sfa_add_user(self): pass + def sfa_register_user(self): pass @auth_sfa_mapper def sfa_update_user(self): pass @auth_sfa_mapper - def sfa_add_slice(self): pass + def sfa_register_slice(self): pass @auth_sfa_mapper def sfa_renew_slice(self): pass @auth_sfa_mapper @@ -1578,9 +1624,17 @@ class TestPlc: @auth_sfa_mapper def sfa_update_slice(self): pass @auth_sfa_mapper + def sfa_remove_user_from_slice(self): pass + @auth_sfa_mapper + def sfa_insert_user_in_slice(self): pass + @auth_sfa_mapper def sfi_list(self): pass @auth_sfa_mapper - def sfi_show(self): pass + def sfi_show_site(self): pass + @auth_sfa_mapper + def sfi_show_slice(self): pass + @auth_sfa_mapper + def sfi_show_slice_researchers(self): pass @auth_sfa_mapper def ssh_slice_sfa(self): pass @auth_sfa_mapper @@ -1590,8 +1644,7 @@ class TestPlc: def sfa_stop(self): "service sfa stop" - self.run_in_guest('service sfa stop')==0 - return True + return self.stop_service ('sfa') def populate (self): "creates random entries in the PLCAPI" @@ -1776,3 +1829,7 @@ class TestPlc: def standby_19(): pass @standby_generic def standby_20(): pass + + # convenience for debugging the test logic + def yes (self): return True + def no (self): return False