From: Thierry Parmentelat Date: Wed, 17 Apr 2013 07:29:10 +0000 (+0200) Subject: Merge branch 'master' into sfa-geni-v3 X-Git-Tag: tests-5.3-1~13 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=70d525866c311347e2ee193e5679cfcf6dc60607;hp=-c;p=tests.git Merge branch 'master' into sfa-geni-v3 --- 70d525866c311347e2ee193e5679cfcf6dc60607 diff --combined system/TestPlc.py index a58ae04,296109d..e001526 --- a/system/TestPlc.py +++ b/system/TestPlc.py @@@ -1,13 -1,13 +1,13 @@@ # Thierry Parmentelat # Copyright (C) 2010 INRIA # - import os, os.path - import datetime - import time import sys + import time + import os, os.path import traceback - from types import StringTypes import socket + from datetime import datetime, timedelta + from types import StringTypes import utils from TestSite import TestSite @@@ -20,6 -20,8 +20,8 @@@ from TestBoxQemu import TestBoxQem from TestSsh import TestSsh from TestApiserver import TestApiserver from TestAuthSfa import TestAuthSfa + from PlcapiUrlScanner import PlcapiUrlScanner + from Completer import Completer, CompleterTask # step methods must take (self) and return a boolean (options is a member of the class) @@@ -83,7 -85,8 +85,8 @@@ class TestPlc 'show', SEP, 'vs_delete','timestamp_vs','vs_create', SEP, 'plc_install', 'plc_configure', 'plc_start', SEP, - 'keys_fetch', 'keys_store', 'keys_clear_known_hosts', 'speed_up_slices', SEP, + 'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP, + 'plcapi_urls','speed_up_slices', SEP, 'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP, # slices created under plcsh interactively seem to be fine but these ones don't have the tags # keep this our of the way for now @@@ -94,11 -97,11 +97,11 @@@ 'sfi_configure@1', 'sfa_add_site@1','sfa_add_pi@1', SEPSFA, 'sfa_add_user@1', 'sfa_update_user@1', 'sfa_add_slice@1', 'sfa_renew_slice@1', SEPSFA, 'sfa_discover@1', 'sfa_create_slice@1', 'sfa_check_slice_plc@1', 'sfa_update_slice@1', SEPSFA, - 'sfi_list@1', 'sfi_show@1', 'sfi_slices@1', 'sfa_utest@1', SEPSFA, + 'sfi_list@1', 'sfi_show@1', 'sfa_utest@1', SEPSFA, # we used to run plcsh_stress_test, and then ssh_node_debug and ssh_node_boot # but as the stress test might take a while, we sometimes missed the debug mode.. 'ssh_node_debug@1', 'plcsh_stress_test@1', SEP, - 'ssh_node_boot@1', 'ssh_slice', 'check_initscripts', SEP, + 'ssh_node_boot@1', 'node_bmlogs@1', 'ssh_slice', 'ssh_slice_basics', 'check_initscripts', SEP, 'ssh_slice_sfa@1', 'sfa_delete_slice@1', 'sfa_delete_user@1', SEPSFA, 'cross_check_tcp@1', 'check_system_slice', SEP, 'empty_slices', 'ssh_slice_off', 'fill_slices', SEP, @@@ -177,49 -180,49 +180,49 @@@ pass def actual_command_in_guest (self,command): - return self.test_ssh.actual_command(self.host_to_guest(command)) + return self.test_ssh.actual_command(self.host_to_guest(command),dry_run=self.options.dry_run) def start_guest (self): - return utils.system(self.test_ssh.actual_command(self.start_guest_in_host())) + return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()),dry_run=self.options.dry_run) def stop_guest (self): - return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host())) + return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host()),dry_run=self.options.dry_run) def run_in_guest (self,command): - return utils.system(self.actual_command_in_guest(command)) + return utils.system(self.actual_command_in_guest(command),dry_run=self.options.dry_run) def run_in_host (self,command): - return self.test_ssh.run_in_buildname(command) + return self.test_ssh.run_in_buildname(command, dry_run=self.options.dry_run) #command gets run in the plc's vm def host_to_guest(self,command): if self.options.plcs_use_lxc: - return "ssh -o StrictHostKeyChecking=no %s %s"%(self.vserverip,command) + return "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null %s %s"%(self.vserverip,command) else: return "vserver %s exec %s"%(self.vservername,command) def vm_root_in_host(self): if self.options.plcs_use_lxc: - return "/var/lib/lxc/%s/rootfs/"%(self.vservername) + return "/vservers/%s/rootfs/"%(self.vservername) else: return "/vservers/%s"%(self.vservername) def vm_timestamp_path (self): if self.options.plcs_use_lxc: - return "/var/lib/lxc/%s/%s.timestamp"%(self.vservername,self.vservername) + return "/vservers/%s/%s.timestamp"%(self.vservername,self.vservername) else: return "/vservers/%s.timestamp"%(self.vservername) #start/stop the vserver def start_guest_in_host(self): if self.options.plcs_use_lxc: - return "lxc-start --daemon --name=%s"%(self.vservername) + return "virsh -c lxc:// start %s"%(self.vservername) else: return "vserver %s start"%(self.vservername) def stop_guest_in_host(self): if self.options.plcs_use_lxc: - return "lxc-stop --name=%s"%(self.vservername) + return "virsh -c lxc:// destroy %s"%(self.vservername) else: return "vserver %s stop"%(self.vservername) @@@ -543,8 -546,9 +546,9 @@@ stamp_path=self.vm_timestamp_path() self.run_in_host("rm -f %s"%stamp_path) if self.options.plcs_use_lxc: - self.run_in_host("lxc-stop --name %s"%self.vservername) - self.run_in_host("lxc-destroy --name %s"%self.vservername) + self.run_in_host("virsh -c lxc:// destroy %s"%self.vservername) + self.run_in_host("virsh -c lxc:// undefine %s"%self.vservername) + self.run_in_host("rm -fr /vservers/%s"%self.vservername) return True else: self.run_in_host("vserver --silent %s delete"%self.vservername) @@@ -923,58 -927,58 +927,58 @@@ return res # silent_minutes : during the first minutes nothing gets printed - def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15): + def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period_seconds=15): if self.options.dry_run: print 'dry_run' return True - # compute timeout - timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes) - graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes) + + class CompleterTaskBootState (CompleterTask): + def __init__ (self, test_plc,hostname): + self.test_plc=test_plc + self.hostname=hostname + self.last_boot_state='undef' + def actual_run (self): + try: + node = self.test_plc.apiserver.GetNodes(self.test_plc.auth_root(), [ self.hostname ], + ['boot_state'])[0] + self.last_boot_state = node['boot_state'] + return self.last_boot_state == target_boot_state + except: + return False + def message (self): + return "CompleterTaskBootState with node %s"%self.hostname + def failure_message (self): + return "node %s in state %s - expected %s"%(self.hostname,self.last_boot_state,target_boot_state) + + timeout = timedelta(minutes=timeout_minutes) + graceout = timedelta(minutes=silent_minutes) + period = timedelta(seconds=period_seconds) # the nodes that haven't checked yet - start with a full list and shrink over time - tocheck = self.all_hostnames() - utils.header("checking nodes %r"%tocheck) - # create a dict hostname -> status - status = dict ( [ (hostname,'undef') for hostname in tocheck ] ) - while tocheck: - # get their status - tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] ) - # update status - for array in tocheck_status: - hostname=array['hostname'] - boot_state=array['boot_state'] - if boot_state == target_boot_state: - utils.header ("%s has reached the %s state"%(hostname,target_boot_state)) - else: - # if it's a real node, never mind - (site_spec,node_spec)=self.locate_hostname(hostname) - if TestNode.is_real_model(node_spec['node_fields']['model']): - utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state)) - # let's cheat - boot_state = target_boot_state - elif datetime.datetime.now() > graceout: - utils.header ("%s still in '%s' state"%(hostname,boot_state)) - graceout=datetime.datetime.now()+datetime.timedelta(1) - status[hostname] = boot_state - # refresh tocheck - tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ] - if not tocheck: - return True - if datetime.datetime.now() > timeout: - for hostname in tocheck: - utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname])) - return False - # otherwise, sleep for a while - time.sleep(period) - # only useful in empty plcs - return True + utils.header("checking nodes boot state (expected %s)"%target_boot_state) + tasks = [ CompleterTaskBootState (self,hostname) \ + for (hostname,_) in self.all_node_infos() ] + return Completer (tasks).run (timeout, graceout, period) def nodes_booted(self): return self.nodes_check_boot_state('boot',timeout_minutes=30,silent_minutes=28) - def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=15): - # compute timeout - timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes) - graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes) + def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period_seconds=15): + class CompleterTaskNodeSsh (CompleterTask): + def __init__ (self, hostname, qemuname, boot_state, local_key): + self.hostname=hostname + self.qemuname=qemuname + self.boot_state=boot_state + self.local_key=local_key + def run (self, silent): + command = TestSsh (self.hostname,key=self.local_key).actual_command("hostname;uname -a") + return utils.system (command, silent=silent)==0 + def failure_message (self): + return "Cannot reach %s @ %s in %s mode"%(self.hostname, self.qemuname, self.boot_state) + + # various delays + timeout = timedelta(minutes=timeout_minutes) + graceout = timedelta(minutes=silent_minutes) + period = timedelta(seconds=period_seconds) vservername=self.vservername if debug: message="debug" @@@ -982,39 -986,11 +986,11 @@@ else: message="boot" local_key = "keys/key_admin.rsa" + utils.header("checking ssh access to nodes (expected in %s mode)"%message) node_infos = self.all_node_infos() - utils.header("checking ssh access (expected in %s mode) to nodes:"%message) - for (nodename,qemuname) in node_infos: - utils.header("hostname=%s -- qemubox=%s"%(nodename,qemuname)) - utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\ - (timeout_minutes,silent_minutes,period)) - while node_infos: - for node_info in node_infos: - (hostname,qemuname) = node_info - # try to run 'hostname' in the node - command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a") - # don't spam logs - show the command only after the grace period - success = utils.system ( command, silent=datetime.datetime.now() < graceout) - if success==0: - utils.header('Successfully entered root@%s (%s)'%(hostname,message)) - # refresh node_infos - node_infos.remove(node_info) - else: - # we will have tried real nodes once, in case they're up - but if not, just skip - (site_spec,node_spec)=self.locate_hostname(hostname) - if TestNode.is_real_model(node_spec['node_fields']['model']): - utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname) - node_infos.remove(node_info) - if not node_infos: - return True - if datetime.datetime.now() > timeout: - for (hostname,qemuname) in node_infos: - utils.header("FAILURE to ssh into %s (on %s)"%(hostname,qemuname)) - return False - # otherwise, sleep for a while - time.sleep(period) - # only useful in empty plcs - return True + tasks = [ CompleterTaskNodeSsh (nodename, qemuname, message, local_key) \ + for (nodename,qemuname) in node_infos ] + return Completer (tasks).run (timeout, graceout, period) def ssh_node_debug(self): "Tries to ssh into nodes in debug mode with the debug ssh key" @@@ -1027,6 -1003,10 +1003,10 @@@ return self.check_nodes_ssh(debug=False, timeout_minutes=self.ssh_node_boot_timeout, silent_minutes=self.ssh_node_boot_silent) + + def node_bmlogs(self): + "Checks that there's a non-empty dir. /var/log/bm/raw" + return utils.system(self.actual_command_in_guest("ls /var/log/bm/raw"))==0 @node_mapper def qemu_local_init (self): pass @@@ -1057,21 -1037,33 +1037,33 @@@ ### initscripts def do_check_initscripts(self): - overall = True + class CompleterTaskInitscript (CompleterTask): + def __init__ (self, test_sliver, stamp): + self.test_sliver=test_sliver + self.stamp=stamp + def actual_run (self): + return self.test_sliver.check_initscript_stamp (self.stamp) + def message (self): + return "initscript checker for %s"%self.test_sliver.name() + def failure_message (self): + return "initscript stamp %s not found in sliver %s"%(self.stamp,self.test_sliver.name()) + + tasks=[] for slice_spec in self.plc_spec['slices']: if not slice_spec.has_key('initscriptstamp'): continue stamp=slice_spec['initscriptstamp'] + slicename=slice_spec['slice_fields']['name'] for nodename in slice_spec['nodenames']: + print 'nodename',nodename,'slicename',slicename,'stamp',stamp (site,node) = self.locate_node (nodename) # xxx - passing the wrong site - probably harmless test_site = TestSite (self,site) test_slice = TestSlice (self,test_site,slice_spec) test_node = TestNode (self,test_site,node) test_sliver = TestSliver (self, test_node, test_slice) - if not test_sliver.check_initscript_stamp(stamp): - overall = False - return overall + tasks.append ( CompleterTaskInitscript (test_sliver, stamp)) + return Completer (tasks).run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10)) def check_initscripts(self): "check that the initscripts have triggered" @@@ -1132,6 -1124,8 +1124,8 @@@ def ssh_slice(self): pass @slice_mapper def ssh_slice_off (self): pass + @slice_mapper + def ssh_slice_basics(self): pass @slice_mapper def check_vsys_defaults(self): pass @@@ -1139,6 -1133,9 +1133,9 @@@ @node_mapper def keys_clear_known_hosts (self): pass + def plcapi_urls (self): + return PlcapiUrlScanner (self.auth_root(),ip=self.vserverip).scan() + def speed_up_slices (self): "tweak nodemanager settings on all nodes using a conf file" # create the template on the server-side @@@ -1223,24 -1220,23 +1220,23 @@@ def check_drl (self): return self._check_system_slice ('drl') # we have the slices up already here, so it should not take too long - def _check_system_slice (self, slicename, timeout_minutes=5, period=15): - timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes) - test_nodes=self.all_nodes() - while test_nodes: - for test_node in test_nodes: - if test_node._check_system_slice (slicename,dry_run=self.options.dry_run): - utils.header ("ok") - test_nodes.remove(test_node) - else: - print '.', - if not test_nodes: - return True - if datetime.datetime.now () > timeout: - for test_node in test_nodes: - utils.header ("can't find system slice %s in %s"%(slicename,test_node.name())) - return False - time.sleep(period) - return True + def _check_system_slice (self, slicename, timeout_minutes=5, period_seconds=15): + class CompleterTaskSystemSlice (CompleterTask): + def __init__ (self, test_node, dry_run): + self.test_node=test_node + self.dry_run=dry_run + def actual_run (self): + return self.test_node._check_system_slice (slicename, dry_run=self.dry_run) + def message (self): + return "System slice %s @ %s"%(slicename, self.test_node.name()) + def failure_message (self): + return "COULD not find system slice %s @ %s"%(slicename, self.test_node.name()) + timeout = timedelta(minutes=timeout_minutes) + silent = timedelta (0) + period = timedelta (seconds=period_seconds) + tasks = [ CompleterTaskSystemSlice (test_node, self.options.dry_run) \ + for test_node in self.all_nodes() ] + return Completer (tasks) . run (timeout, silent, period) def plcsh_stress_test (self): "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents" @@@ -1518,6 -1514,8 +1514,6 @@@ @auth_sfa_mapper def sfi_show(self): pass @auth_sfa_mapper - def sfi_slices(self): pass - @auth_sfa_mapper def ssh_slice_sfa(self): pass @auth_sfa_mapper def sfa_delete_user(self): pass @@@ -1623,7 -1621,7 +1619,7 @@@ if not isinstance(name,StringTypes): raise Exception except: - t=datetime.datetime.now() + t=datetime.now() d=t.date() name=str(d) return "/root/%s-%s.sql"%(database,name) diff --combined system/TestSliceSfa.py index c8fa055,7de388d..7731f86 --- a/system/TestSliceSfa.py +++ b/system/TestSliceSfa.py @@@ -3,7 -3,7 +3,7 @@@ # import time - import datetime + from datetime import datetime, timedelta import utils from TestNode import TestNode @@@ -11,6 -11,8 +11,8 @@@ from TestUser import TestUse from TestBoxQemu import TestBoxQemu from TestSsh import TestSsh + from Completer import Completer, CompleterTask + from TestSlice import CompleterTaskSshSlice class TestSliceSfa: @@@ -30,7 -32,7 +32,7 @@@ def sfi_user(self,*args,**kwds): return self.test_auth_sfa.sfi_user(*args, **kwds) def discover_option(self): - if self.rspec_style()=='pg': return "-r protogeni" + if self.rspec_style()=='pg': return "-r GENI" else: return "-r sfa" # those are step names exposed as methods of TestPlc, hence the _sfa @@@ -47,8 -49,8 +49,8 @@@ def sfa_renew_slice(self, options): "run sfi renew (on Aggregates)" - too_late = datetime.datetime.now()+datetime.timedelta(weeks=52) - one_month = datetime.datetime.now()+datetime.timedelta(weeks=4) + too_late = datetime.now() + timedelta(weeks=52) + one_month = datetime.now() + timedelta(weeks=4) # we expect this to fail on too long term attemps, but to succeed otherwise overall=True for ( renew_until, expected) in [ (too_late, False), (one_month, True) ] : @@@ -84,8 -86,7 +86,8 @@@ "sfiListNodes.py -i %s/%s -o %s/%s"%(self.sfi_path(),self.adfile(),self.sfi_path(),self.nodefile()), "sfiAddSliver.py -i %s/%s -n %s/%s -o %s/%s"%\ (self.sfi_path(),self.adfile(),self.sfi_path(),self.nodefile(),self.sfi_path(),self.reqfile()), - self.sfi_user("create %s %s"%(self.hrn(),self.reqfile())), + self.sfi_user("allocate %s %s"%(self.hrn(),self.reqfile())), + self.sfi_user("provision %s"%(self.hrn())), ] for command in commands: if self.test_plc.run_in_guest(command)!=0: return False @@@ -123,60 -124,23 +125,23 @@@ return self.test_plc.locate_private_key_from_key_names ( [ self.slice_spec['key_name'] ] ) # check the resulting sliver - def ssh_slice_sfa(self,options,timeout_minutes=40,silent_minutes=30,period=15): + def ssh_slice_sfa(self,options,timeout_minutes=40,silent_minutes=0,period_seconds=15): "tries to ssh-enter the SFA slice" - timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes) - graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes) + timeout = timedelta(minutes=timeout_minutes) + graceout = timedelta(minutes=silent_minutes) + period = timedelta(seconds=period_seconds) # locate a key private_key=self.locate_private_key() if not private_key : utils.header("WARNING: Cannot find a valid key for slice %s"%self.name()) return False - - # convert nodenames to real hostnames - restarted=[] - tocheck=[] + command="echo hostname ; hostname; echo id; id; echo uname -a ; uname -a" + + tasks=[] + slicename=self.plc_name() + dry_run = getattr(options,'dry_run',False) for nodename in self.slice_spec['nodenames']: (site_spec,node_spec) = self.test_plc.locate_node(nodename) - tocheck.append(node_spec['node_fields']['hostname']) - - utils.header("checking ssh access into slice %s on nodes %r"%(self.plc_name(),tocheck)) - utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\ - (timeout_minutes,silent_minutes,period)) - while tocheck: - for hostname in tocheck: - (site_spec,node_spec) = self.test_plc.locate_hostname(hostname) - date_test_ssh = TestSsh (hostname,key=private_key,username=self.plc_name()) - command = date_test_ssh.actual_command("echo hostname ; hostname; echo id; id; echo uname -a ; uname -a") - date = utils.system (command, silent=datetime.datetime.now() < graceout) - if date==0: - utils.header("Successfuly entered slice %s on %s"%(self.plc_name(),hostname)) - tocheck.remove(hostname) - else: - # real nodes will have been checked once in case they're up - skip if not - if TestNode.is_real_model(node_spec['node_fields']['model']): - utils.header("WARNING : Checking slice %s on real node %s skipped"%(self.plc_name(),hostname)) - tocheck.remove(hostname) - # nm restart after first failure, if requested - if options.forcenm and hostname not in restarted: - utils.header ("forcenm option : restarting nm on %s"%hostname) - restart_test_ssh=TestSsh(hostname,key="keys/key_admin.rsa") - access=restart_test_ssh.actual_command('service nm restart') - if (access==0): - utils.header('nm restarted on %s'%hostname) - else: - utils.header('Failed to restart nm on %s'%(hostname)) - restarted.append(hostname) - if not tocheck: - # we're done - return True - if datetime.datetime.now() > timeout: - for hostname in tocheck: - utils.header("FAILURE to ssh into %s@%s"%(self.plc_name(),hostname)) - return False - # wait for the period - time.sleep (period) - # for an empty slice - return True - - + tasks.append( CompleterTaskSshSlice(self.test_plc,node_spec['node_fields']['hostname'], + slicename,private_key,command,expected=True,dry_run=dry_run)) + return Completer (tasks).run (timeout, graceout, period)