X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=system%2FTestPlc.py;h=88a4a4146bbf884173fc4e168969da71f6ca2f2d;hb=6466c59ed2ca5112c7915b1a98eeb4525b4bc993;hp=0e2dc2848967a98dd3e96cf521fd38c118e96f70;hpb=e6b4cacd1f888316a0c360e86105d2ca855fdc24;p=tests.git diff --git a/system/TestPlc.py b/system/TestPlc.py index 0e2dc28..88a4a41 100644 --- a/system/TestPlc.py +++ b/system/TestPlc.py @@ -62,23 +62,29 @@ SEP='' class TestPlc: - default_steps = ['uninstall','install','install_rpm', - 'configure', 'start', 'fetch_keys', SEP, - 'store_keys', 'clear_known_hosts', 'initscripts', SEP, - 'sites', 'nodes', 'slices', 'nodegroups', SEP, - 'init_node','bootcd', 'configure_qemu', 'export_qemu', - 'kill_all_qemus', 'reinstall_node','start_node', SEP, - 'nodes_booted', 'nodes_ssh', 'check_slice', 'check_initscripts', SEP, - 'check_sanity', 'check_tcp', 'plcsh_stress_test', SEP, - 'force_gather_logs', 'force_kill_qemus', 'force_record_tracker','force_free_tracker' ] - other_steps = [ 'stop_all_vservers','fresh_install', 'cache_rpm', 'stop', 'vs_start', SEP, - 'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP, - 'clean_sites', 'clean_nodes', - 'clean_slices', 'clean_keys', SEP, - 'show_boxes', 'list_all_qemus', 'list_qemus', SEP, - 'db_dump' , 'db_restore', ' cleanup_tracker', - 'standby_1 through 20' - ] + default_steps = [ + 'display','uninstall','install','install_rpm', + 'configure', 'start', 'fetch_keys', SEP, + 'store_keys', 'clear_known_hosts', 'initscripts', SEP, + 'sites', 'nodes', 'slices', 'nodegroups', SEP, + 'init_node','bootcd', 'configure_qemu', 'export_qemu', + 'kill_all_qemus', 'reinstall_node','start_node', SEP, + # better use of time: do this now that the nodes are taking off + 'plcsh_stress_test', SEP, + 'nodes_ssh_debug', 'nodes_ssh_boot', 'check_slice', 'check_initscripts', SEP, + 'check_tcp', SEP, + 'force_gather_logs', 'force_kill_qemus', 'force_record_tracker','force_free_tracker', + ] + other_steps = [ + 'stop_all_vservers','fresh_install', 'cache_rpm', 'stop', 'vs_start', SEP, + 'check_sanity', SEP, + 'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP, + 'clean_sites', 'clean_nodes', + 'clean_slices', 'clean_keys', SEP, + 'show_boxes', 'list_all_qemus', 'list_qemus', SEP, + 'db_dump' , 'db_restore', 'cleanup_trackers', 'cleanup_all_trackers', + 'standby_1 through 20', + ] @staticmethod def printable_steps (list): @@ -267,6 +273,110 @@ class TestPlc: node.kill_qemu() return True + #################### display config + def display (self): + self.display_pass (1) + self.display_pass (2) + return True + + # entry point + def display_pass (self,passno): + for (key,val) in self.plc_spec.iteritems(): + if passno == 2: + if key == 'sites': + for site in val: + self.display_site_spec(site) + for node in site['nodes']: + self.display_node_spec(node) + elif key=='initscripts': + for initscript in val: + self.display_initscript_spec (initscript) + elif key=='slices': + for slice in val: + self.display_slice_spec (slice) + elif key=='keys': + for key in val: + self.display_key_spec (key) + elif passno == 1: + if key not in ['sites','initscripts','slices','keys']: + print '* ',key,':',val + + def display_site_spec (self,site): + print '* ======== site',site['site_fields']['name'] + for (k,v) in site.iteritems(): + if k=='nodes': + if v: + print '* ','nodes : ', + for node in v: + print node['node_fields']['hostname'],'', + print '' + elif k=='users': + if v: + print '* users : ', + for user in v: + print user['name'],'', + print '' + elif k == 'site_fields': + print '* login_base',':',v['login_base'] + elif k == 'address_fields': + pass + else: + print '* ',k, + PrettyPrinter(indent=8,depth=2).pprint(v) + + def display_initscript_spec (self,initscript): + print '* ======== initscript',initscript['initscript_fields']['name'] + + def display_key_spec (self,key): + print '* ======== key',key['name'] + + def display_slice_spec (self,slice): + print '* ======== slice',slice['slice_fields']['name'] + for (k,v) in slice.iteritems(): + if k=='nodenames': + if v: + print '* nodes : ', + for nodename in v: + print nodename,'', + print '' + elif k=='usernames': + if v: + print '* users : ', + for username in v: + print username,'', + print '' + elif k=='slice_fields': + print '* fields',':', + print 'max_nodes=',v['max_nodes'], + print '' + else: + print '* ',k,v + + def display_node_spec (self,node): + print "* node",node['name'],"host_box=",node['host_box'], + print "hostname=",node['node_fields']['hostname'], + print "ip=",node['interface_fields']['ip'] + + + # another entry point for just showing the boxes involved + def display_mapping (self): + TestPlc.display_mapping_plc(self.plc_spec) + return True + + @staticmethod + def display_mapping_plc (plc_spec): + print '* MyPLC',plc_spec['name'] + print '*\tvserver address = root@%s:/vservers/%s'%(plc_spec['hostname'],plc_spec['vservername']) + print '*\tIP = %s/%s'%(plc_spec['PLC_API_HOST'],plc_spec['vserverip']) + for site_spec in plc_spec['sites']: + for node_spec in site_spec['nodes']: + TestPlc.display_mapping_node(node_spec) + + @staticmethod + def display_mapping_node (node_spec): + print '* NODE %s'%(node_spec['name']) + print '*\tqemu box %s'%node_spec['host_box'] + print '*\thostname=%s'%node_spec['node_fields']['hostname'] ### utility methods for handling the pool of IP addresses allocated to plcs # Logic @@ -277,41 +387,83 @@ class TestPlc: # (*) the record_tracker method adds an entry at the bottom of the file # (*) the cleanup_tracker method stops all known vservers and removes the tracker file - TRACKER_FILE="~/running-test-plcs" + TRACKER_FILE=os.environ['HOME']+"/running-test-plcs" + # how many concurrent plcs are we keeping alive - adjust with the IP pool size + TRACKER_KEEP_VSERVERS = 12 def record_tracker (self): - command="echo %s %s >> %s"%(self.vservername,self.test_ssh.hostname,TestPlc.TRACKER_FILE) - (code,output) = utils.output_of (self.test_ssh.actual_command(command)) - if code != 0: - print "WARNING : COULD NOT record_tracker %s as a running plc on %s"%(self.vservername,self.test_ssh.hostname) - return False + try: + lines=file(TestPlc.TRACKER_FILE).readlines() + except: + lines=[] + + this_line="%s %s\n"%(self.vservername,self.test_ssh.hostname) + for line in lines: + if line==this_line: + print 'this vserver is already included in %s'%TestPlc.TRACKER_FILE + return True + if self.options.dry_run: + print 'dry_run: record_tracker - skipping tracker update' + return True + tracker=file(TestPlc.TRACKER_FILE,"w") + for line in lines+[this_line]: + tracker.write(line) + tracker.close() print "Recorded %s in running plcs on host %s"%(self.vservername,self.test_ssh.hostname) return True - def free_tracker (self): - command="head -1 %s"%TestPlc.TRACKER_FILE - (code,line) = utils.output_of(self.test_ssh.actual_command(command)) - if code != 0: - print "No entry found in %s on %s"%(TestPlc.TRACKER_FILE,self.test_ssh.hostname) - return False + def free_tracker (self, keep_vservers=None): + if not keep_vservers: keep_vservers=TestPlc.TRACKER_KEEP_VSERVERS try: - [vserver_to_stop,hostname] = line.split() + lines=file(TestPlc.TRACKER_FILE).readlines() except: - print "WARNING: free_tracker: Could not parse %s - skipped"%TestPlc.TRACKER_FILE - return False - stop_command = "vserver --silent %s stop"%vserver_to_stop - utils.system(self.test_ssh.actual_command(stop_command)) - x=TestPlc.TRACKER_FILE - flush_command = "tail --lines=+2 %s > %s.tmp ; mv %s.tmp %s"%(x,x,x,x) - utils.system(self.test_ssh.actual_command(flush_command)) + print 'dry_run: free_tracker - skipping tracker update' + return True + how_many = len(lines) - keep_vservers + # nothing todo until we have more than keep_vservers in the tracker + if how_many <= 0: + print 'free_tracker : limit %d not reached'%keep_vservers + return True + to_stop = lines[:how_many] + to_keep = lines[how_many:] + for line in to_stop: + print '>%s<'%line + [vname,hostname]=line.split() + command=TestSsh(hostname).actual_command("vserver --silent %s stop"%vname) + utils.system(command) + if self.options.dry_run: + print 'dry_run: free_tracker would stop %d vservers'%len(to_stop) + for line in to_stop: print line, + print 'dry_run: free_tracker would keep %d vservers'%len(to_keep) + for line in to_keep: print line, + return True + print "Storing %d remaining vservers in %s"%(len(to_keep),TestPlc.TRACKER_FILE) + tracker=open(TestPlc.TRACKER_FILE,"w") + for line in to_keep: + tracker.write(line) + tracker.close() return True # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable - def cleanup_tracker (self): + def cleanup_trackers (self): + try: + for line in TestPlc.TRACKER_FILE.readlines(): + [vname,hostname]=line.split() + stop="vserver --silent %s stop"%vname + command=TestSsh(hostname).actual_command(stop) + utils.system(command) + clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE + utils.system(self.test_ssh.actual_command(clean_tracker)) + except: + return True + + # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable + def cleanup_all_trackers (self): stop_all = "cd /vservers ; for i in * ; do vserver --silent $i stop ; done" utils.system(self.test_ssh.actual_command(stop_all)) clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE utils.system(self.test_ssh.actual_command(clean_tracker)) + return True def uninstall(self): self.run_in_host("vserver --silent %s delete"%self.vservername) @@ -355,7 +507,8 @@ class TestPlc: ### install_rpm def install_rpm(self): - return self.run_in_guest("yum -y install myplc-native")==0 + return self.run_in_guest("yum -y install myplc-native")==0 \ + and self.run_in_guest("yum -y install noderepo-%s-%s"%(self.options.pldistro,self.options.arch))==0 ### def configure(self): @@ -408,13 +561,18 @@ class TestPlc: dir="./keys" if not os.path.isdir(dir): os.mkdir(dir) - prefix = 'root_ssh_key' vservername=self.vservername overall=True + prefix = 'root_ssh_key' for ext in [ 'pub', 'rsa' ] : src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals() dst="keys/%(vservername)s.%(ext)s"%locals() if self.test_ssh.fetch(src,dst) != 0: overall=False + prefix = 'debug_ssh_key' + for ext in [ 'pub', 'rsa' ] : + src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals() + dst="keys/%(vservername)s-debug.%(ext)s"%locals() + if self.test_ssh.fetch(src,dst) != 0: overall=False return overall def sites (self): @@ -545,14 +703,14 @@ class TestPlc: for node_spec in site_spec['nodes'] ] return hostnames - # gracetime : during the first minutes nothing gets printed - def do_nodes_booted (self, minutes, gracetime,period=15): + # silent_minutes : during the first minutes nothing gets printed + def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15): if self.options.dry_run: print 'dry_run' return True # compute timeout - timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes) - graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime) + timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes) + graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes) # the nodes that haven't checked yet - start with a full list and shrink over time tocheck = self.all_hostnames() utils.header("checking nodes %r"%tocheck) @@ -565,21 +723,21 @@ class TestPlc: for array in tocheck_status: hostname=array['hostname'] boot_state=array['boot_state'] - if boot_state == 'boot': - utils.header ("%s has reached the 'boot' state"%hostname) + if boot_state == target_boot_state: + utils.header ("%s has reached the %s state"%(hostname,target_boot_state)) else: # if it's a real node, never mind (site_spec,node_spec)=self.locate_hostname(hostname) if TestNode.is_real_model(node_spec['node_fields']['model']): utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state)) # let's cheat - boot_state = 'boot' + boot_state = target_boot_state elif datetime.datetime.now() > graceout: utils.header ("%s still in '%s' state"%(hostname,boot_state)) graceout=datetime.datetime.now()+datetime.timedelta(1) status[hostname] = boot_state # refresh tocheck - tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ] + tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ] if not tocheck: return True if datetime.datetime.now() > timeout: @@ -592,22 +750,42 @@ class TestPlc: return True def nodes_booted(self): - return self.do_nodes_booted(minutes=20,gracetime=15) + return self.nodes_check_boot_state('boot',timeout_minutes=20,silent_minutes=15) - def do_nodes_ssh(self,minutes,gracetime,period=15): + def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=20): # compute timeout - timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes) - graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime) + timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes) + graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes) + vservername=self.vservername + if debug: + message="debug" + local_key = "keys/%(vservername)s-debug.rsa"%locals() + else: + message="boot" + local_key = "keys/%(vservername)s.rsa"%locals() tocheck = self.all_hostnames() -# self.scan_publicKeys(tocheck) - utils.header("checking Connectivity on nodes %r"%tocheck) + utils.header("checking ssh access (expected in %s mode) to nodes %r"%(message,tocheck)) + utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\ + (timeout_minutes,silent_minutes,period)) while tocheck: for hostname in tocheck: - # try to ssh in nodes - node_test_ssh = TestSsh (hostname,key="/etc/planetlab/root_ssh_key.rsa") - success=self.run_in_guest(node_test_ssh.actual_command("hostname"))==0 - if success: - utils.header('The node %s is sshable -->'%hostname) + # try to run 'hostname' in the node + command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a") + # don't spam logs - show the command only after the grace period + if datetime.datetime.now() > graceout: + success=utils.system(command) + else: + # truly silent, just print out a dot to show we're alive + print '.', + sys.stdout.flush() + command += " 2>/dev/null" + if self.options.dry_run: + print 'dry_run',command + success=0 + else: + success=os.system(command) + if success==0: + utils.header('Successfully entered root@%s (%s)'%(hostname,message)) # refresh tocheck tocheck.remove(hostname) else: @@ -616,8 +794,6 @@ class TestPlc: if TestNode.is_real_model(node_spec['node_fields']['model']): utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname) tocheck.remove(hostname) - elif datetime.datetime.now() > graceout: - utils.header("Could not ssh-enter root context on %s"%hostname) if not tocheck: return True if datetime.datetime.now() > timeout: @@ -629,8 +805,11 @@ class TestPlc: # only useful in empty plcs return True - def nodes_ssh(self): - return self.do_nodes_ssh(minutes=10,gracetime=5) + def nodes_ssh_debug(self): + return self.check_nodes_ssh(debug=True,timeout_minutes=30,silent_minutes=10) + + def nodes_ssh_boot(self): + return self.check_nodes_ssh(debug=False,timeout_minutes=30,silent_minutes=10) @node_mapper def init_node (self): pass @@ -744,7 +923,7 @@ class TestPlc: self.test_ssh.copy_abs("plcsh-stress-test.py",remote) command = location command += " -- --check" - if self.options.small_test: + if self.options.size == 1: command += " --tiny" return ( self.run_in_guest(command) == 0)