- test_site = TestSite (self,site_spec)
- utils.header("Starting checking for nodes in site %s"%self.name())
- notfullybooted_nodes=[ node_spec['node_fields']['hostname'] for node_spec in site_spec['nodes'] ]
- nbr_nodes= len(notfullybooted_nodes)
- while (status):
- for node_spec in site_spec['nodes']:
- hostname=node_spec['node_fields']['hostname']
- if (hostname in notfullybooted_nodes): #to avoid requesting already booted node
- test_node=TestNode (self,test_site,node_spec)
- host_box=node_spec['node_fields']['host_box']
- node_status=test_node.get_node_status(hostname,host_box)
- if (node_status):
- booted_nodes.append(hostname)
- del notfullybooted_nodes[notfullybooted_nodes.index(hostname)]
- if ( not notfullybooted_nodes): break
- elif ( start_time <= dead_time ) :
- start_time=datetime.datetime.now()+ datetime.timedelta(minutes=2)
- time.sleep(15)
- else: status=False
- for nodeup in booted_nodes : utils.header("Node %s correctly installed and booted"%nodeup)
- for nodedown in notfullybooted_nodes : utils.header("Node %s not fully booted"%nodedown)
- return status
+ hostnames += [ node_spec['node_fields']['hostname'] \
+ for node_spec in site_spec['nodes'] ]
+ return hostnames
+
+ # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
+ def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15):
+ if self.options.dry_run:
+ print 'dry_run'
+ return True
+ # compute timeout
+ timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
+ graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
+ # the nodes that haven't checked yet - start with a full list and shrink over time
+ tocheck = self.all_hostnames()
+ utils.header("checking nodes %r"%tocheck)
+ # create a dict hostname -> status
+ status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
+ while tocheck:
+ # get their status
+ tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
+ # update status
+ for array in tocheck_status:
+ hostname=array['hostname']
+ boot_state=array['boot_state']
+ if boot_state == target_boot_state:
+ utils.header ("%s has reached the %s state"%(hostname,target_boot_state))
+ else:
+ # if it's a real node, never mind
+ (site_spec,node_spec)=self.locate_hostname(hostname)
+ if TestNode.is_real_model(node_spec['node_fields']['model']):
+ utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
+ # let's cheat
+ boot_state = target_boot_state
+ elif datetime.datetime.now() > graceout:
+ utils.header ("%s still in '%s' state"%(hostname,boot_state))
+ graceout=datetime.datetime.now()+datetime.timedelta(1)
+ status[hostname] = boot_state
+ # refresh tocheck
+ tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ]
+ if not tocheck:
+ return True
+ if datetime.datetime.now() > timeout:
+ for hostname in tocheck:
+ utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
+ return False
+ # otherwise, sleep for a while
+ time.sleep(period)
+ # only useful in empty plcs
+ return True
+
+ def nodes_booted(self):
+ return self.nodes_check_boot_state('boot',timeout_minutes=20,silent_minutes=15)
+
+ def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=20):
+ # compute timeout
+ timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
+ graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
+ vservername=self.vservername
+ if debug:
+ message="debug"
+ local_key = "keys/%(vservername)s-debug.rsa"%locals()
+ else:
+ message="boot"
+ local_key = "keys/%(vservername)s.rsa"%locals()
+ tocheck = self.all_hostnames()
+ utils.header("checking ssh access (expected in %s mode) to nodes %r"%(message,tocheck))
+ utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
+ (timeout_minutes,silent_minutes,period))
+ while tocheck:
+ for hostname in tocheck:
+ # try to run 'hostname' in the node
+ command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a")
+ # don't spam logs - show the command only after the grace period
+ if datetime.datetime.now() > graceout:
+ success=utils.system(command)
+ else:
+ # truly silent, just print out a dot to show we're alive
+ print '.',
+ sys.stdout.flush()
+ command += " 2>/dev/null"
+ if self.options.dry_run:
+ print 'dry_run',command
+ success=0
+ else:
+ success=os.system(command)
+ if success==0:
+ utils.header('Successfully entered root@%s (%s)'%(hostname,message))
+ # refresh tocheck
+ tocheck.remove(hostname)
+ else:
+ # we will have tried real nodes once, in case they're up - but if not, just skip
+ (site_spec,node_spec)=self.locate_hostname(hostname)
+ if TestNode.is_real_model(node_spec['node_fields']['model']):
+ utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
+ tocheck.remove(hostname)
+ if not tocheck:
+ return True
+ if datetime.datetime.now() > timeout:
+ for hostname in tocheck:
+ utils.header("FAILURE to ssh into %s"%hostname)
+ return False
+ # otherwise, sleep for a while
+ time.sleep(period)
+ # only useful in empty plcs
+ return True
+
+ def nodes_ssh_debug(self):
+ return self.check_nodes_ssh(debug=True,timeout_minutes=30,silent_minutes=10)