- ###the logic is quit wrong, must be rewritten
- def do_check_slices(self):
- # Do not wait here, as this step can be run directly in which case you don't want to wait
- # just add the 5 minutes to the overall timeout
- #utils.header("Waiting for the nodes to fully boot")
- #time.sleep(300)
- bool=bool1=True
- secondes=15
- self.clear_known_hosts()
- start_time = datetime.datetime.now()
- dead_time=start_time + datetime.timedelta(minutes=11)
- for slice_spec in self.test_plc.plc_spec['slices']:
- for hostname in slice_spec['nodenames']:
- slicename=slice_spec['slice_fields']['name']
- # locate the first avail. key
- found=False
- for username in slice_spec['usernames']:
- user_spec=self.test_site.locate_user(username)
- for keyname in user_spec['keynames']:
- key_spec=self.test_plc.locate_key(keyname)
- test_key=TestKey(self.test_plc,key_spec)
- publickey=test_key.publicpath()
- privatekey=test_key.privatepath()
- keyname=test_key.name()
- if os.path.isfile(publickey) and os.path.isfile(privatekey):
- found=True
- break
- if not found:
- raise Exception,"Cannot find a valid key for slice %s"%slicename
-
- # create dir in plc root image
- self.test_plc.run_in_guest("mkdir /root/keys")
- remote_privatekey="/root/keys/%s.rsa"%keyname
- self.test_plc.copy_in_guest(privatekey,remote_privatekey,True)
- while(bool):
- utils.header('restarting nm on %s'%hostname)
- access=self.test_plc.run_in_guest('ssh -i /etc/planetlab/root_ssh_key.rsa root@%s service nm restart'%hostname )
- if (access==0):
- utils.header('nm restarted on %s'%hostname)
- while(bool1):
- utils.header('trying to connect to %s@%s'%(slicename,hostname))
- Date=self.test_plc.run_in_guest('ssh -i %s %s@%s date'%(remote_privatekey,slicename,hostname))
- if (Date==0):
- break
- elif ( start_time <= dead_time ) :
- start_time=datetime.datetime.now()+ datetime.timedelta(seconds=30)
- time.sleep(secondes)
- else:
- bool1=False
- if(bool1):
- utils.header('connected to %s@%s -->'%(slicename,hostname))
+ def check_slice(self,options,timeout_minutes=20,silent_minutes=10,period=15):
+ timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
+ graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
+ # locate a key
+ (found,remote_privatekey)=self.locate_key()
+ if not found :
+ utils.header("WARNING: Cannot find a valid key for slice %s"%self.name())
+ return False
+
+ # convert nodenames to real hostnames
+ slice_spec = self.slice_spec
+ restarted=[]
+ tocheck=[]
+ for nodename in slice_spec['nodenames']:
+ (site_spec,node_spec) = self.test_plc.locate_node(nodename)
+ tocheck.append(node_spec['node_fields']['hostname'])
+
+ utils.header("checking ssh access into slice %s on nodes %r"%(self.name(),tocheck))
+ utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
+ (timeout_minutes,silent_minutes,period))
+ while tocheck:
+ for hostname in tocheck:
+ (site_spec,node_spec) = self.test_plc.locate_hostname(hostname)
+ date_test_ssh = TestSsh (hostname,key=remote_privatekey,username=self.name())
+ command = date_test_ssh.actual_command("echo hostname ; hostname; echo id; id; echo uname -a ; uname -a")
+ date = utils.system (command, silent=datetime.datetime.now() < graceout)
+ if date==0:
+ utils.header("Successfuly entered slice %s on %s"%(self.name(),hostname))
+ tocheck.remove(hostname)
+ else:
+ # real nodes will have been checked once in case they're up - skip if not
+ if TestNode.is_real_model(node_spec['node_fields']['model']):
+ utils.header("WARNING : Checking slice %s on real node %s skipped"%(self.name(),hostname))
+ tocheck.remove(hostname)
+ # nm restart after first failure, if requested
+ if options.forcenm and hostname not in restarted:
+ utils.header ("forcenm option : restarting nm on %s"%hostname)
+ restart_test_ssh=TestSsh(hostname,key="keys/key1.rsa")
+ access=restart_test_ssh.actual_command('service nm restart')
+ if (access==0):
+ utils.header('nm restarted on %s'%hostname)