1 # Thierry Parmentelat <thierry.parmentelat@inria.fr>
2 # Copyright (C) 2010 INRIA
9 from types import StringTypes
13 from TestSite import TestSite
14 from TestNode import TestNode
15 from TestUser import TestUser
16 from TestKey import TestKey
17 from TestSlice import TestSlice
18 from TestSliver import TestSliver
19 from TestBox import TestBox
20 from TestSsh import TestSsh
21 from TestApiserver import TestApiserver
22 from TestSliceSfa import TestSliceSfa
23 from TestUserSfa import TestUserSfa
25 # step methods must take (self) and return a boolean (options is a member of the class)
27 def standby(minutes,dry_run):
28 utils.header('Entering StandBy for %d mn'%minutes)
32 time.sleep(60*minutes)
35 def standby_generic (func):
37 minutes=int(func.__name__.split("_")[1])
38 return standby(minutes,self.options.dry_run)
41 def node_mapper (method):
44 node_method = TestNode.__dict__[method.__name__]
45 for site_spec in self.plc_spec['sites']:
46 test_site = TestSite (self,site_spec)
47 for node_spec in site_spec['nodes']:
48 test_node = TestNode (self,test_site,node_spec)
49 if not node_method(test_node): overall=False
51 # restore the doc text
52 actual.__doc__=method.__doc__
55 def slice_mapper_options (method):
58 slice_method = TestSlice.__dict__[method.__name__]
59 for slice_spec in self.plc_spec['slices']:
60 site_spec = self.locate_site (slice_spec['sitename'])
61 test_site = TestSite(self,site_spec)
62 test_slice=TestSlice(self,test_site,slice_spec)
63 if not slice_method(test_slice,self.options): overall=False
65 # restore the doc text
66 actual.__doc__=method.__doc__
69 def slice_mapper_options_sfa (method):
73 slice_method = TestSliceSfa.__dict__[method.__name__]
74 for slice_spec in self.plc_spec['sfa']['slices_sfa']:
75 site_spec = self.locate_site (slice_spec['sitename'])
76 test_site = TestSite(self,site_spec)
77 test_slice=TestSliceSfa(test_plc,test_site,slice_spec)
78 if not slice_method(test_slice,self.options): overall=False
80 # restore the doc text
81 actual.__doc__=method.__doc__
90 'display', 'resources_pre', SEP,
91 'delete_vs','create_vs','install', 'configure', 'start', SEP,
92 'fetch_keys', 'store_keys', 'clear_known_hosts', SEP,
93 'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP,
94 'reinstall_node', 'init_node','bootcd', 'configure_qemu', 'export_qemu',
95 'kill_all_qemus', 'start_node', SEP,
96 # better use of time: do this now that the nodes are taking off
97 'plcsh_stress_test', SEP,
98 'install_sfa', 'configure_sfa', 'import_sfa', 'start_sfa', SEPSFA,
99 'setup_sfa', 'add_sfa', 'update_sfa', 'view_sfa', SEPSFA,
100 'nodes_ssh_debug', 'nodes_ssh_boot', 'check_slice', 'check_initscripts', SEPSFA,
101 'check_slice_sfa', 'delete_sfa', 'stop_sfa', SEPSFA,
102 'check_tcp', 'check_hooks', SEP,
103 'force_gather_logs', 'force_resources_post', SEP,
106 'show_boxes', 'resources_list','resources_release','resources_release_plc','resources_release_qemu',SEP,
107 'stop', 'vs_start', SEP,
108 'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
109 'clean_sites', 'clean_nodes', 'clean_slices', 'clean_keys', SEP,
110 'clean_leases', 'list_leases', SEP,
112 'list_all_qemus', 'list_qemus', 'kill_qemus', SEP,
113 'db_dump' , 'db_restore', SEP,
114 'standby_1 through 20',SEP,
118 def printable_steps (list):
119 single_line=" ".join(list)+" "
120 return single_line.replace(" "+SEP+" "," \\\n").replace(" "+SEPSFA+" "," \\\n")
122 def valid_step (step):
123 return step != SEP and step != SEPSFA
125 # turn off the sfa-related steps when build has skipped SFA
126 # this is originally for centos5 as recent SFAs won't build on this platformb
128 def check_whether_build_has_sfa (rpms_url):
129 retcod=os.system ("curl --silent %s/ | grep -q sfa"%rpms_url)
130 # full builds are expected to return with 0 here
132 # move all steps containing 'sfa' from default_steps to other_steps
133 sfa_steps= [ step for step in TestPlc.default_steps if step.find('sfa')>=0 ]
134 TestPlc.other_steps += sfa_steps
135 for step in sfa_steps: TestPlc.default_steps.remove(step)
137 def __init__ (self,plc_spec,options):
138 self.plc_spec=plc_spec
140 self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
142 self.vserverip=plc_spec['vserverip']
143 self.vservername=plc_spec['vservername']
144 self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
147 raise Exception,'chroot-based myplc testing is deprecated'
148 self.apiserver=TestApiserver(self.url,options.dry_run)
151 name=self.plc_spec['name']
152 return "%s.%s"%(name,self.vservername)
155 return self.plc_spec['hostname']
158 return self.test_ssh.is_local()
160 # define the API methods on this object through xmlrpc
161 # would help, but not strictly necessary
165 def actual_command_in_guest (self,command):
166 return self.test_ssh.actual_command(self.host_to_guest(command))
168 def start_guest (self):
169 return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
171 def run_in_guest (self,command):
172 return utils.system(self.actual_command_in_guest(command))
174 def run_in_host (self,command):
175 return self.test_ssh.run_in_buildname(command)
177 #command gets run in the vserver
178 def host_to_guest(self,command):
179 return "vserver %s exec %s"%(self.vservername,command)
181 #command gets run in the vserver
182 def start_guest_in_host(self):
183 return "vserver %s start"%(self.vservername)
186 def run_in_guest_piped (self,local,remote):
187 return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
189 def auth_root (self):
190 return {'Username':self.plc_spec['PLC_ROOT_USER'],
191 'AuthMethod':'password',
192 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
193 'Role' : self.plc_spec['role']
195 def locate_site (self,sitename):
196 for site in self.plc_spec['sites']:
197 if site['site_fields']['name'] == sitename:
199 if site['site_fields']['login_base'] == sitename:
201 raise Exception,"Cannot locate site %s"%sitename
203 def locate_node (self,nodename):
204 for site in self.plc_spec['sites']:
205 for node in site['nodes']:
206 if node['name'] == nodename:
208 raise Exception,"Cannot locate node %s"%nodename
210 def locate_hostname (self,hostname):
211 for site in self.plc_spec['sites']:
212 for node in site['nodes']:
213 if node['node_fields']['hostname'] == hostname:
215 raise Exception,"Cannot locate hostname %s"%hostname
217 def locate_key (self,keyname):
218 for key in self.plc_spec['keys']:
219 if key['name'] == keyname:
221 raise Exception,"Cannot locate key %s"%keyname
223 def locate_slice (self, slicename):
224 for slice in self.plc_spec['slices']:
225 if slice['slice_fields']['name'] == slicename:
227 raise Exception,"Cannot locate slice %s"%slicename
229 def all_sliver_objs (self):
231 for slice_spec in self.plc_spec['slices']:
232 slicename = slice_spec['slice_fields']['name']
233 for nodename in slice_spec['nodenames']:
234 result.append(self.locate_sliver_obj (nodename,slicename))
237 def locate_sliver_obj (self,nodename,slicename):
238 (site,node) = self.locate_node(nodename)
239 slice = self.locate_slice (slicename)
241 test_site = TestSite (self, site)
242 test_node = TestNode (self, test_site,node)
243 # xxx the slice site is assumed to be the node site - mhh - probably harmless
244 test_slice = TestSlice (self, test_site, slice)
245 return TestSliver (self, test_node, test_slice)
247 def locate_first_node(self):
248 nodename=self.plc_spec['slices'][0]['nodenames'][0]
249 (site,node) = self.locate_node(nodename)
250 test_site = TestSite (self, site)
251 test_node = TestNode (self, test_site,node)
254 def locate_first_sliver (self):
255 slice_spec=self.plc_spec['slices'][0]
256 slicename=slice_spec['slice_fields']['name']
257 nodename=slice_spec['nodenames'][0]
258 return self.locate_sliver_obj(nodename,slicename)
260 # all different hostboxes used in this plc
261 def gather_hostBoxes(self):
262 # maps on sites and nodes, return [ (host_box,test_node) ]
264 for site_spec in self.plc_spec['sites']:
265 test_site = TestSite (self,site_spec)
266 for node_spec in site_spec['nodes']:
267 test_node = TestNode (self, test_site, node_spec)
268 if not test_node.is_real():
269 tuples.append( (test_node.host_box(),test_node) )
270 # transform into a dict { 'host_box' -> [ test_node .. ] }
272 for (box,node) in tuples:
273 if not result.has_key(box):
276 result[box].append(node)
279 # a step for checking this stuff
280 def show_boxes (self):
281 'print summary of nodes location'
282 for (box,nodes) in self.gather_hostBoxes().iteritems():
283 print box,":"," + ".join( [ node.name() for node in nodes ] )
286 # make this a valid step
287 def kill_all_qemus(self):
288 'kill all qemu instances on the qemu boxes involved by this setup'
289 # this is the brute force version, kill all qemus on that host box
290 for (box,nodes) in self.gather_hostBoxes().iteritems():
291 # pass the first nodename, as we don't push template-qemu on testboxes
292 nodedir=nodes[0].nodedir()
293 TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
296 # make this a valid step
297 def list_all_qemus(self):
298 'list all qemu instances on the qemu boxes involved by this setup'
299 for (box,nodes) in self.gather_hostBoxes().iteritems():
300 # this is the brute force version, kill all qemus on that host box
301 TestBox(box,self.options.buildname).list_all_qemus()
304 # kill only the right qemus
305 def list_qemus(self):
306 'list qemu instances for our nodes'
307 for (box,nodes) in self.gather_hostBoxes().iteritems():
308 # the fine-grain version
313 # kill only the right qemus
314 def kill_qemus(self):
315 'kill the qemu instances for our nodes'
316 for (box,nodes) in self.gather_hostBoxes().iteritems():
317 # the fine-grain version
322 #################### display config
324 "show test configuration after localization"
325 self.display_pass (1)
326 self.display_pass (2)
330 always_display_keys=['PLC_WWW_HOST','nodes','sites',]
331 def display_pass (self,passno):
332 for (key,val) in self.plc_spec.iteritems():
333 if not self.options.verbose and key not in TestPlc.always_display_keys: continue
337 self.display_site_spec(site)
338 for node in site['nodes']:
339 self.display_node_spec(node)
340 elif key=='initscripts':
341 for initscript in val:
342 self.display_initscript_spec (initscript)
345 self.display_slice_spec (slice)
348 self.display_key_spec (key)
350 if key not in ['sites','initscripts','slices','keys', 'sfa']:
351 print '+ ',key,':',val
353 def display_site_spec (self,site):
354 print '+ ======== site',site['site_fields']['name']
355 for (k,v) in site.iteritems():
356 if not self.options.verbose and k not in TestPlc.always_display_keys: continue
359 print '+ ','nodes : ',
361 print node['node_fields']['hostname'],'',
367 print user['name'],'',
369 elif k == 'site_fields':
370 print '+ login_base',':',v['login_base']
371 elif k == 'address_fields':
377 def display_initscript_spec (self,initscript):
378 print '+ ======== initscript',initscript['initscript_fields']['name']
380 def display_key_spec (self,key):
381 print '+ ======== key',key['name']
383 def display_slice_spec (self,slice):
384 print '+ ======== slice',slice['slice_fields']['name']
385 for (k,v) in slice.iteritems():
398 elif k=='slice_fields':
399 print '+ fields',':',
400 print 'max_nodes=',v['max_nodes'],
405 def display_node_spec (self,node):
406 print "+ node=%s host_box=%s"%(node['name'],node['host_box']),
407 print "hostname=",node['node_fields']['hostname'],
408 print "ip=",node['interface_fields']['ip']
409 if self.options.verbose:
410 utils.pprint("node details",node,depth=3)
412 # another entry point for just showing the boxes involved
413 def display_mapping (self):
414 TestPlc.display_mapping_plc(self.plc_spec)
418 def display_mapping_plc (plc_spec):
419 print '+ MyPLC',plc_spec['name']
420 print '+\tvserver address = root@%s:/vservers/%s'%(plc_spec['hostname'],plc_spec['vservername'])
421 print '+\tIP = %s/%s'%(plc_spec['PLC_API_HOST'],plc_spec['vserverip'])
422 for site_spec in plc_spec['sites']:
423 for node_spec in site_spec['nodes']:
424 TestPlc.display_mapping_node(node_spec)
427 def display_mapping_node (node_spec):
428 print '+ NODE %s'%(node_spec['name'])
429 print '+\tqemu box %s'%node_spec['host_box']
430 print '+\thostname=%s'%node_spec['node_fields']['hostname']
432 def resources_pre (self):
433 "run site-dependant pre-test script as defined in LocalTestResources"
434 from LocalTestResources import local_resources
435 return local_resources.step_pre(self)
437 def resources_post (self):
438 "run site-dependant post-test script as defined in LocalTestResources"
439 from LocalTestResources import local_resources
440 return local_resources.step_post(self)
442 def resources_list (self):
443 "run site-dependant list script as defined in LocalTestResources"
444 from LocalTestResources import local_resources
445 return local_resources.step_list(self)
447 def resources_release (self):
448 "run site-dependant release script as defined in LocalTestResources"
449 from LocalTestResources import local_resources
450 return local_resources.step_release(self)
452 def resources_release_plc (self):
453 "run site-dependant release script as defined in LocalTestResources"
454 from LocalTestResources import local_resources
455 return local_resources.step_release_plc(self)
457 def resources_release_qemu (self):
458 "run site-dependant release script as defined in LocalTestResources"
459 from LocalTestResources import local_resources
460 return local_resources.step_release_qemu(self)
463 "vserver delete the test myplc"
464 self.run_in_host("vserver --silent %s delete"%self.vservername)
468 # historically the build was being fetched by the tests
469 # now the build pushes itself as a subdir of the tests workdir
470 # so that the tests do not have to worry about extracting the build (svn, git, or whatever)
471 def create_vs (self):
472 "vserver creation (no install done)"
473 # push the local build/ dir to the testplc box
475 # a full path for the local calls
476 build_dir=os.path.dirname(sys.argv[0])
477 # sometimes this is empty - set to "." in such a case
478 if not build_dir: build_dir="."
479 build_dir += "/build"
481 # use a standard name - will be relative to remote buildname
483 # remove for safety; do *not* mkdir first, otherwise we end up with build/build/
484 self.test_ssh.rmdir(build_dir)
485 self.test_ssh.copy(build_dir,recursive=True)
486 # the repo url is taken from arch-rpms-url
487 # with the last step (i386) removed
488 repo_url = self.options.arch_rpms_url
489 for level in [ 'arch' ]:
490 repo_url = os.path.dirname(repo_url)
491 # pass the vbuild-nightly options to vtest-init-vserver
493 test_env_options += " -p %s"%self.options.personality
494 test_env_options += " -d %s"%self.options.pldistro
495 test_env_options += " -f %s"%self.options.fcdistro
496 script="vtest-init-vserver.sh"
497 vserver_name = self.vservername
498 vserver_options="--netdev eth0 --interface %s"%self.vserverip
500 vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
501 vserver_options += " --hostname %s"%vserver_hostname
503 print "Cannot reverse lookup %s"%self.vserverip
504 print "This is considered fatal, as this might pollute the test results"
506 create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
507 return self.run_in_host(create_vserver) == 0
511 "yum install myplc, noderepo, and the plain bootstrapfs"
513 # workaround for getting pgsql8.2 on centos5
514 if self.options.fcdistro == "centos5":
515 self.run_in_guest("rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/i386/epel-release-5-3.noarch.rpm")
517 if self.options.personality == "linux32":
519 elif self.options.personality == "linux64":
522 raise Exception, "Unsupported personality %r"%self.options.personality
524 nodefamily="%s-%s-%s"%(self.options.pldistro,self.options.fcdistro,arch)
526 # try to install slicerepo - not fatal yet
527 self.run_in_guest("yum -y install slicerepo-%s"%nodefamily)
530 self.run_in_guest("yum -y install myplc")==0 and \
531 self.run_in_guest("yum -y install noderepo-%s"%nodefamily)==0 and \
532 self.run_in_guest("yum -y install bootstrapfs-%s-plain"%nodefamily)==0
537 tmpname='%s.plc-config-tty'%(self.name())
538 fileconf=open(tmpname,'w')
539 for var in [ 'PLC_NAME',
544 'PLC_MAIL_SUPPORT_ADDRESS',
547 # Above line was added for integrating SFA Testing
553 'PLC_RESERVATION_GRANULARITY',
556 fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
557 fileconf.write('w\n')
558 fileconf.write('q\n')
560 utils.system('cat %s'%tmpname)
561 self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
562 utils.system('rm %s'%tmpname)
567 self.run_in_guest('service plc start')
572 self.run_in_guest('service plc stop')
576 "start the PLC vserver"
580 # stores the keys from the config for further use
581 def store_keys(self):
582 "stores test users ssh keys in keys/"
583 for key_spec in self.plc_spec['keys']:
584 TestKey(self,key_spec).store_key()
587 def clean_keys(self):
588 "removes keys cached in keys/"
589 utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
591 # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
592 # for later direct access to the nodes
593 def fetch_keys(self):
594 "gets ssh keys in /etc/planetlab/ and stores them locally in keys/"
596 if not os.path.isdir(dir):
598 vservername=self.vservername
600 prefix = 'debug_ssh_key'
601 for ext in [ 'pub', 'rsa' ] :
602 src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
603 dst="keys/%(vservername)s-debug.%(ext)s"%locals()
604 if self.test_ssh.fetch(src,dst) != 0: overall=False
608 "create sites with PLCAPI"
609 return self.do_sites()
611 def clean_sites (self):
612 "delete sites with PLCAPI"
613 return self.do_sites(action="delete")
615 def do_sites (self,action="add"):
616 for site_spec in self.plc_spec['sites']:
617 test_site = TestSite (self,site_spec)
618 if (action != "add"):
619 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
620 test_site.delete_site()
621 # deleted with the site
622 #test_site.delete_users()
625 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
626 test_site.create_site()
627 test_site.create_users()
630 def clean_all_sites (self):
631 "Delete all sites in PLC, and related objects"
632 print 'auth_root',self.auth_root()
633 site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
634 for site_id in site_ids:
635 print 'Deleting site_id',site_id
636 self.apiserver.DeleteSite(self.auth_root(),site_id)
639 "create nodes with PLCAPI"
640 return self.do_nodes()
641 def clean_nodes (self):
642 "delete nodes with PLCAPI"
643 return self.do_nodes(action="delete")
645 def do_nodes (self,action="add"):
646 for site_spec in self.plc_spec['sites']:
647 test_site = TestSite (self,site_spec)
649 utils.header("Deleting nodes in site %s"%test_site.name())
650 for node_spec in site_spec['nodes']:
651 test_node=TestNode(self,test_site,node_spec)
652 utils.header("Deleting %s"%test_node.name())
653 test_node.delete_node()
655 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
656 for node_spec in site_spec['nodes']:
657 utils.pprint('Creating node %s'%node_spec,node_spec)
658 test_node = TestNode (self,test_site,node_spec)
659 test_node.create_node ()
662 def nodegroups (self):
663 "create nodegroups with PLCAPI"
664 return self.do_nodegroups("add")
665 def clean_nodegroups (self):
666 "delete nodegroups with PLCAPI"
667 return self.do_nodegroups("delete")
671 def translate_timestamp (start,grain,timestamp):
672 if timestamp < TestPlc.YEAR: return start+timestamp*grain
673 else: return timestamp
676 def timestamp_printable (timestamp):
677 return time.strftime('%m-%d %H:%M:%S UTC',time.gmtime(timestamp))
680 "create leases (on reservable nodes only, use e.g. run -c default -c resa)"
682 grain=self.apiserver.GetLeaseGranularity(self.auth_root())
683 print 'API answered grain=',grain
684 start=(now/grain)*grain
686 # find out all nodes that are reservable
687 nodes=self.all_reservable_nodenames()
689 utils.header ("No reservable node found - proceeding without leases")
692 # attach them to the leases as specified in plc_specs
693 # this is where the 'leases' field gets interpreted as relative of absolute
694 for lease_spec in self.plc_spec['leases']:
695 # skip the ones that come with a null slice id
696 if not lease_spec['slice']: continue
697 lease_spec['t_from']=TestPlc.translate_timestamp(start,grain,lease_spec['t_from'])
698 lease_spec['t_until']=TestPlc.translate_timestamp(start,grain,lease_spec['t_until'])
699 lease_addition=self.apiserver.AddLeases(self.auth_root(),nodes,
700 lease_spec['slice'],lease_spec['t_from'],lease_spec['t_until'])
701 if lease_addition['errors']:
702 utils.header("Cannot create leases, %s"%lease_addition['errors'])
705 utils.header('Leases on nodes %r for %s from %d (%s) until %d (%s)'%\
706 (nodes,lease_spec['slice'],
707 lease_spec['t_from'],TestPlc.timestamp_printable(lease_spec['t_from']),
708 lease_spec['t_until'],TestPlc.timestamp_printable(lease_spec['t_until'])))
712 def clean_leases (self):
713 "remove all leases in the myplc side"
714 lease_ids= [ l['lease_id'] for l in self.apiserver.GetLeases(self.auth_root())]
715 utils.header("Cleaning leases %r"%lease_ids)
716 self.apiserver.DeleteLeases(self.auth_root(),lease_ids)
719 def list_leases (self):
720 "list all leases known to the myplc"
721 leases = self.apiserver.GetLeases(self.auth_root())
724 current=l['t_until']>=now
725 if self.options.verbose or current:
726 utils.header("%s %s from %s until %s"%(l['hostname'],l['name'],
727 TestPlc.timestamp_printable(l['t_from']),
728 TestPlc.timestamp_printable(l['t_until'])))
731 # create nodegroups if needed, and populate
732 def do_nodegroups (self, action="add"):
733 # 1st pass to scan contents
735 for site_spec in self.plc_spec['sites']:
736 test_site = TestSite (self,site_spec)
737 for node_spec in site_spec['nodes']:
738 test_node=TestNode (self,test_site,node_spec)
739 if node_spec.has_key('nodegroups'):
740 nodegroupnames=node_spec['nodegroups']
741 if isinstance(nodegroupnames,StringTypes):
742 nodegroupnames = [ nodegroupnames ]
743 for nodegroupname in nodegroupnames:
744 if not groups_dict.has_key(nodegroupname):
745 groups_dict[nodegroupname]=[]
746 groups_dict[nodegroupname].append(test_node.name())
747 auth=self.auth_root()
749 for (nodegroupname,group_nodes) in groups_dict.iteritems():
751 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
752 # first, check if the nodetagtype is here
753 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
755 tag_type_id = tag_types[0]['tag_type_id']
757 tag_type_id = self.apiserver.AddTagType(auth,
758 {'tagname':nodegroupname,
759 'description': 'for nodegroup %s'%nodegroupname,
762 print 'located tag (type)',nodegroupname,'as',tag_type_id
764 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
766 self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
767 print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
768 # set node tag on all nodes, value='yes'
769 for nodename in group_nodes:
771 self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
773 traceback.print_exc()
774 print 'node',nodename,'seems to already have tag',nodegroupname
777 expect_yes = self.apiserver.GetNodeTags(auth,
778 {'hostname':nodename,
779 'tagname':nodegroupname},
780 ['value'])[0]['value']
781 if expect_yes != "yes":
782 print 'Mismatch node tag on node',nodename,'got',expect_yes
785 if not self.options.dry_run:
786 print 'Cannot find tag',nodegroupname,'on node',nodename
790 print 'cleaning nodegroup',nodegroupname
791 self.apiserver.DeleteNodeGroup(auth,nodegroupname)
793 traceback.print_exc()
797 # return a list of tuples (nodename,qemuname)
798 def all_node_infos (self) :
800 for site_spec in self.plc_spec['sites']:
801 node_infos += [ (node_spec['node_fields']['hostname'],node_spec['host_box']) \
802 for node_spec in site_spec['nodes'] ]
805 def all_nodenames (self): return [ x[0] for x in self.all_node_infos() ]
806 def all_reservable_nodenames (self):
808 for site_spec in self.plc_spec['sites']:
809 for node_spec in site_spec['nodes']:
810 node_fields=node_spec['node_fields']
811 if 'node_type' in node_fields and node_fields['node_type']=='reservable':
812 res.append(node_fields['hostname'])
815 # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
816 def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15):
817 if self.options.dry_run:
821 timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
822 graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
823 # the nodes that haven't checked yet - start with a full list and shrink over time
824 tocheck = self.all_hostnames()
825 utils.header("checking nodes %r"%tocheck)
826 # create a dict hostname -> status
827 status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
830 tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
832 for array in tocheck_status:
833 hostname=array['hostname']
834 boot_state=array['boot_state']
835 if boot_state == target_boot_state:
836 utils.header ("%s has reached the %s state"%(hostname,target_boot_state))
838 # if it's a real node, never mind
839 (site_spec,node_spec)=self.locate_hostname(hostname)
840 if TestNode.is_real_model(node_spec['node_fields']['model']):
841 utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
843 boot_state = target_boot_state
844 elif datetime.datetime.now() > graceout:
845 utils.header ("%s still in '%s' state"%(hostname,boot_state))
846 graceout=datetime.datetime.now()+datetime.timedelta(1)
847 status[hostname] = boot_state
849 tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ]
852 if datetime.datetime.now() > timeout:
853 for hostname in tocheck:
854 utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
856 # otherwise, sleep for a while
858 # only useful in empty plcs
861 def nodes_booted(self):
862 return self.nodes_check_boot_state('boot',timeout_minutes=30,silent_minutes=20)
864 def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=15):
866 timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
867 graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
868 vservername=self.vservername
871 local_key = "keys/%(vservername)s-debug.rsa"%locals()
874 local_key = "keys/key1.rsa"
875 node_infos = self.all_node_infos()
876 utils.header("checking ssh access (expected in %s mode) to nodes:"%message)
877 for (nodename,qemuname) in node_infos:
878 utils.header("hostname=%s -- qemubox=%s"%(nodename,qemuname))
879 utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
880 (timeout_minutes,silent_minutes,period))
882 for node_info in node_infos:
883 (hostname,qemuname) = node_info
884 # try to run 'hostname' in the node
885 command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a")
886 # don't spam logs - show the command only after the grace period
887 success = utils.system ( command, silent=datetime.datetime.now() < graceout)
889 utils.header('Successfully entered root@%s (%s)'%(hostname,message))
891 node_infos.remove(node_info)
893 # we will have tried real nodes once, in case they're up - but if not, just skip
894 (site_spec,node_spec)=self.locate_hostname(hostname)
895 if TestNode.is_real_model(node_spec['node_fields']['model']):
896 utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
897 node_infos.remove(node_info)
900 if datetime.datetime.now() > timeout:
901 for (hostname,qemuname) in node_infos:
902 utils.header("FAILURE to ssh into %s (on %s)"%(hostname,qemuname))
904 # otherwise, sleep for a while
906 # only useful in empty plcs
909 def nodes_ssh_debug(self):
910 "Tries to ssh into nodes in debug mode with the debug ssh key"
911 return self.check_nodes_ssh(debug=True,timeout_minutes=10,silent_minutes=5)
913 def nodes_ssh_boot(self):
914 "Tries to ssh into nodes in production mode with the root ssh key"
915 return self.check_nodes_ssh(debug=False,timeout_minutes=40,silent_minutes=15)
918 def init_node (self):
919 "all nodes : init a clean local directory for holding node-dep stuff like iso image..."
923 "all nodes: invoke GetBootMedium and store result locally"
926 def configure_qemu (self):
927 "all nodes: compute qemu config qemu.conf and store it locally"
930 def reinstall_node (self):
931 "all nodes: mark PLCAPI boot_state as reinstall"
934 def export_qemu (self):
935 "all nodes: push local node-dep directory on the qemu box"
938 ### check hooks : invoke scripts from hooks/{node,slice}
939 def check_hooks_node (self):
940 return self.locate_first_node().check_hooks()
941 def check_hooks_sliver (self) :
942 return self.locate_first_sliver().check_hooks()
944 def check_hooks (self):
945 "runs unit tests in the node and slice contexts - see hooks/{node,slice}"
946 return self.check_hooks_node() and self.check_hooks_sliver()
949 def do_check_initscripts(self):
951 for slice_spec in self.plc_spec['slices']:
952 if not slice_spec.has_key('initscriptname'):
954 initscript=slice_spec['initscriptname']
955 for nodename in slice_spec['nodenames']:
956 (site,node) = self.locate_node (nodename)
957 # xxx - passing the wrong site - probably harmless
958 test_site = TestSite (self,site)
959 test_slice = TestSlice (self,test_site,slice_spec)
960 test_node = TestNode (self,test_site,node)
961 test_sliver = TestSliver (self, test_node, test_slice)
962 if not test_sliver.check_initscript(initscript):
966 def check_initscripts(self):
967 "check that the initscripts have triggered"
968 return self.do_check_initscripts()
970 def initscripts (self):
971 "create initscripts with PLCAPI"
972 for initscript in self.plc_spec['initscripts']:
973 utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
974 self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
977 def clean_initscripts (self):
978 "delete initscripts with PLCAPI"
979 for initscript in self.plc_spec['initscripts']:
980 initscript_name = initscript['initscript_fields']['name']
981 print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
983 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
984 print initscript_name,'deleted'
986 print 'deletion went wrong - probably did not exist'
991 "create slices with PLCAPI"
992 return self.do_slices()
994 def clean_slices (self):
995 "delete slices with PLCAPI"
996 return self.do_slices("delete")
998 def do_slices (self, action="add"):
999 for slice in self.plc_spec['slices']:
1000 site_spec = self.locate_site (slice['sitename'])
1001 test_site = TestSite(self,site_spec)
1002 test_slice=TestSlice(self,test_site,slice)
1004 utils.header("Deleting slices in site %s"%test_site.name())
1005 test_slice.delete_slice()
1007 utils.pprint("Creating slice",slice)
1008 test_slice.create_slice()
1009 utils.header('Created Slice %s'%slice['slice_fields']['name'])
1012 @slice_mapper_options
1013 def check_slice(self):
1014 "tries to ssh-enter the slice with the user key, to ensure slice creation"
1018 def clear_known_hosts (self):
1019 "remove test nodes entries from the local known_hosts file"
1023 def start_node (self) :
1024 "all nodes: start the qemu instance (also runs qemu-bridge-init start)"
1027 def check_tcp (self):
1028 "check TCP connectivity between 2 slices (or in loopback if only one is defined)"
1029 specs = self.plc_spec['tcp_test']
1034 s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
1035 if not s_test_sliver.run_tcp_server(port,timeout=10):
1039 # idem for the client side
1040 c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
1041 if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
1045 def plcsh_stress_test (self):
1046 "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents"
1047 # install the stress-test in the plc image
1048 location = "/usr/share/plc_api/plcsh_stress_test.py"
1049 remote="/vservers/%s/%s"%(self.vservername,location)
1050 self.test_ssh.copy_abs("plcsh_stress_test.py",remote)
1052 command += " -- --check"
1053 if self.options.size == 1:
1054 command += " --tiny"
1055 return ( self.run_in_guest(command) == 0)
1057 # populate runs the same utility without slightly different options
1058 # in particular runs with --preserve (dont cleanup) and without --check
1059 # also it gets run twice, once with the --foreign option for creating fake foreign entries
1062 def install_sfa(self):
1063 "yum install sfa, sfa-plc and sfa-client"
1064 if self.options.personality == "linux32":
1066 elif self.options.personality == "linux64":
1069 raise Exception, "Unsupported personality %r"%self.options.personality
1070 return self.run_in_guest("yum -y install sfa sfa-client sfa-plc sfa-sfatables")==0
1073 def configure_sfa(self):
1074 "run sfa-config-tty"
1075 tmpname='%s.sfa-config-tty'%(self.name())
1076 fileconf=open(tmpname,'w')
1077 for var in [ 'SFA_REGISTRY_ROOT_AUTH',
1078 # 'SFA_REGISTRY_LEVEL1_AUTH',
1079 'SFA_REGISTRY_HOST',
1080 'SFA_AGGREGATE_HOST',
1086 'SFA_PLC_DB_PASSWORD',
1089 fileconf.write ('e %s\n%s\n'%(var,self.plc_spec['sfa'][var]))
1090 fileconf.write('w\n')
1091 fileconf.write('R\n')
1092 fileconf.write('q\n')
1094 utils.system('cat %s'%tmpname)
1095 self.run_in_guest_piped('cat %s'%tmpname,'sfa-config-tty')
1096 utils.system('rm %s'%tmpname)
1099 def import_sfa(self):
1101 auth=self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH']
1102 return self.run_in_guest('sfa-import-plc.py')==0
1103 # not needed anymore
1104 # self.run_in_guest('cp /etc/sfa/authorities/%s/%s.pkey /etc/sfa/authorities/server.key'%(auth,auth))
1106 def start_sfa(self):
1108 return self.run_in_guest('service sfa start')==0
1110 def setup_sfa(self):
1111 "sfi client configuration"
1113 if os.path.exists(dir_name):
1114 utils.system('rm -rf %s'%dir_name)
1115 utils.system('mkdir %s'%dir_name)
1116 file_name=dir_name + os.sep + 'fake-pi1.pkey'
1117 fileconf=open(file_name,'w')
1118 fileconf.write (self.plc_spec['keys'][0]['private'])
1121 file_name=dir_name + os.sep + 'sfi_config'
1122 fileconf=open(file_name,'w')
1123 SFI_AUTH=self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH']+".main"
1124 fileconf.write ("SFI_AUTH='%s'"%SFI_AUTH)
1125 fileconf.write('\n')
1126 SFI_USER=SFI_AUTH+'.fake-pi1'
1127 fileconf.write ("SFI_USER='%s'"%SFI_USER)
1128 fileconf.write('\n')
1129 SFI_REGISTRY='http://' + self.plc_spec['sfa']['SFA_PLC_DB_HOST'] + ':12345/'
1130 fileconf.write ("SFI_REGISTRY='%s'"%SFI_REGISTRY)
1131 fileconf.write('\n')
1132 SFI_SM='http://' + self.plc_spec['sfa']['SFA_PLC_DB_HOST'] + ':12347/'
1133 fileconf.write ("SFI_SM='%s'"%SFI_SM)
1134 fileconf.write('\n')
1137 file_name=dir_name + os.sep + 'person.xml'
1138 fileconf=open(file_name,'w')
1139 for record in self.plc_spec['sfa']['sfa_person_xml']:
1140 person_record=record
1141 fileconf.write(person_record)
1142 fileconf.write('\n')
1145 file_name=dir_name + os.sep + 'slice.xml'
1146 fileconf=open(file_name,'w')
1147 for record in self.plc_spec['sfa']['sfa_slice_xml']:
1149 #slice_record=self.plc_spec['sfa']['sfa_slice_xml']
1150 fileconf.write(slice_record)
1151 fileconf.write('\n')
1154 file_name=dir_name + os.sep + 'slice.rspec'
1155 fileconf=open(file_name,'w')
1157 for (key, value) in self.plc_spec['sfa']['sfa_slice_rspec'].items():
1159 fileconf.write(slice_rspec)
1160 fileconf.write('\n')
1163 remote="/vservers/%s/%s"%(self.vservername,location)
1164 self.test_ssh.copy_abs(dir_name, remote, recursive=True)
1166 #utils.system('cat %s'%tmpname)
1167 utils.system('rm -rf %s'%dir_name)
1171 "run sfi.py add (on Registry) and sfi.py create (on SM) to form new objects"
1173 test_user_sfa=TestUserSfa(test_plc,self.plc_spec['sfa'])
1174 success=test_user_sfa.add_user()
1176 for slice_spec in self.plc_spec['sfa']['slices_sfa']:
1177 site_spec = self.locate_site (slice_spec['sitename'])
1178 test_site = TestSite(self,site_spec)
1179 test_slice_sfa=TestSliceSfa(test_plc,test_site,slice_spec)
1180 success1=test_slice_sfa.add_slice()
1181 success2=test_slice_sfa.create_slice()
1182 return success and success1 and success2
1184 def update_sfa(self):
1185 "run sfi.py update (on Registry) and sfi.py create (on SM) on existing objects"
1187 test_user_sfa=TestUserSfa(test_plc,self.plc_spec['sfa'])
1188 success1=test_user_sfa.update_user()
1190 for slice_spec in self.plc_spec['sfa']['slices_sfa']:
1191 site_spec = self.locate_site (slice_spec['sitename'])
1192 test_site = TestSite(self,site_spec)
1193 test_slice_sfa=TestSliceSfa(test_plc,test_site,slice_spec)
1194 success2=test_slice_sfa.update_slice()
1195 return success1 and success2
1198 "run sfi.py list and sfi.py show (both on Registry) and sfi.py slices and sfi.py resources (both on SM)"
1199 auth=self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH']
1201 self.run_in_guest("sfi.py -d /root/.sfi/ list %s.main"%auth)==0 and \
1202 self.run_in_guest("sfi.py -d /root/.sfi/ show %s.main"%auth)==0 and \
1203 self.run_in_guest("sfi.py -d /root/.sfi/ slices")==0 and \
1204 self.run_in_guest("sfi.py -d /root/.sfi/ resources -o resources")==0
1206 @slice_mapper_options_sfa
1207 def check_slice_sfa(self):
1208 "tries to ssh-enter the SFA slice"
1211 def delete_sfa(self):
1212 "run sfi.py delete (on SM), sfi.py remove (on Registry)"
1214 test_user_sfa=TestUserSfa(test_plc,self.plc_spec['sfa'])
1215 success1=test_user_sfa.delete_user()
1216 for slice_spec in self.plc_spec['sfa']['slices_sfa']:
1217 site_spec = self.locate_site (slice_spec['sitename'])
1218 test_site = TestSite(self,site_spec)
1219 test_slice_sfa=TestSliceSfa(test_plc,test_site,slice_spec)
1220 success2=test_slice_sfa.delete_slice()
1222 return success1 and success2
1226 return self.run_in_guest('service sfa stop')==0
1228 def populate (self):
1229 "creates random entries in the PLCAPI"
1230 # install the stress-test in the plc image
1231 location = "/usr/share/plc_api/plcsh_stress_test.py"
1232 remote="/vservers/%s/%s"%(self.vservername,location)
1233 self.test_ssh.copy_abs("plcsh_stress_test.py",remote)
1235 command += " -- --preserve --short-names"
1236 local = (self.run_in_guest(command) == 0);
1237 # second run with --foreign
1238 command += ' --foreign'
1239 remote = (self.run_in_guest(command) == 0);
1240 return ( local and remote)
1242 def gather_logs (self):
1243 "gets all possible logs from plc's/qemu node's/slice's for future reference"
1244 # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
1245 # (1.b) get the plc's /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
1246 # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
1247 # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
1248 # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
1250 print "-------------------- TestPlc.gather_logs : PLC's /var/log"
1251 self.gather_var_logs ()
1253 print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
1254 self.gather_pgsql_logs ()
1256 print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
1257 for site_spec in self.plc_spec['sites']:
1258 test_site = TestSite (self,site_spec)
1259 for node_spec in site_spec['nodes']:
1260 test_node=TestNode(self,test_site,node_spec)
1261 test_node.gather_qemu_logs()
1263 print "-------------------- TestPlc.gather_logs : nodes's /var/log"
1264 self.gather_nodes_var_logs()
1266 print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
1267 self.gather_slivers_var_logs()
1270 def gather_slivers_var_logs(self):
1271 for test_sliver in self.all_sliver_objs():
1272 remote = test_sliver.tar_var_logs()
1273 utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
1274 command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
1275 utils.system(command)
1278 def gather_var_logs (self):
1279 utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
1280 to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
1281 command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
1282 utils.system(command)
1283 command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
1284 utils.system(command)
1286 def gather_pgsql_logs (self):
1287 utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
1288 to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
1289 command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
1290 utils.system(command)
1292 def gather_nodes_var_logs (self):
1293 for site_spec in self.plc_spec['sites']:
1294 test_site = TestSite (self,site_spec)
1295 for node_spec in site_spec['nodes']:
1296 test_node=TestNode(self,test_site,node_spec)
1297 test_ssh = TestSsh (test_node.name(),key="keys/key1.rsa")
1298 command = test_ssh.actual_command("tar -C /var/log -cf - .")
1299 command = command + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
1300 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
1301 utils.system(command)
1304 # returns the filename to use for sql dump/restore, using options.dbname if set
1305 def dbfile (self, database):
1306 # uses options.dbname if it is found
1308 name=self.options.dbname
1309 if not isinstance(name,StringTypes):
1312 t=datetime.datetime.now()
1315 return "/root/%s-%s.sql"%(database,name)
1318 'dump the planetlab5 DB in /root in the PLC - filename has time'
1319 dump=self.dbfile("planetab5")
1320 self.run_in_guest('pg_dump -U pgsqluser planetlab5 -f '+ dump)
1321 utils.header('Dumped planetlab5 database in %s'%dump)
1324 def db_restore(self):
1325 'restore the planetlab5 DB - looks broken, but run -n might help'
1326 dump=self.dbfile("planetab5")
1327 ##stop httpd service
1328 self.run_in_guest('service httpd stop')
1329 # xxx - need another wrapper
1330 self.run_in_guest_piped('echo drop database planetlab5','psql --user=pgsqluser template1')
1331 self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab5')
1332 self.run_in_guest('psql -U pgsqluser planetlab5 -f '+dump)
1333 ##starting httpd service
1334 self.run_in_guest('service httpd start')
1336 utils.header('Database restored from ' + dump)
1339 def standby_1(): pass
1341 def standby_2(): pass
1343 def standby_3(): pass
1345 def standby_4(): pass
1347 def standby_5(): pass
1349 def standby_6(): pass
1351 def standby_7(): pass
1353 def standby_8(): pass
1355 def standby_9(): pass
1357 def standby_10(): pass
1359 def standby_11(): pass
1361 def standby_12(): pass
1363 def standby_13(): pass
1365 def standby_14(): pass
1367 def standby_15(): pass
1369 def standby_16(): pass
1371 def standby_17(): pass
1373 def standby_18(): pass
1375 def standby_19(): pass
1377 def standby_20(): pass