1 # Thierry Parmentelat <thierry.parmentelat@inria.fr>
2 # Copyright (C) 2010 INRIA
9 from types import StringTypes
13 from TestSite import TestSite
14 from TestNode import TestNode
15 from TestUser import TestUser
16 from TestKey import TestKey
17 from TestSlice import TestSlice
18 from TestSliver import TestSliver
19 from TestBox import TestBox
20 from TestSsh import TestSsh
21 from TestApiserver import TestApiserver
22 from TestSliceSfa import TestSliceSfa
23 from TestUserSfa import TestUserSfa
25 # step methods must take (self) and return a boolean (options is a member of the class)
27 def standby(minutes,dry_run):
28 utils.header('Entering StandBy for %d mn'%minutes)
32 time.sleep(60*minutes)
35 def standby_generic (func):
37 minutes=int(func.__name__.split("_")[1])
38 return standby(minutes,self.options.dry_run)
41 def node_mapper (method):
44 node_method = TestNode.__dict__[method.__name__]
45 for site_spec in self.plc_spec['sites']:
46 test_site = TestSite (self,site_spec)
47 for node_spec in site_spec['nodes']:
48 test_node = TestNode (self,test_site,node_spec)
49 if not node_method(test_node): overall=False
51 # restore the doc text
52 actual.__doc__=method.__doc__
55 def slice_mapper_options (method):
58 slice_method = TestSlice.__dict__[method.__name__]
59 for slice_spec in self.plc_spec['slices']:
60 site_spec = self.locate_site (slice_spec['sitename'])
61 test_site = TestSite(self,site_spec)
62 test_slice=TestSlice(self,test_site,slice_spec)
63 if not slice_method(test_slice,self.options): overall=False
65 # restore the doc text
66 actual.__doc__=method.__doc__
69 def slice_mapper_options_sfa (method):
73 slice_method = TestSliceSfa.__dict__[method.__name__]
74 for slice_spec in self.plc_spec['sfa']['slices_sfa']:
75 site_spec = self.locate_site (slice_spec['sitename'])
76 test_site = TestSite(self,site_spec)
77 test_slice=TestSliceSfa(test_plc,test_site,slice_spec)
78 if not slice_method(test_slice,self.options): overall=False
80 # restore the doc text
81 actual.__doc__=method.__doc__
90 'display', 'resources_pre', SEP,
91 'delete_vs','create_vs','install', 'configure', 'start', SEP,
92 'fetch_keys', 'store_keys', 'clear_known_hosts', SEP,
93 'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP,
94 'reinstall_node', 'init_node','bootcd', 'configure_qemu', 'export_qemu',
95 'kill_all_qemus', 'start_node', SEP,
96 # better use of time: do this now that the nodes are taking off
97 'plcsh_stress_test', SEP,
98 'install_sfa', 'configure_sfa', 'import_sfa', 'start_sfa', SEPSFA,
99 'setup_sfa', 'add_sfa', 'update_sfa', 'view_sfa', SEPSFA,
100 'nodes_ssh_debug', 'nodes_ssh_boot', 'check_slice', 'check_initscripts', SEPSFA,
101 'check_slice_sfa', 'delete_sfa', 'stop_sfa', SEPSFA,
102 'check_tcp', 'check_hooks', SEP,
103 'force_gather_logs', 'force_resources_post', SEP,
106 'show_boxes', 'resources_list','resources_release','resources_release_plc','resources_release_qemu',SEP,
107 'stop', 'vs_start', SEP,
108 'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
109 'clean_sites', 'clean_nodes', 'clean_slices', 'clean_keys', SEP,
110 'clean_leases', 'list_leases', SEP,
112 'list_all_qemus', 'list_qemus', 'kill_qemus', SEP,
113 'db_dump' , 'db_restore', SEP,
114 'standby_1 through 20',SEP,
118 def printable_steps (list):
119 single_line=" ".join(list)+" "
120 return single_line.replace(" "+SEP+" "," \\\n").replace(" "+SEPSFA+" "," \\\n")
122 def valid_step (step):
123 return step != SEP and step != SEPSFA
125 # turn off the sfa-related steps when build has skipped SFA
126 # this is originally for centos5 as recent SFAs won't build on this platformb
128 def check_whether_build_has_sfa (rpms_url):
129 retcod=os.system ("curl --silent %s/ | grep -q sfa"%rpms_url)
130 # full builds are expected to return with 0 here
132 # move all steps containing 'sfa' from default_steps to other_steps
133 sfa_steps= [ step for step in TestPlc.default_steps if step.find('sfa')>=0 ]
134 TestPlc.other_steps += sfa_steps
135 for step in sfa_steps: TestPlc.default_steps.remove(step)
137 def __init__ (self,plc_spec,options):
138 self.plc_spec=plc_spec
140 self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
142 self.vserverip=plc_spec['vserverip']
143 self.vservername=plc_spec['vservername']
144 self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
147 raise Exception,'chroot-based myplc testing is deprecated'
148 self.apiserver=TestApiserver(self.url,options.dry_run)
151 name=self.plc_spec['name']
152 return "%s.%s"%(name,self.vservername)
155 return self.plc_spec['hostname']
158 return self.test_ssh.is_local()
160 # define the API methods on this object through xmlrpc
161 # would help, but not strictly necessary
165 def actual_command_in_guest (self,command):
166 return self.test_ssh.actual_command(self.host_to_guest(command))
168 def start_guest (self):
169 return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
171 def run_in_guest (self,command):
172 return utils.system(self.actual_command_in_guest(command))
174 def run_in_host (self,command):
175 return self.test_ssh.run_in_buildname(command)
177 #command gets run in the vserver
178 def host_to_guest(self,command):
179 return "vserver %s exec %s"%(self.vservername,command)
181 #command gets run in the vserver
182 def start_guest_in_host(self):
183 return "vserver %s start"%(self.vservername)
186 def run_in_guest_piped (self,local,remote):
187 return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
189 def auth_root (self):
190 return {'Username':self.plc_spec['PLC_ROOT_USER'],
191 'AuthMethod':'password',
192 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
193 'Role' : self.plc_spec['role']
195 def locate_site (self,sitename):
196 for site in self.plc_spec['sites']:
197 if site['site_fields']['name'] == sitename:
199 if site['site_fields']['login_base'] == sitename:
201 raise Exception,"Cannot locate site %s"%sitename
203 def locate_node (self,nodename):
204 for site in self.plc_spec['sites']:
205 for node in site['nodes']:
206 if node['name'] == nodename:
208 raise Exception,"Cannot locate node %s"%nodename
210 def locate_hostname (self,hostname):
211 for site in self.plc_spec['sites']:
212 for node in site['nodes']:
213 if node['node_fields']['hostname'] == hostname:
215 raise Exception,"Cannot locate hostname %s"%hostname
217 def locate_key (self,keyname):
218 for key in self.plc_spec['keys']:
219 if key['name'] == keyname:
221 raise Exception,"Cannot locate key %s"%keyname
223 def locate_slice (self, slicename):
224 for slice in self.plc_spec['slices']:
225 if slice['slice_fields']['name'] == slicename:
227 raise Exception,"Cannot locate slice %s"%slicename
229 def all_sliver_objs (self):
231 for slice_spec in self.plc_spec['slices']:
232 slicename = slice_spec['slice_fields']['name']
233 for nodename in slice_spec['nodenames']:
234 result.append(self.locate_sliver_obj (nodename,slicename))
237 def locate_sliver_obj (self,nodename,slicename):
238 (site,node) = self.locate_node(nodename)
239 slice = self.locate_slice (slicename)
241 test_site = TestSite (self, site)
242 test_node = TestNode (self, test_site,node)
243 # xxx the slice site is assumed to be the node site - mhh - probably harmless
244 test_slice = TestSlice (self, test_site, slice)
245 return TestSliver (self, test_node, test_slice)
247 def locate_first_node(self):
248 nodename=self.plc_spec['slices'][0]['nodenames'][0]
249 (site,node) = self.locate_node(nodename)
250 test_site = TestSite (self, site)
251 test_node = TestNode (self, test_site,node)
254 def locate_first_sliver (self):
255 slice_spec=self.plc_spec['slices'][0]
256 slicename=slice_spec['slice_fields']['name']
257 nodename=slice_spec['nodenames'][0]
258 return self.locate_sliver_obj(nodename,slicename)
260 # all different hostboxes used in this plc
261 def gather_hostBoxes(self):
262 # maps on sites and nodes, return [ (host_box,test_node) ]
264 for site_spec in self.plc_spec['sites']:
265 test_site = TestSite (self,site_spec)
266 for node_spec in site_spec['nodes']:
267 test_node = TestNode (self, test_site, node_spec)
268 if not test_node.is_real():
269 tuples.append( (test_node.host_box(),test_node) )
270 # transform into a dict { 'host_box' -> [ test_node .. ] }
272 for (box,node) in tuples:
273 if not result.has_key(box):
276 result[box].append(node)
279 # a step for checking this stuff
280 def show_boxes (self):
281 'print summary of nodes location'
282 for (box,nodes) in self.gather_hostBoxes().iteritems():
283 print box,":"," + ".join( [ node.name() for node in nodes ] )
286 # make this a valid step
287 def kill_all_qemus(self):
288 'kill all qemu instances on the qemu boxes involved by this setup'
289 # this is the brute force version, kill all qemus on that host box
290 for (box,nodes) in self.gather_hostBoxes().iteritems():
291 # pass the first nodename, as we don't push template-qemu on testboxes
292 nodedir=nodes[0].nodedir()
293 TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
296 # make this a valid step
297 def list_all_qemus(self):
298 'list all qemu instances on the qemu boxes involved by this setup'
299 for (box,nodes) in self.gather_hostBoxes().iteritems():
300 # this is the brute force version, kill all qemus on that host box
301 TestBox(box,self.options.buildname).list_all_qemus()
304 # kill only the right qemus
305 def list_qemus(self):
306 'list qemu instances for our nodes'
307 for (box,nodes) in self.gather_hostBoxes().iteritems():
308 # the fine-grain version
313 # kill only the right qemus
314 def kill_qemus(self):
315 'kill the qemu instances for our nodes'
316 for (box,nodes) in self.gather_hostBoxes().iteritems():
317 # the fine-grain version
322 #################### display config
324 "show test configuration after localization"
325 self.display_pass (1)
326 self.display_pass (2)
330 always_display_keys=['PLC_WWW_HOST','nodes','sites',]
331 def display_pass (self,passno):
332 for (key,val) in self.plc_spec.iteritems():
333 if not self.options.verbose and key not in TestPlc.always_display_keys: continue
337 self.display_site_spec(site)
338 for node in site['nodes']:
339 self.display_node_spec(node)
340 elif key=='initscripts':
341 for initscript in val:
342 self.display_initscript_spec (initscript)
345 self.display_slice_spec (slice)
348 self.display_key_spec (key)
350 if key not in ['sites','initscripts','slices','keys', 'sfa']:
351 print '+ ',key,':',val
353 def display_site_spec (self,site):
354 print '+ ======== site',site['site_fields']['name']
355 for (k,v) in site.iteritems():
356 if not self.options.verbose and k not in TestPlc.always_display_keys: continue
359 print '+ ','nodes : ',
361 print node['node_fields']['hostname'],'',
367 print user['name'],'',
369 elif k == 'site_fields':
370 print '+ login_base',':',v['login_base']
371 elif k == 'address_fields':
377 def display_initscript_spec (self,initscript):
378 print '+ ======== initscript',initscript['initscript_fields']['name']
380 def display_key_spec (self,key):
381 print '+ ======== key',key['name']
383 def display_slice_spec (self,slice):
384 print '+ ======== slice',slice['slice_fields']['name']
385 for (k,v) in slice.iteritems():
398 elif k=='slice_fields':
399 print '+ fields',':',
400 print 'max_nodes=',v['max_nodes'],
405 def display_node_spec (self,node):
406 print "+ node=%s host_box=%s"%(node['name'],node['host_box']),
407 print "hostname=",node['node_fields']['hostname'],
408 print "ip=",node['interface_fields']['ip']
409 if self.options.verbose:
410 utils.pprint("node details",node,depth=3)
412 # another entry point for just showing the boxes involved
413 def display_mapping (self):
414 TestPlc.display_mapping_plc(self.plc_spec)
418 def display_mapping_plc (plc_spec):
419 print '+ MyPLC',plc_spec['name']
420 print '+\tvserver address = root@%s:/vservers/%s'%(plc_spec['hostname'],plc_spec['vservername'])
421 print '+\tIP = %s/%s'%(plc_spec['PLC_API_HOST'],plc_spec['vserverip'])
422 for site_spec in plc_spec['sites']:
423 for node_spec in site_spec['nodes']:
424 TestPlc.display_mapping_node(node_spec)
427 def display_mapping_node (node_spec):
428 print '+ NODE %s'%(node_spec['name'])
429 print '+\tqemu box %s'%node_spec['host_box']
430 print '+\thostname=%s'%node_spec['node_fields']['hostname']
432 def resources_pre (self):
433 "run site-dependant pre-test script as defined in LocalTestResources"
434 from LocalTestResources import local_resources
435 return local_resources.step_pre(self)
437 def resources_post (self):
438 "run site-dependant post-test script as defined in LocalTestResources"
439 from LocalTestResources import local_resources
440 return local_resources.step_post(self)
442 def resources_list (self):
443 "run site-dependant list script as defined in LocalTestResources"
444 from LocalTestResources import local_resources
445 return local_resources.step_list(self)
447 def resources_release (self):
448 "run site-dependant release script as defined in LocalTestResources"
449 from LocalTestResources import local_resources
450 return local_resources.step_release(self)
452 def resources_release_plc (self):
453 "run site-dependant release script as defined in LocalTestResources"
454 from LocalTestResources import local_resources
455 return local_resources.step_release_plc(self)
457 def resources_release_qemu (self):
458 "run site-dependant release script as defined in LocalTestResources"
459 from LocalTestResources import local_resources
460 return local_resources.step_release_qemu(self)
463 "vserver delete the test myplc"
464 self.run_in_host("vserver --silent %s delete"%self.vservername)
468 # historically the build was being fetched by the tests
469 # now the build pushes itself as a subdir of the tests workdir
470 # so that the tests do not have to worry about extracting the build (svn, git, or whatever)
471 def create_vs (self):
472 "vserver creation (no install done)"
473 # push the local build/ dir to the testplc box
475 # a full path for the local calls
476 build_dir=os.path.dirname(sys.argv[0])
477 # sometimes this is empty - set to "." in such a case
478 if not build_dir: build_dir="."
479 build_dir += "/build"
481 # use a standard name - will be relative to remote buildname
483 # remove for safety; do *not* mkdir first, otherwise we end up with build/build/
484 self.test_ssh.rmdir(build_dir)
485 self.test_ssh.copy(build_dir,recursive=True)
486 # the repo url is taken from arch-rpms-url
487 # with the last step (i386) removed
488 repo_url = self.options.arch_rpms_url
489 for level in [ 'arch' ]:
490 repo_url = os.path.dirname(repo_url)
491 # pass the vbuild-nightly options to vtest-init-vserver
493 test_env_options += " -p %s"%self.options.personality
494 test_env_options += " -d %s"%self.options.pldistro
495 test_env_options += " -f %s"%self.options.fcdistro
496 script="vtest-init-vserver.sh"
497 vserver_name = self.vservername
498 vserver_options="--netdev eth0 --interface %s"%self.vserverip
500 vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
501 vserver_options += " --hostname %s"%vserver_hostname
503 print "Cannot reverse lookup %s"%self.vserverip
504 print "This is considered fatal, as this might pollute the test results"
506 create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
507 return self.run_in_host(create_vserver) == 0
511 "yum install myplc, noderepo, and the plain bootstrapfs"
513 # workaround for getting pgsql8.2 on centos5
514 if self.options.fcdistro == "centos5":
515 self.run_in_guest("rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/i386/epel-release-5-3.noarch.rpm")
517 if self.options.personality == "linux32":
519 elif self.options.personality == "linux64":
522 raise Exception, "Unsupported personality %r"%self.options.personality
524 nodefamily="%s-%s-%s"%(self.options.pldistro,self.options.fcdistro,arch)
526 # try to install slicerepo - not fatal yet
527 self.run_in_guest("yum -y install slicerepo-%s"%nodefamily)
530 self.run_in_guest("yum -y install myplc")==0 and \
531 self.run_in_guest("yum -y install noderepo-%s"%nodefamily)==0 and \
532 self.run_in_guest("yum -y install bootstrapfs-%s-plain"%nodefamily)==0
537 tmpname='%s.plc-config-tty'%(self.name())
538 fileconf=open(tmpname,'w')
539 for var in [ 'PLC_NAME',
544 'PLC_MAIL_SUPPORT_ADDRESS',
547 # Above line was added for integrating SFA Testing
553 'PLC_RESERVATION_GRANULARITY',
556 fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
557 fileconf.write('w\n')
558 fileconf.write('q\n')
560 utils.system('cat %s'%tmpname)
561 self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
562 utils.system('rm %s'%tmpname)
567 self.run_in_guest('service plc start')
572 self.run_in_guest('service plc stop')
576 "start the PLC vserver"
580 # stores the keys from the config for further use
581 def store_keys(self):
582 "stores test users ssh keys in keys/"
583 for key_spec in self.plc_spec['keys']:
584 TestKey(self,key_spec).store_key()
587 def clean_keys(self):
588 "removes keys cached in keys/"
589 utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
591 # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
592 # for later direct access to the nodes
593 def fetch_keys(self):
594 "gets ssh keys in /etc/planetlab/ and stores them locally in keys/"
596 if not os.path.isdir(dir):
598 vservername=self.vservername
600 prefix = 'debug_ssh_key'
601 for ext in [ 'pub', 'rsa' ] :
602 src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
603 dst="keys/%(vservername)s-debug.%(ext)s"%locals()
604 if self.test_ssh.fetch(src,dst) != 0: overall=False
608 "create sites with PLCAPI"
609 return self.do_sites()
611 def clean_sites (self):
612 "delete sites with PLCAPI"
613 return self.do_sites(action="delete")
615 def do_sites (self,action="add"):
616 for site_spec in self.plc_spec['sites']:
617 test_site = TestSite (self,site_spec)
618 if (action != "add"):
619 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
620 test_site.delete_site()
621 # deleted with the site
622 #test_site.delete_users()
625 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
626 test_site.create_site()
627 test_site.create_users()
630 def clean_all_sites (self):
631 "Delete all sites in PLC, and related objects"
632 print 'auth_root',self.auth_root()
633 site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
634 for site_id in site_ids:
635 print 'Deleting site_id',site_id
636 self.apiserver.DeleteSite(self.auth_root(),site_id)
639 "create nodes with PLCAPI"
640 return self.do_nodes()
641 def clean_nodes (self):
642 "delete nodes with PLCAPI"
643 return self.do_nodes(action="delete")
645 def do_nodes (self,action="add"):
646 for site_spec in self.plc_spec['sites']:
647 test_site = TestSite (self,site_spec)
649 utils.header("Deleting nodes in site %s"%test_site.name())
650 for node_spec in site_spec['nodes']:
651 test_node=TestNode(self,test_site,node_spec)
652 utils.header("Deleting %s"%test_node.name())
653 test_node.delete_node()
655 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
656 for node_spec in site_spec['nodes']:
657 utils.pprint('Creating node %s'%node_spec,node_spec)
658 test_node = TestNode (self,test_site,node_spec)
659 test_node.create_node ()
662 def nodegroups (self):
663 "create nodegroups with PLCAPI"
664 return self.do_nodegroups("add")
665 def clean_nodegroups (self):
666 "delete nodegroups with PLCAPI"
667 return self.do_nodegroups("delete")
671 def translate_timestamp (start,grain,timestamp):
672 if timestamp < TestPlc.YEAR: return start+timestamp*grain
673 else: return timestamp
676 def timestamp_printable (timestamp):
677 return time.strftime('%m-%d %H:%M:%S UTC',time.gmtime(timestamp))
680 "create leases (on reservable nodes only, use e.g. run -c default -c resa)"
682 grain=self.apiserver.GetLeaseGranularity(self.auth_root())
683 print 'API answered grain=',grain
684 start=(now/grain)*grain
686 # find out all nodes that are reservable
687 nodes=self.all_reservable_nodenames()
689 utils.header ("No reservable node found - proceeding without leases")
692 # attach them to the leases as specified in plc_specs
693 # this is where the 'leases' field gets interpreted as relative of absolute
694 for lease_spec in self.plc_spec['leases']:
695 # skip the ones that come with a null slice id
696 if not lease_spec['slice']: continue
697 lease_spec['t_from']=TestPlc.translate_timestamp(start,grain,lease_spec['t_from'])
698 lease_spec['t_until']=TestPlc.translate_timestamp(start,grain,lease_spec['t_until'])
699 lease_addition=self.apiserver.AddLeases(self.auth_root(),nodes,
700 lease_spec['slice'],lease_spec['t_from'],lease_spec['t_until'])
701 if lease_addition['errors']:
702 utils.header("Cannot create leases, %s"%lease_addition['errors'])
705 utils.header('Leases on nodes %r for %s from %d (%s) until %d (%s)'%\
706 (nodes,lease_spec['slice'],
707 lease_spec['t_from'],TestPlc.timestamp_printable(lease_spec['t_from']),
708 lease_spec['t_until'],TestPlc.timestamp_printable(lease_spec['t_until'])))
712 def clean_leases (self):
713 "remove all leases in the myplc side"
714 lease_ids= [ l['lease_id'] for l in self.apiserver.GetLeases(self.auth_root())]
715 utils.header("Cleaning leases %r"%lease_ids)
716 self.apiserver.DeleteLeases(self.auth_root(),lease_ids)
719 def list_leases (self):
720 "list all leases known to the myplc"
721 leases = self.apiserver.GetLeases(self.auth_root())
724 current=l['t_until']>=now
725 if self.options.verbose or current:
726 utils.header("%s %s from %s until %s"%(l['hostname'],l['name'],
727 TestPlc.timestamp_printable(l['t_from']),
728 TestPlc.timestamp_printable(l['t_until'])))
731 # create nodegroups if needed, and populate
732 def do_nodegroups (self, action="add"):
733 # 1st pass to scan contents
735 for site_spec in self.plc_spec['sites']:
736 test_site = TestSite (self,site_spec)
737 for node_spec in site_spec['nodes']:
738 test_node=TestNode (self,test_site,node_spec)
739 if node_spec.has_key('nodegroups'):
740 nodegroupnames=node_spec['nodegroups']
741 if isinstance(nodegroupnames,StringTypes):
742 nodegroupnames = [ nodegroupnames ]
743 for nodegroupname in nodegroupnames:
744 if not groups_dict.has_key(nodegroupname):
745 groups_dict[nodegroupname]=[]
746 groups_dict[nodegroupname].append(test_node.name())
747 auth=self.auth_root()
749 for (nodegroupname,group_nodes) in groups_dict.iteritems():
751 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
752 # first, check if the nodetagtype is here
753 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
755 tag_type_id = tag_types[0]['tag_type_id']
757 tag_type_id = self.apiserver.AddTagType(auth,
758 {'tagname':nodegroupname,
759 'description': 'for nodegroup %s'%nodegroupname,
762 print 'located tag (type)',nodegroupname,'as',tag_type_id
764 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
766 self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
767 print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
768 # set node tag on all nodes, value='yes'
769 for nodename in group_nodes:
771 self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
773 traceback.print_exc()
774 print 'node',nodename,'seems to already have tag',nodegroupname
777 expect_yes = self.apiserver.GetNodeTags(auth,
778 {'hostname':nodename,
779 'tagname':nodegroupname},
780 ['value'])[0]['value']
781 if expect_yes != "yes":
782 print 'Mismatch node tag on node',nodename,'got',expect_yes
785 if not self.options.dry_run:
786 print 'Cannot find tag',nodegroupname,'on node',nodename
790 print 'cleaning nodegroup',nodegroupname
791 self.apiserver.DeleteNodeGroup(auth,nodegroupname)
793 traceback.print_exc()
797 # return a list of tuples (nodename,qemuname)
798 def all_node_infos (self) :
800 for site_spec in self.plc_spec['sites']:
801 node_infos += [ (node_spec['node_fields']['hostname'],node_spec['host_box']) \
802 for node_spec in site_spec['nodes'] ]
805 def all_nodenames (self): return [ x[0] for x in self.all_node_infos() ]
806 def all_reservable_nodenames (self):
808 for site_spec in self.plc_spec['sites']:
809 for node_spec in site_spec['nodes']:
810 node_fields=node_spec['node_fields']
811 if 'node_type' in node_fields and node_fields['node_type']=='reservable':
812 res.append(node_fields['hostname'])
815 # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
816 def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15):
817 if self.options.dry_run:
821 timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
822 graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
823 # the nodes that haven't checked yet - start with a full list and shrink over time
824 tocheck = self.all_hostnames()
825 utils.header("checking nodes %r"%tocheck)
826 # create a dict hostname -> status
827 status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
830 tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
832 for array in tocheck_status:
833 hostname=array['hostname']
834 boot_state=array['boot_state']
835 if boot_state == target_boot_state:
836 utils.header ("%s has reached the %s state"%(hostname,target_boot_state))
838 # if it's a real node, never mind
839 (site_spec,node_spec)=self.locate_hostname(hostname)
840 if TestNode.is_real_model(node_spec['node_fields']['model']):
841 utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
843 boot_state = target_boot_state
844 elif datetime.datetime.now() > graceout:
845 utils.header ("%s still in '%s' state"%(hostname,boot_state))
846 graceout=datetime.datetime.now()+datetime.timedelta(1)
847 status[hostname] = boot_state
849 tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ]
852 if datetime.datetime.now() > timeout:
853 for hostname in tocheck:
854 utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
856 # otherwise, sleep for a while
858 # only useful in empty plcs
861 def nodes_booted(self):
862 return self.nodes_check_boot_state('boot',timeout_minutes=30,silent_minutes=20)
864 def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=15):
866 timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
867 graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
868 vservername=self.vservername
871 local_key = "keys/%(vservername)s-debug.rsa"%locals()
874 local_key = "keys/key1.rsa"
875 node_infos = self.all_node_infos()
876 utils.header("checking ssh access (expected in %s mode) to nodes:"%message)
877 for (nodename,qemuname) in node_infos:
878 utils.header("hostname=%s -- qemubox=%s"%(nodename,qemuname))
879 utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
880 (timeout_minutes,silent_minutes,period))
882 for node_info in node_infos:
883 (hostname,qemuname) = node_info
884 # try to run 'hostname' in the node
885 command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a")
886 # don't spam logs - show the command only after the grace period
887 success = utils.system ( command, silent=datetime.datetime.now() < graceout)
889 utils.header('Successfully entered root@%s (%s)'%(hostname,message))
891 node_infos.remove(node_info)
893 # we will have tried real nodes once, in case they're up - but if not, just skip
894 (site_spec,node_spec)=self.locate_hostname(hostname)
895 if TestNode.is_real_model(node_spec['node_fields']['model']):
896 utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
897 node_infos.remove(node_info)
900 if datetime.datetime.now() > timeout:
901 for (hostname,qemuname) in node_infos:
902 utils.header("FAILURE to ssh into %s (on %s)"%(hostname,qemuname))
904 # otherwise, sleep for a while
906 # only useful in empty plcs
909 def nodes_ssh_debug(self):
910 "Tries to ssh into nodes in debug mode with the debug ssh key"
911 return self.check_nodes_ssh(debug=True,timeout_minutes=10,silent_minutes=5)
913 def nodes_ssh_boot(self):
914 "Tries to ssh into nodes in production mode with the root ssh key"
915 return self.check_nodes_ssh(debug=False,timeout_minutes=40,silent_minutes=15)
918 def init_node (self):
919 "all nodes : init a clean local directory for holding node-dep stuff like iso image..."
923 "all nodes: invoke GetBootMedium and store result locally"
926 def configure_qemu (self):
927 "all nodes: compute qemu config qemu.conf and store it locally"
930 def reinstall_node (self):
931 "all nodes: mark PLCAPI boot_state as reinstall"
934 def export_qemu (self):
935 "all nodes: push local node-dep directory on the qemu box"
938 ### check hooks : invoke scripts from hooks/{node,slice}
939 def check_hooks_node (self):
940 return self.locate_first_node().check_hooks()
941 def check_hooks_sliver (self) :
942 return self.locate_first_sliver().check_hooks()
944 def check_hooks (self):
945 "runs unit tests in the node and slice contexts - see hooks/{node,slice}"
946 return self.check_hooks_node() and self.check_hooks_sliver()
949 def do_check_initscripts(self):
951 for slice_spec in self.plc_spec['slices']:
952 if not slice_spec.has_key('initscriptname'):
954 initscript=slice_spec['initscriptname']
955 for nodename in slice_spec['nodenames']:
956 (site,node) = self.locate_node (nodename)
957 # xxx - passing the wrong site - probably harmless
958 test_site = TestSite (self,site)
959 test_slice = TestSlice (self,test_site,slice_spec)
960 test_node = TestNode (self,test_site,node)
961 test_sliver = TestSliver (self, test_node, test_slice)
962 if not test_sliver.check_initscript(initscript):
966 def check_initscripts(self):
967 "check that the initscripts have triggered"
968 return self.do_check_initscripts()
970 def initscripts (self):
971 "create initscripts with PLCAPI"
972 for initscript in self.plc_spec['initscripts']:
973 utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
974 self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
977 def clean_initscripts (self):
978 "delete initscripts with PLCAPI"
979 for initscript in self.plc_spec['initscripts']:
980 initscript_name = initscript['initscript_fields']['name']
981 print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
983 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
984 print initscript_name,'deleted'
986 print 'deletion went wrong - probably did not exist'
991 "create slices with PLCAPI"
992 return self.do_slices()
994 def clean_slices (self):
995 "delete slices with PLCAPI"
996 return self.do_slices("delete")
998 def do_slices (self, action="add"):
999 for slice in self.plc_spec['slices']:
1000 site_spec = self.locate_site (slice['sitename'])
1001 test_site = TestSite(self,site_spec)
1002 test_slice=TestSlice(self,test_site,slice)
1004 utils.header("Deleting slices in site %s"%test_site.name())
1005 test_slice.delete_slice()
1007 utils.pprint("Creating slice",slice)
1008 test_slice.create_slice()
1009 utils.header('Created Slice %s'%slice['slice_fields']['name'])
1012 @slice_mapper_options
1013 def check_slice(self):
1014 "tries to ssh-enter the slice with the user key, to ensure slice creation"
1018 def clear_known_hosts (self):
1019 "remove test nodes entries from the local known_hosts file"
1023 def start_node (self) :
1024 "all nodes: start the qemu instance (also runs qemu-bridge-init start)"
1027 def check_tcp (self):
1028 "check TCP connectivity between 2 slices (or in loopback if only one is defined)"
1029 specs = self.plc_spec['tcp_test']
1034 s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
1035 if not s_test_sliver.run_tcp_server(port,timeout=10):
1039 # idem for the client side
1040 c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
1041 if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
1045 def plcsh_stress_test (self):
1046 "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents"
1047 # install the stress-test in the plc image
1048 location = "/usr/share/plc_api/plcsh_stress_test.py"
1049 remote="/vservers/%s/%s"%(self.vservername,location)
1050 self.test_ssh.copy_abs("plcsh_stress_test.py",remote)
1052 command += " -- --check"
1053 if self.options.size == 1:
1054 command += " --tiny"
1055 return ( self.run_in_guest(command) == 0)
1057 # populate runs the same utility without slightly different options
1058 # in particular runs with --preserve (dont cleanup) and without --check
1059 # also it gets run twice, once with the --foreign option for creating fake foreign entries
1062 def install_sfa(self):
1063 "yum install sfa, sfa-plc and sfa-client"
1064 if self.options.personality == "linux32":
1066 elif self.options.personality == "linux64":
1069 raise Exception, "Unsupported personality %r"%self.options.personality
1070 return self.run_in_guest("yum -y install sfa sfa-client sfa-plc sfa-sfatables")==0
1073 def configure_sfa(self):
1074 "run sfa-config-tty"
1075 tmpname='%s.sfa-config-tty'%(self.name())
1076 fileconf=open(tmpname,'w')
1077 for var in [ 'SFA_REGISTRY_ROOT_AUTH',
1078 # 'SFA_REGISTRY_LEVEL1_AUTH',
1079 'SFA_REGISTRY_HOST',
1080 'SFA_AGGREGATE_HOST',
1086 'SFA_PLC_DB_PASSWORD',
1089 fileconf.write ('e %s\n%s\n'%(var,self.plc_spec['sfa'][var]))
1090 fileconf.write('w\n')
1091 fileconf.write('R\n')
1092 fileconf.write('q\n')
1094 utils.system('cat %s'%tmpname)
1095 self.run_in_guest_piped('cat %s'%tmpname,'sfa-config-tty')
1096 utils.system('rm %s'%tmpname)
1099 def import_sfa(self):
1101 auth=self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH']
1102 return self.run_in_guest('sfa-import-plc.py')==0
1103 # not needed anymore
1104 # self.run_in_guest('cp /etc/sfa/authorities/%s/%s.pkey /etc/sfa/authorities/server.key'%(auth,auth))
1106 def start_sfa(self):
1108 return self.run_in_guest('service sfa start')==0
1110 def setup_sfa(self):
1111 sfa_spec=self.plc_spec['sfa']
1112 "sfi client configuration"
1114 if os.path.exists(dir_name):
1115 utils.system('rm -rf %s'%dir_name)
1116 utils.system('mkdir %s'%dir_name)
1117 file_name=dir_name + os.sep + 'fake-pi1.pkey'
1118 fileconf=open(file_name,'w')
1119 fileconf.write (self.plc_spec['keys'][0]['private'])
1122 file_name=dir_name + os.sep + 'sfi_config'
1123 fileconf=open(file_name,'w')
1124 SFI_AUTH="%s.%s"%(sfa_spec['SFA_REGISTRY_ROOT_AUTH'],sfa_spec['login_base'])
1125 fileconf.write ("SFI_AUTH='%s'"%SFI_AUTH)
1126 fileconf.write('\n')
1127 SFI_USER=SFI_AUTH+'.fake-pi1'
1128 fileconf.write ("SFI_USER='%s'"%SFI_USER)
1129 fileconf.write('\n')
1130 SFI_REGISTRY='http://' + sfa_spec['SFA_PLC_DB_HOST'] + ':12345/'
1131 fileconf.write ("SFI_REGISTRY='%s'"%SFI_REGISTRY)
1132 fileconf.write('\n')
1133 SFI_SM='http://' + sfa_spec['SFA_PLC_DB_HOST'] + ':12347/'
1134 fileconf.write ("SFI_SM='%s'"%SFI_SM)
1135 fileconf.write('\n')
1138 file_name=dir_name + os.sep + 'person.xml'
1139 fileconf=open(file_name,'w')
1140 for record in sfa_spec['sfa_person_xml']:
1141 person_record=record
1142 fileconf.write(person_record)
1143 fileconf.write('\n')
1146 file_name=dir_name + os.sep + 'slice.xml'
1147 fileconf=open(file_name,'w')
1148 for record in sfa_spec['sfa_slice_xml']:
1150 #slice_record=sfa_spec['sfa_slice_xml']
1151 fileconf.write(slice_record)
1152 fileconf.write('\n')
1155 file_name=dir_name + os.sep + 'slice.rspec'
1156 fileconf=open(file_name,'w')
1158 for (key, value) in sfa_spec['sfa_slice_rspec'].items():
1160 fileconf.write(slice_rspec)
1161 fileconf.write('\n')
1164 remote="/vservers/%s/%s"%(self.vservername,location)
1165 self.test_ssh.copy_abs(dir_name, remote, recursive=True)
1167 #utils.system('cat %s'%tmpname)
1168 utils.system('rm -rf %s'%dir_name)
1172 "run sfi.py add (on Registry) and sfi.py create (on SM) to form new objects"
1174 test_user_sfa=TestUserSfa(test_plc,self.plc_spec['sfa'])
1175 success=test_user_sfa.add_user()
1177 for slice_spec in self.plc_spec['sfa']['slices_sfa']:
1178 site_spec = self.locate_site (slice_spec['sitename'])
1179 test_site = TestSite(self,site_spec)
1180 test_slice_sfa=TestSliceSfa(test_plc,test_site,slice_spec)
1181 success1=test_slice_sfa.add_slice()
1182 success2=test_slice_sfa.create_slice()
1183 return success and success1 and success2
1185 def update_sfa(self):
1186 "run sfi.py update (on Registry) and sfi.py create (on SM) on existing objects"
1188 test_user_sfa=TestUserSfa(test_plc,self.plc_spec['sfa'])
1189 success1=test_user_sfa.update_user()
1191 for slice_spec in self.plc_spec['sfa']['slices_sfa']:
1192 site_spec = self.locate_site (slice_spec['sitename'])
1193 test_site = TestSite(self,site_spec)
1194 test_slice_sfa=TestSliceSfa(test_plc,test_site,slice_spec)
1195 success2=test_slice_sfa.update_slice()
1196 return success1 and success2
1199 "run sfi.py list and sfi.py show (both on Registry) and sfi.py slices and sfi.py resources (both on SM)"
1200 auth=self.plc_spec['sfa']['SFA_REGISTRY_ROOT_AUTH']
1202 self.run_in_guest("sfi.py -d /root/.sfi/ list %s.main"%auth)==0 and \
1203 self.run_in_guest("sfi.py -d /root/.sfi/ show %s.main"%auth)==0 and \
1204 self.run_in_guest("sfi.py -d /root/.sfi/ slices")==0 and \
1205 self.run_in_guest("sfi.py -d /root/.sfi/ resources -o resources")==0
1207 @slice_mapper_options_sfa
1208 def check_slice_sfa(self):
1209 "tries to ssh-enter the SFA slice"
1212 def delete_sfa(self):
1213 "run sfi.py delete (on SM), sfi.py remove (on Registry)"
1215 test_user_sfa=TestUserSfa(test_plc,self.plc_spec['sfa'])
1216 success1=test_user_sfa.delete_user()
1217 for slice_spec in self.plc_spec['sfa']['slices_sfa']:
1218 site_spec = self.locate_site (slice_spec['sitename'])
1219 test_site = TestSite(self,site_spec)
1220 test_slice_sfa=TestSliceSfa(test_plc,test_site,slice_spec)
1221 success2=test_slice_sfa.delete_slice()
1223 return success1 and success2
1227 return self.run_in_guest('service sfa stop')==0
1229 def populate (self):
1230 "creates random entries in the PLCAPI"
1231 # install the stress-test in the plc image
1232 location = "/usr/share/plc_api/plcsh_stress_test.py"
1233 remote="/vservers/%s/%s"%(self.vservername,location)
1234 self.test_ssh.copy_abs("plcsh_stress_test.py",remote)
1236 command += " -- --preserve --short-names"
1237 local = (self.run_in_guest(command) == 0);
1238 # second run with --foreign
1239 command += ' --foreign'
1240 remote = (self.run_in_guest(command) == 0);
1241 return ( local and remote)
1243 def gather_logs (self):
1244 "gets all possible logs from plc's/qemu node's/slice's for future reference"
1245 # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
1246 # (1.b) get the plc's /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
1247 # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
1248 # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
1249 # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
1251 print "-------------------- TestPlc.gather_logs : PLC's /var/log"
1252 self.gather_var_logs ()
1254 print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
1255 self.gather_pgsql_logs ()
1257 print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
1258 for site_spec in self.plc_spec['sites']:
1259 test_site = TestSite (self,site_spec)
1260 for node_spec in site_spec['nodes']:
1261 test_node=TestNode(self,test_site,node_spec)
1262 test_node.gather_qemu_logs()
1264 print "-------------------- TestPlc.gather_logs : nodes's /var/log"
1265 self.gather_nodes_var_logs()
1267 print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
1268 self.gather_slivers_var_logs()
1271 def gather_slivers_var_logs(self):
1272 for test_sliver in self.all_sliver_objs():
1273 remote = test_sliver.tar_var_logs()
1274 utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
1275 command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
1276 utils.system(command)
1279 def gather_var_logs (self):
1280 utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
1281 to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
1282 command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
1283 utils.system(command)
1284 command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
1285 utils.system(command)
1287 def gather_pgsql_logs (self):
1288 utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
1289 to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
1290 command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
1291 utils.system(command)
1293 def gather_nodes_var_logs (self):
1294 for site_spec in self.plc_spec['sites']:
1295 test_site = TestSite (self,site_spec)
1296 for node_spec in site_spec['nodes']:
1297 test_node=TestNode(self,test_site,node_spec)
1298 test_ssh = TestSsh (test_node.name(),key="keys/key1.rsa")
1299 command = test_ssh.actual_command("tar -C /var/log -cf - .")
1300 command = command + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
1301 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
1302 utils.system(command)
1305 # returns the filename to use for sql dump/restore, using options.dbname if set
1306 def dbfile (self, database):
1307 # uses options.dbname if it is found
1309 name=self.options.dbname
1310 if not isinstance(name,StringTypes):
1313 t=datetime.datetime.now()
1316 return "/root/%s-%s.sql"%(database,name)
1319 'dump the planetlab5 DB in /root in the PLC - filename has time'
1320 dump=self.dbfile("planetab5")
1321 self.run_in_guest('pg_dump -U pgsqluser planetlab5 -f '+ dump)
1322 utils.header('Dumped planetlab5 database in %s'%dump)
1325 def db_restore(self):
1326 'restore the planetlab5 DB - looks broken, but run -n might help'
1327 dump=self.dbfile("planetab5")
1328 ##stop httpd service
1329 self.run_in_guest('service httpd stop')
1330 # xxx - need another wrapper
1331 self.run_in_guest_piped('echo drop database planetlab5','psql --user=pgsqluser template1')
1332 self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab5')
1333 self.run_in_guest('psql -U pgsqluser planetlab5 -f '+dump)
1334 ##starting httpd service
1335 self.run_in_guest('service httpd start')
1337 utils.header('Database restored from ' + dump)
1340 def standby_1(): pass
1342 def standby_2(): pass
1344 def standby_3(): pass
1346 def standby_4(): pass
1348 def standby_5(): pass
1350 def standby_6(): pass
1352 def standby_7(): pass
1354 def standby_8(): pass
1356 def standby_9(): pass
1358 def standby_10(): pass
1360 def standby_11(): pass
1362 def standby_12(): pass
1364 def standby_13(): pass
1366 def standby_14(): pass
1368 def standby_15(): pass
1370 def standby_16(): pass
1372 def standby_17(): pass
1374 def standby_18(): pass
1376 def standby_19(): pass
1378 def standby_20(): pass