starts untangling; testssh should be built out of a hostname and possibly a key,...
[tests.git] / system / TestPlc.py
1 # $Id$
2 import os, os.path
3 import datetime
4 import time
5 import sys
6 import xmlrpclib
7 import datetime
8 import traceback
9 from types import StringTypes
10
11 import utils
12 from TestSite import TestSite
13 from TestNode import TestNode
14 from TestUser import TestUser
15 from TestKey import TestKey
16 from TestSlice import TestSlice
17 from TestSliver import TestSliver
18 from TestBox import TestBox
19 from TestSsh import TestSsh
20
21 # step methods must take (self, options) and return a boolean
22
23 def standby(minutes):
24         utils.header('Entering StandBy for %d mn'%minutes)
25         time.sleep(60*minutes)
26         return True
27
28 def standby_generic (func):
29     def actual(self,options):
30         minutes=int(func.__name__.split("_")[1])
31         return standby(minutes)
32     return actual
33
34 class TestPlc:
35
36     def __init__ (self,plc_spec):
37         self.plc_spec=plc_spec
38         self.path=os.path.dirname(sys.argv[0])
39         self.test_ssh=TestSsh(self)
40         try:
41             self.vserverip=plc_spec['vserverip']
42             self.vservername=plc_spec['vservername']
43             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
44             self.vserver=True
45         except:
46             self.vserver=False
47             self.url="https://%s:443/PLCAPI/"%plc_spec['hostname']
48         utils.header('Using API url %s'%self.url)
49         self.server=xmlrpclib.Server(self.url,allow_none=True)
50         
51     def name(self):
52         name=self.plc_spec['name']
53         if self.vserver:
54             return name+"[%s]"%self.vservername
55         else:
56             return name+"[chroot]"
57
58     def hostname(self):
59         return self.plc_spec['hostname']
60
61     def is_local (self):
62         return self.test_ssh.is_local()
63
64     # define the API methods on this object through xmlrpc
65     # would help, but not strictly necessary
66     def connect (self):
67         pass
68     
69     #command gets run in the chroot/vserver
70     def host_to_guest(self,command):
71         if self.vserver:
72             return "vserver %s exec %s"%(self.vservername,command)
73         else:
74             return "chroot /plc/root %s"%TestSsh.backslash_shell_specials(command)
75     
76     # copy a file to the myplc root image - pass in_data=True if the file must go in /plc/data
77     def copy_in_guest (self, localfile, remotefile, in_data=False):
78         if in_data:
79             chroot_dest="/plc/data"
80         else:
81             chroot_dest="/plc/root"
82         if self.is_local():
83             if not self.vserver:
84                 utils.system("cp %s %s/%s"%(localfile,chroot_dest,remotefile))
85             else:
86                 utils.system("cp %s /vservers/%s/%s"%(localfile,self.vservername,remotefile))
87         else:
88             if not self.vserver:
89                 utils.system("scp %s %s:%s/%s"%(localfile,self.hostname(),chroot_dest,remotefile))
90             else:
91                 utils.system("scp %s %s@/vservers/%s/%s"%(localfile,self.hostname(),self.vservername,remotefile))
92
93     def auth_root (self):
94         return {'Username':self.plc_spec['PLC_ROOT_USER'],
95                 'AuthMethod':'password',
96                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
97                 'Role' : self.plc_spec['role']
98                 }
99     def locate_site (self,sitename):
100         for site in self.plc_spec['sites']:
101             if site['site_fields']['name'] == sitename:
102                 return site
103             if site['site_fields']['login_base'] == sitename:
104                 return site
105         raise Exception,"Cannot locate site %s"%sitename
106         
107     def locate_node (self,nodename):
108         for site in self.plc_spec['sites']:
109             for node in site['nodes']:
110                 if node['node_fields']['hostname'] == nodename:
111                     return (site,node)
112         raise Exception,"Cannot locate node %s"%nodename
113         
114     def locate_key (self,keyname):
115         for key in self.plc_spec['keys']:
116             if key['name'] == keyname:
117                 return key
118         raise Exception,"Cannot locate key %s"%keyname
119
120     # all different hostboxes used in this plc
121     def gather_hostBoxes(self):
122         # maps on sites and nodes, return [ (host_box,test_node) ]
123         tuples=[]
124         for site_spec in self.plc_spec['sites']:
125             test_site = TestSite (self,site_spec)
126             for node_spec in site_spec['nodes']:
127                 test_node = TestNode (self, test_site, node_spec)
128                 if not test_node.is_real():
129                     tuples.append( (test_node.host_box(),test_node) )
130         # transform into a dict { 'host_box' -> [ hostnames .. ] }
131         result = {}
132         for (box,node) in tuples:
133             if not result.has_key(box):
134                 result[box]=[node]
135             else:
136                 result[box].append(node)
137         return result
138                     
139     # a step for checking this stuff
140     def showboxes (self,options):
141         print 'showboxes'
142         for (box,nodes) in self.gather_hostBoxes().iteritems():
143             print box,":"," + ".join( [ node.name() for node in nodes ] )
144         return True
145
146     # make this a valid step
147     def kill_all_qemus(self,options):
148         for (box,nodes) in self.gather_hostBoxes().iteritems():
149             # this is the brute force version, kill all qemus on that host box
150             TestBox(box,options.buildname).kill_all_qemus()
151         return True
152
153     # make this a valid step
154     def list_all_qemus(self,options):
155         for (box,nodes) in self.gather_hostBoxes().iteritems():
156             # push the script
157             TestBox(box,options.buildname).copy("qemu_kill.sh") 
158             # this is the brute force version, kill all qemus on that host box
159             TestBox(box,options.buildname).run_in_buildname("qemu_kill.sh -l")
160         return True
161
162     # kill only the right qemus
163     def force_kill_qemus(self,options):
164         for (box,nodes) in self.gather_hostBoxes().iteritems():
165             # push the script
166             TestBox(box,options.buildname).copy("qemu_kill.sh") 
167             # the fine-grain version
168             for node in nodes:
169                 node.kill_qemu()
170         return True
171
172     def clear_ssh_config (self,options):
173         # install local ssh_config file as root's .ssh/config - ssh should be quiet
174         # dir might need creation first
175         self.test_ssh.run_in_guest("mkdir /root/.ssh")
176         self.test_ssh.run_in_guest("chmod 700 /root/.ssh")
177         # this does not work - > redirection somehow makes it until an argument to cat
178         #self.run_in_guest_piped("cat ssh_config","cat > /root/.ssh/config")
179         self.copy_in_guest("ssh_config","/root/.ssh/config",True)
180         return True
181             
182     #################### step methods
183
184     ### uninstall
185     def uninstall_chroot(self,options):
186         self.test_ssh.run_in_host('service plc safestop')
187         #####detecting the last myplc version installed and remove it
188         self.test_ssh.run_in_host('rpm -e myplc')
189         ##### Clean up the /plc directory
190         self.test_ssh.run_in_host('rm -rf  /plc/data')
191         ##### stop any running vservers
192         self.test_ssh.run_in_host('for vserver in $(ls /vservers/* | sed -e s,/vservers/,,) ; do vserver $vserver stop ; done')
193         return True
194
195     def uninstall_vserver(self,options):
196         self.test_ssh.run_in_host("vserver --silent %s delete"%self.vservername)
197         return True
198
199     def uninstall(self,options):
200         # if there's a chroot-based myplc running, and then a native-based myplc is being deployed
201         # it sounds safer to have the former uninstalled too
202         # now the vserver method cannot be invoked for chroot instances as vservername is required
203         if self.vserver:
204             self.uninstall_vserver(options)
205             self.uninstall_chroot(options)
206         else:
207             self.uninstall_chroot(options)
208         return True
209
210     ### install
211     def install_chroot(self,options):
212         # nothing to do
213         return True
214
215     # xxx this would not work with hostname != localhost as mylc-init-vserver was extracted locally
216     def install_vserver(self,options):
217         # we need build dir for vtest-init-vserver
218         if self.is_local():
219             # a full path for the local calls
220             build_dir=self.path+"/build"
221         else:
222             # use a standard name - will be relative to HOME 
223             build_dir="tests-system-build"
224         build_checkout = "svn checkout %s %s"%(options.build_url,build_dir)
225         if self.test_ssh.run_in_host(build_checkout) != 0:
226             raise Exception,"Cannot checkout build dir"
227         # the repo url is taken from myplc-url 
228         # with the last two steps (i386/myplc...) removed
229         repo_url = options.myplc_url
230         repo_url = os.path.dirname(repo_url)
231         create_vserver="%s/vtest-init-vserver.sh %s %s -- --interface eth0:%s"%\
232             (build_dir,self.vservername,repo_url,self.vserverip)
233         if self.test_ssh.run_in_host(create_vserver) != 0:
234             raise Exception,"Could not create vserver for %s"%self.vservername
235         return True
236
237     def install(self,options):
238         if self.vserver:
239             return self.install_vserver(options)
240         else:
241             return self.install_chroot(options)
242     
243     ### install_rpm
244     def cache_rpm(self,url):
245         self.test_ssh.run_in_host('rm -rf *.rpm')
246         utils.header('Curling rpm from %s'%url)
247         id= self.test_ssh.run_in_host('curl -O '+url)
248         if (id != 0):
249                 raise Exception,"Could not get rpm from  %s"%url
250                 return False
251         return True
252
253     def install_rpm_chroot(self,options):
254         rpm = os.path.basename(options.myplc_url)
255         if (not os.path.isfile(rpm)):
256                 self.cache_rpm(options.myplc_url)
257         utils.header('Installing the :  %s'%rpm)
258         self.test_ssh.run_in_host('rpm -Uvh '+rpm)
259         self.test_ssh.run_in_host('service plc mount')
260         return True
261
262     def install_rpm_vserver(self,options):
263         self.test_ssh.run_in_guest("yum -y install myplc-native")
264         return True
265
266     def install_rpm(self,options):
267         if self.vserver:
268             return self.install_rpm_vserver(options)
269         else:
270             return self.install_rpm_chroot(options)
271
272     ### 
273     def configure(self,options):
274         tmpname='%s.plc-config-tty'%(self.name())
275         fileconf=open(tmpname,'w')
276         for var in [ 'PLC_NAME',
277                      'PLC_ROOT_PASSWORD',
278                      'PLC_ROOT_USER',
279                      'PLC_MAIL_ENABLED',
280                      'PLC_MAIL_SUPPORT_ADDRESS',
281                      'PLC_DB_HOST',
282                      'PLC_API_HOST',
283                      'PLC_WWW_HOST',
284                      'PLC_BOOT_HOST',
285                      'PLC_NET_DNS1',
286                      'PLC_NET_DNS2']:
287             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
288         fileconf.write('w\n')
289         fileconf.write('q\n')
290         fileconf.close()
291         utils.system('cat %s'%tmpname)
292         self.test_ssh.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
293         utils.system('rm %s'%tmpname)
294         return True
295
296     # the chroot install is slightly different to this respect
297     def start(self, options):
298         if self.vserver:
299             self.test_ssh.run_in_guest('service plc start')
300         else:
301             self.test_ssh.run_in_host('service plc start')
302         return True
303         
304     def stop(self, options):
305         if self.vserver:
306             self.test_ssh.run_in_guest('service plc stop')
307         else:
308             self.test_ssh.run_in_host('service plc stop')
309         return True
310         
311     # could use a TestKey class
312     def store_keys(self, options):
313         for key_spec in self.plc_spec['keys']:
314             TestKey(self,key_spec).store_key()
315         return True
316
317     def clean_keys(self, options):
318         utils.system("rm -rf %s/keys/"%self.path)
319
320     def sites (self,options):
321         return self.do_sites(options)
322     
323     def clean_sites (self,options):
324         return self.do_sites(options,action="delete")
325     
326     def do_sites (self,options,action="add"):
327         for site_spec in self.plc_spec['sites']:
328             test_site = TestSite (self,site_spec)
329             if (action != "add"):
330                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
331                 test_site.delete_site()
332                 # deleted with the site
333                 #test_site.delete_users()
334                 continue
335             else:
336                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
337                 test_site.create_site()
338                 test_site.create_users()
339         return True
340
341     def nodes (self, options):
342         return self.do_nodes(options)
343     def clean_nodes (self, options):
344         return self.do_nodes(options,action="delete")
345
346     def do_nodes (self, options,action="add"):
347         for site_spec in self.plc_spec['sites']:
348             test_site = TestSite (self,site_spec)
349             if action != "add":
350                 utils.header("Deleting nodes in site %s"%test_site.name())
351                 for node_spec in site_spec['nodes']:
352                     test_node=TestNode(self,test_site,node_spec)
353                     utils.header("Deleting %s"%test_node.name())
354                     test_node.delete_node()
355             else:
356                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
357                 for node_spec in site_spec['nodes']:
358                     utils.pprint('Creating node %s'%node_spec,node_spec)
359                     test_node = TestNode (self,test_site,node_spec)
360                     test_node.create_node ()
361         return True
362
363     # create nodegroups if needed, and populate
364     # no need for a clean_nodegroups if we are careful enough
365     def nodegroups (self, options):
366         # 1st pass to scan contents
367         groups_dict = {}
368         for site_spec in self.plc_spec['sites']:
369             test_site = TestSite (self,site_spec)
370             for node_spec in site_spec['nodes']:
371                 test_node=TestNode (self,test_site,node_spec)
372                 if node_spec.has_key('nodegroups'):
373                     nodegroupnames=node_spec['nodegroups']
374                     if isinstance(nodegroupnames,StringTypes):
375                         nodegroupnames = [ nodegroupnames ]
376                     for nodegroupname in nodegroupnames:
377                         if not groups_dict.has_key(nodegroupname):
378                             groups_dict[nodegroupname]=[]
379                         groups_dict[nodegroupname].append(test_node.name())
380         auth=self.auth_root()
381         for (nodegroupname,group_nodes) in groups_dict.iteritems():
382             try:
383                 self.server.GetNodeGroups(auth,{'name':nodegroupname})[0]
384             except:
385                 self.server.AddNodeGroup(auth,{'name':nodegroupname})
386             for node in group_nodes:
387                 self.server.AddNodeToNodeGroup(auth,node,nodegroupname)
388         return True
389
390     def all_hostnames (self) :
391         hostnames = []
392         for site_spec in self.plc_spec['sites']:
393             hostnames += [ node_spec['node_fields']['hostname'] \
394                            for node_spec in site_spec['nodes'] ]
395         return hostnames
396
397     # gracetime : during the first <gracetime> minutes nothing gets printed
398     def do_nodes_booted (self, minutes, gracetime=2):
399         # compute timeout
400         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
401         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
402         # the nodes that haven't checked yet - start with a full list and shrink over time
403         tocheck = self.all_hostnames()
404         utils.header("checking nodes %r"%tocheck)
405         # create a dict hostname -> status
406         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
407         while tocheck:
408             # get their status
409             tocheck_status=self.server.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
410             # update status
411             for array in tocheck_status:
412                 hostname=array['hostname']
413                 boot_state=array['boot_state']
414                 if boot_state == 'boot':
415                     utils.header ("%s has reached the 'boot' state"%hostname)
416                 else:
417                     # if it's a real node, never mind
418                     (site_spec,node_spec)=self.locate_node(hostname)
419                     if TestNode.is_real_model(node_spec['node_fields']['model']):
420                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
421                         # let's cheat
422                         boot_state = 'boot'
423                     if datetime.datetime.now() > graceout:
424                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
425                         graceout=datetime.datetime.now()+datetime.timedelta(1)
426                 status[hostname] = boot_state
427             # refresh tocheck
428             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ]
429             if not tocheck:
430                 return True
431             if datetime.datetime.now() > timeout:
432                 for hostname in tocheck:
433                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
434                 return False
435             # otherwise, sleep for a while
436             time.sleep(15)
437         # only useful in empty plcs
438         return True
439
440     def nodes_booted(self,options):
441         return self.do_nodes_booted(minutes=5)
442     
443     #to scan and store the nodes's public keys and avoid to ask for confirmation when  ssh 
444     def scan_publicKeys(self,hostnames):
445         try:
446             temp_knownhosts="/root/known_hosts"
447             remote_knownhosts="/root/.ssh/known_hosts"
448             self.test_ssh.run_in_host("touch %s"%temp_knownhosts )
449             for hostname in hostnames:
450                 utils.header("Scan Public %s key and store it in the known_host file(under the root image) "%hostname)
451                 scan=self.test_ssh.run_in_host('ssh-keyscan -t rsa %s >> %s '%(hostname,temp_knownhosts))
452             #Store the public keys in the right root image
453             self.copy_in_guest(temp_knownhosts,remote_knownhosts,True)
454             #clean the temp keys file used
455             self.test_ssh.run_in_host('rm -f  %s '%temp_knownhosts )
456         except Exception, err:
457             print err
458             
459     def do_check_nodesSsh(self,minutes):
460         # compute timeout
461         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
462         tocheck = self.all_hostnames()
463         self.scan_publicKeys(tocheck)
464         utils.header("checking Connectivity on nodes %r"%tocheck)
465         while tocheck:
466             for hostname in tocheck:
467                 # try to ssh in nodes
468                 access=self.test_ssh.run_in_guest('ssh -i /etc/planetlab/root_ssh_key.rsa root@%s date'%hostname )
469                 if (not access):
470                     utils.header('The node %s is sshable -->'%hostname)
471                     # refresh tocheck
472                     tocheck.remove(hostname)
473                 else:
474                     (site_spec,node_spec)=self.locate_node(hostname)
475                     if TestNode.is_real_model(node_spec['node_fields']['model']):
476                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
477                         tocheck.remove(hostname)
478             if  not tocheck:
479                 return True
480             if datetime.datetime.now() > timeout:
481                 for hostname in tocheck:
482                     utils.header("FAILURE to ssh into %s"%hostname)
483                 return False
484             # otherwise, sleep for a while
485             time.sleep(15)
486         # only useful in empty plcs
487         return True
488         
489     def nodes_ssh(self, options):
490         return  self.do_check_nodesSsh(minutes=2)
491     
492     def bootcd (self, options):
493         for site_spec in self.plc_spec['sites']:
494             test_site = TestSite (self,site_spec)
495             for node_spec in site_spec['nodes']:
496                 test_node=TestNode (self,test_site,node_spec)
497                 test_node.prepare_area()
498                 test_node.create_boot_cd()
499                 test_node.configure_qemu()
500         return True
501
502     def do_check_intiscripts(self):
503         for site_spec in self.plc_spec['sites']:
504                 test_site = TestSite (self,site_spec)
505                 test_node = TestNode (self,test_site,site_spec['nodes'])
506                 for slice_spec in self.plc_spec['slices']:
507                         test_slice=TestSlice (self,test_site,slice_spec)
508                         test_sliver=TestSliver(self,test_node,test_slice)
509                         init_status=test_sliver.get_initscript(slice_spec)
510                         if (not init_status):
511                                 return False
512                 return init_status
513             
514     def check_initscripts(self, options):
515             return self.do_check_intiscripts()
516                     
517     def initscripts (self, options):
518         for initscript in self.plc_spec['initscripts']:
519             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
520             self.server.AddInitScript(self.auth_root(),initscript['initscript_fields'])
521         return True
522
523     def slices (self, options):
524         return self.do_slices()
525
526     def clean_slices (self, options):
527         return self.do_slices("delete")
528
529     def do_slices (self,  action="add"):
530         for slice in self.plc_spec['slices']:
531             site_spec = self.locate_site (slice['sitename'])
532             test_site = TestSite(self,site_spec)
533             test_slice=TestSlice(self,test_site,slice)
534             if action != "add":
535                 utils.header("Deleting slices in site %s"%test_site.name())
536                 test_slice.delete_slice()
537             else:    
538                 utils.pprint("Creating slice",slice)
539                 test_slice.create_slice()
540                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
541         return True
542         
543     def check_slices(self, options):
544         for slice_spec in self.plc_spec['slices']:
545             site_spec = self.locate_site (slice_spec['sitename'])
546             test_site = TestSite(self,site_spec)
547             test_slice=TestSlice(self,test_site,slice_spec)
548             status=test_slice.do_check_slice(options)
549             if (not status):
550                 return False
551         return status
552     
553     def start_nodes (self, options):
554         utils.header("Starting  nodes")
555         for site_spec in self.plc_spec['sites']:
556             TestSite(self,site_spec).start_nodes (options)
557         return True
558
559     def stop_nodes (self, options):
560         self.kill_all_qemus(options)
561         return True
562
563     def check_tcp (self, options):
564             #we just need to create a sliver object nothing else
565             test_sliver=TestSliver(self,
566                                    TestNode(self, TestSite(self,self.plc_spec['sites'][0]),
567                                             self.plc_spec['sites'][0]['nodes'][0]),
568                                    TestSlice(self,TestSite(self,self.plc_spec['sites'][0]),
569                                              self.plc_spec['slices']))
570             return test_sliver.do_check_tcp(self.plc_spec['tcp_param'],options)
571
572     # returns the filename to use for sql dump/restore, using options.dbname if set
573     def dbfile (self, database, options):
574         # uses options.dbname if it is found
575         try:
576             name=options.dbname
577             if not isinstance(name,StringTypes):
578                 raise Exception
579         except:
580             t=datetime.datetime.now()
581             d=t.date()
582             name=str(d)
583         return "/root/%s-%s.sql"%(database,name)
584
585     def db_dump(self, options):
586         
587         dump=self.dbfile("planetab4",options)
588         self.test_ssh.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
589         utils.header('Dumped planetlab4 database in %s'%dump)
590         return True
591
592     def db_restore(self, options):
593         dump=self.dbfile("planetab4",options)
594         ##stop httpd service
595         self.test_ssh.run_in_guest('service httpd stop')
596         # xxx - need another wrapper
597         self.test_ssh.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
598         self.test_ssh.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
599         self.test_ssh.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
600         ##starting httpd service
601         self.test_ssh.run_in_guest('service httpd start')
602
603         utils.header('Database restored from ' + dump)
604
605     @standby_generic 
606     def standby_1(): pass
607     @standby_generic 
608     def standby_2(): pass
609     @standby_generic 
610     def standby_3(): pass
611     @standby_generic 
612     def standby_4(): pass
613     @standby_generic 
614     def standby_5(): pass
615     @standby_generic 
616     def standby_6(): pass
617     @standby_generic 
618     def standby_7(): pass
619     @standby_generic 
620     def standby_8(): pass
621     @standby_generic 
622     def standby_9(): pass
623     @standby_generic 
624     def standby_10(): pass
625     @standby_generic 
626     def standby_11(): pass
627     @standby_generic 
628     def standby_12(): pass
629     @standby_generic 
630     def standby_13(): pass
631     @standby_generic 
632     def standby_14(): pass
633     @standby_generic 
634     def standby_15(): pass
635     @standby_generic 
636     def standby_16(): pass
637     @standby_generic 
638     def standby_17(): pass
639     @standby_generic 
640     def standby_18(): pass
641     @standby_generic 
642     def standby_19(): pass
643     @standby_generic 
644     def standby_20(): pass
645