repairing
[tests.git] / system / TestPlc.py
1 # $Id$
2 import os, os.path
3 import datetime
4 import time
5 import sys
6 import xmlrpclib
7 import datetime
8 import traceback
9 from types import StringTypes
10
11 import utils
12 from TestSite import TestSite
13 from TestNode import TestNode
14 from TestUser import TestUser
15 from TestKey import TestKey
16 from TestSlice import TestSlice
17 from TestSliver import TestSliver
18 from TestBox import TestBox
19
20 # step methods must take (self, options) and return a boolean
21
22 def standby(minutes):
23         utils.header('Entering StandBy for %d mn'%minutes)
24         time.sleep(60*minutes)
25         return True
26
27 def standby_generic (func):
28     def actual(self,options):
29         minutes=int(func.__name__.split("_")[1])
30         return standby(minutes)
31     return actual
32
33 class TestPlc:
34
35     def __init__ (self,plc_spec):
36         self.plc_spec=plc_spec
37         self.path=os.path.dirname(sys.argv[0])
38         try:
39             self.vserverip=plc_spec['vserverip']
40             self.vservername=plc_spec['vservername']
41             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
42             self.vserver=True
43         except:
44             self.vserver=False
45             self.url="https://%s:443/PLCAPI/"%plc_spec['hostname']
46         utils.header('Using API url %s'%self.url)
47         self.server=xmlrpclib.Server(self.url,allow_none=True)
48         
49     def name(self):
50         name=self.plc_spec['name']
51         if self.vserver:
52             return name+"[%s]"%self.vservername
53         else:
54             return name+"[chroot]"
55
56     def hostname(self):
57         return self.plc_spec['hostname']
58
59     def is_local (self):
60         return utils.is_local(self.hostname())
61
62     # define the API methods on this object through xmlrpc
63     # would help, but not strictly necessary
64     def connect (self):
65         pass
66     
67     # command gets run in the chroot/vserver
68     def host_to_guest(self,command):
69         if self.vserver:
70             return "vserver %s exec %s"%(self.vservername,command)
71         else:
72             return "chroot /plc/root %s"%utils.backslash_shell_specials(command)
73
74     # command gets run on the right box
75     def to_host(self,command):
76         if self.is_local():
77             return command
78         else:
79             return "ssh %s %s"%(self.hostname(),utils.backslash_shell_specials(command))
80
81     def full_command(self,command):
82         return self.to_host(self.host_to_guest(command))
83
84     def run_in_guest (self,command):
85         return utils.system(self.full_command(command))
86     def run_in_host (self,command):
87         return utils.system(self.to_host(command))
88
89     # xxx quick n dirty
90     def run_in_guest_piped (self,local,remote):
91         return utils.system(local+" | "+self.full_command(remote))
92
93     # copy a file to the myplc root image - pass in_data=True if the file must go in /plc/data
94     def copy_in_guest (self, localfile, remotefile, in_data=False):
95         if in_data:
96             chroot_dest="/plc/data"
97         else:
98             chroot_dest="/plc/root"
99         if self.is_local():
100             if not self.vserver:
101                 utils.system("cp %s %s/%s"%(localfile,chroot_dest,remotefile))
102             else:
103                 utils.system("cp %s /vservers/%s/%s"%(localfile,self.vservername,remotefile))
104         else:
105             if not self.vserver:
106                 utils.system("scp %s %s:%s/%s"%(localfile,self.hostname(),chroot_dest,remotefile))
107             else:
108                 utils.system("scp %s %s@/vservers/%s/%s"%(localfile,self.hostname(),self.vservername,remotefile))
109
110     def auth_root (self):
111         return {'Username':self.plc_spec['PLC_ROOT_USER'],
112                 'AuthMethod':'password',
113                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
114                 'Role' : self.plc_spec['role']
115                 }
116     def locate_site (self,sitename):
117         for site in self.plc_spec['sites']:
118             if site['site_fields']['name'] == sitename:
119                 return site
120             if site['site_fields']['login_base'] == sitename:
121                 return site
122         raise Exception,"Cannot locate site %s"%sitename
123         
124     def locate_node (self,nodename):
125         for site in self.plc_spec['sites']:
126             for node in site['nodes']:
127                 if node['node_fields']['hostname'] == nodename:
128                     return (site,node)
129         raise Exception,"Cannot locate node %s"%nodename
130         
131     def locate_key (self,keyname):
132         for key in self.plc_spec['keys']:
133             if key['name'] == keyname:
134                 return key
135         raise Exception,"Cannot locate key %s"%keyname
136
137     # all different hostboxes used in this plc
138     def gather_hostBoxes(self):
139         # maps on sites and nodes, return [ (host_box,test_node) ]
140         tuples=[]
141         for site_spec in self.plc_spec['sites']:
142             test_site = TestSite (self,site_spec)
143             for node_spec in site_spec['nodes']:
144                 test_node = TestNode (self, test_site, node_spec)
145                 if not test_node.is_real():
146                     tuples.append( (test_node.host_box(),test_node) )
147         # transform into a dict { 'host_box' -> [ hostnames .. ] }
148         result = {}
149         for (box,node) in tuples:
150             if not result.has_key(box):
151                 result[box]=[node]
152             else:
153                 result[box].append(node)
154         return result
155                     
156     # a step for checking this stuff
157     def showboxes (self,options):
158         print 'showboxes'
159         for (box,nodes) in self.gather_hostBoxes().iteritems():
160             print box,":"," + ".join( [ node.name() for node in nodes ] )
161         return True
162
163     # make this a valid step
164     def kill_all_qemus(self,options):
165         for (box,nodes) in self.gather_hostBoxes().iteritems():
166             # this is the brute force version, kill all qemus on that host box
167             TestBox(box,options.buildname).kill_all_qemus()
168         return True
169
170     # make this a valid step
171     def list_all_qemus(self,options):
172         for (box,nodes) in self.gather_hostBoxes().iteritems():
173             # push the script
174             TestBox(box,options.buildname).copy("qemu_kill.sh") 
175             # this is the brute force version, kill all qemus on that host box
176             TestBox(box,options.buildname).run_in_buildname("qemu_kill.sh -l")
177         return True
178
179     # kill only the right qemus
180     def kill_qemus(self,options):
181         for (box,nodes) in self.gather_hostBoxes().iteritems():
182             # push the script
183             TestBox(box,options.buildname).copy("qemu_kill.sh") 
184             # the fine-grain version
185             for node in nodes:
186                 node.kill_qemu()
187         return True
188
189     def clear_ssh_config (self,options):
190         # install local ssh_config file as root's .ssh/config - ssh should be quiet
191         # dir might need creation first
192         self.run_in_guest("mkdir /root/.ssh")
193         self.run_in_guest("chmod 700 /root/.ssh")
194         # this does not work - > redirection somehow makes it until an argument to cat
195         #self.run_in_guest_piped("cat ssh_config","cat > /root/.ssh/config")
196         self.copy_in_guest("ssh_config","/root/.ssh/config",True)
197         return True
198             
199     #################### step methods
200
201     ### uninstall
202     def uninstall_chroot(self,options):
203         self.run_in_host('service plc safestop')
204         #####detecting the last myplc version installed and remove it
205         self.run_in_host('rpm -e myplc')
206         ##### Clean up the /plc directory
207         self.run_in_host('rm -rf  /plc/data')
208         ##### stop any running vservers
209         self.run_in_host('for vserver in $(ls /vservers/* | sed -e s,/vservers/,,) ; do vserver $vserver stop ; done')
210         return True
211
212     def uninstall_vserver(self,options):
213         self.run_in_host("vserver --silent %s delete"%self.vservername)
214         return True
215
216     def uninstall(self,options):
217         # if there's a chroot-based myplc running, and then a native-based myplc is being deployed
218         # it sounds safer to have the former uninstalled too
219         # now the vserver method cannot be invoked for chroot instances as vservername is required
220         if self.vserver:
221             self.uninstall_vserver(options)
222             self.uninstall_chroot(options)
223         else:
224             self.uninstall_chroot(options)
225         return True
226
227     ### install
228     def install_chroot(self,options):
229         # nothing to do
230         return True
231
232     # xxx this would not work with hostname != localhost as mylc-init-vserver was extracted locally
233     def install_vserver(self,options):
234         # we need build dir for vtest-init-vserver
235         if self.is_local():
236             # a full path for the local calls
237             build_dir=self.path+"/build"
238         else:
239             # use a standard name - will be relative to HOME 
240             build_dir="tests-system-build"
241         build_checkout = "svn checkout %s %s"%(options.build_url,build_dir)
242         if self.run_in_host(build_checkout) != 0:
243             raise Exception,"Cannot checkout build dir"
244         # the repo url is taken from myplc-url 
245         # with the last two steps (i386/myplc...) removed
246         repo_url = options.myplc_url
247         repo_url = os.path.dirname(repo_url)
248         repo_url = os.path.dirname(repo_url)
249         create_vserver="%s/vtest-init-vserver.sh %s %s -- --interface eth0:%s"%\
250             (build_dir,self.vservername,repo_url,self.vserverip)
251         if self.run_in_host(create_vserver) != 0:
252             raise Exception,"Could not create vserver for %s"%self.vservername
253         return True
254
255     def install(self,options):
256         if self.vserver:
257             return self.install_vserver(options)
258         else:
259             return self.install_chroot(options)
260     
261     ### install_rpm
262     def cache_rpm(self,url):
263         self.run_in_host('rm -rf *.rpm')
264         utils.header('Curling rpm from %s'%url)
265         id= self.run_in_host('curl -O '+url)
266         if (id != 0):
267                 raise Exception,"Could not get rpm from  %s"%url
268                 return False
269         return True
270
271     def install_rpm_chroot(self,options):
272         rpm = os.path.basename(options.myplc_url)
273         if (not os.path.isfile(rpm)):
274                 self.cache_rpm(options.myplc_url)
275         utils.header('Installing the :  %s'%rpm)
276         self.run_in_host('rpm -Uvh '+rpm)
277         self.run_in_host('service plc mount')
278         return True
279
280     def install_rpm_vserver(self,options):
281         self.run_in_guest("yum -y install myplc-native")
282         return True
283
284     def install_rpm(self,options):
285         if self.vserver:
286             return self.install_rpm_vserver(options)
287         else:
288             return self.install_rpm_chroot(options)
289
290     ### 
291     def configure(self,options):
292         tmpname='%s.plc-config-tty'%(self.name())
293         fileconf=open(tmpname,'w')
294         for var in [ 'PLC_NAME',
295                      'PLC_ROOT_PASSWORD',
296                      'PLC_ROOT_USER',
297                      'PLC_MAIL_ENABLED',
298                      'PLC_MAIL_SUPPORT_ADDRESS',
299                      'PLC_DB_HOST',
300                      'PLC_API_HOST',
301                      'PLC_WWW_HOST',
302                      'PLC_BOOT_HOST',
303                      'PLC_NET_DNS1',
304                      'PLC_NET_DNS2']:
305             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
306         fileconf.write('w\n')
307         fileconf.write('q\n')
308         fileconf.close()
309         utils.system('cat %s'%tmpname)
310         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
311         utils.system('rm %s'%tmpname)
312         return True
313
314     # the chroot install is slightly different to this respect
315     def start(self, options):
316         if self.vserver:
317             self.run_in_guest('service plc start')
318         else:
319             self.run_in_host('service plc start')
320         return True
321         
322     def stop(self, options):
323         if self.vserver:
324             self.run_in_guest('service plc stop')
325         else:
326             self.run_in_host('service plc stop')
327         return True
328         
329     # could use a TestKey class
330     def store_keys(self, options):
331         for key_spec in self.plc_spec['keys']:
332             TestKey(self,key_spec).store_key()
333         return True
334
335     def clean_keys(self, options):
336         utils.system("rm -rf %s/keys/"%self.path)
337
338     def sites (self,options):
339         return self.do_sites(options)
340     
341     def clean_sites (self,options):
342         return self.do_sites(options,action="delete")
343     
344     def do_sites (self,options,action="add"):
345         for site_spec in self.plc_spec['sites']:
346             test_site = TestSite (self,site_spec)
347             if (action != "add"):
348                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
349                 test_site.delete_site()
350                 # deleted with the site
351                 #test_site.delete_users()
352                 continue
353             else:
354                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
355                 test_site.create_site()
356                 test_site.create_users()
357         return True
358
359     def nodes (self, options):
360         return self.do_nodes(options)
361     def clean_nodes (self, options):
362         return self.do_nodes(options,action="delete")
363
364     def do_nodes (self, options,action="add"):
365         for site_spec in self.plc_spec['sites']:
366             test_site = TestSite (self,site_spec)
367             if action != "add":
368                 utils.header("Deleting nodes in site %s"%test_site.name())
369                 for node_spec in site_spec['nodes']:
370                     test_node=TestNode(self,test_site,node_spec)
371                     utils.header("Deleting %s"%test_node.name())
372                     test_node.delete_node()
373             else:
374                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
375                 for node_spec in site_spec['nodes']:
376                     utils.pprint('Creating node %s'%node_spec,node_spec)
377                     test_node = TestNode (self,test_site,node_spec)
378                     test_node.create_node ()
379         return True
380
381     # create nodegroups if needed, and populate
382     # no need for a clean_nodegroups if we are careful enough
383     def nodegroups (self, options):
384         # 1st pass to scan contents
385         groups_dict = {}
386         for site_spec in self.plc_spec['sites']:
387             test_site = TestSite (self,site_spec)
388             for node_spec in site_spec['nodes']:
389                 test_node=TestNode (self,test_site,node_spec)
390                 if node_spec.has_key('nodegroups'):
391                     nodegroupnames=node_spec['nodegroups']
392                     if isinstance(nodegroupnames,StringTypes):
393                         nodegroupnames = [ nodegroupnames ]
394                     for nodegroupname in nodegroupnames:
395                         if not groups_dict.has_key(nodegroupname):
396                             groups_dict[nodegroupname]=[]
397                         groups_dict[nodegroupname].append(test_node.name())
398         auth=self.auth_root()
399         for (nodegroupname,group_nodes) in groups_dict.iteritems():
400             try:
401                 self.server.GetNodeGroups(auth,{'name':nodegroupname})[0]
402             except:
403                 self.server.AddNodeGroup(auth,{'name':nodegroupname})
404             for node in group_nodes:
405                 self.server.AddNodeToNodeGroup(auth,node,nodegroupname)
406         return True
407
408     def all_hostnames (self) :
409         hostnames = []
410         for site_spec in self.plc_spec['sites']:
411             hostnames += [ node_spec['node_fields']['hostname'] \
412                            for node_spec in site_spec['nodes'] ]
413         return hostnames
414
415     # gracetime : during the first <gracetime> minutes nothing gets printed
416     def do_nodes_booted (self, minutes, gracetime=2):
417         # compute timeout
418         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
419         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
420         # the nodes that haven't checked yet - start with a full list and shrink over time
421         tocheck = self.all_hostnames()
422         utils.header("checking nodes %r"%tocheck)
423         # create a dict hostname -> status
424         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
425         while tocheck:
426             # get their status
427             tocheck_status=self.server.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
428             # update status
429             for array in tocheck_status:
430                 hostname=array['hostname']
431                 boot_state=array['boot_state']
432                 if boot_state == 'boot':
433                     utils.header ("%s has reached the 'boot' state"%hostname)
434                 else:
435                     # if it's a real node, never mind
436                     (site_spec,node_spec)=self.locate_node(hostname)
437                     if TestNode.is_real_model(node_spec['node_fields']['model']):
438                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
439                         # let's cheat
440                         boot_state = 'boot'
441                     if datetime.datetime.now() > graceout:
442                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
443                         graceout=datetime.datetime.now()+datetime.timedelta(1)
444                 status[hostname] = boot_state
445             # refresh tocheck
446             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ]
447             if not tocheck:
448                 return True
449             if datetime.datetime.now() > timeout:
450                 for hostname in tocheck:
451                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
452                 return False
453             # otherwise, sleep for a while
454             time.sleep(15)
455         # only useful in empty plcs
456         return True
457
458     def nodes_booted(self,options):
459         return self.do_nodes_booted(minutes=5)
460     
461     #to scan and store the nodes's public keys and avoid to ask for confirmation when  ssh 
462     def scan_publicKeys(self,hostnames):
463         try:
464             temp_knownhosts="/root/known_hosts"
465             remote_knownhosts="/root/.ssh/known_hosts"
466             self.run_in_host("touch %s"%temp_knownhosts )
467             for hostname in hostnames:
468                 utils.header("Scan Public %s key and store it in the known_host file(under the root image) "%hostname)
469                 scan=self.run_in_host('ssh-keyscan -t rsa %s >> %s '%(hostname,temp_knownhosts))
470             #Store the public keys in the right root image
471             self.copy_in_guest(temp_knownhosts,remote_knownhosts,True)
472             #clean the temp keys file used
473             self.run_in_host('rm -f  %s '%temp_knownhosts )
474         except Exception, err:
475             print err
476             
477     def do_check_nodesSsh(self,minutes):
478         # compute timeout
479         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
480         tocheck = self.all_hostnames()
481         self.scan_publicKeys(tocheck)
482         utils.header("checking Connectivity on nodes %r"%tocheck)
483         while tocheck:
484             for hostname in tocheck:
485                 # try to ssh in nodes
486                 access=self.run_in_guest('ssh -i /etc/planetlab/root_ssh_key.rsa root@%s date'%hostname )
487                 if (not access):
488                     utils.header('The node %s is sshable -->'%hostname)
489                     # refresh tocheck
490                     tocheck.remove(hostname)
491                 else:
492                     (site_spec,node_spec)=self.locate_node(hostname)
493                     if TestNode.is_real_model(node_spec['node_fields']['model']):
494                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
495                         tocheck.remove(hostname)
496             if  not tocheck:
497                 return True
498             if datetime.datetime.now() > timeout:
499                 for hostname in tocheck:
500                     utils.header("FAILURE to ssh into %s"%hostname)
501                 return False
502             # otherwise, sleep for a while
503             time.sleep(15)
504         # only useful in empty plcs
505         return True
506         
507     def nodes_ssh(self, options):
508         return  self.do_check_nodesSsh(minutes=2)
509     
510     def bootcd (self, options):
511         for site_spec in self.plc_spec['sites']:
512             test_site = TestSite (self,site_spec)
513             for node_spec in site_spec['nodes']:
514                 test_node=TestNode (self,test_site,node_spec)
515                 test_node.prepare_area()
516                 test_node.create_boot_cd()
517                 test_node.configure_qemu()
518         return True
519
520     def do_check_intiscripts(self):
521         for site_spec in self.plc_spec['sites']:
522                 test_site = TestSite (self,site_spec)
523                 test_node = TestNode (self,test_site,site_spec['nodes'])
524                 for slice_spec in self.plc_spec['slices']:
525                         test_slice=TestSlice (self,test_site,slice_spec)
526                         test_sliver=TestSliver(self,test_node,test_slice)
527                         init_status=test_sliver.get_initscript(slice_spec)
528                         if (not init_status):
529                                 return False
530                 return init_status
531             
532     def check_initscripts(self, options):
533             return self.do_check_intiscripts()
534                     
535     def initscripts (self, options):
536         for initscript in self.plc_spec['initscripts']:
537             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
538             self.server.AddInitScript(self.auth_root(),initscript['initscript_fields'])
539         return True
540
541     def slices (self, options):
542         return self.do_slices()
543
544     def clean_slices (self, options):
545         return self.do_slices("delete")
546
547     def do_slices (self,  action="add"):
548         for slice in self.plc_spec['slices']:
549             site_spec = self.locate_site (slice['sitename'])
550             test_site = TestSite(self,site_spec)
551             test_slice=TestSlice(self,test_site,slice)
552             if action != "add":
553                 utils.header("Deleting slices in site %s"%test_site.name())
554                 test_slice.delete_slice()
555             else:    
556                 utils.pprint("Creating slice",slice)
557                 test_slice.create_slice()
558                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
559         return True
560         
561     def check_slices(self, options):
562         for slice_spec in self.plc_spec['slices']:
563             site_spec = self.locate_site (slice_spec['sitename'])
564             test_site = TestSite(self,site_spec)
565             test_slice=TestSlice(self,test_site,slice_spec)
566             status=test_slice.do_check_slice(options)
567             if (not status):
568                 return False
569         return status
570     
571     def start_nodes (self, options):
572         utils.header("Starting  nodes")
573         for site_spec in self.plc_spec['sites']:
574             TestSite(self,site_spec).start_nodes (options)
575         return True
576
577     def stop_nodes (self, options):
578         self.kill_all_qemus(options)
579         return True
580
581     def check_tcp (self, options):
582             #we just need to create a sliver object nothing else
583             test_sliver=TestSliver(self,
584                                    TestNode(self, TestSite(self,self.plc_spec['sites'][0]),
585                                             self.plc_spec['sites'][0]['nodes'][0]),
586                                    TestSlice(self,TestSite(self,self.plc_spec['sites'][0]),
587                                              self.plc_spec['slices']))
588             return test_sliver.do_check_tcp(self.plc_spec['tcp_param'],options)
589
590     # returns the filename to use for sql dump/restore, using options.dbname if set
591     def dbfile (self, database, options):
592         # uses options.dbname if it is found
593         try:
594             name=options.dbname
595             if not isinstance(name,StringTypes):
596                 raise Exception
597         except:
598             t=datetime.datetime.now()
599             d=t.date()
600             name=str(d)
601         return "/root/%s-%s.sql"%(database,name)
602
603     def db_dump(self, options):
604         
605         dump=self.dbfile("planetab4",options)
606         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
607         utils.header('Dumped planetlab4 database in %s'%dump)
608         return True
609
610     def db_restore(self, options):
611         dump=self.dbfile("planetab4",options)
612         ##stop httpd service
613         self.run_in_guest('service httpd stop')
614         # xxx - need another wrapper
615         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
616         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
617         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
618         ##starting httpd service
619         self.run_in_guest('service httpd start')
620
621         utils.header('Database restored from ' + dump)
622
623     @standby_generic 
624     def standby_1(): pass
625     @standby_generic 
626     def standby_2(): pass
627     @standby_generic 
628     def standby_3(): pass
629     @standby_generic 
630     def standby_4(): pass
631     @standby_generic 
632     def standby_5(): pass
633     @standby_generic 
634     def standby_6(): pass
635     @standby_generic 
636     def standby_7(): pass
637     @standby_generic 
638     def standby_8(): pass
639     @standby_generic 
640     def standby_9(): pass
641     @standby_generic 
642     def standby_10(): pass
643     @standby_generic 
644     def standby_11(): pass
645     @standby_generic 
646     def standby_12(): pass
647     @standby_generic 
648     def standby_13(): pass
649     @standby_generic 
650     def standby_14(): pass
651     @standby_generic 
652     def standby_15(): pass
653     @standby_generic 
654     def standby_16(): pass
655     @standby_generic 
656     def standby_17(): pass
657     @standby_generic 
658     def standby_18(): pass
659     @standby_generic 
660     def standby_19(): pass
661     @standby_generic 
662     def standby_20(): pass
663