fine-grain kill of qemus - new step list_all_qemus
[tests.git] / system / TestPlc.py
1 # $Id$
2 import os, os.path
3 import datetime
4 import time
5 import sys
6 import xmlrpclib
7 import datetime
8 import traceback
9 from types import StringTypes
10
11 import utils
12 from TestSite import TestSite
13 from TestNode import TestNode
14 from TestUser import TestUser
15 from TestKey import TestKey
16 from TestSlice import TestSlice
17 from TestBox import TestBox
18
19 # step methods must take (self, options) and return a boolean
20
21 def standby(minutes):
22         utils.header('Entering StandBy for %d mn'%minutes)
23         time.sleep(60*minutes)
24         return True
25
26 def standby_generic (func):
27     def actual(self,options):
28         minutes=int(func.__name__.split("_")[1])
29         return standby(minutes)
30     return actual
31
32 class TestPlc:
33
34     def __init__ (self,plc_spec):
35         self.plc_spec=plc_spec
36         self.path=os.path.dirname(sys.argv[0])
37         try:
38             self.vserverip=plc_spec['vserverip']
39             self.vservername=plc_spec['vservername']
40             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
41             self.vserver=True
42         except:
43             self.vserver=False
44             self.url="https://%s:443/PLCAPI/"%plc_spec['hostname']
45         utils.header('Using API url %s'%self.url)
46         self.server=xmlrpclib.Server(self.url,allow_none=True)
47         
48     def name(self):
49         name=self.plc_spec['name']
50         if self.vserver:
51             return name+"[%s]"%self.vservername
52         else:
53             return name+"[chroot]"
54
55     def is_local (self):
56         return self.plc_spec['hostname'] == 'localhost'
57
58     # define the API methods on this object through xmlrpc
59     # would help, but not strictly necessary
60     def connect (self):
61         pass
62     
63     # command gets run in the chroot/vserver
64     def host_to_guest(self,command):
65         if self.vserver:
66             return "vserver %s exec %s"%(self.vservername,command)
67         else:
68             return "chroot /plc/root %s"%utils.backslash_shell_specials(command)
69
70     # command gets run on the right box
71     def to_host(self,command):
72         if self.is_local():
73             return command
74         else:
75             return "ssh %s %s"%(self.plc_spec['hostname'],utils.backslash_shell_specials(command))
76
77     def full_command(self,command):
78         return self.to_host(self.host_to_guest(command))
79
80     def run_in_guest (self,command):
81         return utils.system(self.full_command(command))
82     def run_in_host (self,command):
83         return utils.system(self.to_host(command))
84
85     # xxx quick n dirty
86     def run_in_guest_piped (self,local,remote):
87         return utils.system(local+" | "+self.full_command(remote))
88
89     # copy a file to the myplc root image - pass in_data=True if the file must go in /plc/data
90     def copy_in_guest (self, localfile, remotefile, in_data=False):
91         if in_data:
92             chroot_dest="/plc/data"
93         else:
94             chroot_dest="/plc/root"
95         if self.is_local():
96             if not self.vserver:
97                 utils.system("cp %s %s/%s"%(localfile,chroot_dest,remotefile))
98             else:
99                 utils.system("cp %s /vservers/%s/%s"%(localfile,self.vservername,remotefile))
100         else:
101             if not self.vserver:
102                 utils.system("scp %s %s:%s/%s"%(localfile,self.plc_spec['hostname'],chroot_dest,remotefile))
103             else:
104                 utils.system("scp %s %s@/vservers/%s/%s"%(localfile,self.plc_spec['hostname'],self.vservername,remotefile))
105
106     def auth_root (self):
107         return {'Username':self.plc_spec['PLC_ROOT_USER'],
108                 'AuthMethod':'password',
109                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
110                 'Role' : self.plc_spec['role']
111                 }
112     def locate_site (self,sitename):
113         for site in self.plc_spec['sites']:
114             if site['site_fields']['name'] == sitename:
115                 return site
116             if site['site_fields']['login_base'] == sitename:
117                 return site
118         raise Exception,"Cannot locate site %s"%sitename
119         
120     def locate_node (self,nodename):
121         for site in self.plc_spec['sites']:
122             for node in site['nodes']:
123                 if node['node_fields']['hostname'] == nodename:
124                     return (site,node)
125         raise Exception,"Cannot locate node %s"%nodename
126         
127     def locate_key (self,keyname):
128         for key in self.plc_spec['keys']:
129             if key['name'] == keyname:
130                 return key
131         raise Exception,"Cannot locate key %s"%keyname
132
133     # all different hostboxes used in this plc
134     def gather_hostBoxes(self):
135         # maps on sites and nodes, return [ (host_box,test_node) ]
136         tuples=[]
137         for site_spec in self.plc_spec['sites']:
138             test_site = TestSite (self,site_spec)
139             for node_spec in site_spec['nodes']:
140                 test_node = TestNode (self, test_site, node_spec)
141                 if not test_node.is_real():
142                     tuples.append( (test_node.host_box(),test_node) )
143         # transform into a dict { 'host_box' -> [ hostnames .. ] }
144         result = {}
145         for (box,node) in tuples:
146             if not result.has_key(box):
147                 result[box]=[node]
148             else:
149                 result[box].append(node)
150         return result
151                     
152     # a step for checking this stuff
153     def showboxes (self,options):
154         print 'showboxes'
155         for (box,nodes) in self.gather_hostBoxes().iteritems():
156             print box,":"," + ".join( [ node.name() for node in nodes ] )
157         return True
158
159     # make this a valid step
160     def kill_all_qemus(self,options):
161         for (box,nodes) in self.gather_hostBoxes().iteritems():
162             # this is the brute force version, kill all qemus on that host box
163             TestBox(box).kill_all_qemus()
164         return True
165
166     # make this a valid step
167     def list_all_qemus(self,options):
168         for (box,nodes) in self.gather_hostBoxes().iteritems():
169             # push the script
170             TestBox(box).copy("qemu_kill.sh")   
171             # this is the brute force version, kill all qemus on that host box
172             TestBox(box).run("./qemu_kill.sh -l")
173         return True
174
175     # kill only the right qemus
176     def kill_qemus(self,options):
177         for (box,nodes) in self.gather_hostBoxes().iteritems():
178             # push the script
179             TestBox(box).copy("qemu_kill.sh")   
180             # the fine-grain version
181             for node in nodes:
182                 node.kill_qemu()
183         return True
184
185     def clear_ssh_config (self,options):
186         # install local ssh_config file as root's .ssh/config - ssh should be quiet
187         # dir might need creation first
188         self.run_in_guest("mkdir /root/.ssh")
189         self.run_in_guest("chmod 700 /root/.ssh")
190         # this does not work - > redirection somehow makes it until an argument to cat
191         #self.run_in_guest_piped("cat ssh_config","cat > /root/.ssh/config")
192         self.copy_in_guest("ssh_config","/root/.ssh/config",True)
193         return True
194             
195     #################### step methods
196
197     ### uninstall
198     def uninstall_chroot(self,options):
199         self.run_in_host('service plc safestop')
200         #####detecting the last myplc version installed and remove it
201         self.run_in_host('rpm -e myplc')
202         ##### Clean up the /plc directory
203         self.run_in_host('rm -rf  /plc/data')
204         ##### stop any running vservers
205         self.run_in_host('for vserver in $(ls /vservers/* | sed -e s,/vservers/,,) ; do vserver $vserver stop ; done')
206         return True
207
208     def uninstall_vserver(self,options):
209         self.run_in_host("vserver --silent %s delete"%self.vservername)
210         return True
211
212     def uninstall(self,options):
213         # if there's a chroot-based myplc running, and then a native-based myplc is being deployed
214         # it sounds safer to have the former uninstalled too
215         # now the vserver method cannot be invoked for chroot instances as vservername is required
216         if self.vserver:
217             self.uninstall_vserver(options)
218             self.uninstall_chroot(options)
219         else:
220             self.uninstall_chroot(options)
221         return True
222
223     ### install
224     def install_chroot(self,options):
225         # nothing to do
226         return True
227
228     # xxx this would not work with hostname != localhost as mylc-init-vserver was extracted locally
229     def install_vserver(self,options):
230         # we need build dir for vtest-init-vserver
231         if self.is_local():
232             # a full path for the local calls
233             build_dir=self.path+"/build"
234         else:
235             # use a standard name - will be relative to HOME 
236             build_dir="tests-system-build"
237         build_checkout = "svn checkout %s %s"%(options.build_url,build_dir)
238         if self.run_in_host(build_checkout) != 0:
239             raise Exception,"Cannot checkout build dir"
240         # the repo url is taken from myplc-url 
241         # with the last two steps (i386/myplc...) removed
242         repo_url = options.myplc_url
243         repo_url = os.path.dirname(repo_url)
244         repo_url = os.path.dirname(repo_url)
245         create_vserver="%s/vtest-init-vserver.sh %s %s -- --interface eth0:%s"%\
246             (build_dir,self.vservername,repo_url,self.vserverip)
247         if self.run_in_host(create_vserver) != 0:
248             raise Exception,"Could not create vserver for %s"%self.vservername
249         return True
250
251     def install(self,options):
252         if self.vserver:
253             return self.install_vserver(options)
254         else:
255             return self.install_chroot(options)
256
257     ### install_rpm
258     def install_rpm_chroot(self,options):
259         utils.header('Installing from %s'%options.myplc_url)
260         url=options.myplc_url
261         self.run_in_host('rpm -Uvh '+url)
262         self.run_in_host('service plc mount')
263         return True
264
265     def install_rpm_vserver(self,options):
266         self.run_in_guest("yum -y install myplc-native")
267         return True
268
269     def install_rpm(self,options):
270         if self.vserver:
271             return self.install_rpm_vserver(options)
272         else:
273             return self.install_rpm_chroot(options)
274
275     ### 
276     def configure(self,options):
277         tmpname='%s/%s.plc-config-tty'%(options.path,self.name())
278         fileconf=open(tmpname,'w')
279         for var in [ 'PLC_NAME',
280                      'PLC_ROOT_PASSWORD',
281                      'PLC_ROOT_USER',
282                      'PLC_MAIL_ENABLED',
283                      'PLC_MAIL_SUPPORT_ADDRESS',
284                      'PLC_DB_HOST',
285                      'PLC_API_HOST',
286                      'PLC_WWW_HOST',
287                      'PLC_BOOT_HOST',
288                      'PLC_NET_DNS1',
289                      'PLC_NET_DNS2']:
290             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
291         fileconf.write('w\n')
292         fileconf.write('q\n')
293         fileconf.close()
294         utils.system('cat %s'%tmpname)
295         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
296         utils.system('rm %s'%tmpname)
297         return True
298
299     # the chroot install is slightly different to this respect
300     def start(self, options):
301         if self.vserver:
302             self.run_in_guest('service plc start')
303         else:
304             self.run_in_host('service plc start')
305         return True
306         
307     def stop(self, options):
308         if self.vserver:
309             self.run_in_guest('service plc stop')
310         else:
311             self.run_in_host('service plc stop')
312         return True
313         
314     # could use a TestKey class
315     def store_keys(self, options):
316         for key_spec in self.plc_spec['keys']:
317             TestKey(self,key_spec).store_key()
318         return True
319
320     def clean_keys(self, options):
321         utils.system("rm -rf %s/keys/"%self.path)
322
323     def sites (self,options):
324         return self.do_sites(options)
325     
326     def clean_sites (self,options):
327         return self.do_sites(options,action="delete")
328     
329     def do_sites (self,options,action="add"):
330         for site_spec in self.plc_spec['sites']:
331             test_site = TestSite (self,site_spec)
332             if (action != "add"):
333                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
334                 test_site.delete_site()
335                 # deleted with the site
336                 #test_site.delete_users()
337                 continue
338             else:
339                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
340                 test_site.create_site()
341                 test_site.create_users()
342         return True
343
344     def nodes (self, options):
345         return self.do_nodes(options)
346     def clean_nodes (self, options):
347         return self.do_nodes(options,action="delete")
348
349     def do_nodes (self, options,action="add"):
350         for site_spec in self.plc_spec['sites']:
351             test_site = TestSite (self,site_spec)
352             if action != "add":
353                 utils.header("Deleting nodes in site %s"%test_site.name())
354                 for node_spec in site_spec['nodes']:
355                     test_node=TestNode(self,test_site,node_spec)
356                     utils.header("Deleting %s"%test_node.name())
357                     test_node.delete_node()
358             else:
359                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
360                 for node_spec in site_spec['nodes']:
361                     utils.show_spec('Creating node %s'%node_spec,node_spec)
362                     test_node = TestNode (self,test_site,node_spec)
363                     test_node.create_node ()
364         return True
365
366     # create nodegroups if needed, and populate
367     # no need for a clean_nodegroups if we are careful enough
368     def nodegroups (self, options):
369         # 1st pass to scan contents
370         groups_dict = {}
371         for site_spec in self.plc_spec['sites']:
372             test_site = TestSite (self,site_spec)
373             for node_spec in site_spec['nodes']:
374                 test_node=TestNode (self,test_site,node_spec)
375                 if node_spec.has_key('nodegroups'):
376                     nodegroupnames=node_spec['nodegroups']
377                     if isinstance(nodegroupnames,StringTypes):
378                         nodegroupnames = [ nodegroupnames ]
379                     for nodegroupname in nodegroupnames:
380                         if not groups_dict.has_key(nodegroupname):
381                             groups_dict[nodegroupname]=[]
382                         groups_dict[nodegroupname].append(test_node.name())
383         auth=self.auth_root()
384         for (nodegroupname,group_nodes) in groups_dict.iteritems():
385             try:
386                 self.server.GetNodeGroups(auth,{'name':nodegroupname})[0]
387             except:
388                 self.server.AddNodeGroup(auth,{'name':nodegroupname})
389             for node in group_nodes:
390                 self.server.AddNodeToNodeGroup(auth,node,nodegroupname)
391         return True
392
393     def all_hostnames (self) :
394         hostnames = []
395         for site_spec in self.plc_spec['sites']:
396             hostnames += [ node_spec['node_fields']['hostname'] \
397                            for node_spec in site_spec['nodes'] ]
398         return hostnames
399
400     # gracetime : during the first <gracetime> minutes nothing gets printed
401     def do_nodes_booted (self, minutes, gracetime=2):
402         # compute timeout
403         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
404         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
405         # the nodes that haven't checked yet - start with a full list and shrink over time
406         tocheck = self.all_hostnames()
407         utils.header("checking nodes %r"%tocheck)
408         # create a dict hostname -> status
409         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
410         while tocheck:
411             # get their status
412             tocheck_status=self.server.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
413             # update status
414             for array in tocheck_status:
415                 hostname=array['hostname']
416                 boot_state=array['boot_state']
417                 if boot_state == 'boot':
418                     utils.header ("%s has reached the 'boot' state"%hostname)
419                 else:
420                     # if it's a real node, never mind
421                     (site_spec,node_spec)=self.locate_node(hostname)
422                     if TestNode.is_real_model(node_spec['node_fields']['model']):
423                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
424                         # let's cheat
425                         boot_state = 'boot'
426                     if datetime.datetime.now() > graceout:
427                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
428                         graceout=datetime.datetime.now()+datetime.timedelta(1)
429                 status[hostname] = boot_state
430             # refresh tocheck
431             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ]
432             if not tocheck:
433                 return True
434             if datetime.datetime.now() > timeout:
435                 for hostname in tocheck:
436                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
437                 return False
438             # otherwise, sleep for a while
439             time.sleep(15)
440         # only useful in empty plcs
441         return True
442
443     def nodes_booted(self,options):
444         return self.do_nodes_booted(minutes=5)
445     
446     #to scan and store the nodes's public keys and avoid to ask for confirmation when  ssh 
447     def scan_publicKeys(self,hostnames):
448         try:
449             temp_knownhosts="/root/known_hosts"
450             remote_knownhosts="/root/.ssh/known_hosts"
451             self.run_in_host("touch %s"%temp_knownhosts )
452             for hostname in hostnames:
453                 utils.header("Scan Public %s key and store it in the known_host file(under the root image) "%hostname)
454                 scan=self.run_in_host('ssh-keyscan -t rsa %s >> %s '%(hostname,temp_knownhosts))
455             #Store the public keys in the right root image
456             self.copy_in_guest(temp_knownhosts,remote_knownhosts,True)
457             #clean the temp keys file used
458             self.run_in_host('rm -f  %s '%temp_knownhosts )
459         except Exception, err:
460             print err
461             
462     def do_check_nodesSsh(self,minutes):
463         # compute timeout
464         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
465         #graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
466         tocheck = self.all_hostnames()
467         self.scan_publicKeys(tocheck)
468         utils.header("checking Connectivity on nodes %r"%tocheck)
469         while tocheck:
470             for hostname in tocheck:
471                 # try to ssh in nodes
472                 access=self.run_in_guest('ssh -i /etc/planetlab/root_ssh_key.rsa root@%s date'%hostname )
473                 if (not access):
474                     utils.header('The node %s is sshable -->'%hostname)
475                     # refresh tocheck
476                     tocheck.remove(hostname)
477                 else:
478                     (site_spec,node_spec)=self.locate_node(hostname)
479                     if TestNode.is_real_model(node_spec['node_fields']['model']):
480                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
481                     tocheck.remove(hostname)
482             if not tocheck:
483                 return True
484             if datetime.datetime.now() > timeout:
485                 for hostname in tocheck:
486                     utils.header("FAILURE to ssh into %s"%hostname)
487                 return False
488             # otherwise, sleep for a while
489             time.sleep(15)
490         # only useful in empty plcs
491         return True
492         
493     def nodes_ssh(self, options):
494         return  self.do_check_nodesSsh(minutes=2)
495             
496     def bootcd (self, options):
497         for site_spec in self.plc_spec['sites']:
498             test_site = TestSite (self,site_spec)
499             for node_spec in site_spec['nodes']:
500                 test_node=TestNode (self,test_site,node_spec)
501                 test_node.create_boot_cd(options.path)
502         return True
503                 
504     def initscripts (self, options):
505         for initscript in self.plc_spec['initscripts']:
506             utils.show_spec('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
507             self.server.AddInitScript(self.auth_root(),initscript['initscript_fields'])
508         return True
509
510     def slices (self, options):
511         return self.do_slices()
512
513     def clean_slices (self, options):
514         return self.do_slices("delete")
515
516     def do_slices (self,  action="add"):
517         for slice in self.plc_spec['slices']:
518             site_spec = self.locate_site (slice['sitename'])
519             test_site = TestSite(self,site_spec)
520             test_slice=TestSlice(self,test_site,slice)
521             if action != "add":
522                 utils.header("Deleting slices in site %s"%test_site.name())
523                 test_slice.delete_slice()
524             else:    
525                 utils.show_spec("Creating slice",slice)
526                 test_slice.create_slice()
527                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
528         return True
529         
530     def check_slices(self, options):
531         for slice_spec in self.plc_spec['slices']:
532             site_spec = self.locate_site (slice_spec['sitename'])
533             test_site = TestSite(self,site_spec)
534             test_slice=TestSlice(self,test_site,slice_spec)
535             status=test_slice.do_check_slice(options)
536             if (not status):
537                 return False
538         return status
539     
540     def start_nodes (self, options):
541         utils.header("Starting  nodes")
542         for site_spec in self.plc_spec['sites']:
543             TestSite(self,site_spec).start_nodes (options)
544         return True
545
546     def stop_nodes (self, options):
547         self.kill_all_qemus()
548         return True
549
550     def check_tcp (self, options):
551         print 'check_tcp not yet implemented'
552         return True
553
554     # returns the filename to use for sql dump/restore, using options.dbname if set
555     def dbfile (self, database, options):
556         # uses options.dbname if it is found
557         try:
558             name=options.dbname
559             if not isinstance(name,StringTypes):
560                 raise Exception
561         except:
562             t=datetime.datetime.now()
563             d=t.date()
564             name=str(d)
565         return "/root/%s-%s.sql"%(database,name)
566
567     def db_dump(self, options):
568         
569         dump=self.dbfile("planetab4",options)
570         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
571         utils.header('Dumped planetlab4 database in %s'%dump)
572         return True
573
574     def db_restore(self, options):
575         dump=self.dbfile("planetab4",options)
576         ##stop httpd service
577         self.run_in_guest('service httpd stop')
578         # xxx - need another wrapper
579         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
580         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
581         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
582         ##starting httpd service
583         self.run_in_guest('service httpd start')
584
585         utils.header('Database restored from ' + dump)
586
587     @standby_generic 
588     def standby_1(): pass
589     @standby_generic 
590     def standby_2(): pass
591     @standby_generic 
592     def standby_3(): pass
593     @standby_generic 
594     def standby_4(): pass
595     @standby_generic 
596     def standby_5(): pass
597     @standby_generic 
598     def standby_6(): pass
599     @standby_generic 
600     def standby_7(): pass
601     @standby_generic 
602     def standby_8(): pass
603     @standby_generic 
604     def standby_9(): pass
605     @standby_generic 
606     def standby_10(): pass
607     @standby_generic 
608     def standby_11(): pass
609     @standby_generic 
610     def standby_12(): pass
611     @standby_generic 
612     def standby_13(): pass
613     @standby_generic 
614     def standby_14(): pass
615     @standby_generic 
616     def standby_15(): pass
617     @standby_generic 
618     def standby_16(): pass
619     @standby_generic 
620     def standby_17(): pass
621     @standby_generic 
622     def standby_18(): pass
623     @standby_generic 
624     def standby_19(): pass
625     @standby_generic 
626     def standby_20(): pass
627