fine-grain & brute force qemu kill (kill_qemus and kill_all_qemus)
[tests.git] / system / TestPlc.py
1 # $Id$
2 import os, os.path
3 import datetime
4 import time
5 import sys
6 import xmlrpclib
7 import datetime
8 import traceback
9 from types import StringTypes
10
11 import utils
12 from TestSite import TestSite
13 from TestNode import TestNode
14 from TestUser import TestUser
15 from TestKey import TestKey
16 from TestSlice import TestSlice
17 from TestBox import TestBox
18
19 # step methods must take (self, options) and return a boolean
20
21 def standby(minutes):
22         utils.header('Entering StandBy for %d mn'%minutes)
23         time.sleep(60*minutes)
24         return True
25
26 def standby_generic (func):
27     def actual(self,options):
28         minutes=int(func.__name__.split("_")[1])
29         return standby(minutes)
30     return actual
31
32 class TestPlc:
33
34     def __init__ (self,plc_spec):
35         self.plc_spec=plc_spec
36         self.path=os.path.dirname(sys.argv[0])
37         try:
38             self.vserverip=plc_spec['vserverip']
39             self.vservername=plc_spec['vservername']
40             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
41             self.vserver=True
42         except:
43             self.vserver=False
44             self.url="https://%s:443/PLCAPI/"%plc_spec['hostname']
45         utils.header('Using API url %s'%self.url)
46         self.server=xmlrpclib.Server(self.url,allow_none=True)
47         
48     def name(self):
49         name=self.plc_spec['name']
50         if self.vserver:
51             return name+"[%s]"%self.vservername
52         else:
53             return name+"[chroot]"
54
55     def is_local (self):
56         return self.plc_spec['hostname'] == 'localhost'
57
58     # define the API methods on this object through xmlrpc
59     # would help, but not strictly necessary
60     def connect (self):
61         pass
62     
63     # command gets run in the chroot/vserver
64     def host_to_guest(self,command):
65         if self.vserver:
66             return "vserver %s exec %s"%(self.vservername,command)
67         else:
68             return "chroot /plc/root %s"%utils.backslash_shell_specials(command)
69
70     # command gets run on the right box
71     def to_host(self,command):
72         if self.is_local():
73             return command
74         else:
75             return "ssh %s %s"%(self.plc_spec['hostname'],utils.backslash_shell_specials(command))
76
77     def full_command(self,command):
78         return self.to_host(self.host_to_guest(command))
79
80     def run_in_guest (self,command):
81         return utils.system(self.full_command(command))
82     def run_in_host (self,command):
83         return utils.system(self.to_host(command))
84
85     # xxx quick n dirty
86     def run_in_guest_piped (self,local,remote):
87         return utils.system(local+" | "+self.full_command(remote))
88
89     # copy a file to the myplc root image - pass in_data=True if the file must go in /plc/data
90     def copy_in_guest (self, localfile, remotefile, in_data=False):
91         if in_data:
92             chroot_dest="/plc/data"
93         else:
94             chroot_dest="/plc/root"
95         if self.is_local():
96             if not self.vserver:
97                 utils.system("cp %s %s/%s"%(localfile,chroot_dest,remotefile))
98             else:
99                 utils.system("cp %s /vservers/%s/%s"%(localfile,self.vservername,remotefile))
100         else:
101             if not self.vserver:
102                 utils.system("scp %s %s:%s/%s"%(localfile,self.plc_spec['hostname'],chroot_dest,remotefile))
103             else:
104                 utils.system("scp %s %s@/vservers/%s/%s"%(localfile,self.plc_spec['hostname'],self.vservername,remotefile))
105
106     def auth_root (self):
107         return {'Username':self.plc_spec['PLC_ROOT_USER'],
108                 'AuthMethod':'password',
109                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
110                 'Role' : self.plc_spec['role']
111                 }
112     def locate_site (self,sitename):
113         for site in self.plc_spec['sites']:
114             if site['site_fields']['name'] == sitename:
115                 return site
116             if site['site_fields']['login_base'] == sitename:
117                 return site
118         raise Exception,"Cannot locate site %s"%sitename
119         
120     def locate_node (self,nodename):
121         for site in self.plc_spec['sites']:
122             for node in site['nodes']:
123                 if node['node_fields']['hostname'] == nodename:
124                     return (site,node)
125         raise Exception,"Cannot locate node %s"%nodename
126         
127     def locate_key (self,keyname):
128         for key in self.plc_spec['keys']:
129             if key['name'] == keyname:
130                 return key
131         raise Exception,"Cannot locate key %s"%keyname
132
133     # all different hostboxes used in this plc
134     def gather_hostBoxes(self):
135         # maps on sites and nodes, return [ (host_box,test_node) ]
136         tuples=[]
137         for site_spec in self.plc_spec['sites']:
138             test_site = TestSite (self,site_spec)
139             for node_spec in site_spec['nodes']:
140                 test_node = TestNode (self, test_site, node_spec)
141                 if not test_node.is_real():
142                     tuples.append( (test_node.host_box(),test_node) )
143         # transform into a dict { 'host_box' -> [ hostnames .. ] }
144         result = {}
145         for (box,node) in tuples:
146             if not result.has_key(box):
147                 result[box]=[node]
148             else:
149                 result[box].append(node)
150         return result
151                     
152     # a step for checking this stuff
153     def showboxes (self,options):
154         print 'showboxes'
155         for (box,nodes) in self.gather_hostBoxes().iteritems():
156             print box,":"," + ".join( [ node.name() for node in nodes ] )
157         return True
158
159     # make this a valid step
160     def kill_all_qemus(self,options):
161         for (box,nodes) in self.gather_hostBoxes().iteritems():
162             # this is the brute force version, kill all qemus on that host box
163             TestBox(box).kill_all_qemus()
164         return True
165
166     # kill only the right qemus
167     def kill_qemus(self,options):
168         for (box,nodes) in self.gather_hostBoxes().iteritems():
169             # the fine-grain version
170             for node in nodes:
171                 node.kill_qemu()
172         return True
173
174     def clear_ssh_config (self,options):
175         # install local ssh_config file as root's .ssh/config - ssh should be quiet
176         # dir might need creation first
177         self.run_in_guest("mkdir /root/.ssh")
178         self.run_in_guest("chmod 700 /root/.ssh")
179         # this does not work - > redirection somehow makes it until an argument to cat
180         #self.run_in_guest_piped("cat ssh_config","cat > /root/.ssh/config")
181         self.copy_in_guest("ssh_config","/root/.ssh/config",True)
182         return True
183             
184     #################### step methods
185
186     ### uninstall
187     def uninstall_chroot(self,options):
188         self.run_in_host('service plc safestop')
189         #####detecting the last myplc version installed and remove it
190         self.run_in_host('rpm -e myplc')
191         ##### Clean up the /plc directory
192         self.run_in_host('rm -rf  /plc/data')
193         ##### stop any running vservers
194         self.run_in_host('for vserver in $(ls /vservers/* | sed -e s,/vservers/,,) ; do vserver $vserver stop ; done')
195         return True
196
197     def uninstall_vserver(self,options):
198         self.run_in_host("vserver --silent %s delete"%self.vservername)
199         return True
200
201     def uninstall(self,options):
202         # if there's a chroot-based myplc running, and then a native-based myplc is being deployed
203         # it sounds safer to have the former uninstalled too
204         # now the vserver method cannot be invoked for chroot instances as vservername is required
205         if self.vserver:
206             self.uninstall_vserver(options)
207             self.uninstall_chroot(options)
208         else:
209             self.uninstall_chroot(options)
210         return True
211
212     ### install
213     def install_chroot(self,options):
214         # nothing to do
215         return True
216
217     # xxx this would not work with hostname != localhost as mylc-init-vserver was extracted locally
218     def install_vserver(self,options):
219         # we need build dir for vtest-init-vserver
220         if self.is_local():
221             # a full path for the local calls
222             build_dir=self.path+"/build"
223         else:
224             # use a standard name - will be relative to HOME 
225             build_dir="tests-system-build"
226         build_checkout = "svn checkout %s %s"%(options.build_url,build_dir)
227         if self.run_in_host(build_checkout) != 0:
228             raise Exception,"Cannot checkout build dir"
229         # the repo url is taken from myplc-url 
230         # with the last two steps (i386/myplc...) removed
231         repo_url = options.myplc_url
232         repo_url = os.path.dirname(repo_url)
233         repo_url = os.path.dirname(repo_url)
234         create_vserver="%s/vtest-init-vserver.sh %s %s -- --interface eth0:%s"%\
235             (build_dir,self.vservername,repo_url,self.vserverip)
236         if self.run_in_host(create_vserver) != 0:
237             raise Exception,"Could not create vserver for %s"%self.vservername
238         return True
239
240     def install(self,options):
241         if self.vserver:
242             return self.install_vserver(options)
243         else:
244             return self.install_chroot(options)
245
246     ### install_rpm
247     def install_rpm_chroot(self,options):
248         utils.header('Installing from %s'%options.myplc_url)
249         url=options.myplc_url
250         self.run_in_host('rpm -Uvh '+url)
251         self.run_in_host('service plc mount')
252         return True
253
254     def install_rpm_vserver(self,options):
255         self.run_in_guest("yum -y install myplc-native")
256         return True
257
258     def install_rpm(self,options):
259         if self.vserver:
260             return self.install_rpm_vserver(options)
261         else:
262             return self.install_rpm_chroot(options)
263
264     ### 
265     def configure(self,options):
266         tmpname='%s/%s.plc-config-tty'%(options.path,self.name())
267         fileconf=open(tmpname,'w')
268         for var in [ 'PLC_NAME',
269                      'PLC_ROOT_PASSWORD',
270                      'PLC_ROOT_USER',
271                      'PLC_MAIL_ENABLED',
272                      'PLC_MAIL_SUPPORT_ADDRESS',
273                      'PLC_DB_HOST',
274                      'PLC_API_HOST',
275                      'PLC_WWW_HOST',
276                      'PLC_BOOT_HOST',
277                      'PLC_NET_DNS1',
278                      'PLC_NET_DNS2']:
279             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
280         fileconf.write('w\n')
281         fileconf.write('q\n')
282         fileconf.close()
283         utils.system('cat %s'%tmpname)
284         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
285         utils.system('rm %s'%tmpname)
286         return True
287
288     # the chroot install is slightly different to this respect
289     def start(self, options):
290         if self.vserver:
291             self.run_in_guest('service plc start')
292         else:
293             self.run_in_host('service plc start')
294         return True
295         
296     def stop(self, options):
297         if self.vserver:
298             self.run_in_guest('service plc stop')
299         else:
300             self.run_in_host('service plc stop')
301         return True
302         
303     # could use a TestKey class
304     def store_keys(self, options):
305         for key_spec in self.plc_spec['keys']:
306             TestKey(self,key_spec).store_key()
307         return True
308
309     def clean_keys(self, options):
310         utils.system("rm -rf %s/keys/"%self.path)
311
312     def sites (self,options):
313         return self.do_sites(options)
314     
315     def clean_sites (self,options):
316         return self.do_sites(options,action="delete")
317     
318     def do_sites (self,options,action="add"):
319         for site_spec in self.plc_spec['sites']:
320             test_site = TestSite (self,site_spec)
321             if (action != "add"):
322                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
323                 test_site.delete_site()
324                 # deleted with the site
325                 #test_site.delete_users()
326                 continue
327             else:
328                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
329                 test_site.create_site()
330                 test_site.create_users()
331         return True
332
333     def nodes (self, options):
334         return self.do_nodes(options)
335     def clean_nodes (self, options):
336         return self.do_nodes(options,action="delete")
337
338     def do_nodes (self, options,action="add"):
339         for site_spec in self.plc_spec['sites']:
340             test_site = TestSite (self,site_spec)
341             if action != "add":
342                 utils.header("Deleting nodes in site %s"%test_site.name())
343                 for node_spec in site_spec['nodes']:
344                     test_node=TestNode(self,test_site,node_spec)
345                     utils.header("Deleting %s"%test_node.name())
346                     test_node.delete_node()
347             else:
348                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
349                 for node_spec in site_spec['nodes']:
350                     utils.show_spec('Creating node %s'%node_spec,node_spec)
351                     test_node = TestNode (self,test_site,node_spec)
352                     test_node.create_node ()
353         return True
354
355     # create nodegroups if needed, and populate
356     # no need for a clean_nodegroups if we are careful enough
357     def nodegroups (self, options):
358         # 1st pass to scan contents
359         groups_dict = {}
360         for site_spec in self.plc_spec['sites']:
361             test_site = TestSite (self,site_spec)
362             for node_spec in site_spec['nodes']:
363                 test_node=TestNode (self,test_site,node_spec)
364                 if node_spec.has_key('nodegroups'):
365                     nodegroupnames=node_spec['nodegroups']
366                     if isinstance(nodegroupnames,StringTypes):
367                         nodegroupnames = [ nodegroupnames ]
368                     for nodegroupname in nodegroupnames:
369                         if not groups_dict.has_key(nodegroupname):
370                             groups_dict[nodegroupname]=[]
371                         groups_dict[nodegroupname].append(test_node.name())
372         auth=self.auth_root()
373         for (nodegroupname,group_nodes) in groups_dict.iteritems():
374             try:
375                 self.server.GetNodeGroups(auth,{'name':nodegroupname})[0]
376             except:
377                 self.server.AddNodeGroup(auth,{'name':nodegroupname})
378             for node in group_nodes:
379                 self.server.AddNodeToNodeGroup(auth,node,nodegroupname)
380         return True
381
382     def all_hostnames (self) :
383         hostnames = []
384         for site_spec in self.plc_spec['sites']:
385             hostnames += [ node_spec['node_fields']['hostname'] \
386                            for node_spec in site_spec['nodes'] ]
387         return hostnames
388
389     # gracetime : during the first <gracetime> minutes nothing gets printed
390     def do_nodes_booted (self, minutes, gracetime=2):
391         # compute timeout
392         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
393         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
394         # the nodes that haven't checked yet - start with a full list and shrink over time
395         tocheck = self.all_hostnames()
396         utils.header("checking nodes %r"%tocheck)
397         # create a dict hostname -> status
398         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
399         while tocheck:
400             # get their status
401             tocheck_status=self.server.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
402             # update status
403             for array in tocheck_status:
404                 hostname=array['hostname']
405                 boot_state=array['boot_state']
406                 if boot_state == 'boot':
407                     utils.header ("%s has reached the 'boot' state"%hostname)
408                 else:
409                     # if it's a real node, never mind
410                     (site_spec,node_spec)=self.locate_node(hostname)
411                     if TestNode.is_real_model(node_spec['node_fields']['model']):
412                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
413                         # let's cheat
414                         boot_state = 'boot'
415                     if datetime.datetime.now() > graceout:
416                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
417                         graceout=datetime.datetime.now()+datetime.timedelta(1)
418                 status[hostname] = boot_state
419             # refresh tocheck
420             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ]
421             if not tocheck:
422                 return True
423             if datetime.datetime.now() > timeout:
424                 for hostname in tocheck:
425                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
426                 return False
427             # otherwise, sleep for a while
428             time.sleep(15)
429         # only useful in empty plcs
430         return True
431
432     def nodes_booted(self,options):
433         return self.do_nodes_booted(minutes=5)
434     
435     #to scan and store the nodes's public keys and avoid to ask for confirmation when  ssh 
436     def scan_publicKeys(self,hostnames):
437         try:
438             temp_knownhosts="/root/known_hosts"
439             remote_knownhosts="/root/.ssh/known_hosts"
440             self.run_in_host("touch %s"%temp_knownhosts )
441             for hostname in hostnames:
442                 utils.header("Scan Public %s key and store it in the known_host file(under the root image) "%hostname)
443                 scan=self.run_in_host('ssh-keyscan -t rsa %s >> %s '%(hostname,temp_knownhosts))
444             #Store the public keys in the right root image
445             self.copy_in_guest(temp_knownhosts,remote_knownhosts,True)
446             #clean the temp keys file used
447             self.run_in_host('rm -f  %s '%temp_knownhosts )
448         except Exception, err:
449             print err
450             
451     def do_check_nodesSsh(self,minutes):
452         # compute timeout
453         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
454         #graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
455         tocheck = self.all_hostnames()
456         self.scan_publicKeys(tocheck)
457         utils.header("checking Connectivity on nodes %r"%tocheck)
458         while tocheck:
459             for hostname in tocheck:
460                 # try to ssh in nodes
461                 access=self.run_in_guest('ssh -i /etc/planetlab/root_ssh_key.rsa root@%s date'%hostname )
462                 if (not access):
463                     utils.header('The node %s is sshable -->'%hostname)
464                     # refresh tocheck
465                     tocheck.remove(hostname)
466                 else:
467                     (site_spec,node_spec)=self.locate_node(hostname)
468                     if TestNode.is_real_model(node_spec['node_fields']['model']):
469                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
470                     tocheck.remove(hostname)
471             if not tocheck:
472                 return True
473             if datetime.datetime.now() > timeout:
474                 for hostname in tocheck:
475                     utils.header("FAILURE to ssh into %s"%hostname)
476                 return False
477             # otherwise, sleep for a while
478             time.sleep(15)
479         # only useful in empty plcs
480         return True
481         
482     def nodes_ssh(self, options):
483         return  self.do_check_nodesSsh(minutes=2)
484             
485     def bootcd (self, options):
486         for site_spec in self.plc_spec['sites']:
487             test_site = TestSite (self,site_spec)
488             for node_spec in site_spec['nodes']:
489                 test_node=TestNode (self,test_site,node_spec)
490                 test_node.create_boot_cd(options.path)
491         return True
492                 
493     def initscripts (self, options):
494         for initscript in self.plc_spec['initscripts']:
495             utils.show_spec('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
496             self.server.AddInitScript(self.auth_root(),initscript['initscript_fields'])
497         return True
498
499     def slices (self, options):
500         return self.do_slices()
501
502     def clean_slices (self, options):
503         return self.do_slices("delete")
504
505     def do_slices (self,  action="add"):
506         for slice in self.plc_spec['slices']:
507             site_spec = self.locate_site (slice['sitename'])
508             test_site = TestSite(self,site_spec)
509             test_slice=TestSlice(self,test_site,slice)
510             if action != "add":
511                 utils.header("Deleting slices in site %s"%test_site.name())
512                 test_slice.delete_slice()
513             else:    
514                 utils.show_spec("Creating slice",slice)
515                 test_slice.create_slice()
516                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
517         return True
518         
519     def check_slices(self, options):
520         for slice_spec in self.plc_spec['slices']:
521             site_spec = self.locate_site (slice_spec['sitename'])
522             test_site = TestSite(self,site_spec)
523             test_slice=TestSlice(self,test_site,slice_spec)
524             status=test_slice.do_check_slice(options)
525             if (not status):
526                 return False
527         return status
528     
529     def start_nodes (self, options):
530         utils.header("Starting  nodes")
531         for site_spec in self.plc_spec['sites']:
532             TestSite(self,site_spec).start_nodes (options)
533         return True
534
535     def stop_nodes (self, options):
536         self.kill_all_qemus()
537         return True
538
539     def check_tcp (self, options):
540         print 'check_tcp not yet implemented'
541         return True
542
543     # returns the filename to use for sql dump/restore, using options.dbname if set
544     def dbfile (self, database, options):
545         # uses options.dbname if it is found
546         try:
547             name=options.dbname
548             if not isinstance(name,StringTypes):
549                 raise Exception
550         except:
551             t=datetime.datetime.now()
552             d=t.date()
553             name=str(d)
554         return "/root/%s-%s.sql"%(database,name)
555
556     def db_dump(self, options):
557         
558         dump=self.dbfile("planetab4",options)
559         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
560         utils.header('Dumped planetlab4 database in %s'%dump)
561         return True
562
563     def db_restore(self, options):
564         dump=self.dbfile("planetab4",options)
565         ##stop httpd service
566         self.run_in_guest('service httpd stop')
567         # xxx - need another wrapper
568         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
569         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
570         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
571         ##starting httpd service
572         self.run_in_guest('service httpd start')
573
574         utils.header('Database restored from ' + dump)
575
576     @standby_generic 
577     def standby_1(): pass
578     @standby_generic 
579     def standby_2(): pass
580     @standby_generic 
581     def standby_3(): pass
582     @standby_generic 
583     def standby_4(): pass
584     @standby_generic 
585     def standby_5(): pass
586     @standby_generic 
587     def standby_6(): pass
588     @standby_generic 
589     def standby_7(): pass
590     @standby_generic 
591     def standby_8(): pass
592     @standby_generic 
593     def standby_9(): pass
594     @standby_generic 
595     def standby_10(): pass
596     @standby_generic 
597     def standby_11(): pass
598     @standby_generic 
599     def standby_12(): pass
600     @standby_generic 
601     def standby_13(): pass
602     @standby_generic 
603     def standby_14(): pass
604     @standby_generic 
605     def standby_15(): pass
606     @standby_generic 
607     def standby_16(): pass
608     @standby_generic 
609     def standby_17(): pass
610     @standby_generic 
611     def standby_18(): pass
612     @standby_generic 
613     def standby_19(): pass
614     @standby_generic 
615     def standby_20(): pass
616