system/TestPlc.py

   1 # $Id$
   2 import os, os.path
   3 import datetime
   4 import time
   5 import sys
   6 import traceback
   7 from types import StringTypes
   8 import socket
   9
  10 import utils
  11 from TestSite import TestSite
  12 from TestNode import TestNode
  13 from TestUser import TestUser
  14 from TestKey import TestKey
  15 from TestSlice import TestSlice
  16 from TestSliver import TestSliver
  17 from TestBox import TestBox
  18 from TestSsh import TestSsh
  19 from TestApiserver import TestApiserver
  20
  21 # step methods must take (self) and return a boolean (options is a member of the class)
  22
  23 def standby(minutes,dry_run):
  24     utils.header('Entering StandBy for %d mn'%minutes)
  25     if dry_run:
  26         print 'dry_run'
  27     else:
  28         time.sleep(60*minutes)
  29     return True
  30
  31 def standby_generic (func):
  32     def actual(self):
  33         minutes=int(func.__name__.split("_")[1])
  34         return standby(minutes,self.options.dry_run)
  35     return actual
  36
  37 def node_mapper (method):
  38     def actual(self):
  39         overall=True
  40         node_method = TestNode.__dict__[method.__name__]
  41         for site_spec in self.plc_spec['sites']:
  42             test_site = TestSite (self,site_spec)
  43             for node_spec in site_spec['nodes']:
  44                 test_node = TestNode (self,test_site,node_spec)
  45                 if not node_method(test_node): overall=False
  46         return overall
  47     return actual
  48
  49 def slice_mapper_options (method):
  50     def actual(self):
  51         overall=True
  52         slice_method = TestSlice.__dict__[method.__name__]
  53         for slice_spec in self.plc_spec['slices']:
  54             site_spec = self.locate_site (slice_spec['sitename'])
  55             test_site = TestSite(self,site_spec)
  56             test_slice=TestSlice(self,test_site,slice_spec)
  57             if not slice_method(test_slice,self.options): overall=False
  58         return overall
  59     return actual
  60
  61 SEP='<sep>'
  62
  63 class TestPlc:
  64
  65     default_steps = ['uninstall','install','install_rpm',
  66                      'configure', 'start', 'fetch_keys', SEP,
  67                      'store_keys', 'clear_known_hosts', 'initscripts', SEP,
  68                      'sites', 'nodes', 'slices', 'nodegroups', SEP,
  69                      'init_node','bootcd', 'configure_qemu', 'export_qemu',
  70                      'kill_all_qemus', 'reinstall_node','start_node', SEP,
  71                      'nodes_booted', 'nodes_ssh', 'check_slice', 'check_initscripts', SEP,
  72                      'check_sanity', 'check_tcp', 'plcsh_stress_test', SEP,
  73                      'force_gather_logs', 'force_kill_qemus', 'force_record_tracker','force_free_tracker' ]
  74     other_steps = [ 'stop_all_vservers','fresh_install', 'cache_rpm', 'stop', 'vs_start', SEP,
  75                     'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
  76                     'clean_sites', 'clean_nodes',
  77                     'clean_slices', 'clean_keys', SEP,
  78                     'show_boxes', 'list_all_qemus', 'list_qemus', SEP,
  79                     'db_dump' , 'db_restore', 'cleanup_trackers', 'cleanup_all_trackers',
  80                     'standby_1 through 20'
  81                     ]
  82
  83     @staticmethod
  84     def printable_steps (list):
  85         return " ".join(list).replace(" "+SEP+" "," \\\n")
  86     @staticmethod
  87     def valid_step (step):
  88         return step != SEP
  89
  90     def __init__ (self,plc_spec,options):
  91         self.plc_spec=plc_spec
  92         self.options=options
  93         self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
  94         try:
  95             self.vserverip=plc_spec['vserverip']
  96             self.vservername=plc_spec['vservername']
  97             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
  98             self.vserver=True
  99         except:
 100             raise Exception,'chroot-based myplc testing is deprecated'
 101         self.apiserver=TestApiserver(self.url,options.dry_run)
 102
 103     def name(self):
 104         name=self.plc_spec['name']
 105         return "%s.%s"%(name,self.vservername)
 106
 107     def hostname(self):
 108         return self.plc_spec['hostname']
 109
 110     def is_local (self):
 111         return self.test_ssh.is_local()
 112
 113     # define the API methods on this object through xmlrpc
 114     # would help, but not strictly necessary
 115     def connect (self):
 116         pass
 117
 118     def actual_command_in_guest (self,command):
 119         return self.test_ssh.actual_command(self.host_to_guest(command))
 120
 121     def start_guest (self):
 122       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
 123
 124     def run_in_guest (self,command):
 125         return utils.system(self.actual_command_in_guest(command))
 126
 127     def run_in_host (self,command):
 128         return self.test_ssh.run_in_buildname(command)
 129
 130     #command gets run in the vserver
 131     def host_to_guest(self,command):
 132         return "vserver %s exec %s"%(self.vservername,command)
 133
 134     #command gets run in the vserver
 135     def start_guest_in_host(self):
 136         return "vserver %s start"%(self.vservername)
 137
 138     # xxx quick n dirty
 139     def run_in_guest_piped (self,local,remote):
 140         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
 141
 142     def auth_root (self):
 143         return {'Username':self.plc_spec['PLC_ROOT_USER'],
 144                 'AuthMethod':'password',
 145                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
 146                 'Role' : self.plc_spec['role']
 147                 }
 148     def locate_site (self,sitename):
 149         for site in self.plc_spec['sites']:
 150             if site['site_fields']['name'] == sitename:
 151                 return site
 152             if site['site_fields']['login_base'] == sitename:
 153                 return site
 154         raise Exception,"Cannot locate site %s"%sitename
 155
 156     def locate_node (self,nodename):
 157         for site in self.plc_spec['sites']:
 158             for node in site['nodes']:
 159                 if node['name'] == nodename:
 160                     return (site,node)
 161         raise Exception,"Cannot locate node %s"%nodename
 162
 163     def locate_hostname (self,hostname):
 164         for site in self.plc_spec['sites']:
 165             for node in site['nodes']:
 166                 if node['node_fields']['hostname'] == hostname:
 167                     return (site,node)
 168         raise Exception,"Cannot locate hostname %s"%hostname
 169
 170     def locate_key (self,keyname):
 171         for key in self.plc_spec['keys']:
 172             if key['name'] == keyname:
 173                 return key
 174         raise Exception,"Cannot locate key %s"%keyname
 175
 176     def locate_slice (self, slicename):
 177         for slice in self.plc_spec['slices']:
 178             if slice['slice_fields']['name'] == slicename:
 179                 return slice
 180         raise Exception,"Cannot locate slice %s"%slicename
 181
 182     def all_sliver_objs (self):
 183         result=[]
 184         for slice_spec in self.plc_spec['slices']:
 185             slicename = slice_spec['slice_fields']['name']
 186             for nodename in slice_spec['nodenames']:
 187                 result.append(self.locate_sliver_obj (nodename,slicename))
 188         return result
 189
 190     def locate_sliver_obj (self,nodename,slicename):
 191         (site,node) = self.locate_node(nodename)
 192         slice = self.locate_slice (slicename)
 193         # build objects
 194         test_site = TestSite (self, site)
 195         test_node = TestNode (self, test_site,node)
 196         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 197         test_slice = TestSlice (self, test_site, slice)
 198         return TestSliver (self, test_node, test_slice)
 199
 200     def locate_first_node(self):
 201         nodename=self.plc_spec['slices'][0]['nodenames'][0]
 202         (site,node) = self.locate_node(nodename)
 203         test_site = TestSite (self, site)
 204         test_node = TestNode (self, test_site,node)
 205         return test_node
 206
 207     def locate_first_sliver (self):
 208         slice_spec=self.plc_spec['slices'][0]
 209         slicename=slice_spec['slice_fields']['name']
 210         nodename=slice_spec['nodenames'][0]
 211         return self.locate_sliver_obj(nodename,slicename)
 212
 213     # all different hostboxes used in this plc
 214     def gather_hostBoxes(self):
 215         # maps on sites and nodes, return [ (host_box,test_node) ]
 216         tuples=[]
 217         for site_spec in self.plc_spec['sites']:
 218             test_site = TestSite (self,site_spec)
 219             for node_spec in site_spec['nodes']:
 220                 test_node = TestNode (self, test_site, node_spec)
 221                 if not test_node.is_real():
 222                     tuples.append( (test_node.host_box(),test_node) )
 223         # transform into a dict { 'host_box' -> [ test_node .. ] }
 224         result = {}
 225         for (box,node) in tuples:
 226             if not result.has_key(box):
 227                 result[box]=[node]
 228             else:
 229                 result[box].append(node)
 230         return result
 231
 232     # a step for checking this stuff
 233     def show_boxes (self):
 234         for (box,nodes) in self.gather_hostBoxes().iteritems():
 235             print box,":"," + ".join( [ node.name() for node in nodes ] )
 236         return True
 237
 238     # make this a valid step
 239     def kill_all_qemus(self):
 240         # this is the brute force version, kill all qemus on that host box
 241         for (box,nodes) in self.gather_hostBoxes().iteritems():
 242             # pass the first nodename, as we don't push template-qemu on testboxes
 243             nodedir=nodes[0].nodedir()
 244             TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
 245         return True
 246
 247     # make this a valid step
 248     def list_all_qemus(self):
 249         for (box,nodes) in self.gather_hostBoxes().iteritems():
 250             # this is the brute force version, kill all qemus on that host box
 251             TestBox(box,self.options.buildname).list_all_qemus()
 252         return True
 253
 254     # kill only the right qemus
 255     def list_qemus(self):
 256         for (box,nodes) in self.gather_hostBoxes().iteritems():
 257             # the fine-grain version
 258             for node in nodes:
 259                 node.list_qemu()
 260         return True
 261
 262     # kill only the right qemus
 263     def kill_qemus(self):
 264         for (box,nodes) in self.gather_hostBoxes().iteritems():
 265             # the fine-grain version
 266             for node in nodes:
 267                 node.kill_qemu()
 268         return True
 269
 270
 271     ### utility methods for handling the pool of IP addresses allocated to plcs
 272     # Logic
 273     # (*) running plcs are recorded in the file named ~/running-test-plcs
 274     # (*) this file contains a line for each running plc, older first
 275     # (*) each line contains the vserver name + the hostname of the (vserver) testbox where it sits
 276     # (*) the free_tracker method performs a vserver stop on the oldest entry
 277     # (*) the record_tracker method adds an entry at the bottom of the file
 278     # (*) the cleanup_tracker method stops all known vservers and removes the tracker file
 279
 280     TRACKER_FILE=os.environ['HOME']+"/running-test-plcs"
 281
 282     def record_tracker (self):
 283         try:
 284             lines=file(TestPlc.TRACKER_FILE).readlines()
 285         except:
 286             lines=[]
 287
 288         this_line="%s %s\n"%(self.vservername,self.test_ssh.hostname)
 289         for line in lines:
 290             if line==this_line:
 291                 print 'this vserver is already included in %s'%TestPlc.TRACKER_FILE
 292                 return True
 293         if self.options.dry_run:
 294             print 'dry_run: record_tracker - skipping tracker update'
 295             return True
 296         tracker=file(TestPlc.TRACKER_FILE,"w")
 297         for line in lines+[this_line]:
 298             tracker.write(line)
 299         tracker.close()
 300         print "Recorded %s in running plcs on host %s"%(self.vservername,self.test_ssh.hostname)
 301         return True
 302
 303     def free_tracker (self, keep_vservers=3):
 304         try:
 305             lines=file(TestPlc.TRACKER_FILE).readlines()
 306         except:
 307             print 'dry_run: free_tracker - skipping tracker update'
 308             return True
 309         how_many = len(lines) - keep_vservers
 310         # nothing todo until we have more than keep_vservers in the tracker
 311         if how_many <= 0:
 312             print 'free_tracker : limit %d not reached'%keep_vservers
 313             return True
 314         to_stop = lines[:how_many]
 315         to_keep = lines[how_many:]
 316         for line in to_stop:
 317             print '>%s<'%line
 318             [vname,hostname]=line.split()
 319             command=TestSsh(hostname).actual_command("vserver --silent %s stop"%vname)
 320             utils.system(command)
 321         if self.options.dry_run:
 322             print 'dry_run: free_tracker would stop %d vservers'%len(to_stop)
 323             for line in to_stop: print line,
 324             print 'dry_run: free_tracker would keep %d vservers'%len(to_keep)
 325             for line in to_keep: print line,
 326             return True
 327         print "Storing %d remaining vservers in %s"%(len(to_keep),TestPlc.TRACKER_FILE)
 328         tracker=open(TestPlc.TRACKER_FILE,"w")
 329         for line in to_keep:
 330             tracker.write(line)
 331         tracker.close()
 332         return True
 333
 334     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 335     def cleanup_trackers (self):
 336         try:
 337             for line in TestPlc.TRACKER_FILE.readlines():
 338                 [vname,hostname]=line.split()
 339                 stop="vserver --silent %s stop"%vname
 340                 command=TestSsh(hostname).actual_command(stop)
 341                 utils.system(command)
 342             clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 343             utils.system(self.test_ssh.actual_command(clean_tracker))
 344         except:
 345             return True
 346
 347     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 348     def cleanup_all_trackers (self):
 349         stop_all = "cd /vservers ; for i in * ; do vserver --silent $i stop ; done"
 350         utils.system(self.test_ssh.actual_command(stop_all))
 351         clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 352         utils.system(self.test_ssh.actual_command(clean_tracker))
 353         return True
 354
 355     def uninstall(self):
 356         self.run_in_host("vserver --silent %s delete"%self.vservername)
 357         return True
 358
 359     ### install
 360     def install(self):
 361         if self.is_local():
 362             # a full path for the local calls
 363             build_dir=os.path.dirname(sys.argv[0])
 364             # sometimes this is empty - set to "." in such a case
 365             if not build_dir: build_dir="."
 366             build_dir += "/build"
 367         else:
 368             # use a standard name - will be relative to remote buildname
 369             build_dir="build"
 370         # run checkout in any case - would do an update if already exists
 371         build_checkout = "svn checkout %s %s"%(self.options.build_url,build_dir)
 372         if self.run_in_host(build_checkout) != 0:
 373             return False
 374         # the repo url is taken from arch-rpms-url
 375         # with the last step (i386.) removed
 376         repo_url = self.options.arch_rpms_url
 377         for level in [ 'arch' ]:
 378             repo_url = os.path.dirname(repo_url)
 379         # pass the vbuild-nightly options to vtest-init-vserver
 380         test_env_options=""
 381         test_env_options += " -p %s"%self.options.personality
 382         test_env_options += " -d %s"%self.options.pldistro
 383         test_env_options += " -f %s"%self.options.fcdistro
 384         script="vtest-init-vserver.sh"
 385         vserver_name = self.vservername
 386         vserver_options="--netdev eth0 --interface %s"%self.vserverip
 387         try:
 388             vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
 389             vserver_options += " --hostname %s"%vserver_hostname
 390         except:
 391             pass
 392         create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
 393         return self.run_in_host(create_vserver) == 0
 394
 395     ### install_rpm
 396     def install_rpm(self):
 397         return self.run_in_guest("yum -y install myplc-native")==0
 398
 399     ###
 400     def configure(self):
 401         tmpname='%s.plc-config-tty'%(self.name())
 402         fileconf=open(tmpname,'w')
 403         for var in [ 'PLC_NAME',
 404                      'PLC_ROOT_PASSWORD',
 405                      'PLC_ROOT_USER',
 406                      'PLC_MAIL_ENABLED',
 407                      'PLC_MAIL_SUPPORT_ADDRESS',
 408                      'PLC_DB_HOST',
 409                      'PLC_API_HOST',
 410                      'PLC_WWW_HOST',
 411                      'PLC_BOOT_HOST',
 412                      'PLC_NET_DNS1',
 413                      'PLC_NET_DNS2']:
 414             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
 415         fileconf.write('w\n')
 416         fileconf.write('q\n')
 417         fileconf.close()
 418         utils.system('cat %s'%tmpname)
 419         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
 420         utils.system('rm %s'%tmpname)
 421         return True
 422
 423     def start(self):
 424         self.run_in_guest('service plc start')
 425         return True
 426
 427     def stop(self):
 428         self.run_in_guest('service plc stop')
 429         return True
 430
 431     def vs_start (self):
 432         self.start_guest()
 433         return True
 434
 435     # stores the keys from the config for further use
 436     def store_keys(self):
 437         for key_spec in self.plc_spec['keys']:
 438                 TestKey(self,key_spec).store_key()
 439         return True
 440
 441     def clean_keys(self):
 442         utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
 443
 444     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 445     # for later direct access to the nodes
 446     def fetch_keys(self):
 447         dir="./keys"
 448         if not os.path.isdir(dir):
 449             os.mkdir(dir)
 450         prefix = 'root_ssh_key'
 451         vservername=self.vservername
 452         overall=True
 453         for ext in [ 'pub', 'rsa' ] :
 454             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 455             dst="keys/%(vservername)s.%(ext)s"%locals()
 456             if self.test_ssh.fetch(src,dst) != 0: overall=False
 457         return overall
 458
 459     def sites (self):
 460         return self.do_sites()
 461
 462     def clean_sites (self):
 463         return self.do_sites(action="delete")
 464
 465     def do_sites (self,action="add"):
 466         for site_spec in self.plc_spec['sites']:
 467             test_site = TestSite (self,site_spec)
 468             if (action != "add"):
 469                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
 470                 test_site.delete_site()
 471                 # deleted with the site
 472                 #test_site.delete_users()
 473                 continue
 474             else:
 475                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
 476                 test_site.create_site()
 477                 test_site.create_users()
 478         return True
 479
 480     def clean_all_sites (self):
 481         print 'auth_root',self.auth_root()
 482         site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
 483         for site_id in site_ids:
 484             print 'Deleting site_id',site_id
 485             self.apiserver.DeleteSite(self.auth_root(),site_id)
 486
 487     def nodes (self):
 488         return self.do_nodes()
 489     def clean_nodes (self):
 490         return self.do_nodes(action="delete")
 491
 492     def do_nodes (self,action="add"):
 493         for site_spec in self.plc_spec['sites']:
 494             test_site = TestSite (self,site_spec)
 495             if action != "add":
 496                 utils.header("Deleting nodes in site %s"%test_site.name())
 497                 for node_spec in site_spec['nodes']:
 498                     test_node=TestNode(self,test_site,node_spec)
 499                     utils.header("Deleting %s"%test_node.name())
 500                     test_node.delete_node()
 501             else:
 502                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
 503                 for node_spec in site_spec['nodes']:
 504                     utils.pprint('Creating node %s'%node_spec,node_spec)
 505                     test_node = TestNode (self,test_site,node_spec)
 506                     test_node.create_node ()
 507         return True
 508
 509     def nodegroups (self):
 510         return self.do_nodegroups("add")
 511     def clean_nodegroups (self):
 512         return self.do_nodegroups("delete")
 513
 514     # create nodegroups if needed, and populate
 515     def do_nodegroups (self, action="add"):
 516         # 1st pass to scan contents
 517         groups_dict = {}
 518         for site_spec in self.plc_spec['sites']:
 519             test_site = TestSite (self,site_spec)
 520             for node_spec in site_spec['nodes']:
 521                 test_node=TestNode (self,test_site,node_spec)
 522                 if node_spec.has_key('nodegroups'):
 523                     nodegroupnames=node_spec['nodegroups']
 524                     if isinstance(nodegroupnames,StringTypes):
 525                         nodegroupnames = [ nodegroupnames ]
 526                     for nodegroupname in nodegroupnames:
 527                         if not groups_dict.has_key(nodegroupname):
 528                             groups_dict[nodegroupname]=[]
 529                         groups_dict[nodegroupname].append(test_node.name())
 530         auth=self.auth_root()
 531         overall = True
 532         for (nodegroupname,group_nodes) in groups_dict.iteritems():
 533             if action == "add":
 534                 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
 535                 # first, check if the nodetagtype is here
 536                 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
 537                 if tag_types:
 538                     tag_type_id = tag_types[0]['tag_type_id']
 539                 else:
 540                     tag_type_id = self.apiserver.AddTagType(auth,
 541                                                             {'tagname':nodegroupname,
 542                                                              'description': 'for nodegroup %s'%nodegroupname,
 543                                                              'category':'test',
 544                                                              'min_role_id':10})
 545                 print 'located tag (type)',nodegroupname,'as',tag_type_id
 546                 # create nodegroup
 547                 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
 548                 if not nodegroups:
 549                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
 550                     print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
 551                 # set node tag on all nodes, value='yes'
 552                 for nodename in group_nodes:
 553                     try:
 554                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
 555                     except:
 556                         traceback.print_exc()
 557                         print 'node',nodename,'seems to already have tag',nodegroupname
 558                     # check anyway
 559                     try:
 560                         expect_yes = self.apiserver.GetNodeTags(auth,
 561                                                                 {'hostname':nodename,
 562                                                                  'tagname':nodegroupname},
 563                                                                 ['tagvalue'])[0]['tagvalue']
 564                         if expect_yes != "yes":
 565                             print 'Mismatch node tag on node',nodename,'got',expect_yes
 566                             overall=False
 567                     except:
 568                         if not self.options.dry_run:
 569                             print 'Cannot find tag',nodegroupname,'on node',nodename
 570                             overall = False
 571             else:
 572                 try:
 573                     print 'cleaning nodegroup',nodegroupname
 574                     self.apiserver.DeleteNodeGroup(auth,nodegroupname)
 575                 except:
 576                     traceback.print_exc()
 577                     overall=False
 578         return overall
 579
 580     def all_hostnames (self) :
 581         hostnames = []
 582         for site_spec in self.plc_spec['sites']:
 583             hostnames += [ node_spec['node_fields']['hostname'] \
 584                            for node_spec in site_spec['nodes'] ]
 585         return hostnames
 586
 587     # gracetime : during the first <gracetime> minutes nothing gets printed
 588     def do_nodes_booted (self, minutes, gracetime,period=15):
 589         if self.options.dry_run:
 590             print 'dry_run'
 591             return True
 592         # compute timeout
 593         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
 594         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
 595         # the nodes that haven't checked yet - start with a full list and shrink over time
 596         tocheck = self.all_hostnames()
 597         utils.header("checking nodes %r"%tocheck)
 598         # create a dict hostname -> status
 599         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
 600         while tocheck:
 601             # get their status
 602             tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
 603             # update status
 604             for array in tocheck_status:
 605                 hostname=array['hostname']
 606                 boot_state=array['boot_state']
 607                 if boot_state == 'boot':
 608                     utils.header ("%s has reached the 'boot' state"%hostname)
 609                 else:
 610                     # if it's a real node, never mind
 611                     (site_spec,node_spec)=self.locate_hostname(hostname)
 612                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 613                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
 614                         # let's cheat
 615                         boot_state = 'boot'
 616                     elif datetime.datetime.now() > graceout:
 617                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
 618                         graceout=datetime.datetime.now()+datetime.timedelta(1)
 619                 status[hostname] = boot_state
 620             # refresh tocheck
 621             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ]
 622             if not tocheck:
 623                 return True
 624             if datetime.datetime.now() > timeout:
 625                 for hostname in tocheck:
 626                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
 627                 return False
 628             # otherwise, sleep for a while
 629             time.sleep(period)
 630         # only useful in empty plcs
 631         return True
 632
 633     def nodes_booted(self):
 634         return self.do_nodes_booted(minutes=20,gracetime=15)
 635
 636     def do_nodes_ssh(self,minutes,gracetime,period=15):
 637         # compute timeout
 638         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
 639         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
 640         tocheck = self.all_hostnames()
 641 #        self.scan_publicKeys(tocheck)
 642         utils.header("checking Connectivity on nodes %r"%tocheck)
 643         while tocheck:
 644             for hostname in tocheck:
 645                 # try to ssh in nodes
 646                 node_test_ssh = TestSsh (hostname,key="/etc/planetlab/root_ssh_key.rsa")
 647                 success=self.run_in_guest(node_test_ssh.actual_command("hostname"))==0
 648                 if success:
 649                     utils.header('The node %s is sshable -->'%hostname)
 650                     # refresh tocheck
 651                     tocheck.remove(hostname)
 652                 else:
 653                     # we will have tried real nodes once, in case they're up - but if not, just skip
 654                     (site_spec,node_spec)=self.locate_hostname(hostname)
 655                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 656                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
 657                         tocheck.remove(hostname)
 658                     elif datetime.datetime.now() > graceout:
 659                         utils.header("Could not ssh-enter root context on %s"%hostname)
 660             if  not tocheck:
 661                 return True
 662             if datetime.datetime.now() > timeout:
 663                 for hostname in tocheck:
 664                     utils.header("FAILURE to ssh into %s"%hostname)
 665                 return False
 666             # otherwise, sleep for a while
 667             time.sleep(period)
 668         # only useful in empty plcs
 669         return True
 670
 671     def nodes_ssh(self):
 672         return self.do_nodes_ssh(minutes=10,gracetime=5)
 673
 674     @node_mapper
 675     def init_node (self): pass
 676     @node_mapper
 677     def bootcd (self): pass
 678     @node_mapper
 679     def configure_qemu (self): pass
 680     @node_mapper
 681     def reinstall_node (self): pass
 682     @node_mapper
 683     def export_qemu (self): pass
 684
 685     ### check sanity : invoke scripts from qaapi/qa/tests/{node,slice}
 686     def check_sanity_node (self):
 687         return self.locate_first_node().check_sanity()
 688     def check_sanity_sliver (self) :
 689         return self.locate_first_sliver().check_sanity()
 690
 691     def check_sanity (self):
 692         return self.check_sanity_node() and self.check_sanity_sliver()
 693
 694     ### initscripts
 695     def do_check_initscripts(self):
 696         overall = True
 697         for slice_spec in self.plc_spec['slices']:
 698             if not slice_spec.has_key('initscriptname'):
 699                 continue
 700             initscript=slice_spec['initscriptname']
 701             for nodename in slice_spec['nodenames']:
 702                 (site,node) = self.locate_node (nodename)
 703                 # xxx - passing the wrong site - probably harmless
 704                 test_site = TestSite (self,site)
 705                 test_slice = TestSlice (self,test_site,slice_spec)
 706                 test_node = TestNode (self,test_site,node)
 707                 test_sliver = TestSliver (self, test_node, test_slice)
 708                 if not test_sliver.check_initscript(initscript):
 709                     overall = False
 710         return overall
 711
 712     def check_initscripts(self):
 713             return self.do_check_initscripts()
 714
 715     def initscripts (self):
 716         for initscript in self.plc_spec['initscripts']:
 717             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
 718             self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
 719         return True
 720
 721     def clean_initscripts (self):
 722         for initscript in self.plc_spec['initscripts']:
 723             initscript_name = initscript['initscript_fields']['name']
 724             print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
 725             try:
 726                 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
 727                 print initscript_name,'deleted'
 728             except:
 729                 print 'deletion went wrong - probably did not exist'
 730         return True
 731
 732     ### manage slices
 733     def slices (self):
 734         return self.do_slices()
 735
 736     def clean_slices (self):
 737         return self.do_slices("delete")
 738
 739     def do_slices (self,  action="add"):
 740         for slice in self.plc_spec['slices']:
 741             site_spec = self.locate_site (slice['sitename'])
 742             test_site = TestSite(self,site_spec)
 743             test_slice=TestSlice(self,test_site,slice)
 744             if action != "add":
 745                 utils.header("Deleting slices in site %s"%test_site.name())
 746                 test_slice.delete_slice()
 747             else:
 748                 utils.pprint("Creating slice",slice)
 749                 test_slice.create_slice()
 750                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
 751         return True
 752
 753     @slice_mapper_options
 754     def check_slice(self): pass
 755
 756     @node_mapper
 757     def clear_known_hosts (self): pass
 758
 759     @node_mapper
 760     def start_node (self) : pass
 761
 762     def check_tcp (self):
 763         specs = self.plc_spec['tcp_test']
 764         overall=True
 765         for spec in specs:
 766             port = spec['port']
 767             # server side
 768             s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
 769             if not s_test_sliver.run_tcp_server(port,timeout=10):
 770                 overall=False
 771                 break
 772
 773             # idem for the client side
 774             c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
 775             if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
 776                 overall=False
 777         return overall
 778
 779     def plcsh_stress_test (self):
 780         # install the stress-test in the plc image
 781         location = "/usr/share/plc_api/plcsh-stress-test.py"
 782         remote="/vservers/%s/%s"%(self.vservername,location)
 783         self.test_ssh.copy_abs("plcsh-stress-test.py",remote)
 784         command = location
 785         command += " -- --check"
 786         if self.options.small_test:
 787             command +=  " --tiny"
 788         return ( self.run_in_guest(command) == 0)
 789
 790     def gather_logs (self):
 791         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
 792         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
 793         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
 794         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
 795         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
 796         # (1.a)
 797         print "-------------------- TestPlc.gather_logs : PLC's /var/log"
 798         self.gather_var_logs ()
 799         # (1.b)
 800         print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
 801         self.gather_pgsql_logs ()
 802         # (2)
 803         print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
 804         for site_spec in self.plc_spec['sites']:
 805             test_site = TestSite (self,site_spec)
 806             for node_spec in site_spec['nodes']:
 807                 test_node=TestNode(self,test_site,node_spec)
 808                 test_node.gather_qemu_logs()
 809         # (3)
 810         print "-------------------- TestPlc.gather_logs : nodes's /var/log"
 811         self.gather_nodes_var_logs()
 812         # (4)
 813         print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
 814         self.gather_slivers_var_logs()
 815         return True
 816
 817     def gather_slivers_var_logs(self):
 818         for test_sliver in self.all_sliver_objs():
 819             remote = test_sliver.tar_var_logs()
 820             utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
 821             command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
 822             utils.system(command)
 823         return True
 824
 825     def gather_var_logs (self):
 826         utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
 827         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
 828         command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
 829         utils.system(command)
 830         command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
 831         utils.system(command)
 832
 833     def gather_pgsql_logs (self):
 834         utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
 835         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
 836         command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
 837         utils.system(command)
 838
 839     def gather_nodes_var_logs (self):
 840         for site_spec in self.plc_spec['sites']:
 841             test_site = TestSite (self,site_spec)
 842             for node_spec in site_spec['nodes']:
 843                 test_node=TestNode(self,test_site,node_spec)
 844                 test_ssh = TestSsh (test_node.name(),key="/etc/planetlab/root_ssh_key.rsa")
 845                 to_plc = self.actual_command_in_guest ( test_ssh.actual_command("tar -C /var/log -cf - ."))
 846                 command = to_plc + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
 847                 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
 848                 utils.system(command)
 849
 850
 851     # returns the filename to use for sql dump/restore, using options.dbname if set
 852     def dbfile (self, database):
 853         # uses options.dbname if it is found
 854         try:
 855             name=self.options.dbname
 856             if not isinstance(name,StringTypes):
 857                 raise Exception
 858         except:
 859             t=datetime.datetime.now()
 860             d=t.date()
 861             name=str(d)
 862         return "/root/%s-%s.sql"%(database,name)
 863
 864     def db_dump(self):
 865         dump=self.dbfile("planetab4")
 866         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
 867         utils.header('Dumped planetlab4 database in %s'%dump)
 868         return True
 869
 870     def db_restore(self):
 871         dump=self.dbfile("planetab4")
 872         ##stop httpd service
 873         self.run_in_guest('service httpd stop')
 874         # xxx - need another wrapper
 875         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
 876         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
 877         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
 878         ##starting httpd service
 879         self.run_in_guest('service httpd start')
 880
 881         utils.header('Database restored from ' + dump)
 882
 883     @standby_generic
 884     def standby_1(): pass
 885     @standby_generic
 886     def standby_2(): pass
 887     @standby_generic
 888     def standby_3(): pass
 889     @standby_generic
 890     def standby_4(): pass
 891     @standby_generic
 892     def standby_5(): pass
 893     @standby_generic
 894     def standby_6(): pass
 895     @standby_generic
 896     def standby_7(): pass
 897     @standby_generic
 898     def standby_8(): pass
 899     @standby_generic
 900     def standby_9(): pass
 901     @standby_generic
 902     def standby_10(): pass
 903     @standby_generic
 904     def standby_11(): pass
 905     @standby_generic
 906     def standby_12(): pass
 907     @standby_generic
 908     def standby_13(): pass
 909     @standby_generic
 910     def standby_14(): pass
 911     @standby_generic
 912     def standby_15(): pass
 913     @standby_generic
 914     def standby_16(): pass
 915     @standby_generic
 916     def standby_17(): pass
 917     @standby_generic
 918     def standby_18(): pass
 919     @standby_generic
 920     def standby_19(): pass
 921     @standby_generic
 922     def standby_20(): pass
 923