system/TestPlc.py

   1 # $Id$
   2 import os, os.path
   3 import datetime
   4 import time
   5 import sys
   6 import traceback
   7 from types import StringTypes
   8 import socket
   9
  10 import utils
  11 from TestSite import TestSite
  12 from TestNode import TestNode
  13 from TestUser import TestUser
  14 from TestKey import TestKey
  15 from TestSlice import TestSlice
  16 from TestSliver import TestSliver
  17 from TestBox import TestBox
  18 from TestSsh import TestSsh
  19 from TestApiserver import TestApiserver
  20
  21 # step methods must take (self) and return a boolean (options is a member of the class)
  22
  23 def standby(minutes,dry_run):
  24     utils.header('Entering StandBy for %d mn'%minutes)
  25     if dry_run:
  26         print 'dry_run'
  27     else:
  28         time.sleep(60*minutes)
  29     return True
  30
  31 def standby_generic (func):
  32     def actual(self):
  33         minutes=int(func.__name__.split("_")[1])
  34         return standby(minutes,self.options.dry_run)
  35     return actual
  36
  37 def node_mapper (method):
  38     def actual(self):
  39         overall=True
  40         node_method = TestNode.__dict__[method.__name__]
  41         for site_spec in self.plc_spec['sites']:
  42             test_site = TestSite (self,site_spec)
  43             for node_spec in site_spec['nodes']:
  44                 test_node = TestNode (self,test_site,node_spec)
  45                 if not node_method(test_node): overall=False
  46         return overall
  47     return actual
  48
  49 def slice_mapper_options (method):
  50     def actual(self):
  51         overall=True
  52         slice_method = TestSlice.__dict__[method.__name__]
  53         for slice_spec in self.plc_spec['slices']:
  54             site_spec = self.locate_site (slice_spec['sitename'])
  55             test_site = TestSite(self,site_spec)
  56             test_slice=TestSlice(self,test_site,slice_spec)
  57             if not slice_method(test_slice,self.options): overall=False
  58         return overall
  59     return actual
  60
  61 SEP='<sep>'
  62
  63 class TestPlc:
  64
  65     default_steps = ['uninstall','install','install_rpm',
  66                      'configure', 'start', 'fetch_keys', SEP,
  67                      'store_keys', 'clear_known_hosts', 'initscripts', SEP,
  68                      'sites', 'nodes', 'slices', 'nodegroups', SEP,
  69                      'init_node','bootcd', 'configure_qemu', 'export_qemu',
  70                      'kill_all_qemus', 'reinstall_node','start_node', SEP,
  71                      'nodes_booted', 'nodes_ssh', 'check_slice', 'check_initscripts', SEP,
  72                      'check_sanity', 'check_tcp', 'plcsh_stress_test', SEP,
  73                      'force_gather_logs', 'force_kill_qemus', 'force_record_tracker','force_free_tracker' ]
  74     other_steps = [ 'stop_all_vservers','fresh_install', 'cache_rpm', 'stop', 'vs_start', SEP,
  75                     'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
  76                     'clean_sites', 'clean_nodes',
  77                     'clean_slices', 'clean_keys', SEP,
  78                     'show_boxes', 'list_all_qemus', 'list_qemus', SEP,
  79                     'db_dump' , 'db_restore', 'cleanup_trackers', 'cleanup_all_trackers',
  80                     'standby_1 through 20'
  81                     ]
  82
  83     @staticmethod
  84     def printable_steps (list):
  85         return " ".join(list).replace(" "+SEP+" "," \\\n")
  86     @staticmethod
  87     def valid_step (step):
  88         return step != SEP
  89
  90     def __init__ (self,plc_spec,options):
  91         self.plc_spec=plc_spec
  92         self.options=options
  93         self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
  94         try:
  95             self.vserverip=plc_spec['vserverip']
  96             self.vservername=plc_spec['vservername']
  97             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
  98             self.vserver=True
  99         except:
 100             raise Exception,'chroot-based myplc testing is deprecated'
 101         self.apiserver=TestApiserver(self.url,options.dry_run)
 102
 103     def name(self):
 104         name=self.plc_spec['name']
 105         return "%s.%s"%(name,self.vservername)
 106
 107     def hostname(self):
 108         return self.plc_spec['hostname']
 109
 110     def is_local (self):
 111         return self.test_ssh.is_local()
 112
 113     # define the API methods on this object through xmlrpc
 114     # would help, but not strictly necessary
 115     def connect (self):
 116         pass
 117
 118     def actual_command_in_guest (self,command):
 119         return self.test_ssh.actual_command(self.host_to_guest(command))
 120
 121     def start_guest (self):
 122       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
 123
 124     def run_in_guest (self,command):
 125         return utils.system(self.actual_command_in_guest(command))
 126
 127     def run_in_host (self,command):
 128         return self.test_ssh.run_in_buildname(command)
 129
 130     #command gets run in the vserver
 131     def host_to_guest(self,command):
 132         return "vserver %s exec %s"%(self.vservername,command)
 133
 134     #command gets run in the vserver
 135     def start_guest_in_host(self):
 136         return "vserver %s start"%(self.vservername)
 137
 138     # xxx quick n dirty
 139     def run_in_guest_piped (self,local,remote):
 140         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
 141
 142     def auth_root (self):
 143         return {'Username':self.plc_spec['PLC_ROOT_USER'],
 144                 'AuthMethod':'password',
 145                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
 146                 'Role' : self.plc_spec['role']
 147                 }
 148     def locate_site (self,sitename):
 149         for site in self.plc_spec['sites']:
 150             if site['site_fields']['name'] == sitename:
 151                 return site
 152             if site['site_fields']['login_base'] == sitename:
 153                 return site
 154         raise Exception,"Cannot locate site %s"%sitename
 155
 156     def locate_node (self,nodename):
 157         for site in self.plc_spec['sites']:
 158             for node in site['nodes']:
 159                 if node['name'] == nodename:
 160                     return (site,node)
 161         raise Exception,"Cannot locate node %s"%nodename
 162
 163     def locate_hostname (self,hostname):
 164         for site in self.plc_spec['sites']:
 165             for node in site['nodes']:
 166                 if node['node_fields']['hostname'] == hostname:
 167                     return (site,node)
 168         raise Exception,"Cannot locate hostname %s"%hostname
 169
 170     def locate_key (self,keyname):
 171         for key in self.plc_spec['keys']:
 172             if key['name'] == keyname:
 173                 return key
 174         raise Exception,"Cannot locate key %s"%keyname
 175
 176     def locate_slice (self, slicename):
 177         for slice in self.plc_spec['slices']:
 178             if slice['slice_fields']['name'] == slicename:
 179                 return slice
 180         raise Exception,"Cannot locate slice %s"%slicename
 181
 182     def all_sliver_objs (self):
 183         result=[]
 184         for slice_spec in self.plc_spec['slices']:
 185             slicename = slice_spec['slice_fields']['name']
 186             for nodename in slice_spec['nodenames']:
 187                 result.append(self.locate_sliver_obj (nodename,slicename))
 188         return result
 189
 190     def locate_sliver_obj (self,nodename,slicename):
 191         (site,node) = self.locate_node(nodename)
 192         slice = self.locate_slice (slicename)
 193         # build objects
 194         test_site = TestSite (self, site)
 195         test_node = TestNode (self, test_site,node)
 196         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 197         test_slice = TestSlice (self, test_site, slice)
 198         return TestSliver (self, test_node, test_slice)
 199
 200     def locate_first_node(self):
 201         nodename=self.plc_spec['slices'][0]['nodenames'][0]
 202         (site,node) = self.locate_node(nodename)
 203         test_site = TestSite (self, site)
 204         test_node = TestNode (self, test_site,node)
 205         return test_node
 206
 207     def locate_first_sliver (self):
 208         slice_spec=self.plc_spec['slices'][0]
 209         slicename=slice_spec['slice_fields']['name']
 210         nodename=slice_spec['nodenames'][0]
 211         return self.locate_sliver_obj(nodename,slicename)
 212
 213     # all different hostboxes used in this plc
 214     def gather_hostBoxes(self):
 215         # maps on sites and nodes, return [ (host_box,test_node) ]
 216         tuples=[]
 217         for site_spec in self.plc_spec['sites']:
 218             test_site = TestSite (self,site_spec)
 219             for node_spec in site_spec['nodes']:
 220                 test_node = TestNode (self, test_site, node_spec)
 221                 if not test_node.is_real():
 222                     tuples.append( (test_node.host_box(),test_node) )
 223         # transform into a dict { 'host_box' -> [ test_node .. ] }
 224         result = {}
 225         for (box,node) in tuples:
 226             if not result.has_key(box):
 227                 result[box]=[node]
 228             else:
 229                 result[box].append(node)
 230         return result
 231
 232     # a step for checking this stuff
 233     def show_boxes (self):
 234         for (box,nodes) in self.gather_hostBoxes().iteritems():
 235             print box,":"," + ".join( [ node.name() for node in nodes ] )
 236         return True
 237
 238     # make this a valid step
 239     def kill_all_qemus(self):
 240         # this is the brute force version, kill all qemus on that host box
 241         for (box,nodes) in self.gather_hostBoxes().iteritems():
 242             # pass the first nodename, as we don't push template-qemu on testboxes
 243             nodedir=nodes[0].nodedir()
 244             TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
 245         return True
 246
 247     # make this a valid step
 248     def list_all_qemus(self):
 249         for (box,nodes) in self.gather_hostBoxes().iteritems():
 250             # this is the brute force version, kill all qemus on that host box
 251             TestBox(box,self.options.buildname).list_all_qemus()
 252         return True
 253
 254     # kill only the right qemus
 255     def list_qemus(self):
 256         for (box,nodes) in self.gather_hostBoxes().iteritems():
 257             # the fine-grain version
 258             for node in nodes:
 259                 node.list_qemu()
 260         return True
 261
 262     # kill only the right qemus
 263     def kill_qemus(self):
 264         for (box,nodes) in self.gather_hostBoxes().iteritems():
 265             # the fine-grain version
 266             for node in nodes:
 267                 node.kill_qemu()
 268         return True
 269
 270
 271     ### utility methods for handling the pool of IP addresses allocated to plcs
 272     # Logic
 273     # (*) running plcs are recorded in the file named ~/running-test-plcs
 274     # (*) this file contains a line for each running plc, older first
 275     # (*) each line contains the vserver name + the hostname of the (vserver) testbox where it sits
 276     # (*) the free_tracker method performs a vserver stop on the oldest entry
 277     # (*) the record_tracker method adds an entry at the bottom of the file
 278     # (*) the cleanup_tracker method stops all known vservers and removes the tracker file
 279
 280     TRACKER_FILE=os.environ['HOME']+"/running-test-plcs"
 281
 282     def record_tracker (self):
 283         try:
 284             lines=file(TestPlc.TRACKER_FILE).readlines()
 285         except:
 286             lines=[]
 287
 288         this_line="%s %s\n"%(self.vservername,self.test_ssh.hostname)
 289         for line in lines:
 290             if line==this_line:
 291                 print 'this vserver is already included in %s'%TestPlc.TRACKER_FILE
 292                 return True
 293         if self.options.dry_run:
 294             print 'dry_run: record_tracker - skipping tracker update'
 295             return True
 296         tracker=file(TestPlc.TRACKER_FILE,"w")
 297         for line in lines+[this_line]:
 298             tracker.write(line)
 299         tracker.close()
 300         print "Recorded %s in running plcs on host %s"%(self.vservername,self.test_ssh.hostname)
 301         return True
 302
 303     def free_tracker (self, keep_vservers=3):
 304         try:
 305             lines=file(TestPlc.TRACKER_FILE).readlines()
 306         except:
 307             print 'dry_run: free_tracker - skipping tracker update'
 308             return True
 309         how_many = len(lines) - keep_vservers
 310         # nothing todo until we have more than keep_vservers in the tracker
 311         if how_many <= 0:
 312             print 'free_tracker : limit %d not reached'%keep_vservers
 313             return True
 314         to_stop = lines[:how_many]
 315         to_keep = lines[how_many:]
 316         for line in to_stop:
 317             print '>%s<'%line
 318             [vname,hostname]=line.split()
 319             command=TestSsh(hostname).actual_command("vserver --silent %s stop"%vname)
 320             utils.system(command)
 321         if self.options.dry_run:
 322             print 'dry_run: free_tracker would stop %d vservers'%len(to_stop)
 323             for line in to_stop: print line,
 324             print 'dry_run: free_tracker would keep %d vservers'%len(to_keep)
 325             for line in to_keep: print line,
 326             return True
 327         print "Storing %d remaining vservers in %s"%(len(to_keep),TestPlc.TRACKER_FILE)
 328         tracker=open(TestPlc.TRACKER_FILE,"w")
 329         for line in to_keep:
 330             tracker.write(line)
 331         tracker.close()
 332         return True
 333
 334     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 335     def cleanup_trackers (self):
 336         try:
 337             for line in TestPlc.TRACKER_FILE.readlines():
 338                 [vname,hostname]=line.split()
 339                 stop="vserver --silent %s stop"%vname
 340                 command=TestSsh(hostname).actual_command(stop)
 341                 utils.system(command)
 342             clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 343             utils.system(self.test_ssh.actual_command(clean_tracker))
 344         except:
 345             return True
 346
 347     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 348     def cleanup_all_trackers (self):
 349         stop_all = "cd /vservers ; for i in * ; do vserver --silent $i stop ; done"
 350         utils.system(self.test_ssh.actual_command(stop_all))
 351         clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 352         utils.system(self.test_ssh.actual_command(clean_tracker))
 353         return True
 354
 355     def uninstall(self):
 356         self.run_in_host("vserver --silent %s delete"%self.vservername)
 357         return True
 358
 359     ### install
 360     def install(self):
 361         if self.is_local():
 362             # a full path for the local calls
 363             build_dir=os.path.dirname(sys.argv[0])
 364             # sometimes this is empty - set to "." in such a case
 365             if not build_dir: build_dir="."
 366             build_dir += "/build"
 367         else:
 368             # use a standard name - will be relative to remote buildname
 369             build_dir="build"
 370         # run checkout in any case - would do an update if already exists
 371         build_checkout = "svn checkout %s %s"%(self.options.build_url,build_dir)
 372         if self.run_in_host(build_checkout) != 0:
 373             return False
 374         # the repo url is taken from arch-rpms-url
 375         # with the last step (i386.) removed
 376         repo_url = self.options.arch_rpms_url
 377         for level in [ 'arch' ]:
 378             repo_url = os.path.dirname(repo_url)
 379         # pass the vbuild-nightly options to vtest-init-vserver
 380         test_env_options=""
 381         test_env_options += " -p %s"%self.options.personality
 382         test_env_options += " -d %s"%self.options.pldistro
 383         test_env_options += " -f %s"%self.options.fcdistro
 384         script="vtest-init-vserver.sh"
 385         vserver_name = self.vservername
 386         vserver_options="--netdev eth0 --interface %s"%self.vserverip
 387         try:
 388             vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
 389             vserver_options += " --hostname %s"%vserver_hostname
 390         except:
 391             pass
 392         create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
 393         return self.run_in_host(create_vserver) == 0
 394
 395     ### install_rpm
 396     def install_rpm(self):
 397         return self.run_in_guest("yum -y install myplc-native")==0 \
 398             and self.run_in_guest("yum -y install noderepo-%s-%s"%(self.options.pldistro,self.options.arch))
 399
 400     ###
 401     def configure(self):
 402         tmpname='%s.plc-config-tty'%(self.name())
 403         fileconf=open(tmpname,'w')
 404         for var in [ 'PLC_NAME',
 405                      'PLC_ROOT_PASSWORD',
 406                      'PLC_ROOT_USER',
 407                      'PLC_MAIL_ENABLED',
 408                      'PLC_MAIL_SUPPORT_ADDRESS',
 409                      'PLC_DB_HOST',
 410                      'PLC_API_HOST',
 411                      'PLC_WWW_HOST',
 412                      'PLC_BOOT_HOST',
 413                      'PLC_NET_DNS1',
 414                      'PLC_NET_DNS2']:
 415             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
 416         fileconf.write('w\n')
 417         fileconf.write('q\n')
 418         fileconf.close()
 419         utils.system('cat %s'%tmpname)
 420         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
 421         utils.system('rm %s'%tmpname)
 422         return True
 423
 424     def start(self):
 425         self.run_in_guest('service plc start')
 426         return True
 427
 428     def stop(self):
 429         self.run_in_guest('service plc stop')
 430         return True
 431
 432     def vs_start (self):
 433         self.start_guest()
 434         return True
 435
 436     # stores the keys from the config for further use
 437     def store_keys(self):
 438         for key_spec in self.plc_spec['keys']:
 439                 TestKey(self,key_spec).store_key()
 440         return True
 441
 442     def clean_keys(self):
 443         utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
 444
 445     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 446     # for later direct access to the nodes
 447     def fetch_keys(self):
 448         dir="./keys"
 449         if not os.path.isdir(dir):
 450             os.mkdir(dir)
 451         prefix = 'root_ssh_key'
 452         vservername=self.vservername
 453         overall=True
 454         for ext in [ 'pub', 'rsa' ] :
 455             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 456             dst="keys/%(vservername)s.%(ext)s"%locals()
 457             if self.test_ssh.fetch(src,dst) != 0: overall=False
 458         return overall
 459
 460     def sites (self):
 461         return self.do_sites()
 462
 463     def clean_sites (self):
 464         return self.do_sites(action="delete")
 465
 466     def do_sites (self,action="add"):
 467         for site_spec in self.plc_spec['sites']:
 468             test_site = TestSite (self,site_spec)
 469             if (action != "add"):
 470                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
 471                 test_site.delete_site()
 472                 # deleted with the site
 473                 #test_site.delete_users()
 474                 continue
 475             else:
 476                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
 477                 test_site.create_site()
 478                 test_site.create_users()
 479         return True
 480
 481     def clean_all_sites (self):
 482         print 'auth_root',self.auth_root()
 483         site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
 484         for site_id in site_ids:
 485             print 'Deleting site_id',site_id
 486             self.apiserver.DeleteSite(self.auth_root(),site_id)
 487
 488     def nodes (self):
 489         return self.do_nodes()
 490     def clean_nodes (self):
 491         return self.do_nodes(action="delete")
 492
 493     def do_nodes (self,action="add"):
 494         for site_spec in self.plc_spec['sites']:
 495             test_site = TestSite (self,site_spec)
 496             if action != "add":
 497                 utils.header("Deleting nodes in site %s"%test_site.name())
 498                 for node_spec in site_spec['nodes']:
 499                     test_node=TestNode(self,test_site,node_spec)
 500                     utils.header("Deleting %s"%test_node.name())
 501                     test_node.delete_node()
 502             else:
 503                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
 504                 for node_spec in site_spec['nodes']:
 505                     utils.pprint('Creating node %s'%node_spec,node_spec)
 506                     test_node = TestNode (self,test_site,node_spec)
 507                     test_node.create_node ()
 508         return True
 509
 510     def nodegroups (self):
 511         return self.do_nodegroups("add")
 512     def clean_nodegroups (self):
 513         return self.do_nodegroups("delete")
 514
 515     # create nodegroups if needed, and populate
 516     def do_nodegroups (self, action="add"):
 517         # 1st pass to scan contents
 518         groups_dict = {}
 519         for site_spec in self.plc_spec['sites']:
 520             test_site = TestSite (self,site_spec)
 521             for node_spec in site_spec['nodes']:
 522                 test_node=TestNode (self,test_site,node_spec)
 523                 if node_spec.has_key('nodegroups'):
 524                     nodegroupnames=node_spec['nodegroups']
 525                     if isinstance(nodegroupnames,StringTypes):
 526                         nodegroupnames = [ nodegroupnames ]
 527                     for nodegroupname in nodegroupnames:
 528                         if not groups_dict.has_key(nodegroupname):
 529                             groups_dict[nodegroupname]=[]
 530                         groups_dict[nodegroupname].append(test_node.name())
 531         auth=self.auth_root()
 532         overall = True
 533         for (nodegroupname,group_nodes) in groups_dict.iteritems():
 534             if action == "add":
 535                 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
 536                 # first, check if the nodetagtype is here
 537                 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
 538                 if tag_types:
 539                     tag_type_id = tag_types[0]['tag_type_id']
 540                 else:
 541                     tag_type_id = self.apiserver.AddTagType(auth,
 542                                                             {'tagname':nodegroupname,
 543                                                              'description': 'for nodegroup %s'%nodegroupname,
 544                                                              'category':'test',
 545                                                              'min_role_id':10})
 546                 print 'located tag (type)',nodegroupname,'as',tag_type_id
 547                 # create nodegroup
 548                 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
 549                 if not nodegroups:
 550                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
 551                     print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
 552                 # set node tag on all nodes, value='yes'
 553                 for nodename in group_nodes:
 554                     try:
 555                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
 556                     except:
 557                         traceback.print_exc()
 558                         print 'node',nodename,'seems to already have tag',nodegroupname
 559                     # check anyway
 560                     try:
 561                         expect_yes = self.apiserver.GetNodeTags(auth,
 562                                                                 {'hostname':nodename,
 563                                                                  'tagname':nodegroupname},
 564                                                                 ['tagvalue'])[0]['tagvalue']
 565                         if expect_yes != "yes":
 566                             print 'Mismatch node tag on node',nodename,'got',expect_yes
 567                             overall=False
 568                     except:
 569                         if not self.options.dry_run:
 570                             print 'Cannot find tag',nodegroupname,'on node',nodename
 571                             overall = False
 572             else:
 573                 try:
 574                     print 'cleaning nodegroup',nodegroupname
 575                     self.apiserver.DeleteNodeGroup(auth,nodegroupname)
 576                 except:
 577                     traceback.print_exc()
 578                     overall=False
 579         return overall
 580
 581     def all_hostnames (self) :
 582         hostnames = []
 583         for site_spec in self.plc_spec['sites']:
 584             hostnames += [ node_spec['node_fields']['hostname'] \
 585                            for node_spec in site_spec['nodes'] ]
 586         return hostnames
 587
 588     # gracetime : during the first <gracetime> minutes nothing gets printed
 589     def do_nodes_booted (self, minutes, gracetime,period=15):
 590         if self.options.dry_run:
 591             print 'dry_run'
 592             return True
 593         # compute timeout
 594         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
 595         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
 596         # the nodes that haven't checked yet - start with a full list and shrink over time
 597         tocheck = self.all_hostnames()
 598         utils.header("checking nodes %r"%tocheck)
 599         # create a dict hostname -> status
 600         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
 601         while tocheck:
 602             # get their status
 603             tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
 604             # update status
 605             for array in tocheck_status:
 606                 hostname=array['hostname']
 607                 boot_state=array['boot_state']
 608                 if boot_state == 'boot':
 609                     utils.header ("%s has reached the 'boot' state"%hostname)
 610                 else:
 611                     # if it's a real node, never mind
 612                     (site_spec,node_spec)=self.locate_hostname(hostname)
 613                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 614                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
 615                         # let's cheat
 616                         boot_state = 'boot'
 617                     elif datetime.datetime.now() > graceout:
 618                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
 619                         graceout=datetime.datetime.now()+datetime.timedelta(1)
 620                 status[hostname] = boot_state
 621             # refresh tocheck
 622             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ]
 623             if not tocheck:
 624                 return True
 625             if datetime.datetime.now() > timeout:
 626                 for hostname in tocheck:
 627                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
 628                 return False
 629             # otherwise, sleep for a while
 630             time.sleep(period)
 631         # only useful in empty plcs
 632         return True
 633
 634     def nodes_booted(self):
 635         return self.do_nodes_booted(minutes=20,gracetime=15)
 636
 637     def do_nodes_ssh(self,minutes,gracetime,period=20):
 638         # compute timeout
 639         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
 640         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
 641         tocheck = self.all_hostnames()
 642 #        self.scan_publicKeys(tocheck)
 643         utils.header("checking ssh access to root context on nodes %r"%tocheck)
 644         while tocheck:
 645             for hostname in tocheck:
 646                 # try to ssh in nodes
 647                 # ssh hostname to the node from the plc
 648                 cmd1 = TestSsh (hostname,key="/etc/planetlab/root_ssh_key.rsa").actual_command("hostname")
 649                 # run this in the guest
 650                 cmd2 = self.test_ssh.actual_command(cmd1)
 651                 # don't spam logs - show the command only after the grace period
 652                 if datetime.datetime.now() > graceout:
 653                     success=utils.system(cmd2)
 654                 else:
 655                     success=os.system(cmd2)
 656                 if success:
 657                     utils.header('Successfully entered root@%s'%hostname)
 658                     # refresh tocheck
 659                     tocheck.remove(hostname)
 660                 else:
 661                     # we will have tried real nodes once, in case they're up - but if not, just skip
 662                     (site_spec,node_spec)=self.locate_hostname(hostname)
 663                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 664                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
 665                         tocheck.remove(hostname)
 666             if  not tocheck:
 667                 return True
 668             if datetime.datetime.now() > timeout:
 669                 for hostname in tocheck:
 670                     utils.header("FAILURE to ssh into %s"%hostname)
 671                 return False
 672             # otherwise, sleep for a while
 673             time.sleep(period)
 674         # only useful in empty plcs
 675         return True
 676
 677     def nodes_ssh(self):
 678         return self.do_nodes_ssh(minutes=30,gracetime=10)
 679
 680     @node_mapper
 681     def init_node (self): pass
 682     @node_mapper
 683     def bootcd (self): pass
 684     @node_mapper
 685     def configure_qemu (self): pass
 686     @node_mapper
 687     def reinstall_node (self): pass
 688     @node_mapper
 689     def export_qemu (self): pass
 690
 691     ### check sanity : invoke scripts from qaapi/qa/tests/{node,slice}
 692     def check_sanity_node (self):
 693         return self.locate_first_node().check_sanity()
 694     def check_sanity_sliver (self) :
 695         return self.locate_first_sliver().check_sanity()
 696
 697     def check_sanity (self):
 698         return self.check_sanity_node() and self.check_sanity_sliver()
 699
 700     ### initscripts
 701     def do_check_initscripts(self):
 702         overall = True
 703         for slice_spec in self.plc_spec['slices']:
 704             if not slice_spec.has_key('initscriptname'):
 705                 continue
 706             initscript=slice_spec['initscriptname']
 707             for nodename in slice_spec['nodenames']:
 708                 (site,node) = self.locate_node (nodename)
 709                 # xxx - passing the wrong site - probably harmless
 710                 test_site = TestSite (self,site)
 711                 test_slice = TestSlice (self,test_site,slice_spec)
 712                 test_node = TestNode (self,test_site,node)
 713                 test_sliver = TestSliver (self, test_node, test_slice)
 714                 if not test_sliver.check_initscript(initscript):
 715                     overall = False
 716         return overall
 717
 718     def check_initscripts(self):
 719             return self.do_check_initscripts()
 720
 721     def initscripts (self):
 722         for initscript in self.plc_spec['initscripts']:
 723             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
 724             self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
 725         return True
 726
 727     def clean_initscripts (self):
 728         for initscript in self.plc_spec['initscripts']:
 729             initscript_name = initscript['initscript_fields']['name']
 730             print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
 731             try:
 732                 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
 733                 print initscript_name,'deleted'
 734             except:
 735                 print 'deletion went wrong - probably did not exist'
 736         return True
 737
 738     ### manage slices
 739     def slices (self):
 740         return self.do_slices()
 741
 742     def clean_slices (self):
 743         return self.do_slices("delete")
 744
 745     def do_slices (self,  action="add"):
 746         for slice in self.plc_spec['slices']:
 747             site_spec = self.locate_site (slice['sitename'])
 748             test_site = TestSite(self,site_spec)
 749             test_slice=TestSlice(self,test_site,slice)
 750             if action != "add":
 751                 utils.header("Deleting slices in site %s"%test_site.name())
 752                 test_slice.delete_slice()
 753             else:
 754                 utils.pprint("Creating slice",slice)
 755                 test_slice.create_slice()
 756                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
 757         return True
 758
 759     @slice_mapper_options
 760     def check_slice(self): pass
 761
 762     @node_mapper
 763     def clear_known_hosts (self): pass
 764
 765     @node_mapper
 766     def start_node (self) : pass
 767
 768     def check_tcp (self):
 769         specs = self.plc_spec['tcp_test']
 770         overall=True
 771         for spec in specs:
 772             port = spec['port']
 773             # server side
 774             s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
 775             if not s_test_sliver.run_tcp_server(port,timeout=10):
 776                 overall=False
 777                 break
 778
 779             # idem for the client side
 780             c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
 781             if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
 782                 overall=False
 783         return overall
 784
 785     def plcsh_stress_test (self):
 786         # install the stress-test in the plc image
 787         location = "/usr/share/plc_api/plcsh-stress-test.py"
 788         remote="/vservers/%s/%s"%(self.vservername,location)
 789         self.test_ssh.copy_abs("plcsh-stress-test.py",remote)
 790         command = location
 791         command += " -- --check"
 792         if self.options.small_test:
 793             command +=  " --tiny"
 794         return ( self.run_in_guest(command) == 0)
 795
 796     def gather_logs (self):
 797         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
 798         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
 799         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
 800         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
 801         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
 802         # (1.a)
 803         print "-------------------- TestPlc.gather_logs : PLC's /var/log"
 804         self.gather_var_logs ()
 805         # (1.b)
 806         print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
 807         self.gather_pgsql_logs ()
 808         # (2)
 809         print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
 810         for site_spec in self.plc_spec['sites']:
 811             test_site = TestSite (self,site_spec)
 812             for node_spec in site_spec['nodes']:
 813                 test_node=TestNode(self,test_site,node_spec)
 814                 test_node.gather_qemu_logs()
 815         # (3)
 816         print "-------------------- TestPlc.gather_logs : nodes's /var/log"
 817         self.gather_nodes_var_logs()
 818         # (4)
 819         print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
 820         self.gather_slivers_var_logs()
 821         return True
 822
 823     def gather_slivers_var_logs(self):
 824         for test_sliver in self.all_sliver_objs():
 825             remote = test_sliver.tar_var_logs()
 826             utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
 827             command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
 828             utils.system(command)
 829         return True
 830
 831     def gather_var_logs (self):
 832         utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
 833         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
 834         command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
 835         utils.system(command)
 836         command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
 837         utils.system(command)
 838
 839     def gather_pgsql_logs (self):
 840         utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
 841         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
 842         command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
 843         utils.system(command)
 844
 845     def gather_nodes_var_logs (self):
 846         for site_spec in self.plc_spec['sites']:
 847             test_site = TestSite (self,site_spec)
 848             for node_spec in site_spec['nodes']:
 849                 test_node=TestNode(self,test_site,node_spec)
 850                 test_ssh = TestSsh (test_node.name(),key="/etc/planetlab/root_ssh_key.rsa")
 851                 to_plc = self.actual_command_in_guest ( test_ssh.actual_command("tar -C /var/log -cf - ."))
 852                 command = to_plc + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
 853                 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
 854                 utils.system(command)
 855
 856
 857     # returns the filename to use for sql dump/restore, using options.dbname if set
 858     def dbfile (self, database):
 859         # uses options.dbname if it is found
 860         try:
 861             name=self.options.dbname
 862             if not isinstance(name,StringTypes):
 863                 raise Exception
 864         except:
 865             t=datetime.datetime.now()
 866             d=t.date()
 867             name=str(d)
 868         return "/root/%s-%s.sql"%(database,name)
 869
 870     def db_dump(self):
 871         dump=self.dbfile("planetab4")
 872         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
 873         utils.header('Dumped planetlab4 database in %s'%dump)
 874         return True
 875
 876     def db_restore(self):
 877         dump=self.dbfile("planetab4")
 878         ##stop httpd service
 879         self.run_in_guest('service httpd stop')
 880         # xxx - need another wrapper
 881         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
 882         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
 883         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
 884         ##starting httpd service
 885         self.run_in_guest('service httpd start')
 886
 887         utils.header('Database restored from ' + dump)
 888
 889     @standby_generic
 890     def standby_1(): pass
 891     @standby_generic
 892     def standby_2(): pass
 893     @standby_generic
 894     def standby_3(): pass
 895     @standby_generic
 896     def standby_4(): pass
 897     @standby_generic
 898     def standby_5(): pass
 899     @standby_generic
 900     def standby_6(): pass
 901     @standby_generic
 902     def standby_7(): pass
 903     @standby_generic
 904     def standby_8(): pass
 905     @standby_generic
 906     def standby_9(): pass
 907     @standby_generic
 908     def standby_10(): pass
 909     @standby_generic
 910     def standby_11(): pass
 911     @standby_generic
 912     def standby_12(): pass
 913     @standby_generic
 914     def standby_13(): pass
 915     @standby_generic
 916     def standby_14(): pass
 917     @standby_generic
 918     def standby_15(): pass
 919     @standby_generic
 920     def standby_16(): pass
 921     @standby_generic
 922     def standby_17(): pass
 923     @standby_generic
 924     def standby_18(): pass
 925     @standby_generic
 926     def standby_19(): pass
 927     @standby_generic
 928     def standby_20(): pass
 929