system/TestPlc.py

   1 # $Id$
   2 import os, os.path
   3 import datetime
   4 import time
   5 import sys
   6 import traceback
   7 from types import StringTypes
   8 import socket
   9
  10 import utils
  11 from TestSite import TestSite
  12 from TestNode import TestNode
  13 from TestUser import TestUser
  14 from TestKey import TestKey
  15 from TestSlice import TestSlice
  16 from TestSliver import TestSliver
  17 from TestBox import TestBox
  18 from TestSsh import TestSsh
  19 from TestApiserver import TestApiserver
  20
  21 # step methods must take (self) and return a boolean (options is a member of the class)
  22
  23 def standby(minutes,dry_run):
  24     utils.header('Entering StandBy for %d mn'%minutes)
  25     if dry_run:
  26         print 'dry_run'
  27     else:
  28         time.sleep(60*minutes)
  29     return True
  30
  31 def standby_generic (func):
  32     def actual(self):
  33         minutes=int(func.__name__.split("_")[1])
  34         return standby(minutes,self.options.dry_run)
  35     return actual
  36
  37 def node_mapper (method):
  38     def actual(self):
  39         overall=True
  40         node_method = TestNode.__dict__[method.__name__]
  41         for site_spec in self.plc_spec['sites']:
  42             test_site = TestSite (self,site_spec)
  43             for node_spec in site_spec['nodes']:
  44                 test_node = TestNode (self,test_site,node_spec)
  45                 if not node_method(test_node): overall=False
  46         return overall
  47     return actual
  48
  49 def slice_mapper_options (method):
  50     def actual(self):
  51         overall=True
  52         slice_method = TestSlice.__dict__[method.__name__]
  53         for slice_spec in self.plc_spec['slices']:
  54             site_spec = self.locate_site (slice_spec['sitename'])
  55             test_site = TestSite(self,site_spec)
  56             test_slice=TestSlice(self,test_site,slice_spec)
  57             if not slice_method(test_slice,self.options): overall=False
  58         return overall
  59     return actual
  60
  61 SEP='<sep>'
  62
  63 class TestPlc:
  64
  65     default_steps = ['uninstall','install','install_rpm',
  66                      'configure', 'start', 'fetch_keys', SEP,
  67                      'store_keys', 'clear_known_hosts', 'initscripts', SEP,
  68                      'sites', 'nodes', 'slices', 'nodegroups', SEP,
  69                      'init_node','bootcd', 'configure_qemu', 'export_qemu',
  70                      'kill_all_qemus', 'reinstall_node','start_node', SEP,
  71                      'nodes_booted', 'nodes_ssh', 'check_slice', 'check_initscripts', SEP,
  72                      'check_sanity', 'check_tcp', 'plcsh_stress_test', SEP,
  73                      'force_gather_logs', 'force_kill_qemus', 'force_record_tracker','force_free_tracker' ]
  74     other_steps = [ 'stop_all_vservers','fresh_install', 'cache_rpm', 'stop', 'vs_start', SEP,
  75                     'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
  76                     'clean_sites', 'clean_nodes',
  77                     'clean_slices', 'clean_keys', SEP,
  78                     'show_boxes', 'list_all_qemus', 'list_qemus', SEP,
  79                     'db_dump' , 'db_restore', 'cleanup_trackers', 'cleanup_all_trackers',
  80                     'standby_1 through 20'
  81                     ]
  82
  83     @staticmethod
  84     def printable_steps (list):
  85         return " ".join(list).replace(" "+SEP+" "," \\\n")
  86     @staticmethod
  87     def valid_step (step):
  88         return step != SEP
  89
  90     def __init__ (self,plc_spec,options):
  91         self.plc_spec=plc_spec
  92         self.options=options
  93         self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
  94         try:
  95             self.vserverip=plc_spec['vserverip']
  96             self.vservername=plc_spec['vservername']
  97             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
  98             self.vserver=True
  99         except:
 100             raise Exception,'chroot-based myplc testing is deprecated'
 101         self.apiserver=TestApiserver(self.url,options.dry_run)
 102
 103     def name(self):
 104         name=self.plc_spec['name']
 105         return "%s.%s"%(name,self.vservername)
 106
 107     def hostname(self):
 108         return self.plc_spec['hostname']
 109
 110     def is_local (self):
 111         return self.test_ssh.is_local()
 112
 113     # define the API methods on this object through xmlrpc
 114     # would help, but not strictly necessary
 115     def connect (self):
 116         pass
 117
 118     def actual_command_in_guest (self,command):
 119         return self.test_ssh.actual_command(self.host_to_guest(command))
 120
 121     def start_guest (self):
 122       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
 123
 124     def run_in_guest (self,command):
 125         return utils.system(self.actual_command_in_guest(command))
 126
 127     def run_in_host (self,command):
 128         return self.test_ssh.run_in_buildname(command)
 129
 130     #command gets run in the vserver
 131     def host_to_guest(self,command):
 132         return "vserver %s exec %s"%(self.vservername,command)
 133
 134     #command gets run in the vserver
 135     def start_guest_in_host(self):
 136         return "vserver %s start"%(self.vservername)
 137
 138     # xxx quick n dirty
 139     def run_in_guest_piped (self,local,remote):
 140         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
 141
 142     def auth_root (self):
 143         return {'Username':self.plc_spec['PLC_ROOT_USER'],
 144                 'AuthMethod':'password',
 145                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
 146                 'Role' : self.plc_spec['role']
 147                 }
 148     def locate_site (self,sitename):
 149         for site in self.plc_spec['sites']:
 150             if site['site_fields']['name'] == sitename:
 151                 return site
 152             if site['site_fields']['login_base'] == sitename:
 153                 return site
 154         raise Exception,"Cannot locate site %s"%sitename
 155
 156     def locate_node (self,nodename):
 157         for site in self.plc_spec['sites']:
 158             for node in site['nodes']:
 159                 if node['name'] == nodename:
 160                     return (site,node)
 161         raise Exception,"Cannot locate node %s"%nodename
 162
 163     def locate_hostname (self,hostname):
 164         for site in self.plc_spec['sites']:
 165             for node in site['nodes']:
 166                 if node['node_fields']['hostname'] == hostname:
 167                     return (site,node)
 168         raise Exception,"Cannot locate hostname %s"%hostname
 169
 170     def locate_key (self,keyname):
 171         for key in self.plc_spec['keys']:
 172             if key['name'] == keyname:
 173                 return key
 174         raise Exception,"Cannot locate key %s"%keyname
 175
 176     def locate_slice (self, slicename):
 177         for slice in self.plc_spec['slices']:
 178             if slice['slice_fields']['name'] == slicename:
 179                 return slice
 180         raise Exception,"Cannot locate slice %s"%slicename
 181
 182     def all_sliver_objs (self):
 183         result=[]
 184         for slice_spec in self.plc_spec['slices']:
 185             slicename = slice_spec['slice_fields']['name']
 186             for nodename in slice_spec['nodenames']:
 187                 result.append(self.locate_sliver_obj (nodename,slicename))
 188         return result
 189
 190     def locate_sliver_obj (self,nodename,slicename):
 191         (site,node) = self.locate_node(nodename)
 192         slice = self.locate_slice (slicename)
 193         # build objects
 194         test_site = TestSite (self, site)
 195         test_node = TestNode (self, test_site,node)
 196         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 197         test_slice = TestSlice (self, test_site, slice)
 198         return TestSliver (self, test_node, test_slice)
 199
 200     def locate_first_node(self):
 201         nodename=self.plc_spec['slices'][0]['nodenames'][0]
 202         (site,node) = self.locate_node(nodename)
 203         test_site = TestSite (self, site)
 204         test_node = TestNode (self, test_site,node)
 205         return test_node
 206
 207     def locate_first_sliver (self):
 208         slice_spec=self.plc_spec['slices'][0]
 209         slicename=slice_spec['slice_fields']['name']
 210         nodename=slice_spec['nodenames'][0]
 211         return self.locate_sliver_obj(nodename,slicename)
 212
 213     # all different hostboxes used in this plc
 214     def gather_hostBoxes(self):
 215         # maps on sites and nodes, return [ (host_box,test_node) ]
 216         tuples=[]
 217         for site_spec in self.plc_spec['sites']:
 218             test_site = TestSite (self,site_spec)
 219             for node_spec in site_spec['nodes']:
 220                 test_node = TestNode (self, test_site, node_spec)
 221                 if not test_node.is_real():
 222                     tuples.append( (test_node.host_box(),test_node) )
 223         # transform into a dict { 'host_box' -> [ test_node .. ] }
 224         result = {}
 225         for (box,node) in tuples:
 226             if not result.has_key(box):
 227                 result[box]=[node]
 228             else:
 229                 result[box].append(node)
 230         return result
 231
 232     # a step for checking this stuff
 233     def show_boxes (self):
 234         for (box,nodes) in self.gather_hostBoxes().iteritems():
 235             print box,":"," + ".join( [ node.name() for node in nodes ] )
 236         return True
 237
 238     # make this a valid step
 239     def kill_all_qemus(self):
 240         # this is the brute force version, kill all qemus on that host box
 241         for (box,nodes) in self.gather_hostBoxes().iteritems():
 242             # pass the first nodename, as we don't push template-qemu on testboxes
 243             nodedir=nodes[0].nodedir()
 244             TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
 245         return True
 246
 247     # make this a valid step
 248     def list_all_qemus(self):
 249         for (box,nodes) in self.gather_hostBoxes().iteritems():
 250             # this is the brute force version, kill all qemus on that host box
 251             TestBox(box,self.options.buildname).list_all_qemus()
 252         return True
 253
 254     # kill only the right qemus
 255     def list_qemus(self):
 256         for (box,nodes) in self.gather_hostBoxes().iteritems():
 257             # the fine-grain version
 258             for node in nodes:
 259                 node.list_qemu()
 260         return True
 261
 262     # kill only the right qemus
 263     def kill_qemus(self):
 264         for (box,nodes) in self.gather_hostBoxes().iteritems():
 265             # the fine-grain version
 266             for node in nodes:
 267                 node.kill_qemu()
 268         return True
 269
 270
 271     ### utility methods for handling the pool of IP addresses allocated to plcs
 272     # Logic
 273     # (*) running plcs are recorded in the file named ~/running-test-plcs
 274     # (*) this file contains a line for each running plc, older first
 275     # (*) each line contains the vserver name + the hostname of the (vserver) testbox where it sits
 276     # (*) the free_tracker method performs a vserver stop on the oldest entry
 277     # (*) the record_tracker method adds an entry at the bottom of the file
 278     # (*) the cleanup_tracker method stops all known vservers and removes the tracker file
 279
 280     TRACKER_FILE=os.environ['HOME']+"/running-test-plcs"
 281     # how many concurrent plcs are we keeping alive - adjust with the IP pool size
 282     TRACKER_KEEP_VSERVERS = 12
 283
 284     def record_tracker (self):
 285         try:
 286             lines=file(TestPlc.TRACKER_FILE).readlines()
 287         except:
 288             lines=[]
 289
 290         this_line="%s %s\n"%(self.vservername,self.test_ssh.hostname)
 291         for line in lines:
 292             if line==this_line:
 293                 print 'this vserver is already included in %s'%TestPlc.TRACKER_FILE
 294                 return True
 295         if self.options.dry_run:
 296             print 'dry_run: record_tracker - skipping tracker update'
 297             return True
 298         tracker=file(TestPlc.TRACKER_FILE,"w")
 299         for line in lines+[this_line]:
 300             tracker.write(line)
 301         tracker.close()
 302         print "Recorded %s in running plcs on host %s"%(self.vservername,self.test_ssh.hostname)
 303         return True
 304
 305     def free_tracker (self, keep_vservers=None):
 306         if not keep_vservers: keep_vservers=TestPlc.TRACKER_KEEP_VSERVERS
 307         try:
 308             lines=file(TestPlc.TRACKER_FILE).readlines()
 309         except:
 310             print 'dry_run: free_tracker - skipping tracker update'
 311             return True
 312         how_many = len(lines) - keep_vservers
 313         # nothing todo until we have more than keep_vservers in the tracker
 314         if how_many <= 0:
 315             print 'free_tracker : limit %d not reached'%keep_vservers
 316             return True
 317         to_stop = lines[:how_many]
 318         to_keep = lines[how_many:]
 319         for line in to_stop:
 320             print '>%s<'%line
 321             [vname,hostname]=line.split()
 322             command=TestSsh(hostname).actual_command("vserver --silent %s stop"%vname)
 323             utils.system(command)
 324         if self.options.dry_run:
 325             print 'dry_run: free_tracker would stop %d vservers'%len(to_stop)
 326             for line in to_stop: print line,
 327             print 'dry_run: free_tracker would keep %d vservers'%len(to_keep)
 328             for line in to_keep: print line,
 329             return True
 330         print "Storing %d remaining vservers in %s"%(len(to_keep),TestPlc.TRACKER_FILE)
 331         tracker=open(TestPlc.TRACKER_FILE,"w")
 332         for line in to_keep:
 333             tracker.write(line)
 334         tracker.close()
 335         return True
 336
 337     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 338     def cleanup_trackers (self):
 339         try:
 340             for line in TestPlc.TRACKER_FILE.readlines():
 341                 [vname,hostname]=line.split()
 342                 stop="vserver --silent %s stop"%vname
 343                 command=TestSsh(hostname).actual_command(stop)
 344                 utils.system(command)
 345             clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 346             utils.system(self.test_ssh.actual_command(clean_tracker))
 347         except:
 348             return True
 349
 350     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 351     def cleanup_all_trackers (self):
 352         stop_all = "cd /vservers ; for i in * ; do vserver --silent $i stop ; done"
 353         utils.system(self.test_ssh.actual_command(stop_all))
 354         clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 355         utils.system(self.test_ssh.actual_command(clean_tracker))
 356         return True
 357
 358     def uninstall(self):
 359         self.run_in_host("vserver --silent %s delete"%self.vservername)
 360         return True
 361
 362     ### install
 363     def install(self):
 364         if self.is_local():
 365             # a full path for the local calls
 366             build_dir=os.path.dirname(sys.argv[0])
 367             # sometimes this is empty - set to "." in such a case
 368             if not build_dir: build_dir="."
 369             build_dir += "/build"
 370         else:
 371             # use a standard name - will be relative to remote buildname
 372             build_dir="build"
 373         # run checkout in any case - would do an update if already exists
 374         build_checkout = "svn checkout %s %s"%(self.options.build_url,build_dir)
 375         if self.run_in_host(build_checkout) != 0:
 376             return False
 377         # the repo url is taken from arch-rpms-url
 378         # with the last step (i386.) removed
 379         repo_url = self.options.arch_rpms_url
 380         for level in [ 'arch' ]:
 381             repo_url = os.path.dirname(repo_url)
 382         # pass the vbuild-nightly options to vtest-init-vserver
 383         test_env_options=""
 384         test_env_options += " -p %s"%self.options.personality
 385         test_env_options += " -d %s"%self.options.pldistro
 386         test_env_options += " -f %s"%self.options.fcdistro
 387         script="vtest-init-vserver.sh"
 388         vserver_name = self.vservername
 389         vserver_options="--netdev eth0 --interface %s"%self.vserverip
 390         try:
 391             vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
 392             vserver_options += " --hostname %s"%vserver_hostname
 393         except:
 394             pass
 395         create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
 396         return self.run_in_host(create_vserver) == 0
 397
 398     ### install_rpm
 399     def install_rpm(self):
 400         return self.run_in_guest("yum -y install myplc-native")==0 \
 401             and self.run_in_guest("yum -y install noderepo-%s-%s"%(self.options.pldistro,self.options.arch))==0
 402
 403     ###
 404     def configure(self):
 405         tmpname='%s.plc-config-tty'%(self.name())
 406         fileconf=open(tmpname,'w')
 407         for var in [ 'PLC_NAME',
 408                      'PLC_ROOT_PASSWORD',
 409                      'PLC_ROOT_USER',
 410                      'PLC_MAIL_ENABLED',
 411                      'PLC_MAIL_SUPPORT_ADDRESS',
 412                      'PLC_DB_HOST',
 413                      'PLC_API_HOST',
 414                      'PLC_WWW_HOST',
 415                      'PLC_BOOT_HOST',
 416                      'PLC_NET_DNS1',
 417                      'PLC_NET_DNS2']:
 418             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
 419         fileconf.write('w\n')
 420         fileconf.write('q\n')
 421         fileconf.close()
 422         utils.system('cat %s'%tmpname)
 423         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
 424         utils.system('rm %s'%tmpname)
 425         return True
 426
 427     def start(self):
 428         self.run_in_guest('service plc start')
 429         return True
 430
 431     def stop(self):
 432         self.run_in_guest('service plc stop')
 433         return True
 434
 435     def vs_start (self):
 436         self.start_guest()
 437         return True
 438
 439     # stores the keys from the config for further use
 440     def store_keys(self):
 441         for key_spec in self.plc_spec['keys']:
 442                 TestKey(self,key_spec).store_key()
 443         return True
 444
 445     def clean_keys(self):
 446         utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
 447
 448     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 449     # for later direct access to the nodes
 450     def fetch_keys(self):
 451         dir="./keys"
 452         if not os.path.isdir(dir):
 453             os.mkdir(dir)
 454         prefix = 'root_ssh_key'
 455         vservername=self.vservername
 456         overall=True
 457         for ext in [ 'pub', 'rsa' ] :
 458             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 459             dst="keys/%(vservername)s.%(ext)s"%locals()
 460             if self.test_ssh.fetch(src,dst) != 0: overall=False
 461         return overall
 462
 463     def sites (self):
 464         return self.do_sites()
 465
 466     def clean_sites (self):
 467         return self.do_sites(action="delete")
 468
 469     def do_sites (self,action="add"):
 470         for site_spec in self.plc_spec['sites']:
 471             test_site = TestSite (self,site_spec)
 472             if (action != "add"):
 473                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
 474                 test_site.delete_site()
 475                 # deleted with the site
 476                 #test_site.delete_users()
 477                 continue
 478             else:
 479                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
 480                 test_site.create_site()
 481                 test_site.create_users()
 482         return True
 483
 484     def clean_all_sites (self):
 485         print 'auth_root',self.auth_root()
 486         site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
 487         for site_id in site_ids:
 488             print 'Deleting site_id',site_id
 489             self.apiserver.DeleteSite(self.auth_root(),site_id)
 490
 491     def nodes (self):
 492         return self.do_nodes()
 493     def clean_nodes (self):
 494         return self.do_nodes(action="delete")
 495
 496     def do_nodes (self,action="add"):
 497         for site_spec in self.plc_spec['sites']:
 498             test_site = TestSite (self,site_spec)
 499             if action != "add":
 500                 utils.header("Deleting nodes in site %s"%test_site.name())
 501                 for node_spec in site_spec['nodes']:
 502                     test_node=TestNode(self,test_site,node_spec)
 503                     utils.header("Deleting %s"%test_node.name())
 504                     test_node.delete_node()
 505             else:
 506                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
 507                 for node_spec in site_spec['nodes']:
 508                     utils.pprint('Creating node %s'%node_spec,node_spec)
 509                     test_node = TestNode (self,test_site,node_spec)
 510                     test_node.create_node ()
 511         return True
 512
 513     def nodegroups (self):
 514         return self.do_nodegroups("add")
 515     def clean_nodegroups (self):
 516         return self.do_nodegroups("delete")
 517
 518     # create nodegroups if needed, and populate
 519     def do_nodegroups (self, action="add"):
 520         # 1st pass to scan contents
 521         groups_dict = {}
 522         for site_spec in self.plc_spec['sites']:
 523             test_site = TestSite (self,site_spec)
 524             for node_spec in site_spec['nodes']:
 525                 test_node=TestNode (self,test_site,node_spec)
 526                 if node_spec.has_key('nodegroups'):
 527                     nodegroupnames=node_spec['nodegroups']
 528                     if isinstance(nodegroupnames,StringTypes):
 529                         nodegroupnames = [ nodegroupnames ]
 530                     for nodegroupname in nodegroupnames:
 531                         if not groups_dict.has_key(nodegroupname):
 532                             groups_dict[nodegroupname]=[]
 533                         groups_dict[nodegroupname].append(test_node.name())
 534         auth=self.auth_root()
 535         overall = True
 536         for (nodegroupname,group_nodes) in groups_dict.iteritems():
 537             if action == "add":
 538                 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
 539                 # first, check if the nodetagtype is here
 540                 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
 541                 if tag_types:
 542                     tag_type_id = tag_types[0]['tag_type_id']
 543                 else:
 544                     tag_type_id = self.apiserver.AddTagType(auth,
 545                                                             {'tagname':nodegroupname,
 546                                                              'description': 'for nodegroup %s'%nodegroupname,
 547                                                              'category':'test',
 548                                                              'min_role_id':10})
 549                 print 'located tag (type)',nodegroupname,'as',tag_type_id
 550                 # create nodegroup
 551                 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
 552                 if not nodegroups:
 553                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
 554                     print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
 555                 # set node tag on all nodes, value='yes'
 556                 for nodename in group_nodes:
 557                     try:
 558                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
 559                     except:
 560                         traceback.print_exc()
 561                         print 'node',nodename,'seems to already have tag',nodegroupname
 562                     # check anyway
 563                     try:
 564                         expect_yes = self.apiserver.GetNodeTags(auth,
 565                                                                 {'hostname':nodename,
 566                                                                  'tagname':nodegroupname},
 567                                                                 ['tagvalue'])[0]['tagvalue']
 568                         if expect_yes != "yes":
 569                             print 'Mismatch node tag on node',nodename,'got',expect_yes
 570                             overall=False
 571                     except:
 572                         if not self.options.dry_run:
 573                             print 'Cannot find tag',nodegroupname,'on node',nodename
 574                             overall = False
 575             else:
 576                 try:
 577                     print 'cleaning nodegroup',nodegroupname
 578                     self.apiserver.DeleteNodeGroup(auth,nodegroupname)
 579                 except:
 580                     traceback.print_exc()
 581                     overall=False
 582         return overall
 583
 584     def all_hostnames (self) :
 585         hostnames = []
 586         for site_spec in self.plc_spec['sites']:
 587             hostnames += [ node_spec['node_fields']['hostname'] \
 588                            for node_spec in site_spec['nodes'] ]
 589         return hostnames
 590
 591     # gracetime : during the first <gracetime> minutes nothing gets printed
 592     def do_nodes_booted (self, minutes, gracetime,period=15):
 593         if self.options.dry_run:
 594             print 'dry_run'
 595             return True
 596         # compute timeout
 597         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
 598         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
 599         # the nodes that haven't checked yet - start with a full list and shrink over time
 600         tocheck = self.all_hostnames()
 601         utils.header("checking nodes %r"%tocheck)
 602         # create a dict hostname -> status
 603         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
 604         while tocheck:
 605             # get their status
 606             tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
 607             # update status
 608             for array in tocheck_status:
 609                 hostname=array['hostname']
 610                 boot_state=array['boot_state']
 611                 if boot_state == 'boot':
 612                     utils.header ("%s has reached the 'boot' state"%hostname)
 613                 else:
 614                     # if it's a real node, never mind
 615                     (site_spec,node_spec)=self.locate_hostname(hostname)
 616                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 617                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
 618                         # let's cheat
 619                         boot_state = 'boot'
 620                     elif datetime.datetime.now() > graceout:
 621                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
 622                         graceout=datetime.datetime.now()+datetime.timedelta(1)
 623                 status[hostname] = boot_state
 624             # refresh tocheck
 625             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ]
 626             if not tocheck:
 627                 return True
 628             if datetime.datetime.now() > timeout:
 629                 for hostname in tocheck:
 630                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
 631                 return False
 632             # otherwise, sleep for a while
 633             time.sleep(period)
 634         # only useful in empty plcs
 635         return True
 636
 637     def nodes_booted(self):
 638         return self.do_nodes_booted(minutes=20,gracetime=15)
 639
 640     def do_nodes_ssh(self,minutes,gracetime,period=20):
 641         # compute timeout
 642         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
 643         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
 644         tocheck = self.all_hostnames()
 645 #        self.scan_publicKeys(tocheck)
 646         utils.header("checking ssh access to root context on nodes %r"%tocheck)
 647         vservername=self.vservername
 648         while tocheck:
 649             for hostname in tocheck:
 650                 # try to run 'hostname' in the node
 651                 # using locally cached keys - assuming we've run testplc.fetch_keys()
 652                 local_key = "keys/%(vservername)s.rsa"%locals()
 653                 command = TestSsh (hostname,key=local_key).actual_command("hostname")
 654                 # don't spam logs - show the command only after the grace period
 655                 if datetime.datetime.now() > graceout:
 656                     success=utils.system(command)
 657                 else:
 658                     success=os.system(command)
 659                 if success==0:
 660                     utils.header('Successfully entered root@%s'%hostname)
 661                     # refresh tocheck
 662                     tocheck.remove(hostname)
 663                 else:
 664                     # we will have tried real nodes once, in case they're up - but if not, just skip
 665                     (site_spec,node_spec)=self.locate_hostname(hostname)
 666                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 667                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
 668                         tocheck.remove(hostname)
 669             if  not tocheck:
 670                 return True
 671             if datetime.datetime.now() > timeout:
 672                 for hostname in tocheck:
 673                     utils.header("FAILURE to ssh into %s"%hostname)
 674                 return False
 675             # otherwise, sleep for a while
 676             time.sleep(period)
 677         # only useful in empty plcs
 678         return True
 679
 680     def nodes_ssh(self):
 681         return self.do_nodes_ssh(minutes=30,gracetime=10)
 682
 683     @node_mapper
 684     def init_node (self): pass
 685     @node_mapper
 686     def bootcd (self): pass
 687     @node_mapper
 688     def configure_qemu (self): pass
 689     @node_mapper
 690     def reinstall_node (self): pass
 691     @node_mapper
 692     def export_qemu (self): pass
 693
 694     ### check sanity : invoke scripts from qaapi/qa/tests/{node,slice}
 695     def check_sanity_node (self):
 696         return self.locate_first_node().check_sanity()
 697     def check_sanity_sliver (self) :
 698         return self.locate_first_sliver().check_sanity()
 699
 700     def check_sanity (self):
 701         return self.check_sanity_node() and self.check_sanity_sliver()
 702
 703     ### initscripts
 704     def do_check_initscripts(self):
 705         overall = True
 706         for slice_spec in self.plc_spec['slices']:
 707             if not slice_spec.has_key('initscriptname'):
 708                 continue
 709             initscript=slice_spec['initscriptname']
 710             for nodename in slice_spec['nodenames']:
 711                 (site,node) = self.locate_node (nodename)
 712                 # xxx - passing the wrong site - probably harmless
 713                 test_site = TestSite (self,site)
 714                 test_slice = TestSlice (self,test_site,slice_spec)
 715                 test_node = TestNode (self,test_site,node)
 716                 test_sliver = TestSliver (self, test_node, test_slice)
 717                 if not test_sliver.check_initscript(initscript):
 718                     overall = False
 719         return overall
 720
 721     def check_initscripts(self):
 722             return self.do_check_initscripts()
 723
 724     def initscripts (self):
 725         for initscript in self.plc_spec['initscripts']:
 726             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
 727             self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
 728         return True
 729
 730     def clean_initscripts (self):
 731         for initscript in self.plc_spec['initscripts']:
 732             initscript_name = initscript['initscript_fields']['name']
 733             print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
 734             try:
 735                 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
 736                 print initscript_name,'deleted'
 737             except:
 738                 print 'deletion went wrong - probably did not exist'
 739         return True
 740
 741     ### manage slices
 742     def slices (self):
 743         return self.do_slices()
 744
 745     def clean_slices (self):
 746         return self.do_slices("delete")
 747
 748     def do_slices (self,  action="add"):
 749         for slice in self.plc_spec['slices']:
 750             site_spec = self.locate_site (slice['sitename'])
 751             test_site = TestSite(self,site_spec)
 752             test_slice=TestSlice(self,test_site,slice)
 753             if action != "add":
 754                 utils.header("Deleting slices in site %s"%test_site.name())
 755                 test_slice.delete_slice()
 756             else:
 757                 utils.pprint("Creating slice",slice)
 758                 test_slice.create_slice()
 759                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
 760         return True
 761
 762     @slice_mapper_options
 763     def check_slice(self): pass
 764
 765     @node_mapper
 766     def clear_known_hosts (self): pass
 767
 768     @node_mapper
 769     def start_node (self) : pass
 770
 771     def check_tcp (self):
 772         specs = self.plc_spec['tcp_test']
 773         overall=True
 774         for spec in specs:
 775             port = spec['port']
 776             # server side
 777             s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
 778             if not s_test_sliver.run_tcp_server(port,timeout=10):
 779                 overall=False
 780                 break
 781
 782             # idem for the client side
 783             c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
 784             if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
 785                 overall=False
 786         return overall
 787
 788     def plcsh_stress_test (self):
 789         # install the stress-test in the plc image
 790         location = "/usr/share/plc_api/plcsh-stress-test.py"
 791         remote="/vservers/%s/%s"%(self.vservername,location)
 792         self.test_ssh.copy_abs("plcsh-stress-test.py",remote)
 793         command = location
 794         command += " -- --check"
 795         if self.options.small_test:
 796             command +=  " --tiny"
 797         return ( self.run_in_guest(command) == 0)
 798
 799     def gather_logs (self):
 800         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
 801         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
 802         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
 803         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
 804         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
 805         # (1.a)
 806         print "-------------------- TestPlc.gather_logs : PLC's /var/log"
 807         self.gather_var_logs ()
 808         # (1.b)
 809         print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
 810         self.gather_pgsql_logs ()
 811         # (2)
 812         print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
 813         for site_spec in self.plc_spec['sites']:
 814             test_site = TestSite (self,site_spec)
 815             for node_spec in site_spec['nodes']:
 816                 test_node=TestNode(self,test_site,node_spec)
 817                 test_node.gather_qemu_logs()
 818         # (3)
 819         print "-------------------- TestPlc.gather_logs : nodes's /var/log"
 820         self.gather_nodes_var_logs()
 821         # (4)
 822         print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
 823         self.gather_slivers_var_logs()
 824         return True
 825
 826     def gather_slivers_var_logs(self):
 827         for test_sliver in self.all_sliver_objs():
 828             remote = test_sliver.tar_var_logs()
 829             utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
 830             command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
 831             utils.system(command)
 832         return True
 833
 834     def gather_var_logs (self):
 835         utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
 836         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
 837         command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
 838         utils.system(command)
 839         command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
 840         utils.system(command)
 841
 842     def gather_pgsql_logs (self):
 843         utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
 844         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
 845         command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
 846         utils.system(command)
 847
 848     def gather_nodes_var_logs (self):
 849         for site_spec in self.plc_spec['sites']:
 850             test_site = TestSite (self,site_spec)
 851             for node_spec in site_spec['nodes']:
 852                 test_node=TestNode(self,test_site,node_spec)
 853                 test_ssh = TestSsh (test_node.name(),key="/etc/planetlab/root_ssh_key.rsa")
 854                 to_plc = self.actual_command_in_guest ( test_ssh.actual_command("tar -C /var/log -cf - ."))
 855                 command = to_plc + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
 856                 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
 857                 utils.system(command)
 858
 859
 860     # returns the filename to use for sql dump/restore, using options.dbname if set
 861     def dbfile (self, database):
 862         # uses options.dbname if it is found
 863         try:
 864             name=self.options.dbname
 865             if not isinstance(name,StringTypes):
 866                 raise Exception
 867         except:
 868             t=datetime.datetime.now()
 869             d=t.date()
 870             name=str(d)
 871         return "/root/%s-%s.sql"%(database,name)
 872
 873     def db_dump(self):
 874         dump=self.dbfile("planetab4")
 875         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
 876         utils.header('Dumped planetlab4 database in %s'%dump)
 877         return True
 878
 879     def db_restore(self):
 880         dump=self.dbfile("planetab4")
 881         ##stop httpd service
 882         self.run_in_guest('service httpd stop')
 883         # xxx - need another wrapper
 884         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
 885         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
 886         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
 887         ##starting httpd service
 888         self.run_in_guest('service httpd start')
 889
 890         utils.header('Database restored from ' + dump)
 891
 892     @standby_generic
 893     def standby_1(): pass
 894     @standby_generic
 895     def standby_2(): pass
 896     @standby_generic
 897     def standby_3(): pass
 898     @standby_generic
 899     def standby_4(): pass
 900     @standby_generic
 901     def standby_5(): pass
 902     @standby_generic
 903     def standby_6(): pass
 904     @standby_generic
 905     def standby_7(): pass
 906     @standby_generic
 907     def standby_8(): pass
 908     @standby_generic
 909     def standby_9(): pass
 910     @standby_generic
 911     def standby_10(): pass
 912     @standby_generic
 913     def standby_11(): pass
 914     @standby_generic
 915     def standby_12(): pass
 916     @standby_generic
 917     def standby_13(): pass
 918     @standby_generic
 919     def standby_14(): pass
 920     @standby_generic
 921     def standby_15(): pass
 922     @standby_generic
 923     def standby_16(): pass
 924     @standby_generic
 925     def standby_17(): pass
 926     @standby_generic
 927     def standby_18(): pass
 928     @standby_generic
 929     def standby_19(): pass
 930     @standby_generic
 931     def standby_20(): pass
 932