system/TestPlc.py

   1 # $Id$
   2 import os, os.path
   3 import datetime
   4 import time
   5 import sys
   6 import traceback
   7 from types import StringTypes
   8 import socket
   9
  10 import utils
  11 from TestSite import TestSite
  12 from TestNode import TestNode
  13 from TestUser import TestUser
  14 from TestKey import TestKey
  15 from TestSlice import TestSlice
  16 from TestSliver import TestSliver
  17 from TestBox import TestBox
  18 from TestSsh import TestSsh
  19 from TestApiserver import TestApiserver
  20
  21 # step methods must take (self) and return a boolean (options is a member of the class)
  22
  23 def standby(minutes,dry_run):
  24     utils.header('Entering StandBy for %d mn'%minutes)
  25     if dry_run:
  26         print 'dry_run'
  27     else:
  28         time.sleep(60*minutes)
  29     return True
  30
  31 def standby_generic (func):
  32     def actual(self):
  33         minutes=int(func.__name__.split("_")[1])
  34         return standby(minutes,self.options.dry_run)
  35     return actual
  36
  37 def node_mapper (method):
  38     def actual(self):
  39         overall=True
  40         node_method = TestNode.__dict__[method.__name__]
  41         for site_spec in self.plc_spec['sites']:
  42             test_site = TestSite (self,site_spec)
  43             for node_spec in site_spec['nodes']:
  44                 test_node = TestNode (self,test_site,node_spec)
  45                 if not node_method(test_node): overall=False
  46         return overall
  47     return actual
  48
  49 def slice_mapper_options (method):
  50     def actual(self):
  51         overall=True
  52         slice_method = TestSlice.__dict__[method.__name__]
  53         for slice_spec in self.plc_spec['slices']:
  54             site_spec = self.locate_site (slice_spec['sitename'])
  55             test_site = TestSite(self,site_spec)
  56             test_slice=TestSlice(self,test_site,slice_spec)
  57             if not slice_method(test_slice,self.options): overall=False
  58         return overall
  59     return actual
  60
  61 SEP='<sep>'
  62
  63 class TestPlc:
  64
  65     default_steps = ['display','uninstall','install','install_rpm',
  66                      'configure', 'start', 'fetch_keys', SEP,
  67                      'store_keys', 'clear_known_hosts', 'initscripts', SEP,
  68                      'sites', 'nodes', 'slices', 'nodegroups', SEP,
  69                      'init_node','bootcd', 'configure_qemu', 'export_qemu',
  70                      'kill_all_qemus', 'reinstall_node','start_node', SEP,
  71                      'nodes_debug_ssh', 'nodes_boot_ssh', 'check_slice', 'check_initscripts', SEP,
  72                      'check_tcp', 'plcsh_stress_test', SEP,
  73                      'force_gather_logs', 'force_kill_qemus', 'force_record_tracker','force_free_tracker' ]
  74     other_steps = [ 'stop_all_vservers','fresh_install', 'cache_rpm', 'stop', 'vs_start', SEP,
  75                     'check_sanity', SEP,
  76                     'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
  77                     'clean_sites', 'clean_nodes',
  78                     'clean_slices', 'clean_keys', SEP,
  79                     'show_boxes', 'list_all_qemus', 'list_qemus', SEP,
  80                     'db_dump' , 'db_restore', 'cleanup_trackers', 'cleanup_all_trackers',
  81                     'standby_1 through 20'
  82                     ]
  83
  84     @staticmethod
  85     def printable_steps (list):
  86         return " ".join(list).replace(" "+SEP+" "," \\\n")
  87     @staticmethod
  88     def valid_step (step):
  89         return step != SEP
  90
  91     def __init__ (self,plc_spec,options):
  92         self.plc_spec=plc_spec
  93         self.options=options
  94         self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
  95         try:
  96             self.vserverip=plc_spec['vserverip']
  97             self.vservername=plc_spec['vservername']
  98             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
  99             self.vserver=True
 100         except:
 101             raise Exception,'chroot-based myplc testing is deprecated'
 102         self.apiserver=TestApiserver(self.url,options.dry_run)
 103
 104     def name(self):
 105         name=self.plc_spec['name']
 106         return "%s.%s"%(name,self.vservername)
 107
 108     def hostname(self):
 109         return self.plc_spec['hostname']
 110
 111     def is_local (self):
 112         return self.test_ssh.is_local()
 113
 114     # define the API methods on this object through xmlrpc
 115     # would help, but not strictly necessary
 116     def connect (self):
 117         pass
 118
 119     def actual_command_in_guest (self,command):
 120         return self.test_ssh.actual_command(self.host_to_guest(command))
 121
 122     def start_guest (self):
 123       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
 124
 125     def run_in_guest (self,command):
 126         return utils.system(self.actual_command_in_guest(command))
 127
 128     def run_in_host (self,command):
 129         return self.test_ssh.run_in_buildname(command)
 130
 131     #command gets run in the vserver
 132     def host_to_guest(self,command):
 133         return "vserver %s exec %s"%(self.vservername,command)
 134
 135     #command gets run in the vserver
 136     def start_guest_in_host(self):
 137         return "vserver %s start"%(self.vservername)
 138
 139     # xxx quick n dirty
 140     def run_in_guest_piped (self,local,remote):
 141         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
 142
 143     def auth_root (self):
 144         return {'Username':self.plc_spec['PLC_ROOT_USER'],
 145                 'AuthMethod':'password',
 146                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
 147                 'Role' : self.plc_spec['role']
 148                 }
 149     def locate_site (self,sitename):
 150         for site in self.plc_spec['sites']:
 151             if site['site_fields']['name'] == sitename:
 152                 return site
 153             if site['site_fields']['login_base'] == sitename:
 154                 return site
 155         raise Exception,"Cannot locate site %s"%sitename
 156
 157     def locate_node (self,nodename):
 158         for site in self.plc_spec['sites']:
 159             for node in site['nodes']:
 160                 if node['name'] == nodename:
 161                     return (site,node)
 162         raise Exception,"Cannot locate node %s"%nodename
 163
 164     def locate_hostname (self,hostname):
 165         for site in self.plc_spec['sites']:
 166             for node in site['nodes']:
 167                 if node['node_fields']['hostname'] == hostname:
 168                     return (site,node)
 169         raise Exception,"Cannot locate hostname %s"%hostname
 170
 171     def locate_key (self,keyname):
 172         for key in self.plc_spec['keys']:
 173             if key['name'] == keyname:
 174                 return key
 175         raise Exception,"Cannot locate key %s"%keyname
 176
 177     def locate_slice (self, slicename):
 178         for slice in self.plc_spec['slices']:
 179             if slice['slice_fields']['name'] == slicename:
 180                 return slice
 181         raise Exception,"Cannot locate slice %s"%slicename
 182
 183     def all_sliver_objs (self):
 184         result=[]
 185         for slice_spec in self.plc_spec['slices']:
 186             slicename = slice_spec['slice_fields']['name']
 187             for nodename in slice_spec['nodenames']:
 188                 result.append(self.locate_sliver_obj (nodename,slicename))
 189         return result
 190
 191     def locate_sliver_obj (self,nodename,slicename):
 192         (site,node) = self.locate_node(nodename)
 193         slice = self.locate_slice (slicename)
 194         # build objects
 195         test_site = TestSite (self, site)
 196         test_node = TestNode (self, test_site,node)
 197         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 198         test_slice = TestSlice (self, test_site, slice)
 199         return TestSliver (self, test_node, test_slice)
 200
 201     def locate_first_node(self):
 202         nodename=self.plc_spec['slices'][0]['nodenames'][0]
 203         (site,node) = self.locate_node(nodename)
 204         test_site = TestSite (self, site)
 205         test_node = TestNode (self, test_site,node)
 206         return test_node
 207
 208     def locate_first_sliver (self):
 209         slice_spec=self.plc_spec['slices'][0]
 210         slicename=slice_spec['slice_fields']['name']
 211         nodename=slice_spec['nodenames'][0]
 212         return self.locate_sliver_obj(nodename,slicename)
 213
 214     # all different hostboxes used in this plc
 215     def gather_hostBoxes(self):
 216         # maps on sites and nodes, return [ (host_box,test_node) ]
 217         tuples=[]
 218         for site_spec in self.plc_spec['sites']:
 219             test_site = TestSite (self,site_spec)
 220             for node_spec in site_spec['nodes']:
 221                 test_node = TestNode (self, test_site, node_spec)
 222                 if not test_node.is_real():
 223                     tuples.append( (test_node.host_box(),test_node) )
 224         # transform into a dict { 'host_box' -> [ test_node .. ] }
 225         result = {}
 226         for (box,node) in tuples:
 227             if not result.has_key(box):
 228                 result[box]=[node]
 229             else:
 230                 result[box].append(node)
 231         return result
 232
 233     # a step for checking this stuff
 234     def show_boxes (self):
 235         for (box,nodes) in self.gather_hostBoxes().iteritems():
 236             print box,":"," + ".join( [ node.name() for node in nodes ] )
 237         return True
 238
 239     # make this a valid step
 240     def kill_all_qemus(self):
 241         # this is the brute force version, kill all qemus on that host box
 242         for (box,nodes) in self.gather_hostBoxes().iteritems():
 243             # pass the first nodename, as we don't push template-qemu on testboxes
 244             nodedir=nodes[0].nodedir()
 245             TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
 246         return True
 247
 248     # make this a valid step
 249     def list_all_qemus(self):
 250         for (box,nodes) in self.gather_hostBoxes().iteritems():
 251             # this is the brute force version, kill all qemus on that host box
 252             TestBox(box,self.options.buildname).list_all_qemus()
 253         return True
 254
 255     # kill only the right qemus
 256     def list_qemus(self):
 257         for (box,nodes) in self.gather_hostBoxes().iteritems():
 258             # the fine-grain version
 259             for node in nodes:
 260                 node.list_qemu()
 261         return True
 262
 263     # kill only the right qemus
 264     def kill_qemus(self):
 265         for (box,nodes) in self.gather_hostBoxes().iteritems():
 266             # the fine-grain version
 267             for node in nodes:
 268                 node.kill_qemu()
 269         return True
 270
 271     def display (self):
 272         utils.show_plc_spec (self.plc_spec)
 273         return True
 274
 275     ### utility methods for handling the pool of IP addresses allocated to plcs
 276     # Logic
 277     # (*) running plcs are recorded in the file named ~/running-test-plcs
 278     # (*) this file contains a line for each running plc, older first
 279     # (*) each line contains the vserver name + the hostname of the (vserver) testbox where it sits
 280     # (*) the free_tracker method performs a vserver stop on the oldest entry
 281     # (*) the record_tracker method adds an entry at the bottom of the file
 282     # (*) the cleanup_tracker method stops all known vservers and removes the tracker file
 283
 284     TRACKER_FILE=os.environ['HOME']+"/running-test-plcs"
 285     # how many concurrent plcs are we keeping alive - adjust with the IP pool size
 286     TRACKER_KEEP_VSERVERS = 12
 287
 288     def record_tracker (self):
 289         try:
 290             lines=file(TestPlc.TRACKER_FILE).readlines()
 291         except:
 292             lines=[]
 293
 294         this_line="%s %s\n"%(self.vservername,self.test_ssh.hostname)
 295         for line in lines:
 296             if line==this_line:
 297                 print 'this vserver is already included in %s'%TestPlc.TRACKER_FILE
 298                 return True
 299         if self.options.dry_run:
 300             print 'dry_run: record_tracker - skipping tracker update'
 301             return True
 302         tracker=file(TestPlc.TRACKER_FILE,"w")
 303         for line in lines+[this_line]:
 304             tracker.write(line)
 305         tracker.close()
 306         print "Recorded %s in running plcs on host %s"%(self.vservername,self.test_ssh.hostname)
 307         return True
 308
 309     def free_tracker (self, keep_vservers=None):
 310         if not keep_vservers: keep_vservers=TestPlc.TRACKER_KEEP_VSERVERS
 311         try:
 312             lines=file(TestPlc.TRACKER_FILE).readlines()
 313         except:
 314             print 'dry_run: free_tracker - skipping tracker update'
 315             return True
 316         how_many = len(lines) - keep_vservers
 317         # nothing todo until we have more than keep_vservers in the tracker
 318         if how_many <= 0:
 319             print 'free_tracker : limit %d not reached'%keep_vservers
 320             return True
 321         to_stop = lines[:how_many]
 322         to_keep = lines[how_many:]
 323         for line in to_stop:
 324             print '>%s<'%line
 325             [vname,hostname]=line.split()
 326             command=TestSsh(hostname).actual_command("vserver --silent %s stop"%vname)
 327             utils.system(command)
 328         if self.options.dry_run:
 329             print 'dry_run: free_tracker would stop %d vservers'%len(to_stop)
 330             for line in to_stop: print line,
 331             print 'dry_run: free_tracker would keep %d vservers'%len(to_keep)
 332             for line in to_keep: print line,
 333             return True
 334         print "Storing %d remaining vservers in %s"%(len(to_keep),TestPlc.TRACKER_FILE)
 335         tracker=open(TestPlc.TRACKER_FILE,"w")
 336         for line in to_keep:
 337             tracker.write(line)
 338         tracker.close()
 339         return True
 340
 341     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 342     def cleanup_trackers (self):
 343         try:
 344             for line in TestPlc.TRACKER_FILE.readlines():
 345                 [vname,hostname]=line.split()
 346                 stop="vserver --silent %s stop"%vname
 347                 command=TestSsh(hostname).actual_command(stop)
 348                 utils.system(command)
 349             clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 350             utils.system(self.test_ssh.actual_command(clean_tracker))
 351         except:
 352             return True
 353
 354     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 355     def cleanup_all_trackers (self):
 356         stop_all = "cd /vservers ; for i in * ; do vserver --silent $i stop ; done"
 357         utils.system(self.test_ssh.actual_command(stop_all))
 358         clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 359         utils.system(self.test_ssh.actual_command(clean_tracker))
 360         return True
 361
 362     def uninstall(self):
 363         self.run_in_host("vserver --silent %s delete"%self.vservername)
 364         return True
 365
 366     ### install
 367     def install(self):
 368         if self.is_local():
 369             # a full path for the local calls
 370             build_dir=os.path.dirname(sys.argv[0])
 371             # sometimes this is empty - set to "." in such a case
 372             if not build_dir: build_dir="."
 373             build_dir += "/build"
 374         else:
 375             # use a standard name - will be relative to remote buildname
 376             build_dir="build"
 377         # run checkout in any case - would do an update if already exists
 378         build_checkout = "svn checkout %s %s"%(self.options.build_url,build_dir)
 379         if self.run_in_host(build_checkout) != 0:
 380             return False
 381         # the repo url is taken from arch-rpms-url
 382         # with the last step (i386.) removed
 383         repo_url = self.options.arch_rpms_url
 384         for level in [ 'arch' ]:
 385             repo_url = os.path.dirname(repo_url)
 386         # pass the vbuild-nightly options to vtest-init-vserver
 387         test_env_options=""
 388         test_env_options += " -p %s"%self.options.personality
 389         test_env_options += " -d %s"%self.options.pldistro
 390         test_env_options += " -f %s"%self.options.fcdistro
 391         script="vtest-init-vserver.sh"
 392         vserver_name = self.vservername
 393         vserver_options="--netdev eth0 --interface %s"%self.vserverip
 394         try:
 395             vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
 396             vserver_options += " --hostname %s"%vserver_hostname
 397         except:
 398             pass
 399         create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
 400         return self.run_in_host(create_vserver) == 0
 401
 402     ### install_rpm
 403     def install_rpm(self):
 404         return self.run_in_guest("yum -y install myplc-native")==0 \
 405             and self.run_in_guest("yum -y install noderepo-%s-%s"%(self.options.pldistro,self.options.arch))==0
 406
 407     ###
 408     def configure(self):
 409         tmpname='%s.plc-config-tty'%(self.name())
 410         fileconf=open(tmpname,'w')
 411         for var in [ 'PLC_NAME',
 412                      'PLC_ROOT_PASSWORD',
 413                      'PLC_ROOT_USER',
 414                      'PLC_MAIL_ENABLED',
 415                      'PLC_MAIL_SUPPORT_ADDRESS',
 416                      'PLC_DB_HOST',
 417                      'PLC_API_HOST',
 418                      'PLC_WWW_HOST',
 419                      'PLC_BOOT_HOST',
 420                      'PLC_NET_DNS1',
 421                      'PLC_NET_DNS2']:
 422             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
 423         fileconf.write('w\n')
 424         fileconf.write('q\n')
 425         fileconf.close()
 426         utils.system('cat %s'%tmpname)
 427         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
 428         utils.system('rm %s'%tmpname)
 429         return True
 430
 431     def start(self):
 432         self.run_in_guest('service plc start')
 433         return True
 434
 435     def stop(self):
 436         self.run_in_guest('service plc stop')
 437         return True
 438
 439     def vs_start (self):
 440         self.start_guest()
 441         return True
 442
 443     # stores the keys from the config for further use
 444     def store_keys(self):
 445         for key_spec in self.plc_spec['keys']:
 446                 TestKey(self,key_spec).store_key()
 447         return True
 448
 449     def clean_keys(self):
 450         utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
 451
 452     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 453     # for later direct access to the nodes
 454     def fetch_keys(self):
 455         dir="./keys"
 456         if not os.path.isdir(dir):
 457             os.mkdir(dir)
 458         vservername=self.vservername
 459         overall=True
 460         prefix = 'root_ssh_key'
 461         for ext in [ 'pub', 'rsa' ] :
 462             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 463             dst="keys/%(vservername)s.%(ext)s"%locals()
 464             if self.test_ssh.fetch(src,dst) != 0: overall=False
 465         prefix = 'debug_ssh_key'
 466         for ext in [ 'pub', 'rsa' ] :
 467             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 468             dst="keys/%(vservername)s-debug.%(ext)s"%locals()
 469             if self.test_ssh.fetch(src,dst) != 0: overall=False
 470         return overall
 471
 472     def sites (self):
 473         return self.do_sites()
 474
 475     def clean_sites (self):
 476         return self.do_sites(action="delete")
 477
 478     def do_sites (self,action="add"):
 479         for site_spec in self.plc_spec['sites']:
 480             test_site = TestSite (self,site_spec)
 481             if (action != "add"):
 482                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
 483                 test_site.delete_site()
 484                 # deleted with the site
 485                 #test_site.delete_users()
 486                 continue
 487             else:
 488                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
 489                 test_site.create_site()
 490                 test_site.create_users()
 491         return True
 492
 493     def clean_all_sites (self):
 494         print 'auth_root',self.auth_root()
 495         site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
 496         for site_id in site_ids:
 497             print 'Deleting site_id',site_id
 498             self.apiserver.DeleteSite(self.auth_root(),site_id)
 499
 500     def nodes (self):
 501         return self.do_nodes()
 502     def clean_nodes (self):
 503         return self.do_nodes(action="delete")
 504
 505     def do_nodes (self,action="add"):
 506         for site_spec in self.plc_spec['sites']:
 507             test_site = TestSite (self,site_spec)
 508             if action != "add":
 509                 utils.header("Deleting nodes in site %s"%test_site.name())
 510                 for node_spec in site_spec['nodes']:
 511                     test_node=TestNode(self,test_site,node_spec)
 512                     utils.header("Deleting %s"%test_node.name())
 513                     test_node.delete_node()
 514             else:
 515                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
 516                 for node_spec in site_spec['nodes']:
 517                     utils.pprint('Creating node %s'%node_spec,node_spec)
 518                     test_node = TestNode (self,test_site,node_spec)
 519                     test_node.create_node ()
 520         return True
 521
 522     def nodegroups (self):
 523         return self.do_nodegroups("add")
 524     def clean_nodegroups (self):
 525         return self.do_nodegroups("delete")
 526
 527     # create nodegroups if needed, and populate
 528     def do_nodegroups (self, action="add"):
 529         # 1st pass to scan contents
 530         groups_dict = {}
 531         for site_spec in self.plc_spec['sites']:
 532             test_site = TestSite (self,site_spec)
 533             for node_spec in site_spec['nodes']:
 534                 test_node=TestNode (self,test_site,node_spec)
 535                 if node_spec.has_key('nodegroups'):
 536                     nodegroupnames=node_spec['nodegroups']
 537                     if isinstance(nodegroupnames,StringTypes):
 538                         nodegroupnames = [ nodegroupnames ]
 539                     for nodegroupname in nodegroupnames:
 540                         if not groups_dict.has_key(nodegroupname):
 541                             groups_dict[nodegroupname]=[]
 542                         groups_dict[nodegroupname].append(test_node.name())
 543         auth=self.auth_root()
 544         overall = True
 545         for (nodegroupname,group_nodes) in groups_dict.iteritems():
 546             if action == "add":
 547                 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
 548                 # first, check if the nodetagtype is here
 549                 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
 550                 if tag_types:
 551                     tag_type_id = tag_types[0]['tag_type_id']
 552                 else:
 553                     tag_type_id = self.apiserver.AddTagType(auth,
 554                                                             {'tagname':nodegroupname,
 555                                                              'description': 'for nodegroup %s'%nodegroupname,
 556                                                              'category':'test',
 557                                                              'min_role_id':10})
 558                 print 'located tag (type)',nodegroupname,'as',tag_type_id
 559                 # create nodegroup
 560                 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
 561                 if not nodegroups:
 562                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
 563                     print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
 564                 # set node tag on all nodes, value='yes'
 565                 for nodename in group_nodes:
 566                     try:
 567                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
 568                     except:
 569                         traceback.print_exc()
 570                         print 'node',nodename,'seems to already have tag',nodegroupname
 571                     # check anyway
 572                     try:
 573                         expect_yes = self.apiserver.GetNodeTags(auth,
 574                                                                 {'hostname':nodename,
 575                                                                  'tagname':nodegroupname},
 576                                                                 ['tagvalue'])[0]['tagvalue']
 577                         if expect_yes != "yes":
 578                             print 'Mismatch node tag on node',nodename,'got',expect_yes
 579                             overall=False
 580                     except:
 581                         if not self.options.dry_run:
 582                             print 'Cannot find tag',nodegroupname,'on node',nodename
 583                             overall = False
 584             else:
 585                 try:
 586                     print 'cleaning nodegroup',nodegroupname
 587                     self.apiserver.DeleteNodeGroup(auth,nodegroupname)
 588                 except:
 589                     traceback.print_exc()
 590                     overall=False
 591         return overall
 592
 593     def all_hostnames (self) :
 594         hostnames = []
 595         for site_spec in self.plc_spec['sites']:
 596             hostnames += [ node_spec['node_fields']['hostname'] \
 597                            for node_spec in site_spec['nodes'] ]
 598         return hostnames
 599
 600     # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
 601     def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15):
 602         if self.options.dry_run:
 603             print 'dry_run'
 604             return True
 605         # compute timeout
 606         timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
 607         graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
 608         # the nodes that haven't checked yet - start with a full list and shrink over time
 609         tocheck = self.all_hostnames()
 610         utils.header("checking nodes %r"%tocheck)
 611         # create a dict hostname -> status
 612         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
 613         while tocheck:
 614             # get their status
 615             tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
 616             # update status
 617             for array in tocheck_status:
 618                 hostname=array['hostname']
 619                 boot_state=array['boot_state']
 620                 if boot_state == target_boot_state:
 621                     utils.header ("%s has reached the %s state"%(hostname,target_boot_state))
 622                 else:
 623                     # if it's a real node, never mind
 624                     (site_spec,node_spec)=self.locate_hostname(hostname)
 625                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 626                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
 627                         # let's cheat
 628                         boot_state = target_boot_state
 629                     elif datetime.datetime.now() > graceout:
 630                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
 631                         graceout=datetime.datetime.now()+datetime.timedelta(1)
 632                 status[hostname] = boot_state
 633             # refresh tocheck
 634             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ]
 635             if not tocheck:
 636                 return True
 637             if datetime.datetime.now() > timeout:
 638                 for hostname in tocheck:
 639                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
 640                 return False
 641             # otherwise, sleep for a while
 642             time.sleep(period)
 643         # only useful in empty plcs
 644         return True
 645
 646     def nodes_booted(self):
 647         return self.nodes_check_boot_state('boot',timeout_minutes=20,silent_minutes=15)
 648
 649     def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=20):
 650         # compute timeout
 651         timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
 652         graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
 653         vservername=self.vservername
 654         if debug:
 655             message="debug"
 656             local_key = "keys/%(vservername)s-debug.rsa"%locals()
 657         else:
 658             message="boot"
 659             local_key = "keys/%(vservername)s.rsa"%locals()
 660         tocheck = self.all_hostnames()
 661         utils.header("checking ssh access (expected in %s mode) to nodes %r"%(message,tocheck))
 662         utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
 663                          (timeout_minutes,silent_minutes,period))
 664         while tocheck:
 665             for hostname in tocheck:
 666                 # try to run 'hostname' in the node
 667                 command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a")
 668                 # don't spam logs - show the command only after the grace period
 669                 if datetime.datetime.now() > graceout:
 670                     success=utils.system(command)
 671                 else:
 672                     # truly silent, just print out a dot to show we're alive
 673                     print '.',
 674                     sys.stdout.flush()
 675                     command += " 2>/dev/null"
 676                     if self.options.dry_run:
 677                         print 'dry_run',command
 678                         success=0
 679                     else:
 680                         success=os.system(command)
 681                 if success==0:
 682                     utils.header('Successfully entered root@%s (%s)'%(hostname,message))
 683                     # refresh tocheck
 684                     tocheck.remove(hostname)
 685                 else:
 686                     # we will have tried real nodes once, in case they're up - but if not, just skip
 687                     (site_spec,node_spec)=self.locate_hostname(hostname)
 688                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 689                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
 690                         tocheck.remove(hostname)
 691             if  not tocheck:
 692                 return True
 693             if datetime.datetime.now() > timeout:
 694                 for hostname in tocheck:
 695                     utils.header("FAILURE to ssh into %s"%hostname)
 696                 return False
 697             # otherwise, sleep for a while
 698             time.sleep(period)
 699         # only useful in empty plcs
 700         return True
 701
 702     def nodes_debug_ssh(self):
 703         return self.check_nodes_ssh(debug=True,timeout_minutes=30,silent_minutes=10)
 704
 705     def nodes_boot_ssh(self):
 706         return self.check_nodes_ssh(debug=False,timeout_minutes=30,silent_minutes=10)
 707
 708     @node_mapper
 709     def init_node (self): pass
 710     @node_mapper
 711     def bootcd (self): pass
 712     @node_mapper
 713     def configure_qemu (self): pass
 714     @node_mapper
 715     def reinstall_node (self): pass
 716     @node_mapper
 717     def export_qemu (self): pass
 718
 719     ### check sanity : invoke scripts from qaapi/qa/tests/{node,slice}
 720     def check_sanity_node (self):
 721         return self.locate_first_node().check_sanity()
 722     def check_sanity_sliver (self) :
 723         return self.locate_first_sliver().check_sanity()
 724
 725     def check_sanity (self):
 726         return self.check_sanity_node() and self.check_sanity_sliver()
 727
 728     ### initscripts
 729     def do_check_initscripts(self):
 730         overall = True
 731         for slice_spec in self.plc_spec['slices']:
 732             if not slice_spec.has_key('initscriptname'):
 733                 continue
 734             initscript=slice_spec['initscriptname']
 735             for nodename in slice_spec['nodenames']:
 736                 (site,node) = self.locate_node (nodename)
 737                 # xxx - passing the wrong site - probably harmless
 738                 test_site = TestSite (self,site)
 739                 test_slice = TestSlice (self,test_site,slice_spec)
 740                 test_node = TestNode (self,test_site,node)
 741                 test_sliver = TestSliver (self, test_node, test_slice)
 742                 if not test_sliver.check_initscript(initscript):
 743                     overall = False
 744         return overall
 745
 746     def check_initscripts(self):
 747             return self.do_check_initscripts()
 748
 749     def initscripts (self):
 750         for initscript in self.plc_spec['initscripts']:
 751             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
 752             self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
 753         return True
 754
 755     def clean_initscripts (self):
 756         for initscript in self.plc_spec['initscripts']:
 757             initscript_name = initscript['initscript_fields']['name']
 758             print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
 759             try:
 760                 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
 761                 print initscript_name,'deleted'
 762             except:
 763                 print 'deletion went wrong - probably did not exist'
 764         return True
 765
 766     ### manage slices
 767     def slices (self):
 768         return self.do_slices()
 769
 770     def clean_slices (self):
 771         return self.do_slices("delete")
 772
 773     def do_slices (self,  action="add"):
 774         for slice in self.plc_spec['slices']:
 775             site_spec = self.locate_site (slice['sitename'])
 776             test_site = TestSite(self,site_spec)
 777             test_slice=TestSlice(self,test_site,slice)
 778             if action != "add":
 779                 utils.header("Deleting slices in site %s"%test_site.name())
 780                 test_slice.delete_slice()
 781             else:
 782                 utils.pprint("Creating slice",slice)
 783                 test_slice.create_slice()
 784                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
 785         return True
 786
 787     @slice_mapper_options
 788     def check_slice(self): pass
 789
 790     @node_mapper
 791     def clear_known_hosts (self): pass
 792
 793     @node_mapper
 794     def start_node (self) : pass
 795
 796     def check_tcp (self):
 797         specs = self.plc_spec['tcp_test']
 798         overall=True
 799         for spec in specs:
 800             port = spec['port']
 801             # server side
 802             s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
 803             if not s_test_sliver.run_tcp_server(port,timeout=10):
 804                 overall=False
 805                 break
 806
 807             # idem for the client side
 808             c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
 809             if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
 810                 overall=False
 811         return overall
 812
 813     def plcsh_stress_test (self):
 814         # install the stress-test in the plc image
 815         location = "/usr/share/plc_api/plcsh-stress-test.py"
 816         remote="/vservers/%s/%s"%(self.vservername,location)
 817         self.test_ssh.copy_abs("plcsh-stress-test.py",remote)
 818         command = location
 819         command += " -- --check"
 820         if self.options.small_test:
 821             command +=  " --tiny"
 822         return ( self.run_in_guest(command) == 0)
 823
 824     def gather_logs (self):
 825         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
 826         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
 827         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
 828         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
 829         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
 830         # (1.a)
 831         print "-------------------- TestPlc.gather_logs : PLC's /var/log"
 832         self.gather_var_logs ()
 833         # (1.b)
 834         print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
 835         self.gather_pgsql_logs ()
 836         # (2)
 837         print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
 838         for site_spec in self.plc_spec['sites']:
 839             test_site = TestSite (self,site_spec)
 840             for node_spec in site_spec['nodes']:
 841                 test_node=TestNode(self,test_site,node_spec)
 842                 test_node.gather_qemu_logs()
 843         # (3)
 844         print "-------------------- TestPlc.gather_logs : nodes's /var/log"
 845         self.gather_nodes_var_logs()
 846         # (4)
 847         print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
 848         self.gather_slivers_var_logs()
 849         return True
 850
 851     def gather_slivers_var_logs(self):
 852         for test_sliver in self.all_sliver_objs():
 853             remote = test_sliver.tar_var_logs()
 854             utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
 855             command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
 856             utils.system(command)
 857         return True
 858
 859     def gather_var_logs (self):
 860         utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
 861         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
 862         command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
 863         utils.system(command)
 864         command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
 865         utils.system(command)
 866
 867     def gather_pgsql_logs (self):
 868         utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
 869         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
 870         command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
 871         utils.system(command)
 872
 873     def gather_nodes_var_logs (self):
 874         for site_spec in self.plc_spec['sites']:
 875             test_site = TestSite (self,site_spec)
 876             for node_spec in site_spec['nodes']:
 877                 test_node=TestNode(self,test_site,node_spec)
 878                 test_ssh = TestSsh (test_node.name(),key="/etc/planetlab/root_ssh_key.rsa")
 879                 to_plc = self.actual_command_in_guest ( test_ssh.actual_command("tar -C /var/log -cf - ."))
 880                 command = to_plc + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
 881                 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
 882                 utils.system(command)
 883
 884
 885     # returns the filename to use for sql dump/restore, using options.dbname if set
 886     def dbfile (self, database):
 887         # uses options.dbname if it is found
 888         try:
 889             name=self.options.dbname
 890             if not isinstance(name,StringTypes):
 891                 raise Exception
 892         except:
 893             t=datetime.datetime.now()
 894             d=t.date()
 895             name=str(d)
 896         return "/root/%s-%s.sql"%(database,name)
 897
 898     def db_dump(self):
 899         dump=self.dbfile("planetab4")
 900         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
 901         utils.header('Dumped planetlab4 database in %s'%dump)
 902         return True
 903
 904     def db_restore(self):
 905         dump=self.dbfile("planetab4")
 906         ##stop httpd service
 907         self.run_in_guest('service httpd stop')
 908         # xxx - need another wrapper
 909         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
 910         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
 911         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
 912         ##starting httpd service
 913         self.run_in_guest('service httpd start')
 914
 915         utils.header('Database restored from ' + dump)
 916
 917     @standby_generic
 918     def standby_1(): pass
 919     @standby_generic
 920     def standby_2(): pass
 921     @standby_generic
 922     def standby_3(): pass
 923     @standby_generic
 924     def standby_4(): pass
 925     @standby_generic
 926     def standby_5(): pass
 927     @standby_generic
 928     def standby_6(): pass
 929     @standby_generic
 930     def standby_7(): pass
 931     @standby_generic
 932     def standby_8(): pass
 933     @standby_generic
 934     def standby_9(): pass
 935     @standby_generic
 936     def standby_10(): pass
 937     @standby_generic
 938     def standby_11(): pass
 939     @standby_generic
 940     def standby_12(): pass
 941     @standby_generic
 942     def standby_13(): pass
 943     @standby_generic
 944     def standby_14(): pass
 945     @standby_generic
 946     def standby_15(): pass
 947     @standby_generic
 948     def standby_16(): pass
 949     @standby_generic
 950     def standby_17(): pass
 951     @standby_generic
 952     def standby_18(): pass
 953     @standby_generic
 954     def standby_19(): pass
 955     @standby_generic
 956     def standby_20(): pass
 957