system/TestPlc.py

   1 # $Id$
   2 import os, os.path
   3 import datetime
   4 import time
   5 import sys
   6 import traceback
   7 from types import StringTypes
   8 import socket
   9
  10 import utils
  11 from TestSite import TestSite
  12 from TestNode import TestNode
  13 from TestUser import TestUser
  14 from TestKey import TestKey
  15 from TestSlice import TestSlice
  16 from TestSliver import TestSliver
  17 from TestBox import TestBox
  18 from TestSsh import TestSsh
  19 from TestApiserver import TestApiserver
  20
  21 # step methods must take (self) and return a boolean (options is a member of the class)
  22
  23 def standby(minutes,dry_run):
  24     utils.header('Entering StandBy for %d mn'%minutes)
  25     if dry_run:
  26         print 'dry_run'
  27     else:
  28         time.sleep(60*minutes)
  29     return True
  30
  31 def standby_generic (func):
  32     def actual(self):
  33         minutes=int(func.__name__.split("_")[1])
  34         return standby(minutes,self.options.dry_run)
  35     return actual
  36
  37 def node_mapper (method):
  38     def actual(self):
  39         overall=True
  40         node_method = TestNode.__dict__[method.__name__]
  41         for site_spec in self.plc_spec['sites']:
  42             test_site = TestSite (self,site_spec)
  43             for node_spec in site_spec['nodes']:
  44                 test_node = TestNode (self,test_site,node_spec)
  45                 if not node_method(test_node): overall=False
  46         return overall
  47     return actual
  48
  49 def slice_mapper_options (method):
  50     def actual(self):
  51         overall=True
  52         slice_method = TestSlice.__dict__[method.__name__]
  53         for slice_spec in self.plc_spec['slices']:
  54             site_spec = self.locate_site (slice_spec['sitename'])
  55             test_site = TestSite(self,site_spec)
  56             test_slice=TestSlice(self,test_site,slice_spec)
  57             if not slice_method(test_slice,self.options): overall=False
  58         return overall
  59     return actual
  60
  61 SEP='<sep>'
  62
  63 class TestPlc:
  64
  65     default_steps = ['display','uninstall','install','install_rpm',
  66                      'configure', 'start', 'fetch_keys', SEP,
  67                      'store_keys', 'clear_known_hosts', 'initscripts', SEP,
  68                      'sites', 'nodes', 'slices', 'nodegroups', SEP,
  69                      'init_node','bootcd', 'configure_qemu', 'export_qemu',
  70                      'kill_all_qemus', 'reinstall_node','start_node', SEP,
  71                      'nodes_booted', 'nodes_ssh', 'check_slice', 'check_initscripts', SEP,
  72                      'check_sanity', 'check_tcp', 'plcsh_stress_test', SEP,
  73                      'force_gather_logs', 'force_kill_qemus', 'force_record_tracker','force_free_tracker' ]
  74     other_steps = [ 'stop_all_vservers','fresh_install', 'cache_rpm', 'stop', 'vs_start', SEP,
  75                     'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
  76                     'clean_sites', 'clean_nodes',
  77                     'clean_slices', 'clean_keys', SEP,
  78                     'show_boxes', 'list_all_qemus', 'list_qemus', SEP,
  79                     'db_dump' , 'db_restore', 'cleanup_trackers', 'cleanup_all_trackers',
  80                     'standby_1 through 20'
  81                     ]
  82
  83     @staticmethod
  84     def printable_steps (list):
  85         return " ".join(list).replace(" "+SEP+" "," \\\n")
  86     @staticmethod
  87     def valid_step (step):
  88         return step != SEP
  89
  90     def __init__ (self,plc_spec,options):
  91         self.plc_spec=plc_spec
  92         self.options=options
  93         self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
  94         try:
  95             self.vserverip=plc_spec['vserverip']
  96             self.vservername=plc_spec['vservername']
  97             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
  98             self.vserver=True
  99         except:
 100             raise Exception,'chroot-based myplc testing is deprecated'
 101         self.apiserver=TestApiserver(self.url,options.dry_run)
 102
 103     def name(self):
 104         name=self.plc_spec['name']
 105         return "%s.%s"%(name,self.vservername)
 106
 107     def hostname(self):
 108         return self.plc_spec['hostname']
 109
 110     def is_local (self):
 111         return self.test_ssh.is_local()
 112
 113     # define the API methods on this object through xmlrpc
 114     # would help, but not strictly necessary
 115     def connect (self):
 116         pass
 117
 118     def actual_command_in_guest (self,command):
 119         return self.test_ssh.actual_command(self.host_to_guest(command))
 120
 121     def start_guest (self):
 122       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
 123
 124     def run_in_guest (self,command):
 125         return utils.system(self.actual_command_in_guest(command))
 126
 127     def run_in_host (self,command):
 128         return self.test_ssh.run_in_buildname(command)
 129
 130     #command gets run in the vserver
 131     def host_to_guest(self,command):
 132         return "vserver %s exec %s"%(self.vservername,command)
 133
 134     #command gets run in the vserver
 135     def start_guest_in_host(self):
 136         return "vserver %s start"%(self.vservername)
 137
 138     # xxx quick n dirty
 139     def run_in_guest_piped (self,local,remote):
 140         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
 141
 142     def auth_root (self):
 143         return {'Username':self.plc_spec['PLC_ROOT_USER'],
 144                 'AuthMethod':'password',
 145                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
 146                 'Role' : self.plc_spec['role']
 147                 }
 148     def locate_site (self,sitename):
 149         for site in self.plc_spec['sites']:
 150             if site['site_fields']['name'] == sitename:
 151                 return site
 152             if site['site_fields']['login_base'] == sitename:
 153                 return site
 154         raise Exception,"Cannot locate site %s"%sitename
 155
 156     def locate_node (self,nodename):
 157         for site in self.plc_spec['sites']:
 158             for node in site['nodes']:
 159                 if node['name'] == nodename:
 160                     return (site,node)
 161         raise Exception,"Cannot locate node %s"%nodename
 162
 163     def locate_hostname (self,hostname):
 164         for site in self.plc_spec['sites']:
 165             for node in site['nodes']:
 166                 if node['node_fields']['hostname'] == hostname:
 167                     return (site,node)
 168         raise Exception,"Cannot locate hostname %s"%hostname
 169
 170     def locate_key (self,keyname):
 171         for key in self.plc_spec['keys']:
 172             if key['name'] == keyname:
 173                 return key
 174         raise Exception,"Cannot locate key %s"%keyname
 175
 176     def locate_slice (self, slicename):
 177         for slice in self.plc_spec['slices']:
 178             if slice['slice_fields']['name'] == slicename:
 179                 return slice
 180         raise Exception,"Cannot locate slice %s"%slicename
 181
 182     def all_sliver_objs (self):
 183         result=[]
 184         for slice_spec in self.plc_spec['slices']:
 185             slicename = slice_spec['slice_fields']['name']
 186             for nodename in slice_spec['nodenames']:
 187                 result.append(self.locate_sliver_obj (nodename,slicename))
 188         return result
 189
 190     def locate_sliver_obj (self,nodename,slicename):
 191         (site,node) = self.locate_node(nodename)
 192         slice = self.locate_slice (slicename)
 193         # build objects
 194         test_site = TestSite (self, site)
 195         test_node = TestNode (self, test_site,node)
 196         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 197         test_slice = TestSlice (self, test_site, slice)
 198         return TestSliver (self, test_node, test_slice)
 199
 200     def locate_first_node(self):
 201         nodename=self.plc_spec['slices'][0]['nodenames'][0]
 202         (site,node) = self.locate_node(nodename)
 203         test_site = TestSite (self, site)
 204         test_node = TestNode (self, test_site,node)
 205         return test_node
 206
 207     def locate_first_sliver (self):
 208         slice_spec=self.plc_spec['slices'][0]
 209         slicename=slice_spec['slice_fields']['name']
 210         nodename=slice_spec['nodenames'][0]
 211         return self.locate_sliver_obj(nodename,slicename)
 212
 213     # all different hostboxes used in this plc
 214     def gather_hostBoxes(self):
 215         # maps on sites and nodes, return [ (host_box,test_node) ]
 216         tuples=[]
 217         for site_spec in self.plc_spec['sites']:
 218             test_site = TestSite (self,site_spec)
 219             for node_spec in site_spec['nodes']:
 220                 test_node = TestNode (self, test_site, node_spec)
 221                 if not test_node.is_real():
 222                     tuples.append( (test_node.host_box(),test_node) )
 223         # transform into a dict { 'host_box' -> [ test_node .. ] }
 224         result = {}
 225         for (box,node) in tuples:
 226             if not result.has_key(box):
 227                 result[box]=[node]
 228             else:
 229                 result[box].append(node)
 230         return result
 231
 232     # a step for checking this stuff
 233     def show_boxes (self):
 234         for (box,nodes) in self.gather_hostBoxes().iteritems():
 235             print box,":"," + ".join( [ node.name() for node in nodes ] )
 236         return True
 237
 238     # make this a valid step
 239     def kill_all_qemus(self):
 240         # this is the brute force version, kill all qemus on that host box
 241         for (box,nodes) in self.gather_hostBoxes().iteritems():
 242             # pass the first nodename, as we don't push template-qemu on testboxes
 243             nodedir=nodes[0].nodedir()
 244             TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
 245         return True
 246
 247     # make this a valid step
 248     def list_all_qemus(self):
 249         for (box,nodes) in self.gather_hostBoxes().iteritems():
 250             # this is the brute force version, kill all qemus on that host box
 251             TestBox(box,self.options.buildname).list_all_qemus()
 252         return True
 253
 254     # kill only the right qemus
 255     def list_qemus(self):
 256         for (box,nodes) in self.gather_hostBoxes().iteritems():
 257             # the fine-grain version
 258             for node in nodes:
 259                 node.list_qemu()
 260         return True
 261
 262     # kill only the right qemus
 263     def kill_qemus(self):
 264         for (box,nodes) in self.gather_hostBoxes().iteritems():
 265             # the fine-grain version
 266             for node in nodes:
 267                 node.kill_qemu()
 268         return True
 269
 270     def display (self):
 271         utils.show_plc_spec (self.plc_spec)
 272         return True
 273
 274     ### utility methods for handling the pool of IP addresses allocated to plcs
 275     # Logic
 276     # (*) running plcs are recorded in the file named ~/running-test-plcs
 277     # (*) this file contains a line for each running plc, older first
 278     # (*) each line contains the vserver name + the hostname of the (vserver) testbox where it sits
 279     # (*) the free_tracker method performs a vserver stop on the oldest entry
 280     # (*) the record_tracker method adds an entry at the bottom of the file
 281     # (*) the cleanup_tracker method stops all known vservers and removes the tracker file
 282
 283     TRACKER_FILE=os.environ['HOME']+"/running-test-plcs"
 284     # how many concurrent plcs are we keeping alive - adjust with the IP pool size
 285     TRACKER_KEEP_VSERVERS = 12
 286
 287     def record_tracker (self):
 288         try:
 289             lines=file(TestPlc.TRACKER_FILE).readlines()
 290         except:
 291             lines=[]
 292
 293         this_line="%s %s\n"%(self.vservername,self.test_ssh.hostname)
 294         for line in lines:
 295             if line==this_line:
 296                 print 'this vserver is already included in %s'%TestPlc.TRACKER_FILE
 297                 return True
 298         if self.options.dry_run:
 299             print 'dry_run: record_tracker - skipping tracker update'
 300             return True
 301         tracker=file(TestPlc.TRACKER_FILE,"w")
 302         for line in lines+[this_line]:
 303             tracker.write(line)
 304         tracker.close()
 305         print "Recorded %s in running plcs on host %s"%(self.vservername,self.test_ssh.hostname)
 306         return True
 307
 308     def free_tracker (self, keep_vservers=None):
 309         if not keep_vservers: keep_vservers=TestPlc.TRACKER_KEEP_VSERVERS
 310         try:
 311             lines=file(TestPlc.TRACKER_FILE).readlines()
 312         except:
 313             print 'dry_run: free_tracker - skipping tracker update'
 314             return True
 315         how_many = len(lines) - keep_vservers
 316         # nothing todo until we have more than keep_vservers in the tracker
 317         if how_many <= 0:
 318             print 'free_tracker : limit %d not reached'%keep_vservers
 319             return True
 320         to_stop = lines[:how_many]
 321         to_keep = lines[how_many:]
 322         for line in to_stop:
 323             print '>%s<'%line
 324             [vname,hostname]=line.split()
 325             command=TestSsh(hostname).actual_command("vserver --silent %s stop"%vname)
 326             utils.system(command)
 327         if self.options.dry_run:
 328             print 'dry_run: free_tracker would stop %d vservers'%len(to_stop)
 329             for line in to_stop: print line,
 330             print 'dry_run: free_tracker would keep %d vservers'%len(to_keep)
 331             for line in to_keep: print line,
 332             return True
 333         print "Storing %d remaining vservers in %s"%(len(to_keep),TestPlc.TRACKER_FILE)
 334         tracker=open(TestPlc.TRACKER_FILE,"w")
 335         for line in to_keep:
 336             tracker.write(line)
 337         tracker.close()
 338         return True
 339
 340     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 341     def cleanup_trackers (self):
 342         try:
 343             for line in TestPlc.TRACKER_FILE.readlines():
 344                 [vname,hostname]=line.split()
 345                 stop="vserver --silent %s stop"%vname
 346                 command=TestSsh(hostname).actual_command(stop)
 347                 utils.system(command)
 348             clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 349             utils.system(self.test_ssh.actual_command(clean_tracker))
 350         except:
 351             return True
 352
 353     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 354     def cleanup_all_trackers (self):
 355         stop_all = "cd /vservers ; for i in * ; do vserver --silent $i stop ; done"
 356         utils.system(self.test_ssh.actual_command(stop_all))
 357         clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 358         utils.system(self.test_ssh.actual_command(clean_tracker))
 359         return True
 360
 361     def uninstall(self):
 362         self.run_in_host("vserver --silent %s delete"%self.vservername)
 363         return True
 364
 365     ### install
 366     def install(self):
 367         if self.is_local():
 368             # a full path for the local calls
 369             build_dir=os.path.dirname(sys.argv[0])
 370             # sometimes this is empty - set to "." in such a case
 371             if not build_dir: build_dir="."
 372             build_dir += "/build"
 373         else:
 374             # use a standard name - will be relative to remote buildname
 375             build_dir="build"
 376         # run checkout in any case - would do an update if already exists
 377         build_checkout = "svn checkout %s %s"%(self.options.build_url,build_dir)
 378         if self.run_in_host(build_checkout) != 0:
 379             return False
 380         # the repo url is taken from arch-rpms-url
 381         # with the last step (i386.) removed
 382         repo_url = self.options.arch_rpms_url
 383         for level in [ 'arch' ]:
 384             repo_url = os.path.dirname(repo_url)
 385         # pass the vbuild-nightly options to vtest-init-vserver
 386         test_env_options=""
 387         test_env_options += " -p %s"%self.options.personality
 388         test_env_options += " -d %s"%self.options.pldistro
 389         test_env_options += " -f %s"%self.options.fcdistro
 390         script="vtest-init-vserver.sh"
 391         vserver_name = self.vservername
 392         vserver_options="--netdev eth0 --interface %s"%self.vserverip
 393         try:
 394             vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
 395             vserver_options += " --hostname %s"%vserver_hostname
 396         except:
 397             pass
 398         create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
 399         return self.run_in_host(create_vserver) == 0
 400
 401     ### install_rpm
 402     def install_rpm(self):
 403         return self.run_in_guest("yum -y install myplc-native")==0 \
 404             and self.run_in_guest("yum -y install noderepo-%s-%s"%(self.options.pldistro,self.options.arch))==0
 405
 406     ###
 407     def configure(self):
 408         tmpname='%s.plc-config-tty'%(self.name())
 409         fileconf=open(tmpname,'w')
 410         for var in [ 'PLC_NAME',
 411                      'PLC_ROOT_PASSWORD',
 412                      'PLC_ROOT_USER',
 413                      'PLC_MAIL_ENABLED',
 414                      'PLC_MAIL_SUPPORT_ADDRESS',
 415                      'PLC_DB_HOST',
 416                      'PLC_API_HOST',
 417                      'PLC_WWW_HOST',
 418                      'PLC_BOOT_HOST',
 419                      'PLC_NET_DNS1',
 420                      'PLC_NET_DNS2']:
 421             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
 422         fileconf.write('w\n')
 423         fileconf.write('q\n')
 424         fileconf.close()
 425         utils.system('cat %s'%tmpname)
 426         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
 427         utils.system('rm %s'%tmpname)
 428         return True
 429
 430     def start(self):
 431         self.run_in_guest('service plc start')
 432         return True
 433
 434     def stop(self):
 435         self.run_in_guest('service plc stop')
 436         return True
 437
 438     def vs_start (self):
 439         self.start_guest()
 440         return True
 441
 442     # stores the keys from the config for further use
 443     def store_keys(self):
 444         for key_spec in self.plc_spec['keys']:
 445                 TestKey(self,key_spec).store_key()
 446         return True
 447
 448     def clean_keys(self):
 449         utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
 450
 451     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 452     # for later direct access to the nodes
 453     def fetch_keys(self):
 454         dir="./keys"
 455         if not os.path.isdir(dir):
 456             os.mkdir(dir)
 457         prefix = 'root_ssh_key'
 458         vservername=self.vservername
 459         overall=True
 460         for ext in [ 'pub', 'rsa' ] :
 461             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 462             dst="keys/%(vservername)s.%(ext)s"%locals()
 463             if self.test_ssh.fetch(src,dst) != 0: overall=False
 464         return overall
 465
 466     def sites (self):
 467         return self.do_sites()
 468
 469     def clean_sites (self):
 470         return self.do_sites(action="delete")
 471
 472     def do_sites (self,action="add"):
 473         for site_spec in self.plc_spec['sites']:
 474             test_site = TestSite (self,site_spec)
 475             if (action != "add"):
 476                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
 477                 test_site.delete_site()
 478                 # deleted with the site
 479                 #test_site.delete_users()
 480                 continue
 481             else:
 482                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
 483                 test_site.create_site()
 484                 test_site.create_users()
 485         return True
 486
 487     def clean_all_sites (self):
 488         print 'auth_root',self.auth_root()
 489         site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
 490         for site_id in site_ids:
 491             print 'Deleting site_id',site_id
 492             self.apiserver.DeleteSite(self.auth_root(),site_id)
 493
 494     def nodes (self):
 495         return self.do_nodes()
 496     def clean_nodes (self):
 497         return self.do_nodes(action="delete")
 498
 499     def do_nodes (self,action="add"):
 500         for site_spec in self.plc_spec['sites']:
 501             test_site = TestSite (self,site_spec)
 502             if action != "add":
 503                 utils.header("Deleting nodes in site %s"%test_site.name())
 504                 for node_spec in site_spec['nodes']:
 505                     test_node=TestNode(self,test_site,node_spec)
 506                     utils.header("Deleting %s"%test_node.name())
 507                     test_node.delete_node()
 508             else:
 509                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
 510                 for node_spec in site_spec['nodes']:
 511                     utils.pprint('Creating node %s'%node_spec,node_spec)
 512                     test_node = TestNode (self,test_site,node_spec)
 513                     test_node.create_node ()
 514         return True
 515
 516     def nodegroups (self):
 517         return self.do_nodegroups("add")
 518     def clean_nodegroups (self):
 519         return self.do_nodegroups("delete")
 520
 521     # create nodegroups if needed, and populate
 522     def do_nodegroups (self, action="add"):
 523         # 1st pass to scan contents
 524         groups_dict = {}
 525         for site_spec in self.plc_spec['sites']:
 526             test_site = TestSite (self,site_spec)
 527             for node_spec in site_spec['nodes']:
 528                 test_node=TestNode (self,test_site,node_spec)
 529                 if node_spec.has_key('nodegroups'):
 530                     nodegroupnames=node_spec['nodegroups']
 531                     if isinstance(nodegroupnames,StringTypes):
 532                         nodegroupnames = [ nodegroupnames ]
 533                     for nodegroupname in nodegroupnames:
 534                         if not groups_dict.has_key(nodegroupname):
 535                             groups_dict[nodegroupname]=[]
 536                         groups_dict[nodegroupname].append(test_node.name())
 537         auth=self.auth_root()
 538         overall = True
 539         for (nodegroupname,group_nodes) in groups_dict.iteritems():
 540             if action == "add":
 541                 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
 542                 # first, check if the nodetagtype is here
 543                 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
 544                 if tag_types:
 545                     tag_type_id = tag_types[0]['tag_type_id']
 546                 else:
 547                     tag_type_id = self.apiserver.AddTagType(auth,
 548                                                             {'tagname':nodegroupname,
 549                                                              'description': 'for nodegroup %s'%nodegroupname,
 550                                                              'category':'test',
 551                                                              'min_role_id':10})
 552                 print 'located tag (type)',nodegroupname,'as',tag_type_id
 553                 # create nodegroup
 554                 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
 555                 if not nodegroups:
 556                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
 557                     print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
 558                 # set node tag on all nodes, value='yes'
 559                 for nodename in group_nodes:
 560                     try:
 561                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
 562                     except:
 563                         traceback.print_exc()
 564                         print 'node',nodename,'seems to already have tag',nodegroupname
 565                     # check anyway
 566                     try:
 567                         expect_yes = self.apiserver.GetNodeTags(auth,
 568                                                                 {'hostname':nodename,
 569                                                                  'tagname':nodegroupname},
 570                                                                 ['tagvalue'])[0]['tagvalue']
 571                         if expect_yes != "yes":
 572                             print 'Mismatch node tag on node',nodename,'got',expect_yes
 573                             overall=False
 574                     except:
 575                         if not self.options.dry_run:
 576                             print 'Cannot find tag',nodegroupname,'on node',nodename
 577                             overall = False
 578             else:
 579                 try:
 580                     print 'cleaning nodegroup',nodegroupname
 581                     self.apiserver.DeleteNodeGroup(auth,nodegroupname)
 582                 except:
 583                     traceback.print_exc()
 584                     overall=False
 585         return overall
 586
 587     def all_hostnames (self) :
 588         hostnames = []
 589         for site_spec in self.plc_spec['sites']:
 590             hostnames += [ node_spec['node_fields']['hostname'] \
 591                            for node_spec in site_spec['nodes'] ]
 592         return hostnames
 593
 594     # gracetime : during the first <gracetime> minutes nothing gets printed
 595     def do_nodes_booted (self, minutes, gracetime,period=15):
 596         if self.options.dry_run:
 597             print 'dry_run'
 598             return True
 599         # compute timeout
 600         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
 601         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
 602         # the nodes that haven't checked yet - start with a full list and shrink over time
 603         tocheck = self.all_hostnames()
 604         utils.header("checking nodes %r"%tocheck)
 605         # create a dict hostname -> status
 606         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
 607         while tocheck:
 608             # get their status
 609             tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
 610             # update status
 611             for array in tocheck_status:
 612                 hostname=array['hostname']
 613                 boot_state=array['boot_state']
 614                 if boot_state == 'boot':
 615                     utils.header ("%s has reached the 'boot' state"%hostname)
 616                 else:
 617                     # if it's a real node, never mind
 618                     (site_spec,node_spec)=self.locate_hostname(hostname)
 619                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 620                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
 621                         # let's cheat
 622                         boot_state = 'boot'
 623                     elif datetime.datetime.now() > graceout:
 624                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
 625                         graceout=datetime.datetime.now()+datetime.timedelta(1)
 626                 status[hostname] = boot_state
 627             # refresh tocheck
 628             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != 'boot' ]
 629             if not tocheck:
 630                 return True
 631             if datetime.datetime.now() > timeout:
 632                 for hostname in tocheck:
 633                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
 634                 return False
 635             # otherwise, sleep for a while
 636             time.sleep(period)
 637         # only useful in empty plcs
 638         return True
 639
 640     def nodes_booted(self):
 641         return self.do_nodes_booted(minutes=20,gracetime=15)
 642
 643     def do_nodes_ssh(self,minutes,gracetime,period=20):
 644         # compute timeout
 645         timeout = datetime.datetime.now()+datetime.timedelta(minutes=minutes)
 646         graceout = datetime.datetime.now()+datetime.timedelta(minutes=gracetime)
 647         tocheck = self.all_hostnames()
 648 #        self.scan_publicKeys(tocheck)
 649         utils.header("checking ssh access to root context on nodes %r"%tocheck)
 650         vservername=self.vservername
 651         while tocheck:
 652             for hostname in tocheck:
 653                 # try to run 'hostname' in the node
 654                 # using locally cached keys - assuming we've run testplc.fetch_keys()
 655                 local_key = "keys/%(vservername)s.rsa"%locals()
 656                 command = TestSsh (hostname,key=local_key).actual_command("hostname")
 657                 # don't spam logs - show the command only after the grace period
 658                 if datetime.datetime.now() > graceout:
 659                     success=utils.system(command)
 660                 else:
 661                     success=os.system(command)
 662                 if success==0:
 663                     utils.header('Successfully entered root@%s'%hostname)
 664                     # refresh tocheck
 665                     tocheck.remove(hostname)
 666                 else:
 667                     # we will have tried real nodes once, in case they're up - but if not, just skip
 668                     (site_spec,node_spec)=self.locate_hostname(hostname)
 669                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 670                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
 671                         tocheck.remove(hostname)
 672             if  not tocheck:
 673                 return True
 674             if datetime.datetime.now() > timeout:
 675                 for hostname in tocheck:
 676                     utils.header("FAILURE to ssh into %s"%hostname)
 677                 return False
 678             # otherwise, sleep for a while
 679             time.sleep(period)
 680         # only useful in empty plcs
 681         return True
 682
 683     def nodes_ssh(self):
 684         return self.do_nodes_ssh(minutes=30,gracetime=10)
 685
 686     @node_mapper
 687     def init_node (self): pass
 688     @node_mapper
 689     def bootcd (self): pass
 690     @node_mapper
 691     def configure_qemu (self): pass
 692     @node_mapper
 693     def reinstall_node (self): pass
 694     @node_mapper
 695     def export_qemu (self): pass
 696
 697     ### check sanity : invoke scripts from qaapi/qa/tests/{node,slice}
 698     def check_sanity_node (self):
 699         return self.locate_first_node().check_sanity()
 700     def check_sanity_sliver (self) :
 701         return self.locate_first_sliver().check_sanity()
 702
 703     def check_sanity (self):
 704         return self.check_sanity_node() and self.check_sanity_sliver()
 705
 706     ### initscripts
 707     def do_check_initscripts(self):
 708         overall = True
 709         for slice_spec in self.plc_spec['slices']:
 710             if not slice_spec.has_key('initscriptname'):
 711                 continue
 712             initscript=slice_spec['initscriptname']
 713             for nodename in slice_spec['nodenames']:
 714                 (site,node) = self.locate_node (nodename)
 715                 # xxx - passing the wrong site - probably harmless
 716                 test_site = TestSite (self,site)
 717                 test_slice = TestSlice (self,test_site,slice_spec)
 718                 test_node = TestNode (self,test_site,node)
 719                 test_sliver = TestSliver (self, test_node, test_slice)
 720                 if not test_sliver.check_initscript(initscript):
 721                     overall = False
 722         return overall
 723
 724     def check_initscripts(self):
 725             return self.do_check_initscripts()
 726
 727     def initscripts (self):
 728         for initscript in self.plc_spec['initscripts']:
 729             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
 730             self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
 731         return True
 732
 733     def clean_initscripts (self):
 734         for initscript in self.plc_spec['initscripts']:
 735             initscript_name = initscript['initscript_fields']['name']
 736             print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
 737             try:
 738                 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
 739                 print initscript_name,'deleted'
 740             except:
 741                 print 'deletion went wrong - probably did not exist'
 742         return True
 743
 744     ### manage slices
 745     def slices (self):
 746         return self.do_slices()
 747
 748     def clean_slices (self):
 749         return self.do_slices("delete")
 750
 751     def do_slices (self,  action="add"):
 752         for slice in self.plc_spec['slices']:
 753             site_spec = self.locate_site (slice['sitename'])
 754             test_site = TestSite(self,site_spec)
 755             test_slice=TestSlice(self,test_site,slice)
 756             if action != "add":
 757                 utils.header("Deleting slices in site %s"%test_site.name())
 758                 test_slice.delete_slice()
 759             else:
 760                 utils.pprint("Creating slice",slice)
 761                 test_slice.create_slice()
 762                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
 763         return True
 764
 765     @slice_mapper_options
 766     def check_slice(self): pass
 767
 768     @node_mapper
 769     def clear_known_hosts (self): pass
 770
 771     @node_mapper
 772     def start_node (self) : pass
 773
 774     def check_tcp (self):
 775         specs = self.plc_spec['tcp_test']
 776         overall=True
 777         for spec in specs:
 778             port = spec['port']
 779             # server side
 780             s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
 781             if not s_test_sliver.run_tcp_server(port,timeout=10):
 782                 overall=False
 783                 break
 784
 785             # idem for the client side
 786             c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
 787             if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
 788                 overall=False
 789         return overall
 790
 791     def plcsh_stress_test (self):
 792         # install the stress-test in the plc image
 793         location = "/usr/share/plc_api/plcsh-stress-test.py"
 794         remote="/vservers/%s/%s"%(self.vservername,location)
 795         self.test_ssh.copy_abs("plcsh-stress-test.py",remote)
 796         command = location
 797         command += " -- --check"
 798         if self.options.small_test:
 799             command +=  " --tiny"
 800         return ( self.run_in_guest(command) == 0)
 801
 802     def gather_logs (self):
 803         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
 804         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
 805         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
 806         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
 807         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
 808         # (1.a)
 809         print "-------------------- TestPlc.gather_logs : PLC's /var/log"
 810         self.gather_var_logs ()
 811         # (1.b)
 812         print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
 813         self.gather_pgsql_logs ()
 814         # (2)
 815         print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
 816         for site_spec in self.plc_spec['sites']:
 817             test_site = TestSite (self,site_spec)
 818             for node_spec in site_spec['nodes']:
 819                 test_node=TestNode(self,test_site,node_spec)
 820                 test_node.gather_qemu_logs()
 821         # (3)
 822         print "-------------------- TestPlc.gather_logs : nodes's /var/log"
 823         self.gather_nodes_var_logs()
 824         # (4)
 825         print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
 826         self.gather_slivers_var_logs()
 827         return True
 828
 829     def gather_slivers_var_logs(self):
 830         for test_sliver in self.all_sliver_objs():
 831             remote = test_sliver.tar_var_logs()
 832             utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
 833             command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
 834             utils.system(command)
 835         return True
 836
 837     def gather_var_logs (self):
 838         utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
 839         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
 840         command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
 841         utils.system(command)
 842         command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
 843         utils.system(command)
 844
 845     def gather_pgsql_logs (self):
 846         utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
 847         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
 848         command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
 849         utils.system(command)
 850
 851     def gather_nodes_var_logs (self):
 852         for site_spec in self.plc_spec['sites']:
 853             test_site = TestSite (self,site_spec)
 854             for node_spec in site_spec['nodes']:
 855                 test_node=TestNode(self,test_site,node_spec)
 856                 test_ssh = TestSsh (test_node.name(),key="/etc/planetlab/root_ssh_key.rsa")
 857                 to_plc = self.actual_command_in_guest ( test_ssh.actual_command("tar -C /var/log -cf - ."))
 858                 command = to_plc + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
 859                 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
 860                 utils.system(command)
 861
 862
 863     # returns the filename to use for sql dump/restore, using options.dbname if set
 864     def dbfile (self, database):
 865         # uses options.dbname if it is found
 866         try:
 867             name=self.options.dbname
 868             if not isinstance(name,StringTypes):
 869                 raise Exception
 870         except:
 871             t=datetime.datetime.now()
 872             d=t.date()
 873             name=str(d)
 874         return "/root/%s-%s.sql"%(database,name)
 875
 876     def db_dump(self):
 877         dump=self.dbfile("planetab4")
 878         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
 879         utils.header('Dumped planetlab4 database in %s'%dump)
 880         return True
 881
 882     def db_restore(self):
 883         dump=self.dbfile("planetab4")
 884         ##stop httpd service
 885         self.run_in_guest('service httpd stop')
 886         # xxx - need another wrapper
 887         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
 888         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
 889         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
 890         ##starting httpd service
 891         self.run_in_guest('service httpd start')
 892
 893         utils.header('Database restored from ' + dump)
 894
 895     @standby_generic
 896     def standby_1(): pass
 897     @standby_generic
 898     def standby_2(): pass
 899     @standby_generic
 900     def standby_3(): pass
 901     @standby_generic
 902     def standby_4(): pass
 903     @standby_generic
 904     def standby_5(): pass
 905     @standby_generic
 906     def standby_6(): pass
 907     @standby_generic
 908     def standby_7(): pass
 909     @standby_generic
 910     def standby_8(): pass
 911     @standby_generic
 912     def standby_9(): pass
 913     @standby_generic
 914     def standby_10(): pass
 915     @standby_generic
 916     def standby_11(): pass
 917     @standby_generic
 918     def standby_12(): pass
 919     @standby_generic
 920     def standby_13(): pass
 921     @standby_generic
 922     def standby_14(): pass
 923     @standby_generic
 924     def standby_15(): pass
 925     @standby_generic
 926     def standby_16(): pass
 927     @standby_generic
 928     def standby_17(): pass
 929     @standby_generic
 930     def standby_18(): pass
 931     @standby_generic
 932     def standby_19(): pass
 933     @standby_generic
 934     def standby_20(): pass
 935