system/TestPlc.py

   1 # $Id$
   2 import os, os.path
   3 import datetime
   4 import time
   5 import sys
   6 import traceback
   7 from types import StringTypes
   8 import socket
   9
  10 import utils
  11 from TestSite import TestSite
  12 from TestNode import TestNode
  13 from TestUser import TestUser
  14 from TestKey import TestKey
  15 from TestSlice import TestSlice
  16 from TestSliver import TestSliver
  17 from TestBox import TestBox
  18 from TestSsh import TestSsh
  19 from TestApiserver import TestApiserver
  20
  21 # step methods must take (self) and return a boolean (options is a member of the class)
  22
  23 def standby(minutes,dry_run):
  24     utils.header('Entering StandBy for %d mn'%minutes)
  25     if dry_run:
  26         print 'dry_run'
  27     else:
  28         time.sleep(60*minutes)
  29     return True
  30
  31 def standby_generic (func):
  32     def actual(self):
  33         minutes=int(func.__name__.split("_")[1])
  34         return standby(minutes,self.options.dry_run)
  35     return actual
  36
  37 def node_mapper (method):
  38     def actual(self):
  39         overall=True
  40         node_method = TestNode.__dict__[method.__name__]
  41         for site_spec in self.plc_spec['sites']:
  42             test_site = TestSite (self,site_spec)
  43             for node_spec in site_spec['nodes']:
  44                 test_node = TestNode (self,test_site,node_spec)
  45                 if not node_method(test_node): overall=False
  46         return overall
  47     return actual
  48
  49 def slice_mapper_options (method):
  50     def actual(self):
  51         overall=True
  52         slice_method = TestSlice.__dict__[method.__name__]
  53         for slice_spec in self.plc_spec['slices']:
  54             site_spec = self.locate_site (slice_spec['sitename'])
  55             test_site = TestSite(self,site_spec)
  56             test_slice=TestSlice(self,test_site,slice_spec)
  57             if not slice_method(test_slice,self.options): overall=False
  58         return overall
  59     return actual
  60
  61 SEP='<sep>'
  62
  63 class TestPlc:
  64
  65     default_steps = [
  66         'display','uninstall','install','install_rpm',
  67         'configure', 'start', 'fetch_keys', SEP,
  68         'store_keys', 'clear_known_hosts', 'initscripts', SEP,
  69         'sites', 'nodes', 'slices', 'nodegroups', SEP,
  70         'init_node','bootcd', 'configure_qemu', 'export_qemu',
  71         'kill_all_qemus', 'reinstall_node','start_node', SEP,
  72         # better use of time: do this now that the nodes are taking off
  73         'plcsh_stress_test', SEP,
  74         'nodes_ssh_debug', 'nodes_ssh_boot', 'check_slice', 'check_initscripts', SEP,
  75         'check_tcp',  SEP,
  76         'force_gather_logs', 'force_kill_qemus', 'force_record_tracker','force_free_tracker',
  77         ]
  78     other_steps = [
  79         'stop_all_vservers','fresh_install', 'cache_rpm', 'stop', 'vs_start', SEP,
  80         'check_sanity',  SEP,
  81         'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
  82         'clean_sites', 'clean_nodes',
  83         'clean_slices', 'clean_keys', SEP,
  84         'show_boxes', 'list_all_qemus', 'list_qemus', SEP,
  85         'db_dump' , 'db_restore', 'cleanup_trackers', 'cleanup_all_trackers',
  86         'standby_1 through 20',
  87         ]
  88
  89     @staticmethod
  90     def printable_steps (list):
  91         return " ".join(list).replace(" "+SEP+" "," \\\n")
  92     @staticmethod
  93     def valid_step (step):
  94         return step != SEP
  95
  96     def __init__ (self,plc_spec,options):
  97         self.plc_spec=plc_spec
  98         self.options=options
  99         self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
 100         try:
 101             self.vserverip=plc_spec['vserverip']
 102             self.vservername=plc_spec['vservername']
 103             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
 104             self.vserver=True
 105         except:
 106             raise Exception,'chroot-based myplc testing is deprecated'
 107         self.apiserver=TestApiserver(self.url,options.dry_run)
 108
 109     def name(self):
 110         name=self.plc_spec['name']
 111         return "%s.%s"%(name,self.vservername)
 112
 113     def hostname(self):
 114         return self.plc_spec['hostname']
 115
 116     def is_local (self):
 117         return self.test_ssh.is_local()
 118
 119     # define the API methods on this object through xmlrpc
 120     # would help, but not strictly necessary
 121     def connect (self):
 122         pass
 123
 124     def actual_command_in_guest (self,command):
 125         return self.test_ssh.actual_command(self.host_to_guest(command))
 126
 127     def start_guest (self):
 128       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
 129
 130     def run_in_guest (self,command):
 131         return utils.system(self.actual_command_in_guest(command))
 132
 133     def run_in_host (self,command):
 134         return self.test_ssh.run_in_buildname(command)
 135
 136     #command gets run in the vserver
 137     def host_to_guest(self,command):
 138         return "vserver %s exec %s"%(self.vservername,command)
 139
 140     #command gets run in the vserver
 141     def start_guest_in_host(self):
 142         return "vserver %s start"%(self.vservername)
 143
 144     # xxx quick n dirty
 145     def run_in_guest_piped (self,local,remote):
 146         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
 147
 148     def auth_root (self):
 149         return {'Username':self.plc_spec['PLC_ROOT_USER'],
 150                 'AuthMethod':'password',
 151                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
 152                 'Role' : self.plc_spec['role']
 153                 }
 154     def locate_site (self,sitename):
 155         for site in self.plc_spec['sites']:
 156             if site['site_fields']['name'] == sitename:
 157                 return site
 158             if site['site_fields']['login_base'] == sitename:
 159                 return site
 160         raise Exception,"Cannot locate site %s"%sitename
 161
 162     def locate_node (self,nodename):
 163         for site in self.plc_spec['sites']:
 164             for node in site['nodes']:
 165                 if node['name'] == nodename:
 166                     return (site,node)
 167         raise Exception,"Cannot locate node %s"%nodename
 168
 169     def locate_hostname (self,hostname):
 170         for site in self.plc_spec['sites']:
 171             for node in site['nodes']:
 172                 if node['node_fields']['hostname'] == hostname:
 173                     return (site,node)
 174         raise Exception,"Cannot locate hostname %s"%hostname
 175
 176     def locate_key (self,keyname):
 177         for key in self.plc_spec['keys']:
 178             if key['name'] == keyname:
 179                 return key
 180         raise Exception,"Cannot locate key %s"%keyname
 181
 182     def locate_slice (self, slicename):
 183         for slice in self.plc_spec['slices']:
 184             if slice['slice_fields']['name'] == slicename:
 185                 return slice
 186         raise Exception,"Cannot locate slice %s"%slicename
 187
 188     def all_sliver_objs (self):
 189         result=[]
 190         for slice_spec in self.plc_spec['slices']:
 191             slicename = slice_spec['slice_fields']['name']
 192             for nodename in slice_spec['nodenames']:
 193                 result.append(self.locate_sliver_obj (nodename,slicename))
 194         return result
 195
 196     def locate_sliver_obj (self,nodename,slicename):
 197         (site,node) = self.locate_node(nodename)
 198         slice = self.locate_slice (slicename)
 199         # build objects
 200         test_site = TestSite (self, site)
 201         test_node = TestNode (self, test_site,node)
 202         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 203         test_slice = TestSlice (self, test_site, slice)
 204         return TestSliver (self, test_node, test_slice)
 205
 206     def locate_first_node(self):
 207         nodename=self.plc_spec['slices'][0]['nodenames'][0]
 208         (site,node) = self.locate_node(nodename)
 209         test_site = TestSite (self, site)
 210         test_node = TestNode (self, test_site,node)
 211         return test_node
 212
 213     def locate_first_sliver (self):
 214         slice_spec=self.plc_spec['slices'][0]
 215         slicename=slice_spec['slice_fields']['name']
 216         nodename=slice_spec['nodenames'][0]
 217         return self.locate_sliver_obj(nodename,slicename)
 218
 219     # all different hostboxes used in this plc
 220     def gather_hostBoxes(self):
 221         # maps on sites and nodes, return [ (host_box,test_node) ]
 222         tuples=[]
 223         for site_spec in self.plc_spec['sites']:
 224             test_site = TestSite (self,site_spec)
 225             for node_spec in site_spec['nodes']:
 226                 test_node = TestNode (self, test_site, node_spec)
 227                 if not test_node.is_real():
 228                     tuples.append( (test_node.host_box(),test_node) )
 229         # transform into a dict { 'host_box' -> [ test_node .. ] }
 230         result = {}
 231         for (box,node) in tuples:
 232             if not result.has_key(box):
 233                 result[box]=[node]
 234             else:
 235                 result[box].append(node)
 236         return result
 237
 238     # a step for checking this stuff
 239     def show_boxes (self):
 240         for (box,nodes) in self.gather_hostBoxes().iteritems():
 241             print box,":"," + ".join( [ node.name() for node in nodes ] )
 242         return True
 243
 244     # make this a valid step
 245     def kill_all_qemus(self):
 246         # this is the brute force version, kill all qemus on that host box
 247         for (box,nodes) in self.gather_hostBoxes().iteritems():
 248             # pass the first nodename, as we don't push template-qemu on testboxes
 249             nodedir=nodes[0].nodedir()
 250             TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
 251         return True
 252
 253     # make this a valid step
 254     def list_all_qemus(self):
 255         for (box,nodes) in self.gather_hostBoxes().iteritems():
 256             # this is the brute force version, kill all qemus on that host box
 257             TestBox(box,self.options.buildname).list_all_qemus()
 258         return True
 259
 260     # kill only the right qemus
 261     def list_qemus(self):
 262         for (box,nodes) in self.gather_hostBoxes().iteritems():
 263             # the fine-grain version
 264             for node in nodes:
 265                 node.list_qemu()
 266         return True
 267
 268     # kill only the right qemus
 269     def kill_qemus(self):
 270         for (box,nodes) in self.gather_hostBoxes().iteritems():
 271             # the fine-grain version
 272             for node in nodes:
 273                 node.kill_qemu()
 274         return True
 275
 276     #################### display config
 277     def display (self):
 278         self.display_pass (1)
 279         self.display_pass (2)
 280         return True
 281
 282     # entry point
 283     def display_pass (self,passno):
 284         for (key,val) in self.plc_spec.iteritems():
 285             if passno == 2:
 286                 if key == 'sites':
 287                     for site in val:
 288                         self.display_site_spec(site)
 289                         for node in site['nodes']:
 290                             self.display_node_spec(node)
 291                 elif key=='initscripts':
 292                     for initscript in val:
 293                         self.display_initscript_spec (initscript)
 294                 elif key=='slices':
 295                     for slice in val:
 296                         self.display_slice_spec (slice)
 297                 elif key=='keys':
 298                     for key in val:
 299                         self.display_key_spec (key)
 300             elif passno == 1:
 301                 if key not in ['sites','initscripts','slices','keys']:
 302                     print '*   ',key,':',val
 303
 304     def display_site_spec (self,site):
 305         print '* ======== site',site['site_fields']['name']
 306         for (k,v) in site.iteritems():
 307             if k=='nodes':
 308                 if v:
 309                     print '*       ','nodes : ',
 310                     for node in v:
 311                         print node['node_fields']['hostname'],'',
 312                     print ''
 313             elif k=='users':
 314                 if v:
 315                     print '*       users : ',
 316                     for user in v:
 317                         print user['name'],'',
 318                     print ''
 319             elif k == 'site_fields':
 320                 print '*       login_base',':',v['login_base']
 321             elif k == 'address_fields':
 322                 pass
 323             else:
 324                 print '*       ',k,
 325                 PrettyPrinter(indent=8,depth=2).pprint(v)
 326
 327     def display_initscript_spec (self,initscript):
 328         print '* ======== initscript',initscript['initscript_fields']['name']
 329
 330     def display_key_spec (self,key):
 331         print '* ======== key',key['name']
 332
 333     def display_slice_spec (self,slice):
 334         print '* ======== slice',slice['slice_fields']['name']
 335         for (k,v) in slice.iteritems():
 336             if k=='nodenames':
 337                 if v:
 338                     print '*       nodes : ',
 339                     for nodename in v:
 340                         print nodename,'',
 341                     print ''
 342             elif k=='usernames':
 343                 if v:
 344                     print '*       users : ',
 345                     for username in v:
 346                         print username,'',
 347                     print ''
 348             elif k=='slice_fields':
 349                 print '*       fields',':',
 350                 print 'max_nodes=',v['max_nodes'],
 351                 print ''
 352             else:
 353                 print '*       ',k,v
 354
 355     def display_node_spec (self,node):
 356         print "*           node",node['name'],"host_box=",node['host_box'],
 357         print "hostname=",node['node_fields']['hostname'],
 358         print "ip=",node['interface_fields']['ip']
 359
 360
 361     # another entry point for just showing the boxes involved
 362     def display_mapping (self):
 363         TestPlc.display_mapping_plc(self.plc_spec)
 364         return True
 365
 366     @staticmethod
 367     def display_mapping_plc (plc_spec):
 368         print '* MyPLC',plc_spec['name']
 369         print '*\tvserver address = root@%s:/vservers/%s'%(plc_spec['hostname'],plc_spec['vservername'])
 370         print '*\tIP = %s/%s'%(plc_spec['PLC_API_HOST'],plc_spec['vserverip'])
 371         for site_spec in plc_spec['sites']:
 372             for node_spec in site_spec['nodes']:
 373                 TestPlc.display_mapping_node(node_spec)
 374
 375     @staticmethod
 376     def display_mapping_node (node_spec):
 377         print '*   NODE %s'%(node_spec['name'])
 378         print '*\tqemu box %s'%node_spec['host_box']
 379         print '*\thostname=%s'%node_spec['node_fields']['hostname']
 380
 381     ### utility methods for handling the pool of IP addresses allocated to plcs
 382     # Logic
 383     # (*) running plcs are recorded in the file named ~/running-test-plcs
 384     # (*) this file contains a line for each running plc, older first
 385     # (*) each line contains the vserver name + the hostname of the (vserver) testbox where it sits
 386     # (*) the free_tracker method performs a vserver stop on the oldest entry
 387     # (*) the record_tracker method adds an entry at the bottom of the file
 388     # (*) the cleanup_tracker method stops all known vservers and removes the tracker file
 389
 390     TRACKER_FILE=os.environ['HOME']+"/running-test-plcs"
 391     # how many concurrent plcs are we keeping alive - adjust with the IP pool size
 392     TRACKER_KEEP_VSERVERS = 12
 393
 394     def record_tracker (self):
 395         try:
 396             lines=file(TestPlc.TRACKER_FILE).readlines()
 397         except:
 398             lines=[]
 399
 400         this_line="%s %s\n"%(self.vservername,self.test_ssh.hostname)
 401         for line in lines:
 402             if line==this_line:
 403                 print 'this vserver is already included in %s'%TestPlc.TRACKER_FILE
 404                 return True
 405         if self.options.dry_run:
 406             print 'dry_run: record_tracker - skipping tracker update'
 407             return True
 408         tracker=file(TestPlc.TRACKER_FILE,"w")
 409         for line in lines+[this_line]:
 410             tracker.write(line)
 411         tracker.close()
 412         print "Recorded %s in running plcs on host %s"%(self.vservername,self.test_ssh.hostname)
 413         return True
 414
 415     def free_tracker (self, keep_vservers=None):
 416         if not keep_vservers: keep_vservers=TestPlc.TRACKER_KEEP_VSERVERS
 417         try:
 418             lines=file(TestPlc.TRACKER_FILE).readlines()
 419         except:
 420             print 'dry_run: free_tracker - skipping tracker update'
 421             return True
 422         how_many = len(lines) - keep_vservers
 423         # nothing todo until we have more than keep_vservers in the tracker
 424         if how_many <= 0:
 425             print 'free_tracker : limit %d not reached'%keep_vservers
 426             return True
 427         to_stop = lines[:how_many]
 428         to_keep = lines[how_many:]
 429         for line in to_stop:
 430             print '>%s<'%line
 431             [vname,hostname]=line.split()
 432             command=TestSsh(hostname).actual_command("vserver --silent %s stop"%vname)
 433             utils.system(command)
 434         if self.options.dry_run:
 435             print 'dry_run: free_tracker would stop %d vservers'%len(to_stop)
 436             for line in to_stop: print line,
 437             print 'dry_run: free_tracker would keep %d vservers'%len(to_keep)
 438             for line in to_keep: print line,
 439             return True
 440         print "Storing %d remaining vservers in %s"%(len(to_keep),TestPlc.TRACKER_FILE)
 441         tracker=open(TestPlc.TRACKER_FILE,"w")
 442         for line in to_keep:
 443             tracker.write(line)
 444         tracker.close()
 445         return True
 446
 447     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 448     def cleanup_trackers (self):
 449         try:
 450             for line in TestPlc.TRACKER_FILE.readlines():
 451                 [vname,hostname]=line.split()
 452                 stop="vserver --silent %s stop"%vname
 453                 command=TestSsh(hostname).actual_command(stop)
 454                 utils.system(command)
 455             clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 456             utils.system(self.test_ssh.actual_command(clean_tracker))
 457         except:
 458             return True
 459
 460     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 461     def cleanup_all_trackers (self):
 462         stop_all = "cd /vservers ; for i in * ; do vserver --silent $i stop ; done"
 463         utils.system(self.test_ssh.actual_command(stop_all))
 464         clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 465         utils.system(self.test_ssh.actual_command(clean_tracker))
 466         return True
 467
 468     def uninstall(self):
 469         self.run_in_host("vserver --silent %s delete"%self.vservername)
 470         return True
 471
 472     ### install
 473     def install(self):
 474         if self.is_local():
 475             # a full path for the local calls
 476             build_dir=os.path.dirname(sys.argv[0])
 477             # sometimes this is empty - set to "." in such a case
 478             if not build_dir: build_dir="."
 479             build_dir += "/build"
 480         else:
 481             # use a standard name - will be relative to remote buildname
 482             build_dir="build"
 483         # run checkout in any case - would do an update if already exists
 484         build_checkout = "svn checkout %s %s"%(self.options.build_url,build_dir)
 485         if self.run_in_host(build_checkout) != 0:
 486             return False
 487         # the repo url is taken from arch-rpms-url
 488         # with the last step (i386) removed
 489         repo_url = self.options.arch_rpms_url
 490         for level in [ 'arch' ]:
 491             repo_url = os.path.dirname(repo_url)
 492         # pass the vbuild-nightly options to vtest-init-vserver
 493         test_env_options=""
 494         test_env_options += " -p %s"%self.options.personality
 495         test_env_options += " -d %s"%self.options.pldistro
 496         test_env_options += " -f %s"%self.options.fcdistro
 497         script="vtest-init-vserver.sh"
 498         vserver_name = self.vservername
 499         vserver_options="--netdev eth0 --interface %s"%self.vserverip
 500         try:
 501             vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
 502             vserver_options += " --hostname %s"%vserver_hostname
 503         except:
 504             pass
 505         create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
 506         return self.run_in_host(create_vserver) == 0
 507
 508     ### install_rpm
 509     def install_rpm(self):
 510         if self.options.personality == "linux32":
 511             arch = "i386"
 512         elif self.options.personality == "linux64":
 513             arch = "x86_64"
 514         else:
 515             raise Exception, "Unsupported personality %r"%self.options.personality
 516         return \
 517             self.run_in_guest("yum -y install myplc-native")==0 and \
 518             self.run_in_guest("yum -y install noderepo-%s-%s"%(self.options.pldistro,arch))==0
 519
 520     ###
 521     def configure(self):
 522         tmpname='%s.plc-config-tty'%(self.name())
 523         fileconf=open(tmpname,'w')
 524         for var in [ 'PLC_NAME',
 525                      'PLC_ROOT_PASSWORD',
 526                      'PLC_ROOT_USER',
 527                      'PLC_MAIL_ENABLED',
 528                      'PLC_MAIL_SUPPORT_ADDRESS',
 529                      'PLC_DB_HOST',
 530                      'PLC_API_HOST',
 531                      'PLC_WWW_HOST',
 532                      'PLC_BOOT_HOST',
 533                      'PLC_NET_DNS1',
 534                      'PLC_NET_DNS2']:
 535             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
 536         fileconf.write('w\n')
 537         fileconf.write('q\n')
 538         fileconf.close()
 539         utils.system('cat %s'%tmpname)
 540         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
 541         utils.system('rm %s'%tmpname)
 542         return True
 543
 544     def start(self):
 545         self.run_in_guest('service plc start')
 546         return True
 547
 548     def stop(self):
 549         self.run_in_guest('service plc stop')
 550         return True
 551
 552     def vs_start (self):
 553         self.start_guest()
 554         return True
 555
 556     # stores the keys from the config for further use
 557     def store_keys(self):
 558         for key_spec in self.plc_spec['keys']:
 559                 TestKey(self,key_spec).store_key()
 560         return True
 561
 562     def clean_keys(self):
 563         utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
 564
 565     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 566     # for later direct access to the nodes
 567     def fetch_keys(self):
 568         dir="./keys"
 569         if not os.path.isdir(dir):
 570             os.mkdir(dir)
 571         vservername=self.vservername
 572         overall=True
 573         prefix = 'root_ssh_key'
 574         for ext in [ 'pub', 'rsa' ] :
 575             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 576             dst="keys/%(vservername)s.%(ext)s"%locals()
 577             if self.test_ssh.fetch(src,dst) != 0: overall=False
 578         prefix = 'debug_ssh_key'
 579         for ext in [ 'pub', 'rsa' ] :
 580             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 581             dst="keys/%(vservername)s-debug.%(ext)s"%locals()
 582             if self.test_ssh.fetch(src,dst) != 0: overall=False
 583         return overall
 584
 585     def sites (self):
 586         return self.do_sites()
 587
 588     def clean_sites (self):
 589         return self.do_sites(action="delete")
 590
 591     def do_sites (self,action="add"):
 592         for site_spec in self.plc_spec['sites']:
 593             test_site = TestSite (self,site_spec)
 594             if (action != "add"):
 595                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
 596                 test_site.delete_site()
 597                 # deleted with the site
 598                 #test_site.delete_users()
 599                 continue
 600             else:
 601                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
 602                 test_site.create_site()
 603                 test_site.create_users()
 604         return True
 605
 606     def clean_all_sites (self):
 607         print 'auth_root',self.auth_root()
 608         site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
 609         for site_id in site_ids:
 610             print 'Deleting site_id',site_id
 611             self.apiserver.DeleteSite(self.auth_root(),site_id)
 612
 613     def nodes (self):
 614         return self.do_nodes()
 615     def clean_nodes (self):
 616         return self.do_nodes(action="delete")
 617
 618     def do_nodes (self,action="add"):
 619         for site_spec in self.plc_spec['sites']:
 620             test_site = TestSite (self,site_spec)
 621             if action != "add":
 622                 utils.header("Deleting nodes in site %s"%test_site.name())
 623                 for node_spec in site_spec['nodes']:
 624                     test_node=TestNode(self,test_site,node_spec)
 625                     utils.header("Deleting %s"%test_node.name())
 626                     test_node.delete_node()
 627             else:
 628                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
 629                 for node_spec in site_spec['nodes']:
 630                     utils.pprint('Creating node %s'%node_spec,node_spec)
 631                     test_node = TestNode (self,test_site,node_spec)
 632                     test_node.create_node ()
 633         return True
 634
 635     def nodegroups (self):
 636         return self.do_nodegroups("add")
 637     def clean_nodegroups (self):
 638         return self.do_nodegroups("delete")
 639
 640     # create nodegroups if needed, and populate
 641     def do_nodegroups (self, action="add"):
 642         # 1st pass to scan contents
 643         groups_dict = {}
 644         for site_spec in self.plc_spec['sites']:
 645             test_site = TestSite (self,site_spec)
 646             for node_spec in site_spec['nodes']:
 647                 test_node=TestNode (self,test_site,node_spec)
 648                 if node_spec.has_key('nodegroups'):
 649                     nodegroupnames=node_spec['nodegroups']
 650                     if isinstance(nodegroupnames,StringTypes):
 651                         nodegroupnames = [ nodegroupnames ]
 652                     for nodegroupname in nodegroupnames:
 653                         if not groups_dict.has_key(nodegroupname):
 654                             groups_dict[nodegroupname]=[]
 655                         groups_dict[nodegroupname].append(test_node.name())
 656         auth=self.auth_root()
 657         overall = True
 658         for (nodegroupname,group_nodes) in groups_dict.iteritems():
 659             if action == "add":
 660                 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
 661                 # first, check if the nodetagtype is here
 662                 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
 663                 if tag_types:
 664                     tag_type_id = tag_types[0]['tag_type_id']
 665                 else:
 666                     tag_type_id = self.apiserver.AddTagType(auth,
 667                                                             {'tagname':nodegroupname,
 668                                                              'description': 'for nodegroup %s'%nodegroupname,
 669                                                              'category':'test',
 670                                                              'min_role_id':10})
 671                 print 'located tag (type)',nodegroupname,'as',tag_type_id
 672                 # create nodegroup
 673                 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
 674                 if not nodegroups:
 675                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
 676                     print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
 677                 # set node tag on all nodes, value='yes'
 678                 for nodename in group_nodes:
 679                     try:
 680                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
 681                     except:
 682                         traceback.print_exc()
 683                         print 'node',nodename,'seems to already have tag',nodegroupname
 684                     # check anyway
 685                     try:
 686                         expect_yes = self.apiserver.GetNodeTags(auth,
 687                                                                 {'hostname':nodename,
 688                                                                  'tagname':nodegroupname},
 689                                                                 ['tagvalue'])[0]['tagvalue']
 690                         if expect_yes != "yes":
 691                             print 'Mismatch node tag on node',nodename,'got',expect_yes
 692                             overall=False
 693                     except:
 694                         if not self.options.dry_run:
 695                             print 'Cannot find tag',nodegroupname,'on node',nodename
 696                             overall = False
 697             else:
 698                 try:
 699                     print 'cleaning nodegroup',nodegroupname
 700                     self.apiserver.DeleteNodeGroup(auth,nodegroupname)
 701                 except:
 702                     traceback.print_exc()
 703                     overall=False
 704         return overall
 705
 706     def all_hostnames (self) :
 707         hostnames = []
 708         for site_spec in self.plc_spec['sites']:
 709             hostnames += [ node_spec['node_fields']['hostname'] \
 710                            for node_spec in site_spec['nodes'] ]
 711         return hostnames
 712
 713     # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
 714     def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15):
 715         if self.options.dry_run:
 716             print 'dry_run'
 717             return True
 718         # compute timeout
 719         timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
 720         graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
 721         # the nodes that haven't checked yet - start with a full list and shrink over time
 722         tocheck = self.all_hostnames()
 723         utils.header("checking nodes %r"%tocheck)
 724         # create a dict hostname -> status
 725         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
 726         while tocheck:
 727             # get their status
 728             tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
 729             # update status
 730             for array in tocheck_status:
 731                 hostname=array['hostname']
 732                 boot_state=array['boot_state']
 733                 if boot_state == target_boot_state:
 734                     utils.header ("%s has reached the %s state"%(hostname,target_boot_state))
 735                 else:
 736                     # if it's a real node, never mind
 737                     (site_spec,node_spec)=self.locate_hostname(hostname)
 738                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 739                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
 740                         # let's cheat
 741                         boot_state = target_boot_state
 742                     elif datetime.datetime.now() > graceout:
 743                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
 744                         graceout=datetime.datetime.now()+datetime.timedelta(1)
 745                 status[hostname] = boot_state
 746             # refresh tocheck
 747             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ]
 748             if not tocheck:
 749                 return True
 750             if datetime.datetime.now() > timeout:
 751                 for hostname in tocheck:
 752                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
 753                 return False
 754             # otherwise, sleep for a while
 755             time.sleep(period)
 756         # only useful in empty plcs
 757         return True
 758
 759     def nodes_booted(self):
 760         return self.nodes_check_boot_state('boot',timeout_minutes=20,silent_minutes=15)
 761
 762     def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=20):
 763         # compute timeout
 764         timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
 765         graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
 766         vservername=self.vservername
 767         if debug:
 768             message="debug"
 769             local_key = "keys/%(vservername)s-debug.rsa"%locals()
 770         else:
 771             message="boot"
 772             local_key = "keys/%(vservername)s.rsa"%locals()
 773         tocheck = self.all_hostnames()
 774         utils.header("checking ssh access (expected in %s mode) to nodes %r"%(message,tocheck))
 775         utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
 776                          (timeout_minutes,silent_minutes,period))
 777         while tocheck:
 778             for hostname in tocheck:
 779                 # try to run 'hostname' in the node
 780                 command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a")
 781                 # don't spam logs - show the command only after the grace period
 782                 if datetime.datetime.now() > graceout:
 783                     success=utils.system(command)
 784                 else:
 785                     # truly silent, just print out a dot to show we're alive
 786                     print '.',
 787                     sys.stdout.flush()
 788                     command += " 2>/dev/null"
 789                     if self.options.dry_run:
 790                         print 'dry_run',command
 791                         success=0
 792                     else:
 793                         success=os.system(command)
 794                 if success==0:
 795                     utils.header('Successfully entered root@%s (%s)'%(hostname,message))
 796                     # refresh tocheck
 797                     tocheck.remove(hostname)
 798                 else:
 799                     # we will have tried real nodes once, in case they're up - but if not, just skip
 800                     (site_spec,node_spec)=self.locate_hostname(hostname)
 801                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 802                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
 803                         tocheck.remove(hostname)
 804             if  not tocheck:
 805                 return True
 806             if datetime.datetime.now() > timeout:
 807                 for hostname in tocheck:
 808                     utils.header("FAILURE to ssh into %s"%hostname)
 809                 return False
 810             # otherwise, sleep for a while
 811             time.sleep(period)
 812         # only useful in empty plcs
 813         return True
 814
 815     def nodes_ssh_debug(self):
 816         return self.check_nodes_ssh(debug=True,timeout_minutes=30,silent_minutes=10)
 817
 818     def nodes_ssh_boot(self):
 819         return self.check_nodes_ssh(debug=False,timeout_minutes=30,silent_minutes=10)
 820
 821     @node_mapper
 822     def init_node (self): pass
 823     @node_mapper
 824     def bootcd (self): pass
 825     @node_mapper
 826     def configure_qemu (self): pass
 827     @node_mapper
 828     def reinstall_node (self): pass
 829     @node_mapper
 830     def export_qemu (self): pass
 831
 832     ### check sanity : invoke scripts from qaapi/qa/tests/{node,slice}
 833     def check_sanity_node (self):
 834         return self.locate_first_node().check_sanity()
 835     def check_sanity_sliver (self) :
 836         return self.locate_first_sliver().check_sanity()
 837
 838     def check_sanity (self):
 839         return self.check_sanity_node() and self.check_sanity_sliver()
 840
 841     ### initscripts
 842     def do_check_initscripts(self):
 843         overall = True
 844         for slice_spec in self.plc_spec['slices']:
 845             if not slice_spec.has_key('initscriptname'):
 846                 continue
 847             initscript=slice_spec['initscriptname']
 848             for nodename in slice_spec['nodenames']:
 849                 (site,node) = self.locate_node (nodename)
 850                 # xxx - passing the wrong site - probably harmless
 851                 test_site = TestSite (self,site)
 852                 test_slice = TestSlice (self,test_site,slice_spec)
 853                 test_node = TestNode (self,test_site,node)
 854                 test_sliver = TestSliver (self, test_node, test_slice)
 855                 if not test_sliver.check_initscript(initscript):
 856                     overall = False
 857         return overall
 858
 859     def check_initscripts(self):
 860             return self.do_check_initscripts()
 861
 862     def initscripts (self):
 863         for initscript in self.plc_spec['initscripts']:
 864             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
 865             self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
 866         return True
 867
 868     def clean_initscripts (self):
 869         for initscript in self.plc_spec['initscripts']:
 870             initscript_name = initscript['initscript_fields']['name']
 871             print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
 872             try:
 873                 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
 874                 print initscript_name,'deleted'
 875             except:
 876                 print 'deletion went wrong - probably did not exist'
 877         return True
 878
 879     ### manage slices
 880     def slices (self):
 881         return self.do_slices()
 882
 883     def clean_slices (self):
 884         return self.do_slices("delete")
 885
 886     def do_slices (self,  action="add"):
 887         for slice in self.plc_spec['slices']:
 888             site_spec = self.locate_site (slice['sitename'])
 889             test_site = TestSite(self,site_spec)
 890             test_slice=TestSlice(self,test_site,slice)
 891             if action != "add":
 892                 utils.header("Deleting slices in site %s"%test_site.name())
 893                 test_slice.delete_slice()
 894             else:
 895                 utils.pprint("Creating slice",slice)
 896                 test_slice.create_slice()
 897                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
 898         return True
 899
 900     @slice_mapper_options
 901     def check_slice(self): pass
 902
 903     @node_mapper
 904     def clear_known_hosts (self): pass
 905
 906     @node_mapper
 907     def start_node (self) : pass
 908
 909     def check_tcp (self):
 910         specs = self.plc_spec['tcp_test']
 911         overall=True
 912         for spec in specs:
 913             port = spec['port']
 914             # server side
 915             s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
 916             if not s_test_sliver.run_tcp_server(port,timeout=10):
 917                 overall=False
 918                 break
 919
 920             # idem for the client side
 921             c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
 922             if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
 923                 overall=False
 924         return overall
 925
 926     def plcsh_stress_test (self):
 927         # install the stress-test in the plc image
 928         location = "/usr/share/plc_api/plcsh-stress-test.py"
 929         remote="/vservers/%s/%s"%(self.vservername,location)
 930         self.test_ssh.copy_abs("plcsh-stress-test.py",remote)
 931         command = location
 932         command += " -- --check"
 933         if self.options.size == 1:
 934             command +=  " --tiny"
 935         return ( self.run_in_guest(command) == 0)
 936
 937     def gather_logs (self):
 938         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
 939         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
 940         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
 941         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
 942         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
 943         # (1.a)
 944         print "-------------------- TestPlc.gather_logs : PLC's /var/log"
 945         self.gather_var_logs ()
 946         # (1.b)
 947         print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
 948         self.gather_pgsql_logs ()
 949         # (2)
 950         print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
 951         for site_spec in self.plc_spec['sites']:
 952             test_site = TestSite (self,site_spec)
 953             for node_spec in site_spec['nodes']:
 954                 test_node=TestNode(self,test_site,node_spec)
 955                 test_node.gather_qemu_logs()
 956         # (3)
 957         print "-------------------- TestPlc.gather_logs : nodes's /var/log"
 958         self.gather_nodes_var_logs()
 959         # (4)
 960         print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
 961         self.gather_slivers_var_logs()
 962         return True
 963
 964     def gather_slivers_var_logs(self):
 965         for test_sliver in self.all_sliver_objs():
 966             remote = test_sliver.tar_var_logs()
 967             utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
 968             command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
 969             utils.system(command)
 970         return True
 971
 972     def gather_var_logs (self):
 973         utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
 974         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
 975         command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
 976         utils.system(command)
 977         command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
 978         utils.system(command)
 979
 980     def gather_pgsql_logs (self):
 981         utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
 982         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
 983         command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
 984         utils.system(command)
 985
 986     def gather_nodes_var_logs (self):
 987         for site_spec in self.plc_spec['sites']:
 988             test_site = TestSite (self,site_spec)
 989             for node_spec in site_spec['nodes']:
 990                 test_node=TestNode(self,test_site,node_spec)
 991                 test_ssh = TestSsh (test_node.name(),key="/etc/planetlab/root_ssh_key.rsa")
 992                 to_plc = self.actual_command_in_guest ( test_ssh.actual_command("tar -C /var/log -cf - ."))
 993                 command = to_plc + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
 994                 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
 995                 utils.system(command)
 996
 997
 998     # returns the filename to use for sql dump/restore, using options.dbname if set
 999     def dbfile (self, database):
1000         # uses options.dbname if it is found
1001         try:
1002             name=self.options.dbname
1003             if not isinstance(name,StringTypes):
1004                 raise Exception
1005         except:
1006             t=datetime.datetime.now()
1007             d=t.date()
1008             name=str(d)
1009         return "/root/%s-%s.sql"%(database,name)
1010
1011     def db_dump(self):
1012         dump=self.dbfile("planetab4")
1013         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
1014         utils.header('Dumped planetlab4 database in %s'%dump)
1015         return True
1016
1017     def db_restore(self):
1018         dump=self.dbfile("planetab4")
1019         ##stop httpd service
1020         self.run_in_guest('service httpd stop')
1021         # xxx - need another wrapper
1022         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
1023         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
1024         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
1025         ##starting httpd service
1026         self.run_in_guest('service httpd start')
1027
1028         utils.header('Database restored from ' + dump)
1029
1030     @standby_generic
1031     def standby_1(): pass
1032     @standby_generic
1033     def standby_2(): pass
1034     @standby_generic
1035     def standby_3(): pass
1036     @standby_generic
1037     def standby_4(): pass
1038     @standby_generic
1039     def standby_5(): pass
1040     @standby_generic
1041     def standby_6(): pass
1042     @standby_generic
1043     def standby_7(): pass
1044     @standby_generic
1045     def standby_8(): pass
1046     @standby_generic
1047     def standby_9(): pass
1048     @standby_generic
1049     def standby_10(): pass
1050     @standby_generic
1051     def standby_11(): pass
1052     @standby_generic
1053     def standby_12(): pass
1054     @standby_generic
1055     def standby_13(): pass
1056     @standby_generic
1057     def standby_14(): pass
1058     @standby_generic
1059     def standby_15(): pass
1060     @standby_generic
1061     def standby_16(): pass
1062     @standby_generic
1063     def standby_17(): pass
1064     @standby_generic
1065     def standby_18(): pass
1066     @standby_generic
1067     def standby_19(): pass
1068     @standby_generic
1069     def standby_20(): pass
1070