system/TestPlc.py

   1 # $Id$
   2 import os, os.path
   3 import datetime
   4 import time
   5 import sys
   6 import traceback
   7 from types import StringTypes
   8 import socket
   9
  10 import utils
  11 from TestSite import TestSite
  12 from TestNode import TestNode
  13 from TestUser import TestUser
  14 from TestKey import TestKey
  15 from TestSlice import TestSlice
  16 from TestSliver import TestSliver
  17 from TestBox import TestBox
  18 from TestSsh import TestSsh
  19 from TestApiserver import TestApiserver
  20
  21 # step methods must take (self) and return a boolean (options is a member of the class)
  22
  23 def standby(minutes,dry_run):
  24     utils.header('Entering StandBy for %d mn'%minutes)
  25     if dry_run:
  26         print 'dry_run'
  27     else:
  28         time.sleep(60*minutes)
  29     return True
  30
  31 def standby_generic (func):
  32     def actual(self):
  33         minutes=int(func.__name__.split("_")[1])
  34         return standby(minutes,self.options.dry_run)
  35     return actual
  36
  37 def node_mapper (method):
  38     def actual(self):
  39         overall=True
  40         node_method = TestNode.__dict__[method.__name__]
  41         for site_spec in self.plc_spec['sites']:
  42             test_site = TestSite (self,site_spec)
  43             for node_spec in site_spec['nodes']:
  44                 test_node = TestNode (self,test_site,node_spec)
  45                 if not node_method(test_node): overall=False
  46         return overall
  47     return actual
  48
  49 def slice_mapper_options (method):
  50     def actual(self):
  51         overall=True
  52         slice_method = TestSlice.__dict__[method.__name__]
  53         for slice_spec in self.plc_spec['slices']:
  54             site_spec = self.locate_site (slice_spec['sitename'])
  55             test_site = TestSite(self,site_spec)
  56             test_slice=TestSlice(self,test_site,slice_spec)
  57             if not slice_method(test_slice,self.options): overall=False
  58         return overall
  59     return actual
  60
  61 SEP='<sep>'
  62
  63 class TestPlc:
  64
  65     default_steps = [
  66         'display','uninstall','install','install_rpm',
  67         'configure', 'start', 'fetch_keys', SEP,
  68         'store_keys', 'clear_known_hosts', 'initscripts', SEP,
  69         'sites', 'nodes', 'slices', 'nodegroups', SEP,
  70         'init_node','bootcd', 'configure_qemu', 'export_qemu',
  71         'kill_all_qemus', 'reinstall_node','start_node', SEP,
  72         # better use of time: do this now that the nodes are taking off
  73         'plcsh_stress_test', SEP,
  74         'nodes_ssh_debug', 'nodes_ssh_boot', 'check_slice', 'check_initscripts', SEP,
  75         'check_tcp',  SEP,
  76         'force_gather_logs', 'force_kill_qemus', 'force_record_tracker','force_free_tracker',
  77         ]
  78     other_steps = [
  79         'stop_all_vservers','fresh_install', 'cache_rpm', 'stop', 'vs_start', SEP,
  80         'check_sanity',  SEP,
  81         'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
  82         'clean_sites', 'clean_nodes',
  83         'clean_slices', 'clean_keys', SEP,
  84         'show_boxes', 'list_all_qemus', 'list_qemus', SEP,
  85         'db_dump' , 'db_restore', 'cleanup_trackers', 'cleanup_all_trackers',
  86         'standby_1 through 20',
  87         ]
  88
  89     @staticmethod
  90     def printable_steps (list):
  91         return " ".join(list).replace(" "+SEP+" "," \\\n")
  92     @staticmethod
  93     def valid_step (step):
  94         return step != SEP
  95
  96     def __init__ (self,plc_spec,options):
  97         self.plc_spec=plc_spec
  98         self.options=options
  99         self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
 100         try:
 101             self.vserverip=plc_spec['vserverip']
 102             self.vservername=plc_spec['vservername']
 103             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
 104             self.vserver=True
 105         except:
 106             raise Exception,'chroot-based myplc testing is deprecated'
 107         self.apiserver=TestApiserver(self.url,options.dry_run)
 108
 109     def name(self):
 110         name=self.plc_spec['name']
 111         return "%s.%s"%(name,self.vservername)
 112
 113     def hostname(self):
 114         return self.plc_spec['hostname']
 115
 116     def is_local (self):
 117         return self.test_ssh.is_local()
 118
 119     # define the API methods on this object through xmlrpc
 120     # would help, but not strictly necessary
 121     def connect (self):
 122         pass
 123
 124     def actual_command_in_guest (self,command):
 125         return self.test_ssh.actual_command(self.host_to_guest(command))
 126
 127     def start_guest (self):
 128       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
 129
 130     def run_in_guest (self,command):
 131         return utils.system(self.actual_command_in_guest(command))
 132
 133     def run_in_host (self,command):
 134         return self.test_ssh.run_in_buildname(command)
 135
 136     #command gets run in the vserver
 137     def host_to_guest(self,command):
 138         return "vserver %s exec %s"%(self.vservername,command)
 139
 140     #command gets run in the vserver
 141     def start_guest_in_host(self):
 142         return "vserver %s start"%(self.vservername)
 143
 144     # xxx quick n dirty
 145     def run_in_guest_piped (self,local,remote):
 146         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
 147
 148     def auth_root (self):
 149         return {'Username':self.plc_spec['PLC_ROOT_USER'],
 150                 'AuthMethod':'password',
 151                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
 152                 'Role' : self.plc_spec['role']
 153                 }
 154     def locate_site (self,sitename):
 155         for site in self.plc_spec['sites']:
 156             if site['site_fields']['name'] == sitename:
 157                 return site
 158             if site['site_fields']['login_base'] == sitename:
 159                 return site
 160         raise Exception,"Cannot locate site %s"%sitename
 161
 162     def locate_node (self,nodename):
 163         for site in self.plc_spec['sites']:
 164             for node in site['nodes']:
 165                 if node['name'] == nodename:
 166                     return (site,node)
 167         raise Exception,"Cannot locate node %s"%nodename
 168
 169     def locate_hostname (self,hostname):
 170         for site in self.plc_spec['sites']:
 171             for node in site['nodes']:
 172                 if node['node_fields']['hostname'] == hostname:
 173                     return (site,node)
 174         raise Exception,"Cannot locate hostname %s"%hostname
 175
 176     def locate_key (self,keyname):
 177         for key in self.plc_spec['keys']:
 178             if key['name'] == keyname:
 179                 return key
 180         raise Exception,"Cannot locate key %s"%keyname
 181
 182     def locate_slice (self, slicename):
 183         for slice in self.plc_spec['slices']:
 184             if slice['slice_fields']['name'] == slicename:
 185                 return slice
 186         raise Exception,"Cannot locate slice %s"%slicename
 187
 188     def all_sliver_objs (self):
 189         result=[]
 190         for slice_spec in self.plc_spec['slices']:
 191             slicename = slice_spec['slice_fields']['name']
 192             for nodename in slice_spec['nodenames']:
 193                 result.append(self.locate_sliver_obj (nodename,slicename))
 194         return result
 195
 196     def locate_sliver_obj (self,nodename,slicename):
 197         (site,node) = self.locate_node(nodename)
 198         slice = self.locate_slice (slicename)
 199         # build objects
 200         test_site = TestSite (self, site)
 201         test_node = TestNode (self, test_site,node)
 202         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 203         test_slice = TestSlice (self, test_site, slice)
 204         return TestSliver (self, test_node, test_slice)
 205
 206     def locate_first_node(self):
 207         nodename=self.plc_spec['slices'][0]['nodenames'][0]
 208         (site,node) = self.locate_node(nodename)
 209         test_site = TestSite (self, site)
 210         test_node = TestNode (self, test_site,node)
 211         return test_node
 212
 213     def locate_first_sliver (self):
 214         slice_spec=self.plc_spec['slices'][0]
 215         slicename=slice_spec['slice_fields']['name']
 216         nodename=slice_spec['nodenames'][0]
 217         return self.locate_sliver_obj(nodename,slicename)
 218
 219     # all different hostboxes used in this plc
 220     def gather_hostBoxes(self):
 221         # maps on sites and nodes, return [ (host_box,test_node) ]
 222         tuples=[]
 223         for site_spec in self.plc_spec['sites']:
 224             test_site = TestSite (self,site_spec)
 225             for node_spec in site_spec['nodes']:
 226                 test_node = TestNode (self, test_site, node_spec)
 227                 if not test_node.is_real():
 228                     tuples.append( (test_node.host_box(),test_node) )
 229         # transform into a dict { 'host_box' -> [ test_node .. ] }
 230         result = {}
 231         for (box,node) in tuples:
 232             if not result.has_key(box):
 233                 result[box]=[node]
 234             else:
 235                 result[box].append(node)
 236         return result
 237
 238     # a step for checking this stuff
 239     def show_boxes (self):
 240         for (box,nodes) in self.gather_hostBoxes().iteritems():
 241             print box,":"," + ".join( [ node.name() for node in nodes ] )
 242         return True
 243
 244     # make this a valid step
 245     def kill_all_qemus(self):
 246         # this is the brute force version, kill all qemus on that host box
 247         for (box,nodes) in self.gather_hostBoxes().iteritems():
 248             # pass the first nodename, as we don't push template-qemu on testboxes
 249             nodedir=nodes[0].nodedir()
 250             TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
 251         return True
 252
 253     # make this a valid step
 254     def list_all_qemus(self):
 255         for (box,nodes) in self.gather_hostBoxes().iteritems():
 256             # this is the brute force version, kill all qemus on that host box
 257             TestBox(box,self.options.buildname).list_all_qemus()
 258         return True
 259
 260     # kill only the right qemus
 261     def list_qemus(self):
 262         for (box,nodes) in self.gather_hostBoxes().iteritems():
 263             # the fine-grain version
 264             for node in nodes:
 265                 node.list_qemu()
 266         return True
 267
 268     # kill only the right qemus
 269     def kill_qemus(self):
 270         for (box,nodes) in self.gather_hostBoxes().iteritems():
 271             # the fine-grain version
 272             for node in nodes:
 273                 node.kill_qemu()
 274         return True
 275
 276     #################### display config
 277     def display (self):
 278         self.display_pass (1)
 279         self.display_pass (2)
 280         return True
 281
 282     # entry point
 283     def display_pass (self,passno):
 284         for (key,val) in self.plc_spec.iteritems():
 285             if passno == 2:
 286                 if key == 'sites':
 287                     for site in val:
 288                         self.display_site_spec(site)
 289                         for node in site['nodes']:
 290                             self.display_node_spec(node)
 291                 elif key=='initscripts':
 292                     for initscript in val:
 293                         self.display_initscript_spec (initscript)
 294                 elif key=='slices':
 295                     for slice in val:
 296                         self.display_slice_spec (slice)
 297                 elif key=='keys':
 298                     for key in val:
 299                         self.display_key_spec (key)
 300             elif passno == 1:
 301                 if key not in ['sites','initscripts','slices','keys']:
 302                     print '*   ',key,':',val
 303
 304     def display_site_spec (self,site):
 305         print '* ======== site',site['site_fields']['name']
 306         for (k,v) in site.iteritems():
 307             if k=='nodes':
 308                 if v:
 309                     print '*       ','nodes : ',
 310                     for node in v:
 311                         print node['node_fields']['hostname'],'',
 312                     print ''
 313             elif k=='users':
 314                 if v:
 315                     print '*       users : ',
 316                     for user in v:
 317                         print user['name'],'',
 318                     print ''
 319             elif k == 'site_fields':
 320                 print '*       login_base',':',v['login_base']
 321             elif k == 'address_fields':
 322                 pass
 323             else:
 324                 print '*       ',k,
 325                 PrettyPrinter(indent=8,depth=2).pprint(v)
 326
 327     def display_initscript_spec (self,initscript):
 328         print '* ======== initscript',initscript['initscript_fields']['name']
 329
 330     def display_key_spec (self,key):
 331         print '* ======== key',key['name']
 332
 333     def display_slice_spec (self,slice):
 334         print '* ======== slice',slice['slice_fields']['name']
 335         for (k,v) in slice.iteritems():
 336             if k=='nodenames':
 337                 if v:
 338                     print '*       nodes : ',
 339                     for nodename in v:
 340                         print nodename,'',
 341                     print ''
 342             elif k=='usernames':
 343                 if v:
 344                     print '*       users : ',
 345                     for username in v:
 346                         print username,'',
 347                     print ''
 348             elif k=='slice_fields':
 349                 print '*       fields',':',
 350                 print 'max_nodes=',v['max_nodes'],
 351                 print ''
 352             else:
 353                 print '*       ',k,v
 354
 355     def display_node_spec (self,node):
 356         print "*           node",node['name'],"host_box=",node['host_box'],
 357         print "hostname=",node['node_fields']['hostname'],
 358         print "ip=",node['interface_fields']['ip']
 359
 360
 361     # another entry point for just showing the boxes involved
 362     def display_mapping (self):
 363         TestPlc.display_mapping_plc(self.plc_spec)
 364         return True
 365
 366     @staticmethod
 367     def display_mapping_plc (plc_spec):
 368         print '* MyPLC',plc_spec['name']
 369         print '*\tvserver address = root@%s:/vservers/%s'%(plc_spec['hostname'],plc_spec['vservername'])
 370         print '*\tIP = %s/%s'%(plc_spec['PLC_API_HOST'],plc_spec['vserverip'])
 371         for site_spec in plc_spec['sites']:
 372             for node_spec in site_spec['nodes']:
 373                 TestPlc.display_mapping_node(node_spec)
 374
 375     @staticmethod
 376     def display_mapping_node (node_spec):
 377         print '*   NODE %s'%(node_spec['name'])
 378         print '*\tqemu box %s'%node_spec['host_box']
 379         print '*\thostname=%s'%node_spec['node_fields']['hostname']
 380
 381     ### utility methods for handling the pool of IP addresses allocated to plcs
 382     # Logic
 383     # (*) running plcs are recorded in the file named ~/running-test-plcs
 384     # (*) this file contains a line for each running plc, older first
 385     # (*) each line contains the vserver name + the hostname of the (vserver) testbox where it sits
 386     # (*) the free_tracker method performs a vserver stop on the oldest entry
 387     # (*) the record_tracker method adds an entry at the bottom of the file
 388     # (*) the cleanup_tracker method stops all known vservers and removes the tracker file
 389
 390     TRACKER_FILE=os.environ['HOME']+"/running-test-plcs"
 391     # how many concurrent plcs are we keeping alive - adjust with the IP pool size
 392     TRACKER_KEEP_VSERVERS = 12
 393
 394     def record_tracker (self):
 395         try:
 396             lines=file(TestPlc.TRACKER_FILE).readlines()
 397         except:
 398             lines=[]
 399
 400         this_line="%s %s\n"%(self.vservername,self.test_ssh.hostname)
 401         for line in lines:
 402             if line==this_line:
 403                 print 'this vserver is already included in %s'%TestPlc.TRACKER_FILE
 404                 return True
 405         if self.options.dry_run:
 406             print 'dry_run: record_tracker - skipping tracker update'
 407             return True
 408         tracker=file(TestPlc.TRACKER_FILE,"w")
 409         for line in lines+[this_line]:
 410             tracker.write(line)
 411         tracker.close()
 412         print "Recorded %s in running plcs on host %s"%(self.vservername,self.test_ssh.hostname)
 413         return True
 414
 415     def free_tracker (self, keep_vservers=None):
 416         if not keep_vservers: keep_vservers=TestPlc.TRACKER_KEEP_VSERVERS
 417         try:
 418             lines=file(TestPlc.TRACKER_FILE).readlines()
 419         except:
 420             print 'dry_run: free_tracker - skipping tracker update'
 421             return True
 422         how_many = len(lines) - keep_vservers
 423         # nothing todo until we have more than keep_vservers in the tracker
 424         if how_many <= 0:
 425             print 'free_tracker : limit %d not reached'%keep_vservers
 426             return True
 427         to_stop = lines[:how_many]
 428         to_keep = lines[how_many:]
 429         for line in to_stop:
 430             print '>%s<'%line
 431             [vname,hostname]=line.split()
 432             command=TestSsh(hostname).actual_command("vserver --silent %s stop"%vname)
 433             utils.system(command)
 434         if self.options.dry_run:
 435             print 'dry_run: free_tracker would stop %d vservers'%len(to_stop)
 436             for line in to_stop: print line,
 437             print 'dry_run: free_tracker would keep %d vservers'%len(to_keep)
 438             for line in to_keep: print line,
 439             return True
 440         print "Storing %d remaining vservers in %s"%(len(to_keep),TestPlc.TRACKER_FILE)
 441         tracker=open(TestPlc.TRACKER_FILE,"w")
 442         for line in to_keep:
 443             tracker.write(line)
 444         tracker.close()
 445         return True
 446
 447     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 448     def cleanup_trackers (self):
 449         try:
 450             for line in TestPlc.TRACKER_FILE.readlines():
 451                 [vname,hostname]=line.split()
 452                 stop="vserver --silent %s stop"%vname
 453                 command=TestSsh(hostname).actual_command(stop)
 454                 utils.system(command)
 455             clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 456             utils.system(self.test_ssh.actual_command(clean_tracker))
 457         except:
 458             return True
 459
 460     # this should/could stop only the ones in TRACKER_FILE if that turns out to be reliable
 461     def cleanup_all_trackers (self):
 462         stop_all = "cd /vservers ; for i in * ; do vserver --silent $i stop ; done"
 463         utils.system(self.test_ssh.actual_command(stop_all))
 464         clean_tracker = "rm -f %s"%TestPlc.TRACKER_FILE
 465         utils.system(self.test_ssh.actual_command(clean_tracker))
 466         return True
 467
 468     def uninstall(self):
 469         self.run_in_host("vserver --silent %s delete"%self.vservername)
 470         return True
 471
 472     ### install
 473     def install(self):
 474         if self.is_local():
 475             # a full path for the local calls
 476             build_dir=os.path.dirname(sys.argv[0])
 477             # sometimes this is empty - set to "." in such a case
 478             if not build_dir: build_dir="."
 479             build_dir += "/build"
 480         else:
 481             # use a standard name - will be relative to remote buildname
 482             build_dir="build"
 483         # run checkout in any case - would do an update if already exists
 484         build_checkout = "svn checkout %s %s"%(self.options.build_url,build_dir)
 485         if self.run_in_host(build_checkout) != 0:
 486             return False
 487         # the repo url is taken from arch-rpms-url
 488         # with the last step (i386.) removed
 489         repo_url = self.options.arch_rpms_url
 490         for level in [ 'arch' ]:
 491             repo_url = os.path.dirname(repo_url)
 492         # pass the vbuild-nightly options to vtest-init-vserver
 493         test_env_options=""
 494         test_env_options += " -p %s"%self.options.personality
 495         test_env_options += " -d %s"%self.options.pldistro
 496         test_env_options += " -f %s"%self.options.fcdistro
 497         script="vtest-init-vserver.sh"
 498         vserver_name = self.vservername
 499         vserver_options="--netdev eth0 --interface %s"%self.vserverip
 500         try:
 501             vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
 502             vserver_options += " --hostname %s"%vserver_hostname
 503         except:
 504             pass
 505         create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
 506         return self.run_in_host(create_vserver) == 0
 507
 508     ### install_rpm
 509     def install_rpm(self):
 510         return self.run_in_guest("yum -y install myplc-native")==0 \
 511             and self.run_in_guest("yum -y install noderepo-%s-%s"%(self.options.pldistro,self.options.arch))==0
 512
 513     ###
 514     def configure(self):
 515         tmpname='%s.plc-config-tty'%(self.name())
 516         fileconf=open(tmpname,'w')
 517         for var in [ 'PLC_NAME',
 518                      'PLC_ROOT_PASSWORD',
 519                      'PLC_ROOT_USER',
 520                      'PLC_MAIL_ENABLED',
 521                      'PLC_MAIL_SUPPORT_ADDRESS',
 522                      'PLC_DB_HOST',
 523                      'PLC_API_HOST',
 524                      'PLC_WWW_HOST',
 525                      'PLC_BOOT_HOST',
 526                      'PLC_NET_DNS1',
 527                      'PLC_NET_DNS2']:
 528             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
 529         fileconf.write('w\n')
 530         fileconf.write('q\n')
 531         fileconf.close()
 532         utils.system('cat %s'%tmpname)
 533         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
 534         utils.system('rm %s'%tmpname)
 535         return True
 536
 537     def start(self):
 538         self.run_in_guest('service plc start')
 539         return True
 540
 541     def stop(self):
 542         self.run_in_guest('service plc stop')
 543         return True
 544
 545     def vs_start (self):
 546         self.start_guest()
 547         return True
 548
 549     # stores the keys from the config for further use
 550     def store_keys(self):
 551         for key_spec in self.plc_spec['keys']:
 552                 TestKey(self,key_spec).store_key()
 553         return True
 554
 555     def clean_keys(self):
 556         utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
 557
 558     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 559     # for later direct access to the nodes
 560     def fetch_keys(self):
 561         dir="./keys"
 562         if not os.path.isdir(dir):
 563             os.mkdir(dir)
 564         vservername=self.vservername
 565         overall=True
 566         prefix = 'root_ssh_key'
 567         for ext in [ 'pub', 'rsa' ] :
 568             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 569             dst="keys/%(vservername)s.%(ext)s"%locals()
 570             if self.test_ssh.fetch(src,dst) != 0: overall=False
 571         prefix = 'debug_ssh_key'
 572         for ext in [ 'pub', 'rsa' ] :
 573             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 574             dst="keys/%(vservername)s-debug.%(ext)s"%locals()
 575             if self.test_ssh.fetch(src,dst) != 0: overall=False
 576         return overall
 577
 578     def sites (self):
 579         return self.do_sites()
 580
 581     def clean_sites (self):
 582         return self.do_sites(action="delete")
 583
 584     def do_sites (self,action="add"):
 585         for site_spec in self.plc_spec['sites']:
 586             test_site = TestSite (self,site_spec)
 587             if (action != "add"):
 588                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
 589                 test_site.delete_site()
 590                 # deleted with the site
 591                 #test_site.delete_users()
 592                 continue
 593             else:
 594                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
 595                 test_site.create_site()
 596                 test_site.create_users()
 597         return True
 598
 599     def clean_all_sites (self):
 600         print 'auth_root',self.auth_root()
 601         site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
 602         for site_id in site_ids:
 603             print 'Deleting site_id',site_id
 604             self.apiserver.DeleteSite(self.auth_root(),site_id)
 605
 606     def nodes (self):
 607         return self.do_nodes()
 608     def clean_nodes (self):
 609         return self.do_nodes(action="delete")
 610
 611     def do_nodes (self,action="add"):
 612         for site_spec in self.plc_spec['sites']:
 613             test_site = TestSite (self,site_spec)
 614             if action != "add":
 615                 utils.header("Deleting nodes in site %s"%test_site.name())
 616                 for node_spec in site_spec['nodes']:
 617                     test_node=TestNode(self,test_site,node_spec)
 618                     utils.header("Deleting %s"%test_node.name())
 619                     test_node.delete_node()
 620             else:
 621                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
 622                 for node_spec in site_spec['nodes']:
 623                     utils.pprint('Creating node %s'%node_spec,node_spec)
 624                     test_node = TestNode (self,test_site,node_spec)
 625                     test_node.create_node ()
 626         return True
 627
 628     def nodegroups (self):
 629         return self.do_nodegroups("add")
 630     def clean_nodegroups (self):
 631         return self.do_nodegroups("delete")
 632
 633     # create nodegroups if needed, and populate
 634     def do_nodegroups (self, action="add"):
 635         # 1st pass to scan contents
 636         groups_dict = {}
 637         for site_spec in self.plc_spec['sites']:
 638             test_site = TestSite (self,site_spec)
 639             for node_spec in site_spec['nodes']:
 640                 test_node=TestNode (self,test_site,node_spec)
 641                 if node_spec.has_key('nodegroups'):
 642                     nodegroupnames=node_spec['nodegroups']
 643                     if isinstance(nodegroupnames,StringTypes):
 644                         nodegroupnames = [ nodegroupnames ]
 645                     for nodegroupname in nodegroupnames:
 646                         if not groups_dict.has_key(nodegroupname):
 647                             groups_dict[nodegroupname]=[]
 648                         groups_dict[nodegroupname].append(test_node.name())
 649         auth=self.auth_root()
 650         overall = True
 651         for (nodegroupname,group_nodes) in groups_dict.iteritems():
 652             if action == "add":
 653                 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
 654                 # first, check if the nodetagtype is here
 655                 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
 656                 if tag_types:
 657                     tag_type_id = tag_types[0]['tag_type_id']
 658                 else:
 659                     tag_type_id = self.apiserver.AddTagType(auth,
 660                                                             {'tagname':nodegroupname,
 661                                                              'description': 'for nodegroup %s'%nodegroupname,
 662                                                              'category':'test',
 663                                                              'min_role_id':10})
 664                 print 'located tag (type)',nodegroupname,'as',tag_type_id
 665                 # create nodegroup
 666                 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
 667                 if not nodegroups:
 668                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
 669                     print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
 670                 # set node tag on all nodes, value='yes'
 671                 for nodename in group_nodes:
 672                     try:
 673                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
 674                     except:
 675                         traceback.print_exc()
 676                         print 'node',nodename,'seems to already have tag',nodegroupname
 677                     # check anyway
 678                     try:
 679                         expect_yes = self.apiserver.GetNodeTags(auth,
 680                                                                 {'hostname':nodename,
 681                                                                  'tagname':nodegroupname},
 682                                                                 ['tagvalue'])[0]['tagvalue']
 683                         if expect_yes != "yes":
 684                             print 'Mismatch node tag on node',nodename,'got',expect_yes
 685                             overall=False
 686                     except:
 687                         if not self.options.dry_run:
 688                             print 'Cannot find tag',nodegroupname,'on node',nodename
 689                             overall = False
 690             else:
 691                 try:
 692                     print 'cleaning nodegroup',nodegroupname
 693                     self.apiserver.DeleteNodeGroup(auth,nodegroupname)
 694                 except:
 695                     traceback.print_exc()
 696                     overall=False
 697         return overall
 698
 699     def all_hostnames (self) :
 700         hostnames = []
 701         for site_spec in self.plc_spec['sites']:
 702             hostnames += [ node_spec['node_fields']['hostname'] \
 703                            for node_spec in site_spec['nodes'] ]
 704         return hostnames
 705
 706     # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
 707     def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15):
 708         if self.options.dry_run:
 709             print 'dry_run'
 710             return True
 711         # compute timeout
 712         timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
 713         graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
 714         # the nodes that haven't checked yet - start with a full list and shrink over time
 715         tocheck = self.all_hostnames()
 716         utils.header("checking nodes %r"%tocheck)
 717         # create a dict hostname -> status
 718         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
 719         while tocheck:
 720             # get their status
 721             tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
 722             # update status
 723             for array in tocheck_status:
 724                 hostname=array['hostname']
 725                 boot_state=array['boot_state']
 726                 if boot_state == target_boot_state:
 727                     utils.header ("%s has reached the %s state"%(hostname,target_boot_state))
 728                 else:
 729                     # if it's a real node, never mind
 730                     (site_spec,node_spec)=self.locate_hostname(hostname)
 731                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 732                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
 733                         # let's cheat
 734                         boot_state = target_boot_state
 735                     elif datetime.datetime.now() > graceout:
 736                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
 737                         graceout=datetime.datetime.now()+datetime.timedelta(1)
 738                 status[hostname] = boot_state
 739             # refresh tocheck
 740             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ]
 741             if not tocheck:
 742                 return True
 743             if datetime.datetime.now() > timeout:
 744                 for hostname in tocheck:
 745                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
 746                 return False
 747             # otherwise, sleep for a while
 748             time.sleep(period)
 749         # only useful in empty plcs
 750         return True
 751
 752     def nodes_booted(self):
 753         return self.nodes_check_boot_state('boot',timeout_minutes=20,silent_minutes=15)
 754
 755     def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=20):
 756         # compute timeout
 757         timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
 758         graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
 759         vservername=self.vservername
 760         if debug:
 761             message="debug"
 762             local_key = "keys/%(vservername)s-debug.rsa"%locals()
 763         else:
 764             message="boot"
 765             local_key = "keys/%(vservername)s.rsa"%locals()
 766         tocheck = self.all_hostnames()
 767         utils.header("checking ssh access (expected in %s mode) to nodes %r"%(message,tocheck))
 768         utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
 769                          (timeout_minutes,silent_minutes,period))
 770         while tocheck:
 771             for hostname in tocheck:
 772                 # try to run 'hostname' in the node
 773                 command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a")
 774                 # don't spam logs - show the command only after the grace period
 775                 if datetime.datetime.now() > graceout:
 776                     success=utils.system(command)
 777                 else:
 778                     # truly silent, just print out a dot to show we're alive
 779                     print '.',
 780                     sys.stdout.flush()
 781                     command += " 2>/dev/null"
 782                     if self.options.dry_run:
 783                         print 'dry_run',command
 784                         success=0
 785                     else:
 786                         success=os.system(command)
 787                 if success==0:
 788                     utils.header('Successfully entered root@%s (%s)'%(hostname,message))
 789                     # refresh tocheck
 790                     tocheck.remove(hostname)
 791                 else:
 792                     # we will have tried real nodes once, in case they're up - but if not, just skip
 793                     (site_spec,node_spec)=self.locate_hostname(hostname)
 794                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 795                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
 796                         tocheck.remove(hostname)
 797             if  not tocheck:
 798                 return True
 799             if datetime.datetime.now() > timeout:
 800                 for hostname in tocheck:
 801                     utils.header("FAILURE to ssh into %s"%hostname)
 802                 return False
 803             # otherwise, sleep for a while
 804             time.sleep(period)
 805         # only useful in empty plcs
 806         return True
 807
 808     def nodes_ssh_debug(self):
 809         return self.check_nodes_ssh(debug=True,timeout_minutes=30,silent_minutes=10)
 810
 811     def nodes_ssh_boot(self):
 812         return self.check_nodes_ssh(debug=False,timeout_minutes=30,silent_minutes=10)
 813
 814     @node_mapper
 815     def init_node (self): pass
 816     @node_mapper
 817     def bootcd (self): pass
 818     @node_mapper
 819     def configure_qemu (self): pass
 820     @node_mapper
 821     def reinstall_node (self): pass
 822     @node_mapper
 823     def export_qemu (self): pass
 824
 825     ### check sanity : invoke scripts from qaapi/qa/tests/{node,slice}
 826     def check_sanity_node (self):
 827         return self.locate_first_node().check_sanity()
 828     def check_sanity_sliver (self) :
 829         return self.locate_first_sliver().check_sanity()
 830
 831     def check_sanity (self):
 832         return self.check_sanity_node() and self.check_sanity_sliver()
 833
 834     ### initscripts
 835     def do_check_initscripts(self):
 836         overall = True
 837         for slice_spec in self.plc_spec['slices']:
 838             if not slice_spec.has_key('initscriptname'):
 839                 continue
 840             initscript=slice_spec['initscriptname']
 841             for nodename in slice_spec['nodenames']:
 842                 (site,node) = self.locate_node (nodename)
 843                 # xxx - passing the wrong site - probably harmless
 844                 test_site = TestSite (self,site)
 845                 test_slice = TestSlice (self,test_site,slice_spec)
 846                 test_node = TestNode (self,test_site,node)
 847                 test_sliver = TestSliver (self, test_node, test_slice)
 848                 if not test_sliver.check_initscript(initscript):
 849                     overall = False
 850         return overall
 851
 852     def check_initscripts(self):
 853             return self.do_check_initscripts()
 854
 855     def initscripts (self):
 856         for initscript in self.plc_spec['initscripts']:
 857             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
 858             self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
 859         return True
 860
 861     def clean_initscripts (self):
 862         for initscript in self.plc_spec['initscripts']:
 863             initscript_name = initscript['initscript_fields']['name']
 864             print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
 865             try:
 866                 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
 867                 print initscript_name,'deleted'
 868             except:
 869                 print 'deletion went wrong - probably did not exist'
 870         return True
 871
 872     ### manage slices
 873     def slices (self):
 874         return self.do_slices()
 875
 876     def clean_slices (self):
 877         return self.do_slices("delete")
 878
 879     def do_slices (self,  action="add"):
 880         for slice in self.plc_spec['slices']:
 881             site_spec = self.locate_site (slice['sitename'])
 882             test_site = TestSite(self,site_spec)
 883             test_slice=TestSlice(self,test_site,slice)
 884             if action != "add":
 885                 utils.header("Deleting slices in site %s"%test_site.name())
 886                 test_slice.delete_slice()
 887             else:
 888                 utils.pprint("Creating slice",slice)
 889                 test_slice.create_slice()
 890                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
 891         return True
 892
 893     @slice_mapper_options
 894     def check_slice(self): pass
 895
 896     @node_mapper
 897     def clear_known_hosts (self): pass
 898
 899     @node_mapper
 900     def start_node (self) : pass
 901
 902     def check_tcp (self):
 903         specs = self.plc_spec['tcp_test']
 904         overall=True
 905         for spec in specs:
 906             port = spec['port']
 907             # server side
 908             s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
 909             if not s_test_sliver.run_tcp_server(port,timeout=10):
 910                 overall=False
 911                 break
 912
 913             # idem for the client side
 914             c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
 915             if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
 916                 overall=False
 917         return overall
 918
 919     def plcsh_stress_test (self):
 920         # install the stress-test in the plc image
 921         location = "/usr/share/plc_api/plcsh-stress-test.py"
 922         remote="/vservers/%s/%s"%(self.vservername,location)
 923         self.test_ssh.copy_abs("plcsh-stress-test.py",remote)
 924         command = location
 925         command += " -- --check"
 926         if self.options.size == 1:
 927             command +=  " --tiny"
 928         return ( self.run_in_guest(command) == 0)
 929
 930     def gather_logs (self):
 931         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
 932         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
 933         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
 934         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
 935         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
 936         # (1.a)
 937         print "-------------------- TestPlc.gather_logs : PLC's /var/log"
 938         self.gather_var_logs ()
 939         # (1.b)
 940         print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
 941         self.gather_pgsql_logs ()
 942         # (2)
 943         print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
 944         for site_spec in self.plc_spec['sites']:
 945             test_site = TestSite (self,site_spec)
 946             for node_spec in site_spec['nodes']:
 947                 test_node=TestNode(self,test_site,node_spec)
 948                 test_node.gather_qemu_logs()
 949         # (3)
 950         print "-------------------- TestPlc.gather_logs : nodes's /var/log"
 951         self.gather_nodes_var_logs()
 952         # (4)
 953         print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
 954         self.gather_slivers_var_logs()
 955         return True
 956
 957     def gather_slivers_var_logs(self):
 958         for test_sliver in self.all_sliver_objs():
 959             remote = test_sliver.tar_var_logs()
 960             utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
 961             command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
 962             utils.system(command)
 963         return True
 964
 965     def gather_var_logs (self):
 966         utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
 967         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
 968         command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
 969         utils.system(command)
 970         command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
 971         utils.system(command)
 972
 973     def gather_pgsql_logs (self):
 974         utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
 975         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
 976         command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
 977         utils.system(command)
 978
 979     def gather_nodes_var_logs (self):
 980         for site_spec in self.plc_spec['sites']:
 981             test_site = TestSite (self,site_spec)
 982             for node_spec in site_spec['nodes']:
 983                 test_node=TestNode(self,test_site,node_spec)
 984                 test_ssh = TestSsh (test_node.name(),key="/etc/planetlab/root_ssh_key.rsa")
 985                 to_plc = self.actual_command_in_guest ( test_ssh.actual_command("tar -C /var/log -cf - ."))
 986                 command = to_plc + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
 987                 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
 988                 utils.system(command)
 989
 990
 991     # returns the filename to use for sql dump/restore, using options.dbname if set
 992     def dbfile (self, database):
 993         # uses options.dbname if it is found
 994         try:
 995             name=self.options.dbname
 996             if not isinstance(name,StringTypes):
 997                 raise Exception
 998         except:
 999             t=datetime.datetime.now()
1000             d=t.date()
1001             name=str(d)
1002         return "/root/%s-%s.sql"%(database,name)
1003
1004     def db_dump(self):
1005         dump=self.dbfile("planetab4")
1006         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
1007         utils.header('Dumped planetlab4 database in %s'%dump)
1008         return True
1009
1010     def db_restore(self):
1011         dump=self.dbfile("planetab4")
1012         ##stop httpd service
1013         self.run_in_guest('service httpd stop')
1014         # xxx - need another wrapper
1015         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
1016         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
1017         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
1018         ##starting httpd service
1019         self.run_in_guest('service httpd start')
1020
1021         utils.header('Database restored from ' + dump)
1022
1023     @standby_generic
1024     def standby_1(): pass
1025     @standby_generic
1026     def standby_2(): pass
1027     @standby_generic
1028     def standby_3(): pass
1029     @standby_generic
1030     def standby_4(): pass
1031     @standby_generic
1032     def standby_5(): pass
1033     @standby_generic
1034     def standby_6(): pass
1035     @standby_generic
1036     def standby_7(): pass
1037     @standby_generic
1038     def standby_8(): pass
1039     @standby_generic
1040     def standby_9(): pass
1041     @standby_generic
1042     def standby_10(): pass
1043     @standby_generic
1044     def standby_11(): pass
1045     @standby_generic
1046     def standby_12(): pass
1047     @standby_generic
1048     def standby_13(): pass
1049     @standby_generic
1050     def standby_14(): pass
1051     @standby_generic
1052     def standby_15(): pass
1053     @standby_generic
1054     def standby_16(): pass
1055     @standby_generic
1056     def standby_17(): pass
1057     @standby_generic
1058     def standby_18(): pass
1059     @standby_generic
1060     def standby_19(): pass
1061     @standby_generic
1062     def standby_20(): pass
1063