system/TestPlc.py

   1 # $Id$
   2 import os, os.path
   3 import datetime
   4 import time
   5 import sys
   6 import traceback
   7 from types import StringTypes
   8 import socket
   9
  10 import utils
  11 from TestSite import TestSite
  12 from TestNode import TestNode
  13 from TestUser import TestUser
  14 from TestKey import TestKey
  15 from TestSlice import TestSlice
  16 from TestSliver import TestSliver
  17 from TestBox import TestBox
  18 from TestSsh import TestSsh
  19 from TestApiserver import TestApiserver
  20
  21 # step methods must take (self) and return a boolean (options is a member of the class)
  22
  23 def standby(minutes,dry_run):
  24     utils.header('Entering StandBy for %d mn'%minutes)
  25     if dry_run:
  26         print 'dry_run'
  27     else:
  28         time.sleep(60*minutes)
  29     return True
  30
  31 def standby_generic (func):
  32     def actual(self):
  33         minutes=int(func.__name__.split("_")[1])
  34         return standby(minutes,self.options.dry_run)
  35     return actual
  36
  37 def node_mapper (method):
  38     def actual(self):
  39         overall=True
  40         node_method = TestNode.__dict__[method.__name__]
  41         for site_spec in self.plc_spec['sites']:
  42             test_site = TestSite (self,site_spec)
  43             for node_spec in site_spec['nodes']:
  44                 test_node = TestNode (self,test_site,node_spec)
  45                 if not node_method(test_node): overall=False
  46         return overall
  47     # restore the doc text
  48     actual.__doc__=method.__doc__
  49     return actual
  50
  51 def slice_mapper_options (method):
  52     def actual(self):
  53         overall=True
  54         slice_method = TestSlice.__dict__[method.__name__]
  55         for slice_spec in self.plc_spec['slices']:
  56             site_spec = self.locate_site (slice_spec['sitename'])
  57             test_site = TestSite(self,site_spec)
  58             test_slice=TestSlice(self,test_site,slice_spec)
  59             if not slice_method(test_slice,self.options): overall=False
  60         return overall
  61     # restore the doc text
  62     actual.__doc__=method.__doc__
  63     return actual
  64
  65 SEP='<sep>'
  66
  67 class TestPlc:
  68
  69     default_steps = [
  70         'display', 'local_pre', SEP,
  71         'delete','create','install', 'configure', 'start', SEP,
  72         'fetch_keys', 'store_keys', 'clear_known_hosts', SEP,
  73         'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', SEP,
  74         'reinstall_node', 'init_node','bootcd', 'configure_qemu', 'export_qemu',
  75         'kill_all_qemus', 'start_node', SEP,
  76         # better use of time: do this now that the nodes are taking off
  77         'plcsh_stress_test', SEP,
  78         'nodes_ssh_debug', 'nodes_ssh_boot', 'check_slice', 'check_initscripts', SEP,
  79         'check_tcp',  'check_hooks',  SEP,
  80         'force_gather_logs', 'force_local_post',
  81         ]
  82     other_steps = [
  83         'fresh_install', 'stop', 'vs_start', SEP,
  84         'clean_initscripts', 'clean_nodegroups','clean_all_sites', SEP,
  85         'clean_sites', 'clean_nodes', 'clean_slices', 'clean_keys', SEP,
  86         'populate' , SEP,
  87         'show_boxes', 'list_all_qemus', 'list_qemus', 'kill_qemus', SEP,
  88         'db_dump' , 'db_restore', SEP,
  89         'local_list','local_cleanup',SEP,
  90         'standby_1 through 20',
  91         ]
  92
  93     @staticmethod
  94     def printable_steps (list):
  95         return " ".join(list).replace(" "+SEP+" "," \\\n")
  96     @staticmethod
  97     def valid_step (step):
  98         return step != SEP
  99
 100     def __init__ (self,plc_spec,options):
 101         self.plc_spec=plc_spec
 102         self.options=options
 103         self.test_ssh=TestSsh(self.plc_spec['hostname'],self.options.buildname)
 104         try:
 105             self.vserverip=plc_spec['vserverip']
 106             self.vservername=plc_spec['vservername']
 107             self.url="https://%s:443/PLCAPI/"%plc_spec['vserverip']
 108             self.vserver=True
 109         except:
 110             raise Exception,'chroot-based myplc testing is deprecated'
 111         self.apiserver=TestApiserver(self.url,options.dry_run)
 112
 113     def name(self):
 114         name=self.plc_spec['name']
 115         return "%s.%s"%(name,self.vservername)
 116
 117     def hostname(self):
 118         return self.plc_spec['hostname']
 119
 120     def is_local (self):
 121         return self.test_ssh.is_local()
 122
 123     # define the API methods on this object through xmlrpc
 124     # would help, but not strictly necessary
 125     def connect (self):
 126         pass
 127
 128     def actual_command_in_guest (self,command):
 129         return self.test_ssh.actual_command(self.host_to_guest(command))
 130
 131     def start_guest (self):
 132       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host()))
 133
 134     def run_in_guest (self,command):
 135         return utils.system(self.actual_command_in_guest(command))
 136
 137     def run_in_host (self,command):
 138         return self.test_ssh.run_in_buildname(command)
 139
 140     #command gets run in the vserver
 141     def host_to_guest(self,command):
 142         return "vserver %s exec %s"%(self.vservername,command)
 143
 144     #command gets run in the vserver
 145     def start_guest_in_host(self):
 146         return "vserver %s start"%(self.vservername)
 147
 148     # xxx quick n dirty
 149     def run_in_guest_piped (self,local,remote):
 150         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),keep_stdin=True))
 151
 152     def auth_root (self):
 153         return {'Username':self.plc_spec['PLC_ROOT_USER'],
 154                 'AuthMethod':'password',
 155                 'AuthString':self.plc_spec['PLC_ROOT_PASSWORD'],
 156                 'Role' : self.plc_spec['role']
 157                 }
 158     def locate_site (self,sitename):
 159         for site in self.plc_spec['sites']:
 160             if site['site_fields']['name'] == sitename:
 161                 return site
 162             if site['site_fields']['login_base'] == sitename:
 163                 return site
 164         raise Exception,"Cannot locate site %s"%sitename
 165
 166     def locate_node (self,nodename):
 167         for site in self.plc_spec['sites']:
 168             for node in site['nodes']:
 169                 if node['name'] == nodename:
 170                     return (site,node)
 171         raise Exception,"Cannot locate node %s"%nodename
 172
 173     def locate_hostname (self,hostname):
 174         for site in self.plc_spec['sites']:
 175             for node in site['nodes']:
 176                 if node['node_fields']['hostname'] == hostname:
 177                     return (site,node)
 178         raise Exception,"Cannot locate hostname %s"%hostname
 179
 180     def locate_key (self,keyname):
 181         for key in self.plc_spec['keys']:
 182             if key['name'] == keyname:
 183                 return key
 184         raise Exception,"Cannot locate key %s"%keyname
 185
 186     def locate_slice (self, slicename):
 187         for slice in self.plc_spec['slices']:
 188             if slice['slice_fields']['name'] == slicename:
 189                 return slice
 190         raise Exception,"Cannot locate slice %s"%slicename
 191
 192     def all_sliver_objs (self):
 193         result=[]
 194         for slice_spec in self.plc_spec['slices']:
 195             slicename = slice_spec['slice_fields']['name']
 196             for nodename in slice_spec['nodenames']:
 197                 result.append(self.locate_sliver_obj (nodename,slicename))
 198         return result
 199
 200     def locate_sliver_obj (self,nodename,slicename):
 201         (site,node) = self.locate_node(nodename)
 202         slice = self.locate_slice (slicename)
 203         # build objects
 204         test_site = TestSite (self, site)
 205         test_node = TestNode (self, test_site,node)
 206         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 207         test_slice = TestSlice (self, test_site, slice)
 208         return TestSliver (self, test_node, test_slice)
 209
 210     def locate_first_node(self):
 211         nodename=self.plc_spec['slices'][0]['nodenames'][0]
 212         (site,node) = self.locate_node(nodename)
 213         test_site = TestSite (self, site)
 214         test_node = TestNode (self, test_site,node)
 215         return test_node
 216
 217     def locate_first_sliver (self):
 218         slice_spec=self.plc_spec['slices'][0]
 219         slicename=slice_spec['slice_fields']['name']
 220         nodename=slice_spec['nodenames'][0]
 221         return self.locate_sliver_obj(nodename,slicename)
 222
 223     # all different hostboxes used in this plc
 224     def gather_hostBoxes(self):
 225         # maps on sites and nodes, return [ (host_box,test_node) ]
 226         tuples=[]
 227         for site_spec in self.plc_spec['sites']:
 228             test_site = TestSite (self,site_spec)
 229             for node_spec in site_spec['nodes']:
 230                 test_node = TestNode (self, test_site, node_spec)
 231                 if not test_node.is_real():
 232                     tuples.append( (test_node.host_box(),test_node) )
 233         # transform into a dict { 'host_box' -> [ test_node .. ] }
 234         result = {}
 235         for (box,node) in tuples:
 236             if not result.has_key(box):
 237                 result[box]=[node]
 238             else:
 239                 result[box].append(node)
 240         return result
 241
 242     # a step for checking this stuff
 243     def show_boxes (self):
 244         for (box,nodes) in self.gather_hostBoxes().iteritems():
 245             print box,":"," + ".join( [ node.name() for node in nodes ] )
 246         return True
 247
 248     # make this a valid step
 249     def kill_all_qemus(self):
 250         "all qemu boxes: kill all running qemus (even of former runs)"
 251         # this is the brute force version, kill all qemus on that host box
 252         for (box,nodes) in self.gather_hostBoxes().iteritems():
 253             # pass the first nodename, as we don't push template-qemu on testboxes
 254             nodedir=nodes[0].nodedir()
 255             TestBox(box,self.options.buildname).kill_all_qemus(nodedir)
 256         return True
 257
 258     # make this a valid step
 259     def list_all_qemus(self):
 260         for (box,nodes) in self.gather_hostBoxes().iteritems():
 261             # this is the brute force version, kill all qemus on that host box
 262             TestBox(box,self.options.buildname).list_all_qemus()
 263         return True
 264
 265     # kill only the right qemus
 266     def list_qemus(self):
 267         for (box,nodes) in self.gather_hostBoxes().iteritems():
 268             # the fine-grain version
 269             for node in nodes:
 270                 node.list_qemu()
 271         return True
 272
 273     # kill only the right qemus
 274     def kill_qemus(self):
 275         for (box,nodes) in self.gather_hostBoxes().iteritems():
 276             # the fine-grain version
 277             for node in nodes:
 278                 node.kill_qemu()
 279         return True
 280
 281     #################### display config
 282     def display (self):
 283         "show test configuration after localization"
 284         self.display_pass (1)
 285         self.display_pass (2)
 286         return True
 287
 288     # entry point
 289     def display_pass (self,passno):
 290         for (key,val) in self.plc_spec.iteritems():
 291             if passno == 2:
 292                 if key == 'sites':
 293                     for site in val:
 294                         self.display_site_spec(site)
 295                         for node in site['nodes']:
 296                             self.display_node_spec(node)
 297                 elif key=='initscripts':
 298                     for initscript in val:
 299                         self.display_initscript_spec (initscript)
 300                 elif key=='slices':
 301                     for slice in val:
 302                         self.display_slice_spec (slice)
 303                 elif key=='keys':
 304                     for key in val:
 305                         self.display_key_spec (key)
 306             elif passno == 1:
 307                 if key not in ['sites','initscripts','slices','keys']:
 308                     print '*   ',key,':',val
 309
 310     def display_site_spec (self,site):
 311         print '* ======== site',site['site_fields']['name']
 312         for (k,v) in site.iteritems():
 313             if k=='nodes':
 314                 if v:
 315                     print '*       ','nodes : ',
 316                     for node in v:
 317                         print node['node_fields']['hostname'],'',
 318                     print ''
 319             elif k=='users':
 320                 if v:
 321                     print '*       users : ',
 322                     for user in v:
 323                         print user['name'],'',
 324                     print ''
 325             elif k == 'site_fields':
 326                 print '*       login_base',':',v['login_base']
 327             elif k == 'address_fields':
 328                 pass
 329             else:
 330                 print '*       ',k,
 331                 PrettyPrinter(indent=8,depth=2).pprint(v)
 332
 333     def display_initscript_spec (self,initscript):
 334         print '* ======== initscript',initscript['initscript_fields']['name']
 335
 336     def display_key_spec (self,key):
 337         print '* ======== key',key['name']
 338
 339     def display_slice_spec (self,slice):
 340         print '* ======== slice',slice['slice_fields']['name']
 341         for (k,v) in slice.iteritems():
 342             if k=='nodenames':
 343                 if v:
 344                     print '*       nodes : ',
 345                     for nodename in v:
 346                         print nodename,'',
 347                     print ''
 348             elif k=='usernames':
 349                 if v:
 350                     print '*       users : ',
 351                     for username in v:
 352                         print username,'',
 353                     print ''
 354             elif k=='slice_fields':
 355                 print '*       fields',':',
 356                 print 'max_nodes=',v['max_nodes'],
 357                 print ''
 358             else:
 359                 print '*       ',k,v
 360
 361     def display_node_spec (self,node):
 362         print "*           node",node['name'],"host_box=",node['host_box'],
 363         print "hostname=",node['node_fields']['hostname'],
 364         print "ip=",node['interface_fields']['ip']
 365
 366
 367     # another entry point for just showing the boxes involved
 368     def display_mapping (self):
 369         TestPlc.display_mapping_plc(self.plc_spec)
 370         return True
 371
 372     @staticmethod
 373     def display_mapping_plc (plc_spec):
 374         print '* MyPLC',plc_spec['name']
 375         print '*\tvserver address = root@%s:/vservers/%s'%(plc_spec['hostname'],plc_spec['vservername'])
 376         print '*\tIP = %s/%s'%(plc_spec['PLC_API_HOST'],plc_spec['vserverip'])
 377         for site_spec in plc_spec['sites']:
 378             for node_spec in site_spec['nodes']:
 379                 TestPlc.display_mapping_node(node_spec)
 380
 381     @staticmethod
 382     def display_mapping_node (node_spec):
 383         print '*   NODE %s'%(node_spec['name'])
 384         print '*\tqemu box %s'%node_spec['host_box']
 385         print '*\thostname=%s'%node_spec['node_fields']['hostname']
 386
 387     def local_pre (self):
 388         "run site-dependant pre-test script as defined in LocalTestResources"
 389         from LocalTestResources import local_resources
 390         return local_resources.step_pre(self)
 391
 392     def local_post (self):
 393         "run site-dependant post-test script as defined in LocalTestResources"
 394         from LocalTestResources import local_resources
 395         return local_resources.step_post(self)
 396
 397     def local_list (self):
 398         "run site-dependant list script as defined in LocalTestResources"
 399         from LocalTestResources import local_resources
 400         return local_resources.step_list(self)
 401
 402     def local_cleanup (self):
 403         "run site-dependant cleanup script as defined in LocalTestResources"
 404         from LocalTestResources import local_resources
 405         return local_resources.step_cleanup(self)
 406
 407     def delete(self):
 408         "vserver delete the test myplc"
 409         self.run_in_host("vserver --silent %s delete"%self.vservername)
 410         return True
 411
 412     ### install
 413     def create (self):
 414         "vserver creation (no install done)"
 415         if self.is_local():
 416             # a full path for the local calls
 417             build_dir=os.path.dirname(sys.argv[0])
 418             # sometimes this is empty - set to "." in such a case
 419             if not build_dir: build_dir="."
 420             build_dir += "/build"
 421         else:
 422             # use a standard name - will be relative to remote buildname
 423             build_dir="build"
 424         # run checkout in any case - would do an update if already exists
 425         build_checkout = "svn checkout %s %s"%(self.options.build_url,build_dir)
 426         if self.run_in_host(build_checkout) != 0:
 427             return False
 428         # the repo url is taken from arch-rpms-url
 429         # with the last step (i386) removed
 430         repo_url = self.options.arch_rpms_url
 431         for level in [ 'arch' ]:
 432             repo_url = os.path.dirname(repo_url)
 433         # pass the vbuild-nightly options to vtest-init-vserver
 434         test_env_options=""
 435         test_env_options += " -p %s"%self.options.personality
 436         test_env_options += " -d %s"%self.options.pldistro
 437         test_env_options += " -f %s"%self.options.fcdistro
 438         script="vtest-init-vserver.sh"
 439         vserver_name = self.vservername
 440         vserver_options="--netdev eth0 --interface %s"%self.vserverip
 441         try:
 442             vserver_hostname=socket.gethostbyaddr(self.vserverip)[0]
 443             vserver_options += " --hostname %s"%vserver_hostname
 444         except:
 445             pass
 446         create_vserver="%(build_dir)s/%(script)s %(test_env_options)s %(vserver_name)s %(repo_url)s -- %(vserver_options)s"%locals()
 447         return self.run_in_host(create_vserver) == 0
 448
 449     ### install_rpm
 450     def install(self):
 451         "yum install myplc, noderepo, and the plain bootstrapfs"
 452         if self.options.personality == "linux32":
 453             arch = "i386"
 454         elif self.options.personality == "linux64":
 455             arch = "x86_64"
 456         else:
 457             raise Exception, "Unsupported personality %r"%self.options.personality
 458         return \
 459             self.run_in_guest("yum -y install myplc")==0 and \
 460             self.run_in_guest("yum -y install noderepo-%s-%s"%(self.options.pldistro,arch))==0 and \
 461             self.run_in_guest("yum -y install bootstrapfs-%s-%s-plain"%(self.options.pldistro,arch))==0
 462
 463     ###
 464     def configure(self):
 465         "run plc-config-tty"
 466         tmpname='%s.plc-config-tty'%(self.name())
 467         fileconf=open(tmpname,'w')
 468         for var in [ 'PLC_NAME',
 469                      'PLC_ROOT_PASSWORD',
 470                      'PLC_ROOT_USER',
 471                      'PLC_MAIL_ENABLED',
 472                      'PLC_MAIL_SUPPORT_ADDRESS',
 473                      'PLC_DB_HOST',
 474                      'PLC_API_HOST',
 475                      'PLC_WWW_HOST',
 476                      'PLC_BOOT_HOST',
 477                      'PLC_NET_DNS1',
 478                      'PLC_NET_DNS2']:
 479             fileconf.write ('e %s\n%s\n'%(var,self.plc_spec[var]))
 480         fileconf.write('w\n')
 481         fileconf.write('q\n')
 482         fileconf.close()
 483         utils.system('cat %s'%tmpname)
 484         self.run_in_guest_piped('cat %s'%tmpname,'plc-config-tty')
 485         utils.system('rm %s'%tmpname)
 486         return True
 487
 488     def start(self):
 489         "service plc start"
 490         self.run_in_guest('service plc start')
 491         return True
 492
 493     def stop(self):
 494         "service plc stop"
 495         self.run_in_guest('service plc stop')
 496         return True
 497
 498     def vs_start (self):
 499         self.start_guest()
 500         return True
 501
 502     # stores the keys from the config for further use
 503     def store_keys(self):
 504         "stores test users ssh keys in keys/"
 505         for key_spec in self.plc_spec['keys']:
 506                 TestKey(self,key_spec).store_key()
 507         return True
 508
 509     def clean_keys(self):
 510         utils.system("rm -rf %s/keys/"%os.path(sys.argv[0]))
 511
 512     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 513     # for later direct access to the nodes
 514     def fetch_keys(self):
 515         "gets ssh keys in /etc/planetlab/ and stores them locally in keys/"
 516         dir="./keys"
 517         if not os.path.isdir(dir):
 518             os.mkdir(dir)
 519         vservername=self.vservername
 520         overall=True
 521         prefix = 'root_ssh_key'
 522         for ext in [ 'pub', 'rsa' ] :
 523             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 524             dst="keys/%(vservername)s.%(ext)s"%locals()
 525             if self.test_ssh.fetch(src,dst) != 0: overall=False
 526         prefix = 'debug_ssh_key'
 527         for ext in [ 'pub', 'rsa' ] :
 528             src="/vservers/%(vservername)s/etc/planetlab/%(prefix)s.%(ext)s"%locals()
 529             dst="keys/%(vservername)s-debug.%(ext)s"%locals()
 530             if self.test_ssh.fetch(src,dst) != 0: overall=False
 531         return overall
 532
 533     def sites (self):
 534         "create sites with PLCAPI"
 535         return self.do_sites()
 536
 537     def clean_sites (self):
 538         "delete sites with PLCAPI"
 539         return self.do_sites(action="delete")
 540
 541     def do_sites (self,action="add"):
 542         for site_spec in self.plc_spec['sites']:
 543             test_site = TestSite (self,site_spec)
 544             if (action != "add"):
 545                 utils.header("Deleting site %s in %s"%(test_site.name(),self.name()))
 546                 test_site.delete_site()
 547                 # deleted with the site
 548                 #test_site.delete_users()
 549                 continue
 550             else:
 551                 utils.header("Creating site %s & users in %s"%(test_site.name(),self.name()))
 552                 test_site.create_site()
 553                 test_site.create_users()
 554         return True
 555
 556     def clean_all_sites (self):
 557         print 'auth_root',self.auth_root()
 558         site_ids = [s['site_id'] for s in self.apiserver.GetSites(self.auth_root(), {}, ['site_id'])]
 559         for site_id in site_ids:
 560             print 'Deleting site_id',site_id
 561             self.apiserver.DeleteSite(self.auth_root(),site_id)
 562
 563     def nodes (self):
 564         "create nodes with PLCAPI"
 565         return self.do_nodes()
 566     def clean_nodes (self):
 567         "delete nodes with PLCAPI"
 568         return self.do_nodes(action="delete")
 569
 570     def do_nodes (self,action="add"):
 571         for site_spec in self.plc_spec['sites']:
 572             test_site = TestSite (self,site_spec)
 573             if action != "add":
 574                 utils.header("Deleting nodes in site %s"%test_site.name())
 575                 for node_spec in site_spec['nodes']:
 576                     test_node=TestNode(self,test_site,node_spec)
 577                     utils.header("Deleting %s"%test_node.name())
 578                     test_node.delete_node()
 579             else:
 580                 utils.header("Creating nodes for site %s in %s"%(test_site.name(),self.name()))
 581                 for node_spec in site_spec['nodes']:
 582                     utils.pprint('Creating node %s'%node_spec,node_spec)
 583                     test_node = TestNode (self,test_site,node_spec)
 584                     test_node.create_node ()
 585         return True
 586
 587     def nodegroups (self):
 588         "create nodegroups with PLCAPI"
 589         return self.do_nodegroups("add")
 590     def clean_nodegroups (self):
 591         "delete nodegroups with PLCAPI"
 592         return self.do_nodegroups("delete")
 593
 594     # create nodegroups if needed, and populate
 595     def do_nodegroups (self, action="add"):
 596         # 1st pass to scan contents
 597         groups_dict = {}
 598         for site_spec in self.plc_spec['sites']:
 599             test_site = TestSite (self,site_spec)
 600             for node_spec in site_spec['nodes']:
 601                 test_node=TestNode (self,test_site,node_spec)
 602                 if node_spec.has_key('nodegroups'):
 603                     nodegroupnames=node_spec['nodegroups']
 604                     if isinstance(nodegroupnames,StringTypes):
 605                         nodegroupnames = [ nodegroupnames ]
 606                     for nodegroupname in nodegroupnames:
 607                         if not groups_dict.has_key(nodegroupname):
 608                             groups_dict[nodegroupname]=[]
 609                         groups_dict[nodegroupname].append(test_node.name())
 610         auth=self.auth_root()
 611         overall = True
 612         for (nodegroupname,group_nodes) in groups_dict.iteritems():
 613             if action == "add":
 614                 print 'nodegroups:','dealing with nodegroup',nodegroupname,'on nodes',group_nodes
 615                 # first, check if the nodetagtype is here
 616                 tag_types = self.apiserver.GetTagTypes(auth,{'tagname':nodegroupname})
 617                 if tag_types:
 618                     tag_type_id = tag_types[0]['tag_type_id']
 619                 else:
 620                     tag_type_id = self.apiserver.AddTagType(auth,
 621                                                             {'tagname':nodegroupname,
 622                                                              'description': 'for nodegroup %s'%nodegroupname,
 623                                                              'category':'test',
 624                                                              'min_role_id':10})
 625                 print 'located tag (type)',nodegroupname,'as',tag_type_id
 626                 # create nodegroup
 627                 nodegroups = self.apiserver.GetNodeGroups (auth, {'groupname':nodegroupname})
 628                 if not nodegroups:
 629                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
 630                     print 'created nodegroup',nodegroupname,'from tagname',nodegroupname,'and value','yes'
 631                 # set node tag on all nodes, value='yes'
 632                 for nodename in group_nodes:
 633                     try:
 634                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
 635                     except:
 636                         traceback.print_exc()
 637                         print 'node',nodename,'seems to already have tag',nodegroupname
 638                     # check anyway
 639                     try:
 640                         expect_yes = self.apiserver.GetNodeTags(auth,
 641                                                                 {'hostname':nodename,
 642                                                                  'tagname':nodegroupname},
 643                                                                 ['value'])[0]['value']
 644                         if expect_yes != "yes":
 645                             print 'Mismatch node tag on node',nodename,'got',expect_yes
 646                             overall=False
 647                     except:
 648                         if not self.options.dry_run:
 649                             print 'Cannot find tag',nodegroupname,'on node',nodename
 650                             overall = False
 651             else:
 652                 try:
 653                     print 'cleaning nodegroup',nodegroupname
 654                     self.apiserver.DeleteNodeGroup(auth,nodegroupname)
 655                 except:
 656                     traceback.print_exc()
 657                     overall=False
 658         return overall
 659
 660     def all_hostnames (self) :
 661         hostnames = []
 662         for site_spec in self.plc_spec['sites']:
 663             hostnames += [ node_spec['node_fields']['hostname'] \
 664                            for node_spec in site_spec['nodes'] ]
 665         return hostnames
 666
 667     # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
 668     def nodes_check_boot_state (self, target_boot_state, timeout_minutes, silent_minutes,period=15):
 669         if self.options.dry_run:
 670             print 'dry_run'
 671             return True
 672         # compute timeout
 673         timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
 674         graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
 675         # the nodes that haven't checked yet - start with a full list and shrink over time
 676         tocheck = self.all_hostnames()
 677         utils.header("checking nodes %r"%tocheck)
 678         # create a dict hostname -> status
 679         status = dict ( [ (hostname,'undef') for hostname in tocheck ] )
 680         while tocheck:
 681             # get their status
 682             tocheck_status=self.apiserver.GetNodes(self.auth_root(), tocheck, ['hostname','boot_state' ] )
 683             # update status
 684             for array in tocheck_status:
 685                 hostname=array['hostname']
 686                 boot_state=array['boot_state']
 687                 if boot_state == target_boot_state:
 688                     utils.header ("%s has reached the %s state"%(hostname,target_boot_state))
 689                 else:
 690                     # if it's a real node, never mind
 691                     (site_spec,node_spec)=self.locate_hostname(hostname)
 692                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 693                         utils.header("WARNING - Real node %s in %s - ignored"%(hostname,boot_state))
 694                         # let's cheat
 695                         boot_state = target_boot_state
 696                     elif datetime.datetime.now() > graceout:
 697                         utils.header ("%s still in '%s' state"%(hostname,boot_state))
 698                         graceout=datetime.datetime.now()+datetime.timedelta(1)
 699                 status[hostname] = boot_state
 700             # refresh tocheck
 701             tocheck = [ hostname for (hostname,boot_state) in status.iteritems() if boot_state != target_boot_state ]
 702             if not tocheck:
 703                 return True
 704             if datetime.datetime.now() > timeout:
 705                 for hostname in tocheck:
 706                     utils.header("FAILURE due to %s in '%s' state"%(hostname,status[hostname]))
 707                 return False
 708             # otherwise, sleep for a while
 709             time.sleep(period)
 710         # only useful in empty plcs
 711         return True
 712
 713     def nodes_booted(self):
 714         return self.nodes_check_boot_state('boot',timeout_minutes=20,silent_minutes=15)
 715
 716     def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period=20):
 717         # compute timeout
 718         timeout = datetime.datetime.now()+datetime.timedelta(minutes=timeout_minutes)
 719         graceout = datetime.datetime.now()+datetime.timedelta(minutes=silent_minutes)
 720         vservername=self.vservername
 721         if debug:
 722             message="debug"
 723             local_key = "keys/%(vservername)s-debug.rsa"%locals()
 724         else:
 725             message="boot"
 726             local_key = "keys/%(vservername)s.rsa"%locals()
 727         tocheck = self.all_hostnames()
 728         utils.header("checking ssh access (expected in %s mode) to nodes %r"%(message,tocheck))
 729         utils.header("max timeout is %d minutes, silent for %d minutes (period is %s)"%\
 730                          (timeout_minutes,silent_minutes,period))
 731         while tocheck:
 732             for hostname in tocheck:
 733                 # try to run 'hostname' in the node
 734                 command = TestSsh (hostname,key=local_key).actual_command("hostname;uname -a")
 735                 # don't spam logs - show the command only after the grace period
 736                 if datetime.datetime.now() > graceout:
 737                     success=utils.system(command)
 738                 else:
 739                     # truly silent, just print out a dot to show we're alive
 740                     print '.',
 741                     sys.stdout.flush()
 742                     command += " 2>/dev/null"
 743                     if self.options.dry_run:
 744                         print 'dry_run',command
 745                         success=0
 746                     else:
 747                         success=os.system(command)
 748                 if success==0:
 749                     utils.header('Successfully entered root@%s (%s)'%(hostname,message))
 750                     # refresh tocheck
 751                     tocheck.remove(hostname)
 752                 else:
 753                     # we will have tried real nodes once, in case they're up - but if not, just skip
 754                     (site_spec,node_spec)=self.locate_hostname(hostname)
 755                     if TestNode.is_real_model(node_spec['node_fields']['model']):
 756                         utils.header ("WARNING : check ssh access into real node %s - skipped"%hostname)
 757                         tocheck.remove(hostname)
 758             if  not tocheck:
 759                 return True
 760             if datetime.datetime.now() > timeout:
 761                 for hostname in tocheck:
 762                     utils.header("FAILURE to ssh into %s"%hostname)
 763                 return False
 764             # otherwise, sleep for a while
 765             time.sleep(period)
 766         # only useful in empty plcs
 767         return True
 768
 769     def nodes_ssh_debug(self):
 770         "Tries to ssh into nodes in debug mode with the debug ssh key"
 771         return self.check_nodes_ssh(debug=True,timeout_minutes=30,silent_minutes=10)
 772
 773     def nodes_ssh_boot(self):
 774         "Tries to ssh into nodes in production mode with the root ssh key"
 775         return self.check_nodes_ssh(debug=False,timeout_minutes=30,silent_minutes=10)
 776
 777     @node_mapper
 778     def init_node (self):
 779         "all nodes : init a clean local directory for holding node-dep stuff like iso image..."
 780         pass
 781     @node_mapper
 782     def bootcd (self):
 783         "all nodes: invoke GetBootMedium and store result locally"
 784         pass
 785     @node_mapper
 786     def configure_qemu (self):
 787         "all nodes: compute qemu config qemu.conf and store it locally"
 788         pass
 789     @node_mapper
 790     def reinstall_node (self):
 791         "all nodes: mark PLCAPI boot_state as reinstall"
 792         pass
 793     @node_mapper
 794     def export_qemu (self):
 795         "all nodes: push local node-dep directory on the qemu box"
 796         pass
 797
 798     ### check hooks : invoke scripts from hooks/{node,slice}
 799     def check_hooks_node (self):
 800         return self.locate_first_node().check_hooks()
 801     def check_hooks_sliver (self) :
 802         return self.locate_first_sliver().check_hooks()
 803
 804     def check_hooks (self):
 805         "runs unit tests in the node and slice contexts - see hooks/{node,slice}"
 806         return self.check_hooks_node() and self.check_hooks_sliver()
 807
 808     ### initscripts
 809     def do_check_initscripts(self):
 810         overall = True
 811         for slice_spec in self.plc_spec['slices']:
 812             if not slice_spec.has_key('initscriptname'):
 813                 continue
 814             initscript=slice_spec['initscriptname']
 815             for nodename in slice_spec['nodenames']:
 816                 (site,node) = self.locate_node (nodename)
 817                 # xxx - passing the wrong site - probably harmless
 818                 test_site = TestSite (self,site)
 819                 test_slice = TestSlice (self,test_site,slice_spec)
 820                 test_node = TestNode (self,test_site,node)
 821                 test_sliver = TestSliver (self, test_node, test_slice)
 822                 if not test_sliver.check_initscript(initscript):
 823                     overall = False
 824         return overall
 825
 826     def check_initscripts(self):
 827         "check that the initscripts have triggered"
 828         return self.do_check_initscripts()
 829
 830     def initscripts (self):
 831         "create initscripts with PLCAPI"
 832         for initscript in self.plc_spec['initscripts']:
 833             utils.pprint('Adding Initscript in plc %s'%self.plc_spec['name'],initscript)
 834             self.apiserver.AddInitScript(self.auth_root(),initscript['initscript_fields'])
 835         return True
 836
 837     def clean_initscripts (self):
 838         "delete initscripts with PLCAPI"
 839         for initscript in self.plc_spec['initscripts']:
 840             initscript_name = initscript['initscript_fields']['name']
 841             print('Attempting to delete %s in plc %s'%(initscript_name,self.plc_spec['name']))
 842             try:
 843                 self.apiserver.DeleteInitScript(self.auth_root(),initscript_name)
 844                 print initscript_name,'deleted'
 845             except:
 846                 print 'deletion went wrong - probably did not exist'
 847         return True
 848
 849     ### manage slices
 850     def slices (self):
 851         "create slices with PLCAPI"
 852         return self.do_slices()
 853
 854     def clean_slices (self):
 855         "delete slices with PLCAPI"
 856         return self.do_slices("delete")
 857
 858     def do_slices (self,  action="add"):
 859         for slice in self.plc_spec['slices']:
 860             site_spec = self.locate_site (slice['sitename'])
 861             test_site = TestSite(self,site_spec)
 862             test_slice=TestSlice(self,test_site,slice)
 863             if action != "add":
 864                 utils.header("Deleting slices in site %s"%test_site.name())
 865                 test_slice.delete_slice()
 866             else:
 867                 utils.pprint("Creating slice",slice)
 868                 test_slice.create_slice()
 869                 utils.header('Created Slice %s'%slice['slice_fields']['name'])
 870         return True
 871
 872     @slice_mapper_options
 873     def check_slice(self):
 874         "tries to ssh-enter the slice with the user key, to ensure slice creation"
 875         pass
 876
 877     @node_mapper
 878     def clear_known_hosts (self):
 879         "remove test nodes entries from the local known_hosts file"
 880         pass
 881
 882     @node_mapper
 883     def start_node (self) :
 884         "all nodes: start the qemu instance (also runs qemu-bridge-init start)"
 885         pass
 886
 887     def check_tcp (self):
 888         "check TCP connectivity between 2 slices (or in loopback if only one is defined)"
 889         specs = self.plc_spec['tcp_test']
 890         overall=True
 891         for spec in specs:
 892             port = spec['port']
 893             # server side
 894             s_test_sliver = self.locate_sliver_obj (spec['server_node'],spec['server_slice'])
 895             if not s_test_sliver.run_tcp_server(port,timeout=10):
 896                 overall=False
 897                 break
 898
 899             # idem for the client side
 900             c_test_sliver = self.locate_sliver_obj(spec['server_node'],spec['server_slice'])
 901             if not c_test_sliver.run_tcp_client(s_test_sliver.test_node.name(),port):
 902                 overall=False
 903         return overall
 904
 905     def plcsh_stress_test (self):
 906         "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents"
 907         # install the stress-test in the plc image
 908         location = "/usr/share/plc_api/plcsh_stress_test.py"
 909         remote="/vservers/%s/%s"%(self.vservername,location)
 910         self.test_ssh.copy_abs("plcsh_stress_test.py",remote)
 911         command = location
 912         command += " -- --check"
 913         if self.options.size == 1:
 914             command +=  " --tiny"
 915         return ( self.run_in_guest(command) == 0)
 916
 917     # populate runs the same utility without slightly different options
 918     # in particular runs with --preserve (dont cleanup) and without --check
 919     # also it gets run twice, once with the --foreign option for creating fake foreign entries
 920     def populate (self):
 921         "creates random entries in the PLCAPI"
 922         # install the stress-test in the plc image
 923         location = "/usr/share/plc_api/plcsh_stress_test.py"
 924         remote="/vservers/%s/%s"%(self.vservername,location)
 925         self.test_ssh.copy_abs("plcsh_stress_test.py",remote)
 926         command = location
 927         command += " -- --preserve --short-names"
 928         local = (self.run_in_guest(command) == 0);
 929         # second run with --foreign
 930         command += ' --foreign'
 931         remote = (self.run_in_guest(command) == 0);
 932         return ( local and remote)
 933
 934     def gather_logs (self):
 935         "gets all possible logs from plc's/qemu node's/slice's for future reference"
 936         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
 937         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
 938         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
 939         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
 940         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
 941         # (1.a)
 942         print "-------------------- TestPlc.gather_logs : PLC's /var/log"
 943         self.gather_var_logs ()
 944         # (1.b)
 945         print "-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/"
 946         self.gather_pgsql_logs ()
 947         # (2)
 948         print "-------------------- TestPlc.gather_logs : nodes's QEMU logs"
 949         for site_spec in self.plc_spec['sites']:
 950             test_site = TestSite (self,site_spec)
 951             for node_spec in site_spec['nodes']:
 952                 test_node=TestNode(self,test_site,node_spec)
 953                 test_node.gather_qemu_logs()
 954         # (3)
 955         print "-------------------- TestPlc.gather_logs : nodes's /var/log"
 956         self.gather_nodes_var_logs()
 957         # (4)
 958         print "-------------------- TestPlc.gather_logs : sample sliver's /var/log"
 959         self.gather_slivers_var_logs()
 960         return True
 961
 962     def gather_slivers_var_logs(self):
 963         for test_sliver in self.all_sliver_objs():
 964             remote = test_sliver.tar_var_logs()
 965             utils.system("mkdir -p logs/sliver.var-log.%s"%test_sliver.name())
 966             command = remote + " | tar -C logs/sliver.var-log.%s -xf -"%test_sliver.name()
 967             utils.system(command)
 968         return True
 969
 970     def gather_var_logs (self):
 971         utils.system("mkdir -p logs/myplc.var-log.%s"%self.name())
 972         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
 973         command = to_plc + "| tar -C logs/myplc.var-log.%s -xf -"%self.name()
 974         utils.system(command)
 975         command = "chmod a+r,a+x logs/myplc.var-log.%s/httpd"%self.name()
 976         utils.system(command)
 977
 978     def gather_pgsql_logs (self):
 979         utils.system("mkdir -p logs/myplc.pgsql-log.%s"%self.name())
 980         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
 981         command = to_plc + "| tar -C logs/myplc.pgsql-log.%s -xf -"%self.name()
 982         utils.system(command)
 983
 984     def gather_nodes_var_logs (self):
 985         for site_spec in self.plc_spec['sites']:
 986             test_site = TestSite (self,site_spec)
 987             for node_spec in site_spec['nodes']:
 988                 test_node=TestNode(self,test_site,node_spec)
 989                 test_ssh = TestSsh (test_node.name(),key="/etc/planetlab/root_ssh_key.rsa")
 990                 to_plc = self.actual_command_in_guest ( test_ssh.actual_command("tar -C /var/log -cf - ."))
 991                 command = to_plc + "| tar -C logs/node.var-log.%s -xf -"%test_node.name()
 992                 utils.system("mkdir -p logs/node.var-log.%s"%test_node.name())
 993                 utils.system(command)
 994
 995
 996     # returns the filename to use for sql dump/restore, using options.dbname if set
 997     def dbfile (self, database):
 998         # uses options.dbname if it is found
 999         try:
1000             name=self.options.dbname
1001             if not isinstance(name,StringTypes):
1002                 raise Exception
1003         except:
1004             t=datetime.datetime.now()
1005             d=t.date()
1006             name=str(d)
1007         return "/root/%s-%s.sql"%(database,name)
1008
1009     def db_dump(self):
1010         dump=self.dbfile("planetab4")
1011         self.run_in_guest('pg_dump -U pgsqluser planetlab4 -f '+ dump)
1012         utils.header('Dumped planetlab4 database in %s'%dump)
1013         return True
1014
1015     def db_restore(self):
1016         dump=self.dbfile("planetab4")
1017         ##stop httpd service
1018         self.run_in_guest('service httpd stop')
1019         # xxx - need another wrapper
1020         self.run_in_guest_piped('echo drop database planetlab4','psql --user=pgsqluser template1')
1021         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab4')
1022         self.run_in_guest('psql -U pgsqluser planetlab4 -f '+dump)
1023         ##starting httpd service
1024         self.run_in_guest('service httpd start')
1025
1026         utils.header('Database restored from ' + dump)
1027
1028     @standby_generic
1029     def standby_1(): pass
1030     @standby_generic
1031     def standby_2(): pass
1032     @standby_generic
1033     def standby_3(): pass
1034     @standby_generic
1035     def standby_4(): pass
1036     @standby_generic
1037     def standby_5(): pass
1038     @standby_generic
1039     def standby_6(): pass
1040     @standby_generic
1041     def standby_7(): pass
1042     @standby_generic
1043     def standby_8(): pass
1044     @standby_generic
1045     def standby_9(): pass
1046     @standby_generic
1047     def standby_10(): pass
1048     @standby_generic
1049     def standby_11(): pass
1050     @standby_generic
1051     def standby_12(): pass
1052     @standby_generic
1053     def standby_13(): pass
1054     @standby_generic
1055     def standby_14(): pass
1056     @standby_generic
1057     def standby_15(): pass
1058     @standby_generic
1059     def standby_16(): pass
1060     @standby_generic
1061     def standby_17(): pass
1062     @standby_generic
1063     def standby_18(): pass
1064     @standby_generic
1065     def standby_19(): pass
1066     @standby_generic
1067     def standby_20(): pass