system/TestPlc.py

   1 # Thierry Parmentelat <thierry.parmentelat@inria.fr>
   2 # Copyright (C) 2010 INRIA
   3 #
   4 import sys
   5 import time
   6 import os, os.path
   7 import traceback
   8 import socket
   9 from datetime import datetime, timedelta
  10
  11 import utils
  12 from Completer import Completer, CompleterTask
  13 from TestSite import TestSite
  14 from TestNode import TestNode, CompleterTaskNodeSsh
  15 from TestUser import TestUser
  16 from TestKey import TestKey
  17 from TestSlice import TestSlice
  18 from TestSliver import TestSliver
  19 from TestBoxQemu import TestBoxQemu
  20 from TestSsh import TestSsh
  21 from TestApiserver import TestApiserver
  22 from TestAuthSfa import TestAuthSfa
  23 from PlcapiUrlScanner import PlcapiUrlScanner
  24
  25 from TestBonding import TestBonding
  26
  27 has_sfa_cache_filename="sfa-cache"
  28
  29 # step methods must take (self) and return a boolean (options is a member of the class)
  30
  31 def standby(minutes, dry_run):
  32     utils.header('Entering StandBy for {:d} mn'.format(minutes))
  33     if dry_run:
  34         print('dry_run')
  35     else:
  36         time.sleep(60*minutes)
  37     return True
  38
  39 def standby_generic(func):
  40     def actual(self):
  41         minutes = int(func.__name__.split("_")[1])
  42         return standby(minutes, self.options.dry_run)
  43     return actual
  44
  45 def node_mapper(method):
  46     def map_on_nodes(self, *args, **kwds):
  47         overall = True
  48         node_method = TestNode.__dict__[method.__name__]
  49         for test_node in self.all_nodes():
  50             if not node_method(test_node, *args, **kwds):
  51                 overall=False
  52         return overall
  53     # maintain __name__ for ignore_result
  54     map_on_nodes.__name__ = method.__name__
  55     # restore the doc text
  56     map_on_nodes.__doc__ = TestNode.__dict__[method.__name__].__doc__
  57     return map_on_nodes
  58
  59 def slice_mapper(method):
  60     def map_on_slices(self):
  61         overall = True
  62         slice_method = TestSlice.__dict__[method.__name__]
  63         for slice_spec in self.plc_spec['slices']:
  64             site_spec = self.locate_site (slice_spec['sitename'])
  65             test_site = TestSite(self,site_spec)
  66             test_slice = TestSlice(self,test_site,slice_spec)
  67             if not slice_method(test_slice, self.options):
  68                 overall=False
  69         return overall
  70     # maintain __name__ for ignore_result
  71     map_on_slices.__name__ = method.__name__
  72     # restore the doc text
  73     map_on_slices.__doc__ = TestSlice.__dict__[method.__name__].__doc__
  74     return map_on_slices
  75
  76 def bonding_redirector(method):
  77     bonding_name = method.__name__.replace('bonding_', '')
  78     def redirect(self):
  79         bonding_method = TestBonding.__dict__[bonding_name]
  80         return bonding_method(self.test_bonding)
  81     # maintain __name__ for ignore_result
  82     redirect.__name__ = method.__name__
  83     # restore the doc text
  84     redirect.__doc__ = TestBonding.__dict__[bonding_name].__doc__
  85     return redirect
  86
  87 # run a step but return True so that we can go on
  88 def ignore_result(method):
  89     def ignoring(self):
  90         # ssh_slice_ignore->ssh_slice
  91         ref_name = method.__name__.replace('_ignore', '').replace('force_', '')
  92         ref_method = TestPlc.__dict__[ref_name]
  93         result = ref_method(self)
  94         print("Actual (but ignored) result for {ref_name} is {result}".format(**locals()))
  95         return Ignored(result)
  96     name = method.__name__.replace('_ignore', '').replace('force_', '')
  97     ignoring.__name__ = name
  98     ignoring.__doc__ = "ignored version of " + name
  99     return ignoring
 100
 101 # a variant that expects the TestSlice method to return a list of CompleterTasks that
 102 # are then merged into a single Completer run to avoid wating for all the slices
 103 # esp. useful when a test fails of course
 104 # because we need to pass arguments we use a class instead..
 105 class slice_mapper__tasks(object):
 106     # could not get this to work with named arguments
 107     def __init__(self, timeout_minutes, silent_minutes, period_seconds):
 108         self.timeout = timedelta(minutes = timeout_minutes)
 109         self.silent = timedelta(minutes = silent_minutes)
 110         self.period = timedelta(seconds = period_seconds)
 111     def __call__(self, method):
 112         decorator_self=self
 113         # compute augmented method name
 114         method_name = method.__name__ + "__tasks"
 115         # locate in TestSlice
 116         slice_method = TestSlice.__dict__[ method_name ]
 117         def wrappee(self):
 118             tasks=[]
 119             for slice_spec in self.plc_spec['slices']:
 120                 site_spec = self.locate_site (slice_spec['sitename'])
 121                 test_site = TestSite(self, site_spec)
 122                 test_slice = TestSlice(self, test_site, slice_spec)
 123                 tasks += slice_method (test_slice, self.options)
 124             return Completer (tasks, message=method.__name__).\
 125                 run(decorator_self.timeout, decorator_self.silent, decorator_self.period)
 126         # restore the doc text from the TestSlice method even if a bit odd
 127         wrappee.__name__ = method.__name__
 128         wrappee.__doc__ = slice_method.__doc__
 129         return wrappee
 130
 131 def auth_sfa_mapper(method):
 132     def actual(self):
 133         overall = True
 134         auth_method = TestAuthSfa.__dict__[method.__name__]
 135         for auth_spec in self.plc_spec['sfa']['auth_sfa_specs']:
 136             test_auth = TestAuthSfa(self, auth_spec)
 137             if not auth_method(test_auth, self.options):
 138                 overall=False
 139         return overall
 140     # restore the doc text
 141     actual.__doc__ = TestAuthSfa.__dict__[method.__name__].__doc__
 142     return actual
 143
 144 class Ignored:
 145     def __init__(self, result):
 146         self.result = result
 147
 148 SEP = '<sep>'
 149 SEPSFA = '<sep_sfa>'
 150
 151 class TestPlc:
 152
 153     default_steps = [
 154         'show', SEP,
 155         'plcvm_delete','plcvm_timestamp','plcvm_create', SEP,
 156         'plc_install', 'plc_configure', 'plc_start', SEP,
 157         'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP,
 158         'plcapi_urls','speed_up_slices', SEP,
 159         'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP,
 160 # slices created under plcsh interactively seem to be fine but these ones don't have the tags
 161 # keep this our of the way for now
 162         'check_vsys_defaults_ignore', SEP,
 163 # run this first off so it's easier to re-run on another qemu box
 164         'qemu_kill_mine', 'nodestate_reinstall', 'qemu_local_init','bootcd', 'qemu_local_config', SEP,
 165         'qemu_clean_mine', 'qemu_export', 'qemu_start', 'qemu_timestamp', 'qemu_nodefamily', SEP,
 166         'sfa_install_all', 'sfa_configure', 'cross_sfa_configure', 'sfa_start', 'sfa_import', SEPSFA,
 167         'sfi_configure@1', 'sfa_register_site@1','sfa_register_pi@1', SEPSFA,
 168         'sfa_register_user@1', 'sfa_update_user@1', 'sfa_register_slice@1', 'sfa_renew_slice@1', SEPSFA,
 169         'sfa_remove_user_from_slice@1','sfi_show_slice_researchers@1',
 170         'sfa_insert_user_in_slice@1','sfi_show_slice_researchers@1', SEPSFA,
 171         'sfa_discover@1', 'sfa_rspec@1', 'sfa_allocate@1', 'sfa_provision@1', SEPSFA,
 172         'sfa_check_slice_plc@1', 'sfa_update_slice@1', SEPSFA,
 173         'sfi_list@1', 'sfi_show_site@1', 'sfa_utest@1', SEPSFA,
 174         # we used to run plcsh_stress_test, and then ssh_node_debug and ssh_node_boot
 175         # but as the stress test might take a while, we sometimes missed the debug mode..
 176         'probe_kvm_iptables',
 177         'ping_node', 'ssh_node_debug', 'plcsh_stress_test@1', SEP,
 178         'ssh_node_boot', 'node_bmlogs', 'ssh_slice', 'ssh_slice_basics', 'check_initscripts', SEP,
 179         'ssh_slice_sfa@1', SEPSFA,
 180         'sfa_rspec_empty@1', 'sfa_allocate_empty@1', 'sfa_provision_empty@1','sfa_check_slice_plc_empty@1', SEPSFA,
 181         'sfa_delete_slice@1', 'sfa_delete_user@1', SEPSFA,
 182         'cross_check_tcp@1', 'check_system_slice', SEP,
 183         # for inspecting the slice while it runs the first time
 184         #'fail',
 185         # check slices are turned off properly
 186         'empty_slices', 'ssh_slice_off', 'slice_fs_deleted_ignore', SEP,
 187         # check they are properly re-created with the same name
 188         'fill_slices', 'ssh_slice_again', SEP,
 189         'gather_logs_force', SEP,
 190         ]
 191     other_steps = [
 192         'export', 'show_boxes', 'super_speed_up_slices', SEP,
 193         'check_hooks', 'plc_stop', 'plcvm_start', 'plcvm_stop', SEP,
 194         'delete_initscripts', 'delete_nodegroups','delete_all_sites', SEP,
 195         'delete_sites', 'delete_nodes', 'delete_slices', 'keys_clean', SEP,
 196         'delete_leases', 'list_leases', SEP,
 197         'populate', SEP,
 198         'nodestate_show','nodestate_safeboot','nodestate_boot', SEP,
 199         'qemu_list_all', 'qemu_list_mine', 'qemu_kill_all', SEP,
 200         'sfa_install_core', 'sfa_install_sfatables', 'sfa_install_plc', 'sfa_install_client', SEPSFA,
 201         'sfa_plcclean', 'sfa_dbclean', 'sfa_stop','sfa_uninstall', 'sfi_clean', SEPSFA,
 202         'sfa_get_expires', SEPSFA,
 203         'plc_db_dump' , 'plc_db_restore', SEP,
 204         'check_netflow','check_drl', SEP,
 205         'debug_nodemanager', 'slice_fs_present', SEP,
 206         'standby_1_through_20','yes','no',SEP,
 207         'install_syslinux6', 'bonding_builds', 'bonding_nodes', SEP,
 208         ]
 209     default_bonding_steps = [
 210         'bonding_init_partial',
 211         'bonding_add_yum',
 212         'bonding_install_rpms', SEP,
 213         ]
 214
 215     @staticmethod
 216     def printable_steps(list):
 217         single_line = " ".join(list) + " "
 218         return single_line.replace(" "+SEP+" ", " \\\n").replace(" "+SEPSFA+" ", " \\\n")
 219     @staticmethod
 220     def valid_step(step):
 221         return step != SEP and step != SEPSFA
 222
 223     # turn off the sfa-related steps when build has skipped SFA
 224     # this was originally for centos5 but is still valid
 225     # for up to f12 as recent SFAs with sqlalchemy won't build before f14
 226     @staticmethod
 227     def _has_sfa_cached(rpms_url):
 228         if os.path.isfile(has_sfa_cache_filename):
 229             with open(has_sfa_cache_filename) as cache:
 230                 cached = cache.read() == "yes"
 231             utils.header("build provides SFA (cached):{}".format(cached))
 232             return cached
 233         # warning, we're now building 'sface' so let's be a bit more picky
 234         # full builds are expected to return with 0 here
 235         utils.header("Checking if build provides SFA package...")
 236         retcod = utils.system("curl --silent {}/ | grep -q sfa-".format(rpms_url)) == 0
 237         encoded = 'yes' if retcod else 'no'
 238         with open(has_sfa_cache_filename,'w') as cache:
 239             cache.write(encoded)
 240         return retcod
 241
 242     @staticmethod
 243     def check_whether_build_has_sfa(rpms_url):
 244         has_sfa = TestPlc._has_sfa_cached(rpms_url)
 245         if has_sfa:
 246             utils.header("build does provide SFA")
 247         else:
 248             # move all steps containing 'sfa' from default_steps to other_steps
 249             utils.header("SFA package not found - removing steps with sfa or sfi")
 250             sfa_steps = [ step for step in TestPlc.default_steps
 251                           if step.find('sfa') >= 0 or step.find("sfi") >= 0 ]
 252             TestPlc.other_steps += sfa_steps
 253             for step in sfa_steps:
 254                 TestPlc.default_steps.remove(step)
 255
 256     def __init__(self, plc_spec, options):
 257         self.plc_spec = plc_spec
 258         self.options = options
 259         self.test_ssh = TestSsh(self.plc_spec['host_box'], self.options.buildname)
 260         self.vserverip = plc_spec['vserverip']
 261         self.vservername = plc_spec['vservername']
 262         self.vplchostname = self.vservername.split('-')[-1]
 263         self.url = "https://{}:443/PLCAPI/".format(plc_spec['vserverip'])
 264         self.apiserver = TestApiserver(self.url, options.dry_run)
 265         (self.ssh_node_boot_timeout, self.ssh_node_boot_silent) = plc_spec['ssh_node_boot_timers']
 266         (self.ssh_node_debug_timeout, self.ssh_node_debug_silent) = plc_spec['ssh_node_debug_timers']
 267
 268     def has_addresses_api(self):
 269         return self.apiserver.has_method('AddIpAddress')
 270
 271     def name(self):
 272         name = self.plc_spec['name']
 273         return "{}.{}".format(name,self.vservername)
 274
 275     def hostname(self):
 276         return self.plc_spec['host_box']
 277
 278     def is_local(self):
 279         return self.test_ssh.is_local()
 280
 281     # define the API methods on this object through xmlrpc
 282     # would help, but not strictly necessary
 283     def connect(self):
 284         pass
 285
 286     def actual_command_in_guest(self,command, backslash=False):
 287         raw1 = self.host_to_guest(command)
 288         raw2 = self.test_ssh.actual_command(raw1, dry_run=self.options.dry_run, backslash=backslash)
 289         return raw2
 290
 291     def start_guest(self):
 292       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host(),
 293                                                        dry_run=self.options.dry_run))
 294
 295     def stop_guest(self):
 296       return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host(),
 297                                                        dry_run=self.options.dry_run))
 298
 299     def run_in_guest(self, command, backslash=False):
 300         raw = self.actual_command_in_guest(command, backslash)
 301         return utils.system(raw)
 302
 303     def run_in_host(self,command):
 304         return self.test_ssh.run_in_buildname(command, dry_run=self.options.dry_run)
 305
 306     # backslashing turned out so awful at some point that I've turned off auto-backslashing
 307     # see e.g. plc_start esp. the version for f14
 308     #command gets run in the plc's vm
 309     def host_to_guest(self, command):
 310         ssh_leg = TestSsh(self.vplchostname)
 311         return ssh_leg.actual_command(command, keep_stdin=True)
 312
 313     # this /vservers thing is legacy...
 314     def vm_root_in_host(self):
 315         return "/vservers/{}/".format(self.vservername)
 316
 317     def vm_timestamp_path(self):
 318         return "/vservers/{}/{}.timestamp".format(self.vservername, self.vservername)
 319
 320     #start/stop the vserver
 321     def start_guest_in_host(self):
 322         return "virsh -c lxc:/// start {}".format(self.vservername)
 323
 324     def stop_guest_in_host(self):
 325         return "virsh -c lxc:/// destroy {}".format(self.vservername)
 326
 327     # xxx quick n dirty
 328     def run_in_guest_piped(self,local,remote):
 329         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),
 330                                                                      keep_stdin = True))
 331
 332     def yum_check_installed(self, rpms):
 333         if isinstance(rpms, list):
 334             rpms=" ".join(rpms)
 335         return self.run_in_guest("rpm -q {}".format(rpms)) == 0
 336
 337     # does a yum install in the vs, ignore yum retcod, check with rpm
 338     def yum_install(self, rpms):
 339         if isinstance(rpms, list):
 340             rpms=" ".join(rpms)
 341         self.run_in_guest("yum -y install {}".format(rpms))
 342         # yum-complete-transaction comes with yum-utils, that is in vtest.pkgs
 343         self.run_in_guest("yum-complete-transaction -y")
 344         return self.yum_check_installed(rpms)
 345
 346     def auth_root(self):
 347         return {'Username'   : self.plc_spec['settings']['PLC_ROOT_USER'],
 348                 'AuthMethod' : 'password',
 349                 'AuthString' : self.plc_spec['settings']['PLC_ROOT_PASSWORD'],
 350                 'Role'       : self.plc_spec['role'],
 351                 }
 352
 353     def locate_site(self,sitename):
 354         for site in self.plc_spec['sites']:
 355             if site['site_fields']['name'] == sitename:
 356                 return site
 357             if site['site_fields']['login_base'] == sitename:
 358                 return site
 359         raise Exception("Cannot locate site {}".format(sitename))
 360
 361     def locate_node(self, nodename):
 362         for site in self.plc_spec['sites']:
 363             for node in site['nodes']:
 364                 if node['name'] == nodename:
 365                     return site, node
 366         raise Exception("Cannot locate node {}".format(nodename))
 367
 368     def locate_hostname(self, hostname):
 369         for site in self.plc_spec['sites']:
 370             for node in site['nodes']:
 371                 if node['node_fields']['hostname'] == hostname:
 372                     return(site, node)
 373         raise Exception("Cannot locate hostname {}".format(hostname))
 374
 375     def locate_key(self, key_name):
 376         for key in self.plc_spec['keys']:
 377             if key['key_name'] == key_name:
 378                 return key
 379         raise Exception("Cannot locate key {}".format(key_name))
 380
 381     def locate_private_key_from_key_names(self, key_names):
 382         # locate the first avail. key
 383         found = False
 384         for key_name in key_names:
 385             key_spec = self.locate_key(key_name)
 386             test_key = TestKey(self,key_spec)
 387             publickey = test_key.publicpath()
 388             privatekey = test_key.privatepath()
 389             if os.path.isfile(publickey) and os.path.isfile(privatekey):
 390                 found = True
 391         if found:
 392             return privatekey
 393         else:
 394             return None
 395
 396     def locate_slice(self, slicename):
 397         for slice in self.plc_spec['slices']:
 398             if slice['slice_fields']['name'] == slicename:
 399                 return slice
 400         raise Exception("Cannot locate slice {}".format(slicename))
 401
 402     def all_sliver_objs(self):
 403         result = []
 404         for slice_spec in self.plc_spec['slices']:
 405             slicename = slice_spec['slice_fields']['name']
 406             for nodename in slice_spec['nodenames']:
 407                 result.append(self.locate_sliver_obj(nodename, slicename))
 408         return result
 409
 410     def locate_sliver_obj(self, nodename, slicename):
 411         site,node = self.locate_node(nodename)
 412         slice = self.locate_slice(slicename)
 413         # build objects
 414         test_site = TestSite(self, site)
 415         test_node = TestNode(self, test_site, node)
 416         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 417         test_slice = TestSlice(self, test_site, slice)
 418         return TestSliver(self, test_node, test_slice)
 419
 420     def locate_first_node(self):
 421         nodename = self.plc_spec['slices'][0]['nodenames'][0]
 422         site,node = self.locate_node(nodename)
 423         test_site = TestSite(self, site)
 424         test_node = TestNode(self, test_site, node)
 425         return test_node
 426
 427     def locate_first_sliver(self):
 428         slice_spec = self.plc_spec['slices'][0]
 429         slicename = slice_spec['slice_fields']['name']
 430         nodename = slice_spec['nodenames'][0]
 431         return self.locate_sliver_obj(nodename,slicename)
 432
 433     # all different hostboxes used in this plc
 434     def get_BoxNodes(self):
 435         # maps on sites and nodes, return [ (host_box,test_node) ]
 436         tuples = []
 437         for site_spec in self.plc_spec['sites']:
 438             test_site = TestSite(self,site_spec)
 439             for node_spec in site_spec['nodes']:
 440                 test_node = TestNode(self, test_site, node_spec)
 441                 if not test_node.is_real():
 442                     tuples.append( (test_node.host_box(),test_node) )
 443         # transform into a dict { 'host_box' -> [ test_node .. ] }
 444         result = {}
 445         for (box,node) in tuples:
 446             if box not in result:
 447                 result[box] = [node]
 448             else:
 449                 result[box].append(node)
 450         return result
 451
 452     # a step for checking this stuff
 453     def show_boxes(self):
 454         'print summary of nodes location'
 455         for box,nodes in self.get_BoxNodes().items():
 456             print(box,":"," + ".join( [ node.name() for node in nodes ] ))
 457         return True
 458
 459     # make this a valid step
 460     def qemu_kill_all(self):
 461         'kill all qemu instances on the qemu boxes involved by this setup'
 462         # this is the brute force version, kill all qemus on that host box
 463         for (box,nodes) in self.get_BoxNodes().items():
 464             # pass the first nodename, as we don't push template-qemu on testboxes
 465             nodedir = nodes[0].nodedir()
 466             TestBoxQemu(box, self.options.buildname).qemu_kill_all(nodedir)
 467         return True
 468
 469     # make this a valid step
 470     def qemu_list_all(self):
 471         'list all qemu instances on the qemu boxes involved by this setup'
 472         for box,nodes in self.get_BoxNodes().items():
 473             # this is the brute force version, kill all qemus on that host box
 474             TestBoxQemu(box, self.options.buildname).qemu_list_all()
 475         return True
 476
 477     # kill only the qemus related to this test
 478     def qemu_list_mine(self):
 479         'list qemu instances for our nodes'
 480         for (box,nodes) in self.get_BoxNodes().items():
 481             # the fine-grain version
 482             for node in nodes:
 483                 node.list_qemu()
 484         return True
 485
 486     # kill only the qemus related to this test
 487     def qemu_clean_mine(self):
 488         'cleanup (rm -rf) qemu instances for our nodes'
 489         for box,nodes in self.get_BoxNodes().items():
 490             # the fine-grain version
 491             for node in nodes:
 492                 node.qemu_clean()
 493         return True
 494
 495     # kill only the right qemus
 496     def qemu_kill_mine(self):
 497         'kill the qemu instances for our nodes'
 498         for box,nodes in self.get_BoxNodes().items():
 499             # the fine-grain version
 500             for node in nodes:
 501                 node.kill_qemu()
 502         return True
 503
 504     #################### display config
 505     def show(self):
 506         "show test configuration after localization"
 507         self.show_pass(1)
 508         self.show_pass(2)
 509         return True
 510
 511     # uggly hack to make sure 'run export' only reports about the 1st plc
 512     # to avoid confusion - also we use 'inri_slice1' in various aliases..
 513     exported_id = 1
 514     def export(self):
 515         "print cut'n paste-able stuff to export env variables to your shell"
 516         # guess local domain from hostname
 517         if TestPlc.exported_id > 1:
 518             print("export GUESTHOSTNAME{:d}={}".format(TestPlc.exported_id, self.plc_spec['vservername']))
 519             return True
 520         TestPlc.exported_id += 1
 521         domain = socket.gethostname().split('.',1)[1]
 522         fqdn   = "{}.{}".format(self.plc_spec['host_box'], domain)
 523         print("export BUILD={}".format(self.options.buildname))
 524         print("export PLCHOSTLXC={}".format(fqdn))
 525         print("export GUESTNAME={}".format(self.vservername))
 526         print("export GUESTHOSTNAME={}.{}".format(self.vplchostname, domain))
 527         # find hostname of first node
 528         hostname, qemubox = self.all_node_infos()[0]
 529         print("export KVMHOST={}.{}".format(qemubox, domain))
 530         print("export NODE={}".format(hostname))
 531         return True
 532
 533     # entry point
 534     always_display_keys=['PLC_WWW_HOST', 'nodes', 'sites']
 535     def show_pass(self, passno):
 536         for (key,val) in self.plc_spec.items():
 537             if not self.options.verbose and key not in TestPlc.always_display_keys:
 538                 continue
 539             if passno == 2:
 540                 if key == 'sites':
 541                     for site in val:
 542                         self.display_site_spec(site)
 543                         for node in site['nodes']:
 544                             self.display_node_spec(node)
 545                 elif key == 'initscripts':
 546                     for initscript in val:
 547                         self.display_initscript_spec(initscript)
 548                 elif key == 'slices':
 549                     for slice in val:
 550                         self.display_slice_spec(slice)
 551                 elif key == 'keys':
 552                     for key in val:
 553                         self.display_key_spec(key)
 554             elif passno == 1:
 555                 if key not in ['sites', 'initscripts', 'slices', 'keys']:
 556                     print('+   ', key, ':', val)
 557
 558     def display_site_spec(self, site):
 559         print('+ ======== site', site['site_fields']['name'])
 560         for k,v in site.items():
 561             if not self.options.verbose and k not in TestPlc.always_display_keys:
 562                 continue
 563             if k == 'nodes':
 564                 if v:
 565                     print('+       ','nodes : ', end=' ')
 566                     for node in v:
 567                         print(node['node_fields']['hostname'],'', end=' ')
 568                     print('')
 569             elif k == 'users':
 570                 if v:
 571                     print('+       users : ', end=' ')
 572                     for user in v:
 573                         print(user['name'],'', end=' ')
 574                     print('')
 575             elif k == 'site_fields':
 576                 print('+       login_base', ':', v['login_base'])
 577             elif k == 'address_fields':
 578                 pass
 579             else:
 580                 print('+       ', end=' ')
 581                 utils.pprint(k, v)
 582
 583     def display_initscript_spec(self, initscript):
 584         print('+ ======== initscript', initscript['initscript_fields']['name'])
 585
 586     def display_key_spec(self, key):
 587         print('+ ======== key', key['key_name'])
 588
 589     def display_slice_spec(self, slice):
 590         print('+ ======== slice', slice['slice_fields']['name'])
 591         for k,v in slice.items():
 592             if k == 'nodenames':
 593                 if v:
 594                     print('+       nodes : ', end=' ')
 595                     for nodename in v:
 596                         print(nodename,'', end=' ')
 597                     print('')
 598             elif k == 'usernames':
 599                 if v:
 600                     print('+       users : ', end=' ')
 601                     for username in v:
 602                         print(username,'', end=' ')
 603                     print('')
 604             elif k == 'slice_fields':
 605                 print('+       fields',':', end=' ')
 606                 print('max_nodes=',v['max_nodes'], end=' ')
 607                 print('')
 608             else:
 609                 print('+       ',k,v)
 610
 611     def display_node_spec(self, node):
 612         print("+           node={} host_box={}".format(node['name'], node['host_box']), end=' ')
 613         print("hostname=", node['node_fields']['hostname'], end=' ')
 614         print("ip=", node['interface_fields']['ip'])
 615         if self.options.verbose:
 616             utils.pprint("node details", node, depth=3)
 617
 618     # another entry point for just showing the boxes involved
 619     def display_mapping(self):
 620         TestPlc.display_mapping_plc(self.plc_spec)
 621         return True
 622
 623     @staticmethod
 624     def display_mapping_plc(plc_spec):
 625         print('+ MyPLC',plc_spec['name'])
 626         # WARNING this would not be right for lxc-based PLC's - should be harmless though
 627         print('+\tvserver address = root@{}:/vservers/{}'.format(plc_spec['host_box'], plc_spec['vservername']))
 628         print('+\tIP = {}/{}'.format(plc_spec['settings']['PLC_API_HOST'], plc_spec['vserverip']))
 629         for site_spec in plc_spec['sites']:
 630             for node_spec in site_spec['nodes']:
 631                 TestPlc.display_mapping_node(node_spec)
 632
 633     @staticmethod
 634     def display_mapping_node(node_spec):
 635         print('+   NODE {}'.format(node_spec['name']))
 636         print('+\tqemu box {}'.format(node_spec['host_box']))
 637         print('+\thostname={}'.format(node_spec['node_fields']['hostname']))
 638
 639     # write a timestamp in /vservers/<>.timestamp
 640     # cannot be inside the vserver, that causes vserver .. build to cough
 641     def plcvm_timestamp(self):
 642         "Create a timestamp to remember creation date for this plc"
 643         now = int(time.time())
 644         # TODO-lxc check this one
 645         # a first approx. is to store the timestamp close to the VM root like vs does
 646         stamp_path = self.vm_timestamp_path()
 647         stamp_dir = os.path.dirname(stamp_path)
 648         utils.system(self.test_ssh.actual_command("mkdir -p {}".format(stamp_dir)))
 649         return utils.system(self.test_ssh.actual_command("echo {:d} > {}".format(now, stamp_path))) == 0
 650
 651     # this is called inconditionnally at the beginning of the test sequence
 652     # just in case this is a rerun, so if the vm is not running it's fine
 653     def plcvm_delete(self):
 654         "vserver delete the test myplc"
 655         stamp_path = self.vm_timestamp_path()
 656         self.run_in_host("rm -f {}".format(stamp_path))
 657         self.run_in_host("virsh -c lxc:// destroy {}".format(self.vservername))
 658         self.run_in_host("virsh -c lxc:// undefine {}".format(self.vservername))
 659         self.run_in_host("rm -fr /vservers/{}".format(self.vservername))
 660         return True
 661
 662     ### install
 663     # historically the build was being fetched by the tests
 664     # now the build pushes itself as a subdir of the tests workdir
 665     # so that the tests do not have to worry about extracting the build (svn, git, or whatever)
 666     def plcvm_create(self):
 667         "vserver creation (no install done)"
 668         # push the local build/ dir to the testplc box
 669         if self.is_local():
 670             # a full path for the local calls
 671             build_dir = os.path.dirname(sys.argv[0])
 672             # sometimes this is empty - set to "." in such a case
 673             if not build_dir:
 674                 build_dir="."
 675             build_dir += "/build"
 676         else:
 677             # use a standard name - will be relative to remote buildname
 678             build_dir = "build"
 679             # remove for safety; do *not* mkdir first, otherwise we end up with build/build/
 680             self.test_ssh.rmdir(build_dir)
 681             self.test_ssh.copy(build_dir, recursive=True)
 682         # the repo url is taken from arch-rpms-url
 683         # with the last step (i386) removed
 684         repo_url = self.options.arch_rpms_url
 685         for level in [ 'arch' ]:
 686             repo_url = os.path.dirname(repo_url)
 687
 688         # invoke initvm (drop support for vs)
 689         script = "lbuild-initvm.sh"
 690         script_options = ""
 691         # pass the vbuild-nightly options to [lv]test-initvm
 692         script_options += " -p {}".format(self.options.personality)
 693         script_options += " -d {}".format(self.options.pldistro)
 694         script_options += " -f {}".format(self.options.fcdistro)
 695         script_options += " -r {}".format(repo_url)
 696         vserver_name = self.vservername
 697         try:
 698             vserver_hostname = socket.gethostbyaddr(self.vserverip)[0]
 699             script_options += " -n {}".format(vserver_hostname)
 700         except:
 701             print("Cannot reverse lookup {}".format(self.vserverip))
 702             print("This is considered fatal, as this might pollute the test results")
 703             return False
 704         create_vserver="{build_dir}/{script} {script_options} {vserver_name}".format(**locals())
 705         return self.run_in_host(create_vserver) == 0
 706
 707     ### install_rpm
 708     def plc_install(self):
 709         """
 710         yum install myplc, noderepo
 711         plain bootstrapfs is not installed anymore
 712         """
 713
 714         # compute nodefamily
 715         if self.options.personality == "linux32":
 716             arch = "i386"
 717         elif self.options.personality == "linux64":
 718             arch = "x86_64"
 719         else:
 720             raise Exception("Unsupported personality {}".format(self.options.personality))
 721         nodefamily = "{}-{}-{}".format(self.options.pldistro, self.options.fcdistro, arch)
 722
 723         pkgs_list=[]
 724         pkgs_list.append("slicerepo-{}".format(nodefamily))
 725         pkgs_list.append("myplc")
 726         pkgs_list.append("noderepo-{}".format(nodefamily))
 727         pkgs_list.append("nodeimage-{}-plain".format(nodefamily))
 728         pkgs_string=" ".join(pkgs_list)
 729         return self.yum_install(pkgs_list)
 730
 731     def install_syslinux6(self):
 732         """
 733         install syslinux6 from the fedora21 release
 734         """
 735         key = 'http://mirror.onelab.eu/keys/RPM-GPG-KEY-fedora-21-primary'
 736
 737         rpms = [
 738             'http://mirror.onelab.eu/fedora/releases/21/Everything/x86_64/os/Packages/s/syslinux-6.03-1.fc21.x86_64.rpm',
 739             'http://mirror.onelab.eu/fedora/releases/21/Everything/x86_64/os/Packages/s/syslinux-nonlinux-6.03-1.fc21.noarch.rpm',
 740             'http://mirror.onelab.eu/fedora/releases/21/Everything/x86_64/os/Packages/s/syslinux-perl-6.03-1.fc21.x86_64.rpm',
 741         ]
 742         # this can be done several times
 743         self.run_in_guest("rpm --import {key}".format(**locals()))
 744         return self.run_in_guest("yum -y localinstall {}".format(" ".join(rpms))) == 0
 745
 746     def bonding_builds(self):
 747         """
 748         list /etc/yum.repos.d on the myplc side
 749         """
 750         self.run_in_guest("ls /etc/yum.repos.d/*partial.repo")
 751         return True
 752
 753     def bonding_nodes(self):
 754         """
 755         List nodes known to the myplc together with their nodefamiliy
 756         """
 757         print("---------------------------------------- nodes")
 758         for node in self.apiserver.GetNodes(self.auth_root()):
 759             print("{} -> {}".format(node['hostname'],
 760                                     self.apiserver.GetNodeFlavour(self.auth_root(),node['hostname'])['nodefamily']))
 761         print("---------------------------------------- nodes")
 762
 763
 764     ###
 765     def mod_python(self):
 766         """yum install mod_python, useful on f18 and above so as to avoid broken wsgi"""
 767         return self.yum_install( ['mod_python'] )
 768
 769     ###
 770     def plc_configure(self):
 771         "run plc-config-tty"
 772         tmpname = '{}.plc-config-tty'.format(self.name())
 773         with open(tmpname,'w') as fileconf:
 774             for (var,value) in self.plc_spec['settings'].items():
 775                 fileconf.write('e {}\n{}\n'.format(var, value))
 776             fileconf.write('w\n')
 777             fileconf.write('q\n')
 778         utils.system('cat {}'.format(tmpname))
 779         self.run_in_guest_piped('cat {}'.format(tmpname), 'plc-config-tty')
 780         utils.system('rm {}'.format(tmpname))
 781         return True
 782
 783 # f14 is a bit odd in this respect, although this worked fine in guests up to f18
 784 # however using a vplc guest under f20 requires this trick
 785 # the symptom is this: service plc start
 786 # Starting plc (via systemctl):  Failed to get D-Bus connection: \
 787 #    Failed to connect to socket /org/freedesktop/systemd1/private: Connection refused
 788 # weird thing is the doc says f14 uses upstart by default and not systemd
 789 # so this sounds kind of harmless
 790     def start_service(self, service):
 791         return self.start_stop_service(service, 'start')
 792     def stop_service(self, service):
 793         return self.start_stop_service(service, 'stop')
 794
 795     def start_stop_service(self, service, start_or_stop):
 796         "utility to start/stop a service with the special trick for f14"
 797         if self.options.fcdistro != 'f14':
 798             return self.run_in_guest("service {} {}".format(service, start_or_stop)) == 0
 799         else:
 800             # patch /sbin/service so it does not reset environment
 801             self.run_in_guest('sed -i -e \\"s,env -i,env,\\" /sbin/service')
 802             # this is because our own scripts in turn call service
 803             return self.run_in_guest("SYSTEMCTL_SKIP_REDIRECT=true service {} {}"\
 804                                      .format(service, start_or_stop)) == 0
 805
 806     def plc_start(self):
 807         "service plc start"
 808         return self.start_service('plc')
 809
 810     def plc_stop(self):
 811         "service plc stop"
 812         return self.stop_service('plc')
 813
 814     def plcvm_start(self):
 815         "start the PLC vserver"
 816         self.start_guest()
 817         return True
 818
 819     def plcvm_stop(self):
 820         "stop the PLC vserver"
 821         self.stop_guest()
 822         return True
 823
 824     # stores the keys from the config for further use
 825     def keys_store(self):
 826         "stores test users ssh keys in keys/"
 827         for key_spec in self.plc_spec['keys']:
 828                 TestKey(self,key_spec).store_key()
 829         return True
 830
 831     def keys_clean(self):
 832         "removes keys cached in keys/"
 833         utils.system("rm -rf ./keys")
 834         return True
 835
 836     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 837     # for later direct access to the nodes
 838     def keys_fetch(self):
 839         "gets ssh keys in /etc/planetlab/ and stores them locally in keys/"
 840         dir="./keys"
 841         if not os.path.isdir(dir):
 842             os.mkdir(dir)
 843         vservername = self.vservername
 844         vm_root = self.vm_root_in_host()
 845         overall = True
 846         prefix = 'debug_ssh_key'
 847         for ext in ['pub', 'rsa'] :
 848             src = "{vm_root}/etc/planetlab/{prefix}.{ext}".format(**locals())
 849             dst = "keys/{vservername}-debug.{ext}".format(**locals())
 850             if self.test_ssh.fetch(src, dst) != 0:
 851                 overall=False
 852         return overall
 853
 854     def sites(self):
 855         "create sites with PLCAPI"
 856         return self.do_sites()
 857
 858     def delete_sites(self):
 859         "delete sites with PLCAPI"
 860         return self.do_sites(action="delete")
 861
 862     def do_sites(self, action="add"):
 863         for site_spec in self.plc_spec['sites']:
 864             test_site = TestSite(self,site_spec)
 865             if (action != "add"):
 866                 utils.header("Deleting site {} in {}".format(test_site.name(), self.name()))
 867                 test_site.delete_site()
 868                 # deleted with the site
 869                 #test_site.delete_users()
 870                 continue
 871             else:
 872                 utils.header("Creating site {} & users in {}".format(test_site.name(), self.name()))
 873                 test_site.create_site()
 874                 test_site.create_users()
 875         return True
 876
 877     def delete_all_sites(self):
 878         "Delete all sites in PLC, and related objects"
 879         print('auth_root', self.auth_root())
 880         sites = self.apiserver.GetSites(self.auth_root(), {}, ['site_id','login_base'])
 881         for site in sites:
 882             # keep automatic site - otherwise we shoot in our own foot, root_auth is not valid anymore
 883             if site['login_base'] == self.plc_spec['settings']['PLC_SLICE_PREFIX']:
 884                 continue
 885             site_id = site['site_id']
 886             print('Deleting site_id', site_id)
 887             self.apiserver.DeleteSite(self.auth_root(), site_id)
 888         return True
 889
 890     def nodes(self):
 891         "create nodes with PLCAPI"
 892         return self.do_nodes()
 893     def delete_nodes(self):
 894         "delete nodes with PLCAPI"
 895         return self.do_nodes(action="delete")
 896
 897     def do_nodes(self, action="add"):
 898         for site_spec in self.plc_spec['sites']:
 899             test_site = TestSite(self, site_spec)
 900             if action != "add":
 901                 utils.header("Deleting nodes in site {}".format(test_site.name()))
 902                 for node_spec in site_spec['nodes']:
 903                     test_node = TestNode(self, test_site, node_spec)
 904                     utils.header("Deleting {}".format(test_node.name()))
 905                     test_node.delete_node()
 906             else:
 907                 utils.header("Creating nodes for site {} in {}".format(test_site.name(), self.name()))
 908                 for node_spec in site_spec['nodes']:
 909                     utils.pprint('Creating node {}'.format(node_spec), node_spec)
 910                     test_node = TestNode(self, test_site, node_spec)
 911                     test_node.create_node()
 912         return True
 913
 914     def nodegroups(self):
 915         "create nodegroups with PLCAPI"
 916         return self.do_nodegroups("add")
 917     def delete_nodegroups(self):
 918         "delete nodegroups with PLCAPI"
 919         return self.do_nodegroups("delete")
 920
 921     YEAR = 365*24*3600
 922     @staticmethod
 923     def translate_timestamp(start, grain, timestamp):
 924         if timestamp < TestPlc.YEAR:
 925             return start + timestamp*grain
 926         else:
 927             return timestamp
 928
 929     @staticmethod
 930     def timestamp_printable(timestamp):
 931         return time.strftime('%m-%d %H:%M:%S UTC', time.gmtime(timestamp))
 932
 933     def leases(self):
 934         "create leases (on reservable nodes only, use e.g. run -c default -c resa)"
 935         now = int(time.time())
 936         grain = self.apiserver.GetLeaseGranularity(self.auth_root())
 937         print('API answered grain=', grain)
 938         start = (now//grain)*grain
 939         start += grain
 940         # find out all nodes that are reservable
 941         nodes = self.all_reservable_nodenames()
 942         if not nodes:
 943             utils.header("No reservable node found - proceeding without leases")
 944             return True
 945         ok = True
 946         # attach them to the leases as specified in plc_specs
 947         # this is where the 'leases' field gets interpreted as relative of absolute
 948         for lease_spec in self.plc_spec['leases']:
 949             # skip the ones that come with a null slice id
 950             if not lease_spec['slice']:
 951                 continue
 952             lease_spec['t_from']  = TestPlc.translate_timestamp(start, grain, lease_spec['t_from'])
 953             lease_spec['t_until'] = TestPlc.translate_timestamp(start, grain, lease_spec['t_until'])
 954             lease_addition = self.apiserver.AddLeases(self.auth_root(), nodes, lease_spec['slice'],
 955                                                       lease_spec['t_from'], lease_spec['t_until'])
 956             if lease_addition['errors']:
 957                 utils.header("Cannot create leases, {}".format(lease_addition['errors']))
 958                 ok = False
 959             else:
 960                 utils.header('Leases on nodes {} for {} from {:d} ({}) until {:d} ({})'\
 961                              .format(nodes, lease_spec['slice'],
 962                                      lease_spec['t_from'],  TestPlc.timestamp_printable(lease_spec['t_from']),
 963                                      lease_spec['t_until'], TestPlc.timestamp_printable(lease_spec['t_until'])))
 964
 965         return ok
 966
 967     def delete_leases(self):
 968         "remove all leases in the myplc side"
 969         lease_ids = [ l['lease_id'] for l in self.apiserver.GetLeases(self.auth_root())]
 970         utils.header("Cleaning leases {}".format(lease_ids))
 971         self.apiserver.DeleteLeases(self.auth_root(), lease_ids)
 972         return True
 973
 974     def list_leases(self):
 975         "list all leases known to the myplc"
 976         leases = self.apiserver.GetLeases(self.auth_root())
 977         now = int(time.time())
 978         for l in leases:
 979             current = l['t_until'] >= now
 980             if self.options.verbose or current:
 981                 utils.header("{} {} from {} until {}"\
 982                              .format(l['hostname'], l['name'],
 983                                      TestPlc.timestamp_printable(l['t_from']),
 984                                      TestPlc.timestamp_printable(l['t_until'])))
 985         return True
 986
 987     # create nodegroups if needed, and populate
 988     def do_nodegroups(self, action="add"):
 989         # 1st pass to scan contents
 990         groups_dict = {}
 991         for site_spec in self.plc_spec['sites']:
 992             test_site = TestSite(self,site_spec)
 993             for node_spec in site_spec['nodes']:
 994                 test_node = TestNode(self, test_site, node_spec)
 995                 if 'nodegroups' in node_spec:
 996                     nodegroupnames = node_spec['nodegroups']
 997                     if isinstance(nodegroupnames, str):
 998                         nodegroupnames = [ nodegroupnames ]
 999                     for nodegroupname in nodegroupnames:
1000                         if nodegroupname not in groups_dict:
1001                             groups_dict[nodegroupname] = []
1002                         groups_dict[nodegroupname].append(test_node.name())
1003         auth = self.auth_root()
1004         overall = True
1005         for (nodegroupname,group_nodes) in groups_dict.items():
1006             if action == "add":
1007                 print('nodegroups:', 'dealing with nodegroup',\
1008                     nodegroupname, 'on nodes', group_nodes)
1009                 # first, check if the nodetagtype is here
1010                 tag_types = self.apiserver.GetTagTypes(auth, {'tagname':nodegroupname})
1011                 if tag_types:
1012                     tag_type_id = tag_types[0]['tag_type_id']
1013                 else:
1014                     tag_type_id = self.apiserver.AddTagType(auth,
1015                                                             {'tagname' : nodegroupname,
1016                                                              'description' : 'for nodegroup {}'.format(nodegroupname),
1017                                                              'category' : 'test'})
1018                 print('located tag (type)', nodegroupname, 'as', tag_type_id)
1019                 # create nodegroup
1020                 nodegroups = self.apiserver.GetNodeGroups(auth, {'groupname' : nodegroupname})
1021                 if not nodegroups:
1022                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
1023                     print('created nodegroup', nodegroupname, \
1024                         'from tagname', nodegroupname, 'and value', 'yes')
1025                 # set node tag on all nodes, value='yes'
1026                 for nodename in group_nodes:
1027                     try:
1028                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
1029                     except:
1030                         traceback.print_exc()
1031                         print('node', nodename, 'seems to already have tag', nodegroupname)
1032                     # check anyway
1033                     try:
1034                         expect_yes = self.apiserver.GetNodeTags(auth,
1035                                                                 {'hostname' : nodename,
1036                                                                  'tagname'  : nodegroupname},
1037                                                                 ['value'])[0]['value']
1038                         if expect_yes != "yes":
1039                             print('Mismatch node tag on node',nodename,'got',expect_yes)
1040                             overall = False
1041                     except:
1042                         if not self.options.dry_run:
1043                             print('Cannot find tag', nodegroupname, 'on node', nodename)
1044                             overall = False
1045             else:
1046                 try:
1047                     print('cleaning nodegroup', nodegroupname)
1048                     self.apiserver.DeleteNodeGroup(auth, nodegroupname)
1049                 except:
1050                     traceback.print_exc()
1051                     overall = False
1052         return overall
1053
1054     # a list of TestNode objs
1055     def all_nodes(self):
1056         nodes=[]
1057         for site_spec in self.plc_spec['sites']:
1058             test_site = TestSite(self,site_spec)
1059             for node_spec in site_spec['nodes']:
1060                 nodes.append(TestNode(self, test_site, node_spec))
1061         return nodes
1062
1063     # return a list of tuples (nodename,qemuname)
1064     def all_node_infos(self) :
1065         node_infos = []
1066         for site_spec in self.plc_spec['sites']:
1067             node_infos += [ (node_spec['node_fields']['hostname'], node_spec['host_box']) \
1068                                 for node_spec in site_spec['nodes'] ]
1069         return node_infos
1070
1071     def all_nodenames(self):
1072         return [ x[0] for x in self.all_node_infos() ]
1073     def all_reservable_nodenames(self):
1074         res = []
1075         for site_spec in self.plc_spec['sites']:
1076             for node_spec in site_spec['nodes']:
1077                 node_fields = node_spec['node_fields']
1078                 if 'node_type' in node_fields and node_fields['node_type'] == 'reservable':
1079                     res.append(node_fields['hostname'])
1080         return res
1081
1082     # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
1083     def nodes_check_boot_state(self, target_boot_state, timeout_minutes,
1084                                silent_minutes, period_seconds = 15):
1085         if self.options.dry_run:
1086             print('dry_run')
1087             return True
1088
1089         class CompleterTaskBootState(CompleterTask):
1090             def __init__(self, test_plc, hostname):
1091                 self.test_plc = test_plc
1092                 self.hostname = hostname
1093                 self.last_boot_state = 'undef'
1094             def actual_run(self):
1095                 try:
1096                     node = self.test_plc.apiserver.GetNodes(self.test_plc.auth_root(),
1097                                                             [ self.hostname ],
1098                                                             ['boot_state'])[0]
1099                     self.last_boot_state = node['boot_state']
1100                     return self.last_boot_state == target_boot_state
1101                 except:
1102                     return False
1103             def message(self):
1104                 return "CompleterTaskBootState with node {}".format(self.hostname)
1105             def failure_epilogue(self):
1106                 print("node {} in state {} - expected {}"\
1107                     .format(self.hostname, self.last_boot_state, target_boot_state))
1108
1109         timeout = timedelta(minutes=timeout_minutes)
1110         graceout = timedelta(minutes=silent_minutes)
1111         period   = timedelta(seconds=period_seconds)
1112         # the nodes that haven't checked yet - start with a full list and shrink over time
1113         utils.header("checking nodes boot state (expected {})".format(target_boot_state))
1114         tasks = [ CompleterTaskBootState(self,hostname) \
1115                       for (hostname,_) in self.all_node_infos() ]
1116         message = 'check_boot_state={}'.format(target_boot_state)
1117         return Completer(tasks, message=message).run(timeout, graceout, period)
1118
1119     def nodes_booted(self):
1120         return self.nodes_check_boot_state('boot', timeout_minutes=30, silent_minutes=28)
1121
1122     def probe_kvm_iptables(self):
1123         (_,kvmbox) = self.all_node_infos()[0]
1124         TestSsh(kvmbox).run("iptables-save")
1125         return True
1126
1127     # probing nodes
1128     def check_nodes_ping(self, timeout_seconds=30, period_seconds=10):
1129         class CompleterTaskPingNode(CompleterTask):
1130             def __init__(self, hostname):
1131                 self.hostname = hostname
1132             def run(self, silent):
1133                 command="ping -c 1 -w 1 {} >& /dev/null".format(self.hostname)
1134                 return utils.system(command, silent=silent) == 0
1135             def failure_epilogue(self):
1136                 print("Cannot ping node with name {}".format(self.hostname))
1137         timeout = timedelta(seconds = timeout_seconds)
1138         graceout = timeout
1139         period = timedelta(seconds = period_seconds)
1140         node_infos = self.all_node_infos()
1141         tasks = [ CompleterTaskPingNode(h) for (h,_) in node_infos ]
1142         return Completer(tasks, message='ping_node').run(timeout, graceout, period)
1143
1144     # ping node before we try to reach ssh, helpful for troubleshooting failing bootCDs
1145     def ping_node(self):
1146         "Ping nodes"
1147         return self.check_nodes_ping()
1148
1149     def check_nodes_ssh(self, debug, timeout_minutes, silent_minutes, period_seconds=15):
1150         # various delays
1151         timeout  = timedelta(minutes=timeout_minutes)
1152         graceout = timedelta(minutes=silent_minutes)
1153         period   = timedelta(seconds=period_seconds)
1154         vservername = self.vservername
1155         if debug:
1156             message = "debug"
1157             completer_message = 'ssh_node_debug'
1158             local_key = "keys/{vservername}-debug.rsa".format(**locals())
1159         else:
1160             message = "boot"
1161             completer_message = 'ssh_node_boot'
1162             local_key = "keys/key_admin.rsa"
1163         utils.header("checking ssh access to nodes (expected in {} mode)".format(message))
1164         node_infos = self.all_node_infos()
1165         tasks = [ CompleterTaskNodeSsh(nodename, qemuname, local_key,
1166                                         boot_state=message, dry_run=self.options.dry_run) \
1167                       for (nodename, qemuname) in node_infos ]
1168         return Completer(tasks, message=completer_message).run(timeout, graceout, period)
1169
1170     def ssh_node_debug(self):
1171         "Tries to ssh into nodes in debug mode with the debug ssh key"
1172         return self.check_nodes_ssh(debug = True,
1173                                     timeout_minutes = self.ssh_node_debug_timeout,
1174                                     silent_minutes = self.ssh_node_debug_silent)
1175
1176     def ssh_node_boot(self):
1177         "Tries to ssh into nodes in production mode with the root ssh key"
1178         return self.check_nodes_ssh(debug = False,
1179                                     timeout_minutes = self.ssh_node_boot_timeout,
1180                                     silent_minutes = self.ssh_node_boot_silent)
1181
1182     def node_bmlogs(self):
1183         "Checks that there's a non-empty dir. /var/log/bm/raw"
1184         return utils.system(self.actual_command_in_guest("ls /var/log/bm/raw")) == 0
1185
1186     @node_mapper
1187     def qemu_local_init(self): pass
1188     @node_mapper
1189     def bootcd(self): pass
1190     @node_mapper
1191     def qemu_local_config(self): pass
1192     @node_mapper
1193     def nodestate_reinstall(self): pass
1194     @node_mapper
1195     def nodestate_safeboot(self): pass
1196     @node_mapper
1197     def nodestate_boot(self): pass
1198     @node_mapper
1199     def nodestate_show(self): pass
1200     @node_mapper
1201     def qemu_export(self): pass
1202
1203     ### check hooks : invoke scripts from hooks/{node,slice}
1204     def check_hooks_node(self):
1205         return self.locate_first_node().check_hooks()
1206     def check_hooks_sliver(self) :
1207         return self.locate_first_sliver().check_hooks()
1208
1209     def check_hooks(self):
1210         "runs unit tests in the node and slice contexts - see hooks/{node,slice}"
1211         return self.check_hooks_node() and self.check_hooks_sliver()
1212
1213     ### initscripts
1214     def do_check_initscripts(self):
1215         class CompleterTaskInitscript(CompleterTask):
1216             def __init__(self, test_sliver, stamp):
1217                 self.test_sliver = test_sliver
1218                 self.stamp = stamp
1219             def actual_run(self):
1220                 return self.test_sliver.check_initscript_stamp(self.stamp)
1221             def message(self):
1222                 return "initscript checker for {}".format(self.test_sliver.name())
1223             def failure_epilogue(self):
1224                 print("initscript stamp {} not found in sliver {}"\
1225                     .format(self.stamp, self.test_sliver.name()))
1226
1227         tasks = []
1228         for slice_spec in self.plc_spec['slices']:
1229             if 'initscriptstamp' not in slice_spec:
1230                 continue
1231             stamp = slice_spec['initscriptstamp']
1232             slicename = slice_spec['slice_fields']['name']
1233             for nodename in slice_spec['nodenames']:
1234                 print('nodename', nodename, 'slicename', slicename, 'stamp', stamp)
1235                 site,node = self.locate_node(nodename)
1236                 # xxx - passing the wrong site - probably harmless
1237                 test_site = TestSite(self, site)
1238                 test_slice = TestSlice(self, test_site, slice_spec)
1239                 test_node = TestNode(self, test_site, node)
1240                 test_sliver = TestSliver(self, test_node, test_slice)
1241                 tasks.append(CompleterTaskInitscript(test_sliver, stamp))
1242         return Completer(tasks, message='check_initscripts').\
1243             run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10))
1244
1245     def check_initscripts(self):
1246         "check that the initscripts have triggered"
1247         return self.do_check_initscripts()
1248
1249     def initscripts(self):
1250         "create initscripts with PLCAPI"
1251         for initscript in self.plc_spec['initscripts']:
1252             utils.pprint('Adding Initscript in plc {}'.format(self.plc_spec['name']), initscript)
1253             self.apiserver.AddInitScript(self.auth_root(), initscript['initscript_fields'])
1254         return True
1255
1256     def delete_initscripts(self):
1257         "delete initscripts with PLCAPI"
1258         for initscript in self.plc_spec['initscripts']:
1259             initscript_name = initscript['initscript_fields']['name']
1260             print(('Attempting to delete {} in plc {}'.format(initscript_name, self.plc_spec['name'])))
1261             try:
1262                 self.apiserver.DeleteInitScript(self.auth_root(), initscript_name)
1263                 print(initscript_name, 'deleted')
1264             except:
1265                 print('deletion went wrong - probably did not exist')
1266         return True
1267
1268     ### manage slices
1269     def slices(self):
1270         "create slices with PLCAPI"
1271         return self.do_slices(action="add")
1272
1273     def delete_slices(self):
1274         "delete slices with PLCAPI"
1275         return self.do_slices(action="delete")
1276
1277     def fill_slices(self):
1278         "add nodes in slices with PLCAPI"
1279         return self.do_slices(action="fill")
1280
1281     def empty_slices(self):
1282         "remove nodes from slices with PLCAPI"
1283         return self.do_slices(action="empty")
1284
1285     def do_slices(self,  action="add"):
1286         for slice in self.plc_spec['slices']:
1287             site_spec = self.locate_site(slice['sitename'])
1288             test_site = TestSite(self,site_spec)
1289             test_slice=TestSlice(self,test_site,slice)
1290             if action == "delete":
1291                 test_slice.delete_slice()
1292             elif action == "fill":
1293                 test_slice.add_nodes()
1294             elif action == "empty":
1295                 test_slice.delete_nodes()
1296             else:
1297                 test_slice.create_slice()
1298         return True
1299
1300     @slice_mapper__tasks(20, 10, 15)
1301     def ssh_slice(self): pass
1302     @slice_mapper__tasks(20, 19, 15)
1303     def ssh_slice_off(self): pass
1304     @slice_mapper__tasks(1, 1, 15)
1305     def slice_fs_present(self): pass
1306     @slice_mapper__tasks(1, 1, 15)
1307     def slice_fs_deleted(self): pass
1308
1309     # use another name so we can exclude/ignore it from the tests on the nightly command line
1310     def ssh_slice_again(self): return self.ssh_slice()
1311     # note that simply doing ssh_slice_again=ssh_slice would kind of work too
1312     # but for some reason the ignore-wrapping thing would not
1313
1314     @slice_mapper
1315     def ssh_slice_basics(self): pass
1316     @slice_mapper
1317     def check_vsys_defaults(self): pass
1318
1319     @node_mapper
1320     def keys_clear_known_hosts(self): pass
1321
1322     def plcapi_urls(self):
1323         """
1324         attempts to reach the PLCAPI with various forms for the URL
1325         """
1326         return PlcapiUrlScanner(self.auth_root(), ip=self.vserverip).scan()
1327
1328     def speed_up_slices(self):
1329         "tweak nodemanager cycle (wait time) to 30+/-10 s"
1330         return self._speed_up_slices (30, 10)
1331     def super_speed_up_slices(self):
1332         "dev mode: tweak nodemanager cycle (wait time) to 5+/-1 s"
1333         return self._speed_up_slices(5, 1)
1334
1335     def _speed_up_slices(self, p, r):
1336         # create the template on the server-side
1337         template = "{}.nodemanager".format(self.name())
1338         with open(template,"w") as template_file:
1339             template_file.write('OPTIONS="-p {} -r {} -d"\n'.format(p, r))
1340         in_vm = "/var/www/html/PlanetLabConf/nodemanager"
1341         remote = "{}/{}".format(self.vm_root_in_host(), in_vm)
1342         self.test_ssh.copy_abs(template, remote)
1343         # Add a conf file
1344         if not self.apiserver.GetConfFiles(self.auth_root(),
1345                                            {'dest' : '/etc/sysconfig/nodemanager'}):
1346             self.apiserver.AddConfFile(self.auth_root(),
1347                                         {'dest' : '/etc/sysconfig/nodemanager',
1348                                          'source' : 'PlanetLabConf/nodemanager',
1349                                          'postinstall_cmd' : 'service nm restart',})
1350         return True
1351
1352     def debug_nodemanager(self):
1353         "sets verbose mode for nodemanager, and speeds up cycle even more (needs speed_up_slices first)"
1354         template = "{}.nodemanager".format(self.name())
1355         with open(template,"w") as template_file:
1356             template_file.write('OPTIONS="-p 10 -r 6 -v -d"\n')
1357         in_vm = "/var/www/html/PlanetLabConf/nodemanager"
1358         remote = "{}/{}".format(self.vm_root_in_host(), in_vm)
1359         self.test_ssh.copy_abs(template, remote)
1360         return True
1361
1362     @node_mapper
1363     def qemu_start(self) : pass
1364
1365     @node_mapper
1366     def qemu_timestamp(self) : pass
1367
1368     @node_mapper
1369     def qemu_nodefamily(self): pass
1370
1371     # when a spec refers to a node possibly on another plc
1372     def locate_sliver_obj_cross(self, nodename, slicename, other_plcs):
1373         for plc in [ self ] + other_plcs:
1374             try:
1375                 return plc.locate_sliver_obj(nodename, slicename)
1376             except:
1377                 pass
1378         raise Exception("Cannot locate sliver {}@{} among all PLCs".format(nodename, slicename))
1379
1380     # implement this one as a cross step so that we can take advantage of different nodes
1381     # in multi-plcs mode
1382     def cross_check_tcp(self, other_plcs):
1383         "check TCP connectivity between 2 slices (or in loopback if only one is defined)"
1384         if 'tcp_specs' not in self.plc_spec or not self.plc_spec['tcp_specs']:
1385             utils.header("check_tcp: no/empty config found")
1386             return True
1387         specs = self.plc_spec['tcp_specs']
1388         overall = True
1389
1390         # first wait for the network to be up and ready from the slices
1391         class CompleterTaskNetworkReadyInSliver(CompleterTask):
1392             def __init__(self, test_sliver):
1393                 self.test_sliver = test_sliver
1394             def actual_run(self):
1395                 return self.test_sliver.check_tcp_ready(port = 9999)
1396             def message(self):
1397                 return "network ready checker for {}".format(self.test_sliver.name())
1398             def failure_epilogue(self):
1399                 print("could not bind port from sliver {}".format(self.test_sliver.name()))
1400
1401         sliver_specs = {}
1402         tasks = []
1403         managed_sliver_names = set()
1404         for spec in specs:
1405             # locate the TestSliver instances involved, and cache them in the spec instance
1406             spec['s_sliver'] = self.locate_sliver_obj_cross(spec['server_node'], spec['server_slice'], other_plcs)
1407             spec['c_sliver'] = self.locate_sliver_obj_cross(spec['client_node'], spec['client_slice'], other_plcs)
1408             message = "Will check TCP between s={} and c={}"\
1409                       .format(spec['s_sliver'].name(), spec['c_sliver'].name())
1410             if 'client_connect' in spec:
1411                 message += " (using {})".format(spec['client_connect'])
1412             utils.header(message)
1413             # we need to check network presence in both slivers, but also
1414             # avoid to insert a sliver several times
1415             for sliver in [ spec['s_sliver'], spec['c_sliver'] ]:
1416                 if sliver.name() not in managed_sliver_names:
1417                     tasks.append(CompleterTaskNetworkReadyInSliver(sliver))
1418                     # add this sliver's name in the set
1419                     managed_sliver_names .update( {sliver.name()} )
1420
1421         # wait for the netork to be OK in all server sides
1422         if not Completer(tasks, message='check for network readiness in slivers').\
1423            run(timedelta(seconds=30), timedelta(seconds=24), period=timedelta(seconds=5)):
1424             return False
1425
1426         # run server and client
1427         for spec in specs:
1428             port = spec['port']
1429             # server side
1430             # the issue here is that we have the server run in background
1431             # and so we have no clue if it took off properly or not
1432             # looks like in some cases it does not
1433             if not spec['s_sliver'].run_tcp_server(port, timeout=20):
1434                 overall = False
1435                 break
1436
1437             # idem for the client side
1438             # use nodename from located sliver, unless 'client_connect' is set
1439             if 'client_connect' in spec:
1440                 destination = spec['client_connect']
1441             else:
1442                 destination = spec['s_sliver'].test_node.name()
1443             if not spec['c_sliver'].run_tcp_client(destination, port):
1444                 overall = False
1445         return overall
1446
1447     # painfully enough, we need to allow for some time as netflow might show up last
1448     def check_system_slice(self):
1449         "all nodes: check that a system slice is alive"
1450         # netflow currently not working in the lxc distro
1451         # drl not built at all in the wtx distro
1452         # if we find either of them we're happy
1453         return self.check_netflow() or self.check_drl()
1454
1455     # expose these
1456     def check_netflow(self): return self._check_system_slice('netflow')
1457     def check_drl(self): return self._check_system_slice('drl')
1458
1459     # we have the slices up already here, so it should not take too long
1460     def _check_system_slice(self, slicename, timeout_minutes=5, period_seconds=15):
1461         class CompleterTaskSystemSlice(CompleterTask):
1462             def __init__(self, test_node, dry_run):
1463                 self.test_node = test_node
1464                 self.dry_run = dry_run
1465             def actual_run(self):
1466                 return self.test_node._check_system_slice(slicename, dry_run=self.dry_run)
1467             def message(self):
1468                 return "System slice {} @ {}".format(slicename, self.test_node.name())
1469             def failure_epilogue(self):
1470                 print("COULD not find system slice {} @ {}".format(slicename, self.test_node.name()))
1471         timeout = timedelta(minutes=timeout_minutes)
1472         silent  = timedelta(0)
1473         period  = timedelta(seconds=period_seconds)
1474         tasks = [ CompleterTaskSystemSlice(test_node, self.options.dry_run) \
1475                       for test_node in self.all_nodes() ]
1476         return Completer(tasks, message='_check_system_slice').run(timeout, silent, period)
1477
1478     def plcsh_stress_test(self):
1479         "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents"
1480         # install the stress-test in the plc image
1481         location = "/usr/share/plc_api/plcsh_stress_test.py"
1482         remote = "{}/{}".format(self.vm_root_in_host(), location)
1483         self.test_ssh.copy_abs("plcsh_stress_test.py", remote)
1484         command = location
1485         command += " -- --check"
1486         if self.options.size == 1:
1487             command +=  " --tiny"
1488         return self.run_in_guest(command) == 0
1489
1490     # populate runs the same utility without slightly different options
1491     # in particular runs with --preserve (dont cleanup) and without --check
1492     # also it gets run twice, once with the --foreign option for creating fake foreign entries
1493
1494     def sfa_install_all(self):
1495         "yum install sfa sfa-plc sfa-sfatables sfa-client"
1496         return self.yum_install("sfa sfa-plc sfa-sfatables sfa-client")
1497
1498     def sfa_install_core(self):
1499         "yum install sfa"
1500         return self.yum_install("sfa")
1501
1502     def sfa_install_plc(self):
1503         "yum install sfa-plc"
1504         return self.yum_install("sfa-plc")
1505
1506     def sfa_install_sfatables(self):
1507         "yum install sfa-sfatables"
1508         return self.yum_install("sfa-sfatables")
1509
1510     # for some very odd reason, this sometimes fails with the following symptom
1511     # # yum install sfa-client
1512     # Setting up Install Process
1513     # ...
1514     # Downloading Packages:
1515     # Running rpm_check_debug
1516     # Running Transaction Test
1517     # Transaction Test Succeeded
1518     # Running Transaction
1519     # Transaction couldn't start:
1520     # installing package sfa-client-2.1-7.onelab.2012.05.23.i686 needs 68KB on the / filesystem
1521     # [('installing package sfa-client-2.1-7.onelab.2012.05.23.i686 needs 68KB on the / filesystem', (9, '/', 69632L))]
1522     # even though in the same context I have
1523     # [2012.05.23--f14-32-sfastd1-1-vplc07] / # df -h
1524     # Filesystem            Size  Used Avail Use% Mounted on
1525     # /dev/hdv1             806G  264G  501G  35% /
1526     # none                   16M   36K   16M   1% /tmp
1527     #
1528     # so as a workaround, we first try yum install, and then invoke rpm on the cached rpm...
1529     def sfa_install_client(self):
1530         "yum install sfa-client"
1531         first_try = self.yum_install("sfa-client")
1532         if first_try:
1533             return True
1534         utils.header("********** Regular yum failed - special workaround in place, 2nd chance")
1535         code, cached_rpm_path = \
1536                 utils.output_of(self.actual_command_in_guest('find /var/cache/yum -name sfa-client\*.rpm'))
1537         utils.header("rpm_path=<<{}>>".format(rpm_path))
1538         # just for checking
1539         self.run_in_guest("rpm -i {}".format(cached_rpm_path))
1540         return self.yum_check_installed("sfa-client")
1541
1542     def sfa_dbclean(self):
1543         "thoroughly wipes off the SFA database"
1544         return self.run_in_guest("sfaadmin reg nuke") == 0 or \
1545             self.run_in_guest("sfa-nuke.py") == 0 or \
1546             self.run_in_guest("sfa-nuke-plc.py") == 0 or \
1547             self.run_in_guest("sfaadmin registry nuke") == 0
1548
1549     def sfa_fsclean(self):
1550         "cleanup /etc/sfa/trusted_roots and /var/lib/sfa"
1551         self.run_in_guest("rm -rf /etc/sfa/trusted_roots /var/lib/sfa/authorities")
1552         return True
1553
1554     def sfa_plcclean(self):
1555         "cleans the PLC entries that were created as a side effect of running the script"
1556         # ignore result
1557         sfa_spec = self.plc_spec['sfa']
1558
1559         for auth_sfa_spec in sfa_spec['auth_sfa_specs']:
1560             login_base = auth_sfa_spec['login_base']
1561             try:
1562                 self.apiserver.DeleteSite(self.auth_root(),login_base)
1563             except:
1564                 print("Site {} already absent from PLC db".format(login_base))
1565
1566             for spec_name in ['pi_spec','user_spec']:
1567                 user_spec = auth_sfa_spec[spec_name]
1568                 username = user_spec['email']
1569                 try:
1570                     self.apiserver.DeletePerson(self.auth_root(),username)
1571                 except:
1572                     # this in fact is expected as sites delete their members
1573                     #print "User {} already absent from PLC db".format(username)
1574                     pass
1575
1576         print("REMEMBER TO RUN sfa_import AGAIN")
1577         return True
1578
1579     def sfa_uninstall(self):
1580         "uses rpm to uninstall sfa - ignore result"
1581         self.run_in_guest("rpm -e sfa sfa-sfatables sfa-client sfa-plc")
1582         self.run_in_guest("rm -rf /var/lib/sfa")
1583         self.run_in_guest("rm -rf /etc/sfa")
1584         self.run_in_guest("rm -rf /var/log/sfa_access.log /var/log/sfa_import_plc.log /var/log/sfa.daemon")
1585         # xxx tmp
1586         self.run_in_guest("rpm -e --noscripts sfa-plc")
1587         return True
1588
1589     ### run unit tests for SFA
1590     # NOTE: for some reason on f14/i386, yum install sfa-tests fails for no reason
1591     # Running Transaction
1592     # Transaction couldn't start:
1593     # installing package sfa-tests-1.0-21.onelab.i686 needs 204KB on the / filesystem
1594     # [('installing package sfa-tests-1.0-21.onelab.i686 needs 204KB on the / filesystem', (9, '/', 208896L))]
1595     # no matter how many Gbs are available on the testplc
1596     # could not figure out what's wrong, so...
1597     # if the yum install phase fails, consider the test is successful
1598     # other combinations will eventually run it hopefully
1599     def sfa_utest(self):
1600         "yum install sfa-tests and run SFA unittests"
1601         self.run_in_guest("yum -y install sfa-tests")
1602         # failed to install - forget it
1603         if self.run_in_guest("rpm -q sfa-tests") != 0:
1604             utils.header("WARNING: SFA unit tests failed to install, ignoring")
1605             return True
1606         return self.run_in_guest("/usr/share/sfa/tests/testAll.py") == 0
1607
1608     ###
1609     def confdir(self):
1610         dirname = "conf.{}".format(self.plc_spec['name'])
1611         if not os.path.isdir(dirname):
1612             utils.system("mkdir -p {}".format(dirname))
1613         if not os.path.isdir(dirname):
1614             raise Exception("Cannot create config dir for plc {}".format(self.name()))
1615         return dirname
1616
1617     def conffile(self, filename):
1618         return "{}/{}".format(self.confdir(), filename)
1619     def confsubdir(self, dirname, clean, dry_run=False):
1620         subdirname = "{}/{}".format(self.confdir(), dirname)
1621         if clean:
1622             utils.system("rm -rf {}".format(subdirname))
1623         if not os.path.isdir(subdirname):
1624             utils.system("mkdir -p {}".format(subdirname))
1625         if not dry_run and not os.path.isdir(subdirname):
1626             raise "Cannot create config subdir {} for plc {}".format(dirname, self.name())
1627         return subdirname
1628
1629     def conffile_clean(self, filename):
1630         filename=self.conffile(filename)
1631         return utils.system("rm -rf {}".format(filename))==0
1632
1633     ###
1634     def sfa_configure(self):
1635         "run sfa-config-tty"
1636         tmpname = self.conffile("sfa-config-tty")
1637         with open(tmpname,'w') as fileconf:
1638             for (var,value) in self.plc_spec['sfa']['settings'].items():
1639                 fileconf.write('e {}\n{}\n'.format(var, value))
1640             fileconf.write('w\n')
1641             fileconf.write('R\n')
1642             fileconf.write('q\n')
1643         utils.system('cat {}'.format(tmpname))
1644         self.run_in_guest_piped('cat {}'.format(tmpname), 'sfa-config-tty')
1645         return True
1646
1647     def aggregate_xml_line(self):
1648         port = self.plc_spec['sfa']['neighbours-port']
1649         return '<aggregate addr="{}" hrn="{}" port="{}"/>'\
1650             .format(self.vserverip, self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH'], port)
1651
1652     def registry_xml_line(self):
1653         return '<registry addr="{}" hrn="{}" port="12345"/>'\
1654             .format(self.vserverip, self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH'])
1655
1656
1657     # a cross step that takes all other plcs in argument
1658     def cross_sfa_configure(self, other_plcs):
1659         "writes aggregates.xml and registries.xml that point to all other PLCs in the test"
1660         # of course with a single plc, other_plcs is an empty list
1661         if not other_plcs:
1662             return True
1663         agg_fname = self.conffile("agg.xml")
1664         with open(agg_fname,"w") as out:
1665             out.write("<aggregates>{}</aggregates>\n"\
1666                       .format(" ".join([ plc.aggregate_xml_line() for plc in other_plcs ])))
1667         utils.header("(Over)wrote {}".format(agg_fname))
1668         reg_fname=self.conffile("reg.xml")
1669         with open(reg_fname,"w") as out:
1670             out.write("<registries>{}</registries>\n"\
1671                       .format(" ".join([ plc.registry_xml_line() for plc in other_plcs ])))
1672         utils.header("(Over)wrote {}".format(reg_fname))
1673         return self.test_ssh.copy_abs(agg_fname,
1674                                       '/{}/etc/sfa/aggregates.xml'.format(self.vm_root_in_host())) == 0 \
1675            and self.test_ssh.copy_abs(reg_fname,
1676                                       '/{}/etc/sfa/registries.xml'.format(self.vm_root_in_host())) == 0
1677
1678     def sfa_import(self):
1679         "use sfaadmin to import from plc"
1680         auth = self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH']
1681         return self.run_in_guest('sfaadmin reg import_registry') == 0
1682
1683     def sfa_start(self):
1684         "service sfa start"
1685         return self.start_service('sfa')
1686
1687
1688     def sfi_configure(self):
1689         "Create /root/sfi on the plc side for sfi client configuration"
1690         if self.options.dry_run:
1691             utils.header("DRY RUN - skipping step")
1692             return True
1693         sfa_spec = self.plc_spec['sfa']
1694         # cannot use auth_sfa_mapper to pass dir_name
1695         for slice_spec in self.plc_spec['sfa']['auth_sfa_specs']:
1696             test_slice = TestAuthSfa(self, slice_spec)
1697             dir_basename = os.path.basename(test_slice.sfi_path())
1698             dir_name = self.confsubdir("dot-sfi/{}".format(dir_basename),
1699                                        clean=True, dry_run=self.options.dry_run)
1700             test_slice.sfi_configure(dir_name)
1701             # push into the remote /root/sfi area
1702             location = test_slice.sfi_path()
1703             remote = "{}/{}".format(self.vm_root_in_host(), location)
1704             self.test_ssh.mkdir(remote, abs=True)
1705             # need to strip last level or remote otherwise we get an extra dir level
1706             self.test_ssh.copy_abs(dir_name, os.path.dirname(remote), recursive=True)
1707
1708         return True
1709
1710     def sfi_clean(self):
1711         "clean up /root/sfi on the plc side"
1712         self.run_in_guest("rm -rf /root/sfi")
1713         return True
1714
1715     def sfa_rspec_empty(self):
1716         "expose a static empty rspec (ships with the tests module) in the sfi directory"
1717         filename = "empty-rspec.xml"
1718         overall = True
1719         for slice_spec in self.plc_spec['sfa']['auth_sfa_specs']:
1720             test_slice = TestAuthSfa(self, slice_spec)
1721             in_vm = test_slice.sfi_path()
1722             remote = "{}/{}".format(self.vm_root_in_host(), in_vm)
1723             if self.test_ssh.copy_abs(filename, remote) !=0:
1724                 overall = False
1725         return overall
1726
1727     @auth_sfa_mapper
1728     def sfa_register_site(self): pass
1729     @auth_sfa_mapper
1730     def sfa_register_pi(self): pass
1731     @auth_sfa_mapper
1732     def sfa_register_user(self): pass
1733     @auth_sfa_mapper
1734     def sfa_update_user(self): pass
1735     @auth_sfa_mapper
1736     def sfa_register_slice(self): pass
1737     @auth_sfa_mapper
1738     def sfa_renew_slice(self): pass
1739     @auth_sfa_mapper
1740     def sfa_get_expires(self): pass
1741     @auth_sfa_mapper
1742     def sfa_discover(self): pass
1743     @auth_sfa_mapper
1744     def sfa_rspec(self): pass
1745     @auth_sfa_mapper
1746     def sfa_allocate(self): pass
1747     @auth_sfa_mapper
1748     def sfa_allocate_empty(self): pass
1749     @auth_sfa_mapper
1750     def sfa_provision(self): pass
1751     @auth_sfa_mapper
1752     def sfa_provision_empty(self): pass
1753     @auth_sfa_mapper
1754     def sfa_check_slice_plc(self): pass
1755     @auth_sfa_mapper
1756     def sfa_check_slice_plc_empty(self): pass
1757     @auth_sfa_mapper
1758     def sfa_update_slice(self): pass
1759     @auth_sfa_mapper
1760     def sfa_remove_user_from_slice(self): pass
1761     @auth_sfa_mapper
1762     def sfa_insert_user_in_slice(self): pass
1763     @auth_sfa_mapper
1764     def sfi_list(self): pass
1765     @auth_sfa_mapper
1766     def sfi_show_site(self): pass
1767     @auth_sfa_mapper
1768     def sfi_show_slice(self): pass
1769     @auth_sfa_mapper
1770     def sfi_show_slice_researchers(self): pass
1771     @auth_sfa_mapper
1772     def ssh_slice_sfa(self): pass
1773     @auth_sfa_mapper
1774     def sfa_delete_user(self): pass
1775     @auth_sfa_mapper
1776     def sfa_delete_slice(self): pass
1777
1778     def sfa_stop(self):
1779         "service sfa stop"
1780         return self.stop_service('sfa')
1781
1782     def populate(self):
1783         "creates random entries in the PLCAPI"
1784         # install the stress-test in the plc image
1785         location = "/usr/share/plc_api/plcsh_stress_test.py"
1786         remote = "{}/{}".format(self.vm_root_in_host(), location)
1787         self.test_ssh.copy_abs("plcsh_stress_test.py", remote)
1788         command = location
1789         command += " -- --preserve --short-names"
1790         local = (self.run_in_guest(command) == 0);
1791         # second run with --foreign
1792         command += ' --foreign'
1793         remote = (self.run_in_guest(command) == 0);
1794         return local and remote
1795
1796
1797     ####################
1798     @bonding_redirector
1799     def bonding_init_partial(self): pass
1800
1801     @bonding_redirector
1802     def bonding_add_yum(self): pass
1803
1804     @bonding_redirector
1805     def bonding_install_rpms(self): pass
1806
1807     ####################
1808
1809     def gather_logs(self):
1810         "gets all possible logs from plc's/qemu node's/slice's for future reference"
1811         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
1812         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
1813         # (1.c) get the plc's /root/sfi -> logs/sfi.<plcname>/
1814         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
1815         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
1816         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
1817         # (1.a)
1818         print("-------------------- TestPlc.gather_logs : PLC's /var/log")
1819         self.gather_var_logs()
1820         # (1.b)
1821         print("-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/")
1822         self.gather_pgsql_logs()
1823         # (1.c)
1824         print("-------------------- TestPlc.gather_logs : PLC's /root/sfi/")
1825         self.gather_root_sfi()
1826         # (2)
1827         print("-------------------- TestPlc.gather_logs : nodes's QEMU logs")
1828         for site_spec in self.plc_spec['sites']:
1829             test_site = TestSite(self,site_spec)
1830             for node_spec in site_spec['nodes']:
1831                 test_node = TestNode(self, test_site, node_spec)
1832                 test_node.gather_qemu_logs()
1833         # (3)
1834         print("-------------------- TestPlc.gather_logs : nodes's /var/log")
1835         self.gather_nodes_var_logs()
1836         # (4)
1837         print("-------------------- TestPlc.gather_logs : sample sliver's /var/log")
1838         self.gather_slivers_var_logs()
1839         return True
1840
1841     def gather_slivers_var_logs(self):
1842         for test_sliver in self.all_sliver_objs():
1843             remote = test_sliver.tar_var_logs()
1844             utils.system("mkdir -p logs/sliver.var-log.{}".format(test_sliver.name()))
1845             command = remote + " | tar -C logs/sliver.var-log.{} -xf -".format(test_sliver.name())
1846             utils.system(command)
1847         return True
1848
1849     def gather_var_logs(self):
1850         utils.system("mkdir -p logs/myplc.var-log.{}".format(self.name()))
1851         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
1852         command = to_plc + "| tar -C logs/myplc.var-log.{} -xf -".format(self.name())
1853         utils.system(command)
1854         command = "chmod a+r,a+x logs/myplc.var-log.{}/httpd".format(self.name())
1855         utils.system(command)
1856
1857     def gather_pgsql_logs(self):
1858         utils.system("mkdir -p logs/myplc.pgsql-log.{}".format(self.name()))
1859         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
1860         command = to_plc + "| tar -C logs/myplc.pgsql-log.{} -xf -".format(self.name())
1861         utils.system(command)
1862
1863     def gather_root_sfi(self):
1864         utils.system("mkdir -p logs/sfi.{}".format(self.name()))
1865         to_plc = self.actual_command_in_guest("tar -C /root/sfi/ -cf - .")
1866         command = to_plc + "| tar -C logs/sfi.{} -xf -".format(self.name())
1867         utils.system(command)
1868
1869     def gather_nodes_var_logs(self):
1870         for site_spec in self.plc_spec['sites']:
1871             test_site = TestSite(self, site_spec)
1872             for node_spec in site_spec['nodes']:
1873                 test_node = TestNode(self, test_site, node_spec)
1874                 test_ssh = TestSsh(test_node.name(), key="keys/key_admin.rsa")
1875                 command = test_ssh.actual_command("tar -C /var/log -cf - .")
1876                 command = command + "| tar -C logs/node.var-log.{} -xf -".format(test_node.name())
1877                 utils.system("mkdir -p logs/node.var-log.{}".format(test_node.name()))
1878                 utils.system(command)
1879
1880
1881     # returns the filename to use for sql dump/restore, using options.dbname if set
1882     def dbfile(self, database):
1883         # uses options.dbname if it is found
1884         try:
1885             name = self.options.dbname
1886             if not isinstance(name, str):
1887                 raise Exception
1888         except:
1889             t = datetime.now()
1890             d = t.date()
1891             name = str(d)
1892         return "/root/{}-{}.sql".format(database, name)
1893
1894     def plc_db_dump(self):
1895         'dump the planetlab5 DB in /root in the PLC - filename has time'
1896         dump=self.dbfile("planetab5")
1897         self.run_in_guest('pg_dump -U pgsqluser planetlab5 -f '+ dump)
1898         utils.header('Dumped planetlab5 database in {}'.format(dump))
1899         return True
1900
1901     def plc_db_restore(self):
1902         'restore the planetlab5 DB - looks broken, but run -n might help'
1903         dump = self.dbfile("planetab5")
1904         ##stop httpd service
1905         self.run_in_guest('service httpd stop')
1906         # xxx - need another wrapper
1907         self.run_in_guest_piped('echo drop database planetlab5', 'psql --user=pgsqluser template1')
1908         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab5')
1909         self.run_in_guest('psql -U pgsqluser planetlab5 -f ' + dump)
1910         ##starting httpd service
1911         self.run_in_guest('service httpd start')
1912
1913         utils.header('Database restored from ' + dump)
1914
1915     @staticmethod
1916     def create_ignore_steps():
1917         for step in TestPlc.default_steps + TestPlc.other_steps:
1918             # default step can have a plc qualifier
1919             if '@' in step:
1920                 step, qualifier = step.split('@')
1921             # or be defined as forced or ignored by default
1922             for keyword in ['_ignore','_force']:
1923                 if step.endswith(keyword):
1924                     step=step.replace(keyword,'')
1925             if step == SEP or step == SEPSFA :
1926                 continue
1927             method = getattr(TestPlc,step)
1928             name = step + '_ignore'
1929             wrapped = ignore_result(method)
1930 #            wrapped.__doc__ = method.__doc__ + " (run in ignore-result mode)"
1931             setattr(TestPlc, name, wrapped)
1932
1933 #    @ignore_result
1934 #    def ssh_slice_again_ignore (self): pass
1935 #    @ignore_result
1936 #    def check_initscripts_ignore (self): pass
1937
1938     def standby_1_through_20(self):
1939         """convenience function to wait for a specified number of minutes"""
1940         pass
1941     @standby_generic
1942     def standby_1(): pass
1943     @standby_generic
1944     def standby_2(): pass
1945     @standby_generic
1946     def standby_3(): pass
1947     @standby_generic
1948     def standby_4(): pass
1949     @standby_generic
1950     def standby_5(): pass
1951     @standby_generic
1952     def standby_6(): pass
1953     @standby_generic
1954     def standby_7(): pass
1955     @standby_generic
1956     def standby_8(): pass
1957     @standby_generic
1958     def standby_9(): pass
1959     @standby_generic
1960     def standby_10(): pass
1961     @standby_generic
1962     def standby_11(): pass
1963     @standby_generic
1964     def standby_12(): pass
1965     @standby_generic
1966     def standby_13(): pass
1967     @standby_generic
1968     def standby_14(): pass
1969     @standby_generic
1970     def standby_15(): pass
1971     @standby_generic
1972     def standby_16(): pass
1973     @standby_generic
1974     def standby_17(): pass
1975     @standby_generic
1976     def standby_18(): pass
1977     @standby_generic
1978     def standby_19(): pass
1979     @standby_generic
1980     def standby_20(): pass
1981
1982     # convenience for debugging the test logic
1983     def yes(self): return True
1984     def no(self): return False
1985     def fail(self): return False