system/TestPlc.py

   1 # Thierry Parmentelat <thierry.parmentelat@inria.fr>
   2 # Copyright (C) 2010 INRIA
   3 #
   4 import sys
   5 import time
   6 import os, os.path
   7 import traceback
   8 import socket
   9 from datetime import datetime, timedelta
  10
  11 import utils
  12 from Completer import Completer, CompleterTask
  13 from TestSite import TestSite
  14 from TestNode import TestNode, CompleterTaskNodeSsh
  15 from TestUser import TestUser
  16 from TestKey import TestKey
  17 from TestSlice import TestSlice
  18 from TestSliver import TestSliver
  19 from TestBoxQemu import TestBoxQemu
  20 from TestSsh import TestSsh
  21 from TestApiserver import TestApiserver
  22 from TestAuthSfa import TestAuthSfa
  23 from PlcapiUrlScanner import PlcapiUrlScanner
  24
  25 from TestBonding import TestBonding
  26
  27 from gethostbyaddr import workaround_gethostbyaddr
  28
  29 has_sfa_cache_filename = "sfa-cache"
  30
  31 # step methods must take (self) and return a boolean (options is a member of the class)
  32
  33 def standby(minutes, dry_run):
  34     utils.header('Entering StandBy for {:d} mn'.format(minutes))
  35     if dry_run:
  36         print('dry_run')
  37     else:
  38         time.sleep(60*minutes)
  39     return True
  40
  41 def standby_generic(func):
  42     def actual(self):
  43         minutes = int(func.__name__.split("_")[1])
  44         return standby(minutes, self.options.dry_run)
  45     return actual
  46
  47 def node_mapper(method):
  48     def map_on_nodes(self, *args, **kwds):
  49         overall = True
  50         node_method = TestNode.__dict__[method.__name__]
  51         for test_node in self.all_nodes():
  52             if not node_method(test_node, *args, **kwds):
  53                 overall = False
  54         return overall
  55     # maintain __name__ for ignore_result
  56     map_on_nodes.__name__ = method.__name__
  57     # restore the doc text
  58     map_on_nodes.__doc__ = TestNode.__dict__[method.__name__].__doc__
  59     return map_on_nodes
  60
  61 def slice_mapper(method):
  62     def map_on_slices(self):
  63         overall = True
  64         slice_method = TestSlice.__dict__[method.__name__]
  65         for slice_spec in self.plc_spec['slices']:
  66             site_spec = self.locate_site (slice_spec['sitename'])
  67             test_site = TestSite(self,site_spec)
  68             test_slice = TestSlice(self,test_site,slice_spec)
  69             if not slice_method(test_slice, self.options):
  70                 overall=False
  71         return overall
  72     # maintain __name__ for ignore_result
  73     map_on_slices.__name__ = method.__name__
  74     # restore the doc text
  75     map_on_slices.__doc__ = TestSlice.__dict__[method.__name__].__doc__
  76     return map_on_slices
  77
  78 def bonding_redirector(method):
  79     bonding_name = method.__name__.replace('bonding_', '')
  80     def redirect(self):
  81         bonding_method = TestBonding.__dict__[bonding_name]
  82         return bonding_method(self.test_bonding)
  83     # maintain __name__ for ignore_result
  84     redirect.__name__ = method.__name__
  85     # restore the doc text
  86     redirect.__doc__ = TestBonding.__dict__[bonding_name].__doc__
  87     return redirect
  88
  89 # run a step but return True so that we can go on
  90 def ignore_result(method):
  91     def ignoring(self):
  92         # ssh_slice_ignore->ssh_slice
  93         ref_name = method.__name__.replace('_ignore', '').replace('force_', '')
  94         ref_method = TestPlc.__dict__[ref_name]
  95         result = ref_method(self)
  96         print("Actual (but ignored) result for {ref_name} is {result}".format(**locals()))
  97         return Ignored(result)
  98     name = method.__name__.replace('_ignore', '').replace('force_', '')
  99     ignoring.__name__ = name
 100     ignoring.__doc__ = "ignored version of " + name
 101     return ignoring
 102
 103 # a variant that expects the TestSlice method to return a list of CompleterTasks that
 104 # are then merged into a single Completer run to avoid wating for all the slices
 105 # esp. useful when a test fails of course
 106 # because we need to pass arguments we use a class instead..
 107 class slice_mapper__tasks(object):
 108     # could not get this to work with named arguments
 109     def __init__(self, timeout_minutes, silent_minutes, period_seconds):
 110         self.timeout = timedelta(minutes = timeout_minutes)
 111         self.silent = timedelta(minutes = silent_minutes)
 112         self.period = timedelta(seconds = period_seconds)
 113     def __call__(self, method):
 114         decorator_self=self
 115         # compute augmented method name
 116         method_name = method.__name__ + "__tasks"
 117         # locate in TestSlice
 118         slice_method = TestSlice.__dict__[ method_name ]
 119         def wrappee(self):
 120             tasks=[]
 121             for slice_spec in self.plc_spec['slices']:
 122                 site_spec = self.locate_site (slice_spec['sitename'])
 123                 test_site = TestSite(self, site_spec)
 124                 test_slice = TestSlice(self, test_site, slice_spec)
 125                 tasks += slice_method (test_slice, self.options)
 126             return Completer (tasks, message=method.__name__).\
 127                 run(decorator_self.timeout, decorator_self.silent, decorator_self.period)
 128         # restore the doc text from the TestSlice method even if a bit odd
 129         wrappee.__name__ = method.__name__
 130         wrappee.__doc__ = slice_method.__doc__
 131         return wrappee
 132
 133 def auth_sfa_mapper(method):
 134     def actual(self):
 135         overall = True
 136         auth_method = TestAuthSfa.__dict__[method.__name__]
 137         for auth_spec in self.plc_spec['sfa']['auth_sfa_specs']:
 138             test_auth = TestAuthSfa(self, auth_spec)
 139             if not auth_method(test_auth, self.options):
 140                 overall=False
 141         return overall
 142     # restore the doc text
 143     actual.__doc__ = TestAuthSfa.__dict__[method.__name__].__doc__
 144     return actual
 145
 146 class Ignored:
 147     def __init__(self, result):
 148         self.result = result
 149
 150 SEP = '<sep>'
 151 SEPSFA = '<sep_sfa>'
 152
 153 class TestPlc:
 154
 155     default_steps = [
 156         'show', SEP,
 157         'plcvm_delete', 'plcvm_timestamp', 'plcvm_create', SEP,
 158         'django_install', 'plc_install', 'plc_configure', 'plc_start', SEP,
 159         'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP,
 160         'plcapi_urls', 'speed_up_slices', SEP,
 161         'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP,
 162 # ss # slices created under plcsh interactively seem to be fine but these ones don't have the tags
 163 # ss # keep this out of the way for now
 164 # ss         'check_vsys_defaults_ignore', SEP,
 165 # ss # run this first off so it's easier to re-run on another qemu box
 166 # ss         'qemu_kill_mine', 'nodestate_reinstall', 'qemu_local_init',
 167 # ss         'bootcd', 'qemu_local_config', SEP,
 168 # ss         'qemu_clean_mine', 'qemu_export', 'qemu_cleanlog', SEP,
 169 # ss         'qemu_start', 'qemu_timestamp', 'qemu_nodefamily', SEP,
 170         'sfa_install_all', 'sfa_configure', 'cross_sfa_configure',
 171         'sfa_start', 'sfa_import', SEPSFA,
 172         'sfi_configure@1', 'sfa_register_site@1', 'sfa_register_pi@1', SEPSFA,
 173         'sfa_register_user@1', 'sfa_update_user@1',
 174         'sfa_register_slice@1', 'sfa_renew_slice@1', SEPSFA,
 175         'sfa_remove_user_from_slice@1', 'sfi_show_slice_researchers@1',
 176         'sfa_insert_user_in_slice@1', 'sfi_show_slice_researchers@1', SEPSFA,
 177         'sfa_discover@1', 'sfa_rspec@1', SEPSFA,
 178         'sfa_allocate@1', 'sfa_provision@1', 'sfa_describe@1', SEPSFA,
 179         'sfa_check_slice_plc@1', 'sfa_update_slice@1', SEPSFA,
 180         'sfi_list@1', 'sfi_show_site@1', 'sfa_utest@1', SEPSFA,
 181         # we used to run plcsh_stress_test, and then ssh_node_debug and ssh_node_boot
 182         # but as the stress test might take a while, we sometimes missed the debug mode..
 183 # ss        'probe_kvm_iptables',
 184 # ss        'ping_node', 'ssh_node_debug', 'plcsh_stress_test@1', SEP,
 185 # ss        'ssh_node_boot', 'node_bmlogs', 'ssh_slice', 'ssh_slice_basics', SEP,
 186 # ss        'ssh_slice_sfa@1', SEPSFA,
 187         'sfa_rspec_empty@1', 'sfa_allocate_empty@1', 'sfa_provision_empty@1',
 188         'sfa_check_slice_plc_empty@1', SEPSFA,
 189         'sfa_delete_slice@1', 'sfa_delete_user@1', SEPSFA,
 190 # ss        'check_system_slice', SEP,
 191         # for inspecting the slice while it runs the first time
 192         #'fail',
 193         # check slices are turned off properly
 194 # ss        'debug_nodemanager',
 195 # ss        'empty_slices', 'ssh_slice_off', 'slice_fs_deleted_ignore', SEP,
 196 # ss        # check they are properly re-created with the same name
 197 # ss        'fill_slices', 'ssh_slice_again', SEP,
 198         'gather_logs_force', SEP,
 199         ]
 200     other_steps = [
 201         'export', 'show_boxes', 'super_speed_up_slices', SEP,
 202         'check_hooks', 'plc_stop', 'plcvm_start', 'plcvm_stop', SEP,
 203         'delete_initscripts', 'delete_nodegroups', 'delete_all_sites', SEP,
 204         'delete_sites', 'delete_nodes', 'delete_slices', 'keys_clean', SEP,
 205         'delete_leases', 'list_leases', SEP,
 206         'populate', SEP,
 207         'nodestate_show', 'nodestate_safeboot', 'nodestate_boot', 'nodestate_upgrade', SEP,
 208         'nodedistro_show', 'nodedistro_f14', 'nodedistro_f18', SEP,
 209         'nodedistro_f20', 'nodedistro_f21', 'nodedistro_f22', SEP,
 210         'qemu_list_all', 'qemu_list_mine', 'qemu_kill_all', SEP,
 211         'sfa_install_core', 'sfa_install_sfatables',
 212         'sfa_install_plc', 'sfa_install_client', SEPSFA,
 213         'sfa_plcclean', 'sfa_dbclean', 'sfa_stop', 'sfa_uninstall', 'sfi_clean', SEPSFA,
 214         'sfa_get_expires', SEPSFA,
 215         'plc_db_dump', 'plc_db_restore', SEP,
 216         'check_netflow', 'check_drl', SEP,
 217         # used to be part of default steps but won't work since f27
 218         'cross_check_tcp@1',
 219         'slice_fs_present', 'check_initscripts', SEP,
 220         'standby_1_through_20', 'yes', 'no', SEP,
 221         'install_syslinux6', 'bonding_builds', 'bonding_nodes', SEP,
 222         ]
 223     default_bonding_steps = [
 224         'bonding_init_partial',
 225         'bonding_add_yum',
 226         'bonding_install_rpms', SEP,
 227         ]
 228
 229     @staticmethod
 230     def printable_steps(list):
 231         single_line = " ".join(list) + " "
 232         return single_line.replace(" "+SEP+" ", " \\\n").replace(" "+SEPSFA+" ", " \\\n")
 233     @staticmethod
 234     def valid_step(step):
 235         return step != SEP and step != SEPSFA
 236
 237     # turn off the sfa-related steps when build has skipped SFA
 238     # this was originally for centos5 but is still valid
 239     # for up to f12 as recent SFAs with sqlalchemy won't build before f14
 240     @staticmethod
 241     def _has_sfa_cached(rpms_url):
 242         if os.path.isfile(has_sfa_cache_filename):
 243             with open(has_sfa_cache_filename) as cache:
 244                 cached = cache.read() == "yes"
 245             utils.header("build provides SFA (cached):{}".format(cached))
 246             return cached
 247         # warning, we're now building 'sface' so let's be a bit more picky
 248         # full builds are expected to return with 0 here
 249         utils.header("Checking if build provides SFA package...")
 250         retcod = utils.system("curl --silent {}/ | grep -q sfa-4".format(rpms_url)) == 0
 251         encoded = 'yes' if retcod else 'no'
 252         with open(has_sfa_cache_filename,'w') as cache:
 253             cache.write(encoded)
 254         return retcod
 255
 256     @staticmethod
 257     def check_whether_build_has_sfa(rpms_url):
 258         has_sfa = TestPlc._has_sfa_cached(rpms_url)
 259         if has_sfa:
 260             utils.header("build does provide SFA")
 261         else:
 262             # move all steps containing 'sfa' from default_steps to other_steps
 263             utils.header("SFA package not found - removing steps with sfa or sfi")
 264             sfa_steps = [ step for step in TestPlc.default_steps
 265                           if step.find('sfa') >= 0 or step.find("sfi") >= 0 ]
 266             TestPlc.other_steps += sfa_steps
 267             for step in sfa_steps:
 268                 TestPlc.default_steps.remove(step)
 269
 270     def __init__(self, plc_spec, options):
 271         self.plc_spec = plc_spec
 272         self.options = options
 273         self.test_ssh = TestSsh(self.plc_spec['host_box'], self.options.buildname)
 274         self.vserverip = plc_spec['vserverip']
 275         self.vservername = plc_spec['vservername']
 276         self.vplchostname = self.vservername.split('-')[-1]
 277         self.url = "https://{}:443/PLCAPI/".format(plc_spec['vserverip'])
 278         self.apiserver = TestApiserver(self.url, options.dry_run)
 279         (self.ssh_node_boot_timeout, self.ssh_node_boot_silent) = plc_spec['ssh_node_boot_timers']
 280         (self.ssh_node_debug_timeout, self.ssh_node_debug_silent) = plc_spec['ssh_node_debug_timers']
 281
 282     def has_addresses_api(self):
 283         return self.apiserver.has_method('AddIpAddress')
 284
 285     def name(self):
 286         name = self.plc_spec['name']
 287         return "{}.{}".format(name,self.vservername)
 288
 289     def hostname(self):
 290         return self.plc_spec['host_box']
 291
 292     def is_local(self):
 293         return self.test_ssh.is_local()
 294
 295     # define the API methods on this object through xmlrpc
 296     # would help, but not strictly necessary
 297     def connect(self):
 298         pass
 299
 300     def actual_command_in_guest(self,command, backslash=False):
 301         raw1 = self.host_to_guest(command)
 302         raw2 = self.test_ssh.actual_command(raw1, dry_run=self.options.dry_run, backslash=backslash)
 303         return raw2
 304
 305     def start_guest(self):
 306       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host(),
 307                                                        dry_run=self.options.dry_run))
 308
 309     def stop_guest(self):
 310       return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host(),
 311                                                        dry_run=self.options.dry_run))
 312
 313     def run_in_guest(self, command, backslash=False):
 314         raw = self.actual_command_in_guest(command, backslash)
 315         return utils.system(raw)
 316
 317     def run_in_host(self,command):
 318         return self.test_ssh.run_in_buildname(command, dry_run=self.options.dry_run)
 319
 320     # backslashing turned out so awful at some point that I've turned off auto-backslashing
 321     # see e.g. plc_start esp. the version for f14
 322     #command gets run in the plc's vm
 323     def host_to_guest(self, command):
 324         ssh_leg = TestSsh(self.vplchostname)
 325         return ssh_leg.actual_command(command, keep_stdin=True)
 326
 327     # this /vservers thing is legacy...
 328     def vm_root_in_host(self):
 329         return "/vservers/{}/".format(self.vservername)
 330
 331     def vm_timestamp_path(self):
 332         return "/vservers/{}/{}.timestamp".format(self.vservername, self.vservername)
 333
 334     #start/stop the vserver
 335     def start_guest_in_host(self):
 336         return "virsh -c lxc:/// start {}".format(self.vservername)
 337
 338     def stop_guest_in_host(self):
 339         return "virsh -c lxc:/// destroy {}".format(self.vservername)
 340
 341     # xxx quick n dirty
 342     def run_in_guest_piped(self,local,remote):
 343         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),
 344                                                                      keep_stdin = True))
 345
 346     def dnf_check_installed(self, rpms):
 347         if isinstance(rpms, list):
 348             rpms=" ".join(rpms)
 349         return self.run_in_guest("rpm -q {}".format(rpms)) == 0
 350
 351     # does a yum install in the vs, ignore yum retcod, check with rpm
 352     def dnf_install(self, rpms):
 353         if isinstance(rpms, list):
 354             rpms=" ".join(rpms)
 355         yum_mode = self.run_in_guest("dnf -y install {}".format(rpms))
 356         if yum_mode != 0:
 357             self.run_in_guest("dnf -y install --allowerasing {}".format(rpms))
 358         # yum-complete-transaction comes with yum-utils, that is in vtest.pkgs
 359         # nothing similar with dnf, forget about this for now
 360         # self.run_in_guest("yum-complete-transaction -y")
 361         return self.dnf_check_installed(rpms)
 362
 363     def pip3_install(self, package):
 364         return self.run_in_guest(f"pip3 install {package} || pip install {package}") == 0
 365
 366     def auth_root(self):
 367         return {'Username'   : self.plc_spec['settings']['PLC_ROOT_USER'],
 368                 'AuthMethod' : 'password',
 369                 'AuthString' : self.plc_spec['settings']['PLC_ROOT_PASSWORD'],
 370                 'Role'       : self.plc_spec['role'],
 371                 }
 372
 373     def locate_site(self,sitename):
 374         for site in self.plc_spec['sites']:
 375             if site['site_fields']['name'] == sitename:
 376                 return site
 377             if site['site_fields']['login_base'] == sitename:
 378                 return site
 379         raise Exception("Cannot locate site {}".format(sitename))
 380
 381     def locate_node(self, nodename):
 382         for site in self.plc_spec['sites']:
 383             for node in site['nodes']:
 384                 if node['name'] == nodename:
 385                     return site, node
 386         raise Exception("Cannot locate node {}".format(nodename))
 387
 388     def locate_hostname(self, hostname):
 389         for site in self.plc_spec['sites']:
 390             for node in site['nodes']:
 391                 if node['node_fields']['hostname'] == hostname:
 392                     return(site, node)
 393         raise Exception("Cannot locate hostname {}".format(hostname))
 394
 395     def locate_key(self, key_name):
 396         for key in self.plc_spec['keys']:
 397             if key['key_name'] == key_name:
 398                 return key
 399         raise Exception("Cannot locate key {}".format(key_name))
 400
 401     def locate_private_key_from_key_names(self, key_names):
 402         # locate the first avail. key
 403         found = False
 404         for key_name in key_names:
 405             key_spec = self.locate_key(key_name)
 406             test_key = TestKey(self,key_spec)
 407             publickey = test_key.publicpath()
 408             privatekey = test_key.privatepath()
 409             if os.path.isfile(publickey) and os.path.isfile(privatekey):
 410                 found = True
 411         if found:
 412             return privatekey
 413         else:
 414             return None
 415
 416     def locate_slice(self, slicename):
 417         for slice in self.plc_spec['slices']:
 418             if slice['slice_fields']['name'] == slicename:
 419                 return slice
 420         raise Exception("Cannot locate slice {}".format(slicename))
 421
 422     def all_sliver_objs(self):
 423         result = []
 424         for slice_spec in self.plc_spec['slices']:
 425             slicename = slice_spec['slice_fields']['name']
 426             for nodename in slice_spec['nodenames']:
 427                 result.append(self.locate_sliver_obj(nodename, slicename))
 428         return result
 429
 430     def locate_sliver_obj(self, nodename, slicename):
 431         site,node = self.locate_node(nodename)
 432         slice = self.locate_slice(slicename)
 433         # build objects
 434         test_site = TestSite(self, site)
 435         test_node = TestNode(self, test_site, node)
 436         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 437         test_slice = TestSlice(self, test_site, slice)
 438         return TestSliver(self, test_node, test_slice)
 439
 440     def locate_first_node(self):
 441         nodename = self.plc_spec['slices'][0]['nodenames'][0]
 442         site,node = self.locate_node(nodename)
 443         test_site = TestSite(self, site)
 444         test_node = TestNode(self, test_site, node)
 445         return test_node
 446
 447     def locate_first_sliver(self):
 448         slice_spec = self.plc_spec['slices'][0]
 449         slicename = slice_spec['slice_fields']['name']
 450         nodename = slice_spec['nodenames'][0]
 451         return self.locate_sliver_obj(nodename,slicename)
 452
 453     # all different hostboxes used in this plc
 454     def get_BoxNodes(self):
 455         # maps on sites and nodes, return [ (host_box,test_node) ]
 456         tuples = []
 457         for site_spec in self.plc_spec['sites']:
 458             test_site = TestSite(self,site_spec)
 459             for node_spec in site_spec['nodes']:
 460                 test_node = TestNode(self, test_site, node_spec)
 461                 if not test_node.is_real():
 462                     tuples.append( (test_node.host_box(),test_node) )
 463         # transform into a dict { 'host_box' -> [ test_node .. ] }
 464         result = {}
 465         for (box,node) in tuples:
 466             if box not in result:
 467                 result[box] = [node]
 468             else:
 469                 result[box].append(node)
 470         return result
 471
 472     # a step for checking this stuff
 473     def show_boxes(self):
 474         'print summary of nodes location'
 475         for box,nodes in self.get_BoxNodes().items():
 476             print(box,":"," + ".join( [ node.name() for node in nodes ] ))
 477         return True
 478
 479     # make this a valid step
 480     def qemu_kill_all(self):
 481         'kill all qemu instances on the qemu boxes involved by this setup'
 482         # this is the brute force version, kill all qemus on that host box
 483         for (box,nodes) in self.get_BoxNodes().items():
 484             # pass the first nodename, as we don't push template-qemu on testboxes
 485             nodedir = nodes[0].nodedir()
 486             TestBoxQemu(box, self.options.buildname).qemu_kill_all(nodedir)
 487         return True
 488
 489     # make this a valid step
 490     def qemu_list_all(self):
 491         'list all qemu instances on the qemu boxes involved by this setup'
 492         for box,nodes in self.get_BoxNodes().items():
 493             # this is the brute force version, kill all qemus on that host box
 494             TestBoxQemu(box, self.options.buildname).qemu_list_all()
 495         return True
 496
 497     # kill only the qemus related to this test
 498     def qemu_list_mine(self):
 499         'list qemu instances for our nodes'
 500         for (box,nodes) in self.get_BoxNodes().items():
 501             # the fine-grain version
 502             for node in nodes:
 503                 node.list_qemu()
 504         return True
 505
 506     # kill only the qemus related to this test
 507     def qemu_clean_mine(self):
 508         'cleanup (rm -rf) qemu instances for our nodes'
 509         for box,nodes in self.get_BoxNodes().items():
 510             # the fine-grain version
 511             for node in nodes:
 512                 node.qemu_clean()
 513         return True
 514
 515     # kill only the right qemus
 516     def qemu_kill_mine(self):
 517         'kill the qemu instances for our nodes'
 518         for box,nodes in self.get_BoxNodes().items():
 519             # the fine-grain version
 520             for node in nodes:
 521                 node.kill_qemu()
 522         return True
 523
 524     #################### display config
 525     def show(self):
 526         "show test configuration after localization"
 527         self.show_pass(1)
 528         self.show_pass(2)
 529         return True
 530
 531     # uggly hack to make sure 'run export' only reports about the 1st plc
 532     # to avoid confusion - also we use 'inri_slice1' in various aliases..
 533     exported_id = 1
 534     def export(self):
 535         "print cut'n paste-able stuff to export env variables to your shell"
 536         # guess local domain from hostname
 537         if TestPlc.exported_id > 1:
 538             print("export GUESTHOSTNAME{:d}={}".format(TestPlc.exported_id, self.plc_spec['vservername']))
 539             return True
 540         TestPlc.exported_id += 1
 541         domain = socket.gethostname().split('.',1)[1]
 542         fqdn   = "{}.{}".format(self.plc_spec['host_box'], domain)
 543         print("export BUILD={}".format(self.options.buildname))
 544         print("export PLCHOSTLXC={}".format(fqdn))
 545         print("export GUESTNAME={}".format(self.vservername))
 546         print("export GUESTHOSTNAME={}.{}".format(self.vplchostname, domain))
 547         # find hostname of first node
 548         hostname, qemubox = self.all_node_infos()[0]
 549         print("export KVMHOST={}.{}".format(qemubox, domain))
 550         print("export NODE={}".format(hostname))
 551         return True
 552
 553     # entry point
 554     always_display_keys=['PLC_WWW_HOST', 'nodes', 'sites']
 555     def show_pass(self, passno):
 556         for (key,val) in self.plc_spec.items():
 557             if not self.options.verbose and key not in TestPlc.always_display_keys:
 558                 continue
 559             if passno == 2:
 560                 if key == 'sites':
 561                     for site in val:
 562                         self.display_site_spec(site)
 563                         for node in site['nodes']:
 564                             self.display_node_spec(node)
 565                 elif key == 'initscripts':
 566                     for initscript in val:
 567                         self.display_initscript_spec(initscript)
 568                 elif key == 'slices':
 569                     for slice in val:
 570                         self.display_slice_spec(slice)
 571                 elif key == 'keys':
 572                     for key in val:
 573                         self.display_key_spec(key)
 574             elif passno == 1:
 575                 if key not in ['sites', 'initscripts', 'slices', 'keys']:
 576                     print('+   ', key, ':', val)
 577
 578     def display_site_spec(self, site):
 579         print('+ ======== site', site['site_fields']['name'])
 580         for k,v in site.items():
 581             if not self.options.verbose and k not in TestPlc.always_display_keys:
 582                 continue
 583             if k == 'nodes':
 584                 if v:
 585                     print('+       ', 'nodes : ', end=' ')
 586                     for node in v:
 587                         print(node['node_fields']['hostname'],'', end=' ')
 588                     print('')
 589             elif k == 'users':
 590                 if v:
 591                     print('+       users : ', end=' ')
 592                     for user in v:
 593                         print(user['name'],'', end=' ')
 594                     print('')
 595             elif k == 'site_fields':
 596                 print('+       login_base', ':', v['login_base'])
 597             elif k == 'address_fields':
 598                 pass
 599             else:
 600                 print('+       ', end=' ')
 601                 utils.pprint(k, v)
 602
 603     def display_initscript_spec(self, initscript):
 604         print('+ ======== initscript', initscript['initscript_fields']['name'])
 605
 606     def display_key_spec(self, key):
 607         print('+ ======== key', key['key_name'])
 608
 609     def display_slice_spec(self, slice):
 610         print('+ ======== slice', slice['slice_fields']['name'])
 611         for k,v in slice.items():
 612             if k == 'nodenames':
 613                 if v:
 614                     print('+       nodes : ', end=' ')
 615                     for nodename in v:
 616                         print(nodename,'', end=' ')
 617                     print('')
 618             elif k == 'usernames':
 619                 if v:
 620                     print('+       users : ', end=' ')
 621                     for username in v:
 622                         print(username,'', end=' ')
 623                     print('')
 624             elif k == 'slice_fields':
 625                 print('+       fields', ':', end=' ')
 626                 print('max_nodes=',v['max_nodes'], end=' ')
 627                 print('')
 628             else:
 629                 print('+       ',k,v)
 630
 631     def display_node_spec(self, node):
 632         print("+           node={} host_box={}".format(node['name'], node['host_box']), end=' ')
 633         print("hostname=", node['node_fields']['hostname'], end=' ')
 634         print("ip=", node['interface_fields']['ip'])
 635         if self.options.verbose:
 636             utils.pprint("node details", node, depth=3)
 637
 638     # another entry point for just showing the boxes involved
 639     def display_mapping(self):
 640         TestPlc.display_mapping_plc(self.plc_spec)
 641         return True
 642
 643     @staticmethod
 644     def display_mapping_plc(plc_spec):
 645         print('+ MyPLC',plc_spec['name'])
 646         # WARNING this would not be right for lxc-based PLC's - should be harmless though
 647         print('+\tvserver address = root@{}:/vservers/{}'.format(plc_spec['host_box'], plc_spec['vservername']))
 648         print('+\tIP = {}/{}'.format(plc_spec['settings']['PLC_API_HOST'], plc_spec['vserverip']))
 649         for site_spec in plc_spec['sites']:
 650             for node_spec in site_spec['nodes']:
 651                 TestPlc.display_mapping_node(node_spec)
 652
 653     @staticmethod
 654     def display_mapping_node(node_spec):
 655         print('+   NODE {}'.format(node_spec['name']))
 656         print('+\tqemu box {}'.format(node_spec['host_box']))
 657         print('+\thostname={}'.format(node_spec['node_fields']['hostname']))
 658
 659     # write a timestamp in /vservers/<>.timestamp
 660     # cannot be inside the vserver, that causes vserver .. build to cough
 661     def plcvm_timestamp(self):
 662         "Create a timestamp to remember creation date for this plc"
 663         now = int(time.time())
 664         # TODO-lxc check this one
 665         # a first approx. is to store the timestamp close to the VM root like vs does
 666         stamp_path = self.vm_timestamp_path()
 667         stamp_dir = os.path.dirname(stamp_path)
 668         utils.system(self.test_ssh.actual_command("mkdir -p {}".format(stamp_dir)))
 669         return utils.system(self.test_ssh.actual_command("echo {:d} > {}".format(now, stamp_path))) == 0
 670
 671     # this is called inconditionnally at the beginning of the test sequence
 672     # just in case this is a rerun, so if the vm is not running it's fine
 673     def plcvm_delete(self):
 674         "vserver delete the test myplc"
 675         stamp_path = self.vm_timestamp_path()
 676         self.run_in_host("rm -f {}".format(stamp_path))
 677         self.run_in_host("virsh -c lxc:/// destroy {}".format(self.vservername))
 678         self.run_in_host("virsh -c lxc:/// undefine {}".format(self.vservername))
 679         self.run_in_host("rm -fr /vservers/{}".format(self.vservername))
 680         return True
 681
 682     ### install
 683     # historically the build was being fetched by the tests
 684     # now the build pushes itself as a subdir of the tests workdir
 685     # so that the tests do not have to worry about extracting the build (svn, git, or whatever)
 686     def plcvm_create(self):
 687         "vserver creation (no install done)"
 688         # push the local build/ dir to the testplc box
 689         if self.is_local():
 690             # a full path for the local calls
 691             build_dir = os.path.dirname(sys.argv[0])
 692             # sometimes this is empty - set to "." in such a case
 693             if not build_dir:
 694                 build_dir="."
 695             build_dir += "/build"
 696         else:
 697             # use a standard name - will be relative to remote buildname
 698             build_dir = "build"
 699             # remove for safety; do *not* mkdir first, otherwise we end up with build/build/
 700             self.test_ssh.rmdir(build_dir)
 701             self.test_ssh.copy(build_dir, recursive=True)
 702         # the repo url is taken from arch-rpms-url
 703         # with the last step (i386) removed
 704         repo_url = self.options.arch_rpms_url
 705         for level in [ 'arch' ]:
 706             repo_url = os.path.dirname(repo_url)
 707
 708         # invoke initvm (drop support for vs)
 709         script = "lbuild-initvm.sh"
 710         script_options = ""
 711         # pass the vbuild-nightly options to [lv]test-initvm
 712         script_options += " -p {}".format(self.options.personality)
 713         script_options += " -d {}".format(self.options.pldistro)
 714         script_options += " -f {}".format(self.options.fcdistro)
 715         script_options += " -r {}".format(repo_url)
 716         vserver_name = self.vservername
 717         vserver_hostname = workaround_gethostbyaddr(self.vserverip)
 718         if not vserver_hostname:
 719             print("Cannot reverse lookup {}".format(self.vserverip))
 720             print("This is considered fatal, as this might pollute the test results")
 721             return False
 722         script_options += " -n {}".format(vserver_hostname)
 723         create_vserver="{build_dir}/{script} {script_options} {vserver_name}".format(**locals())
 724         return self.run_in_host(create_vserver) == 0
 725
 726     ### install django through pip
 727     def django_install(self):
 728         # plcapi requires Django, that is no longer provided py fedora as an rpm
 729         # so we use pip instead
 730         """
 731         pip install Django
 732         """
 733         return self.pip3_install('Django')
 734
 735     ### install_rpm
 736     def plc_install(self):
 737         """
 738         yum install myplc, noderepo
 739         """
 740
 741         # compute nodefamily
 742         if self.options.personality == "linux32":
 743             arch = "i386"
 744         elif self.options.personality == "linux64":
 745             arch = "x86_64"
 746         else:
 747             raise Exception("Unsupported personality {}".format(self.options.personality))
 748         nodefamily = "{}-{}-{}".format(self.options.pldistro, self.options.fcdistro, arch)
 749
 750         # check it's possible to install just 'myplc-core' first
 751         if not self.dnf_install("myplc-core"):
 752             return False
 753
 754         pkgs_list = []
 755         pkgs_list.append("myplc")
 756         # pkgs_list.append("slicerepo-{}".format(nodefamily))
 757         # pkgs_list.append("noderepo-{}".format(nodefamily))
 758         pkgs_string=" ".join(pkgs_list)
 759         return self.dnf_install(pkgs_list)
 760
 761     def install_syslinux6(self):
 762         """
 763         install syslinux6 from the fedora21 release
 764         """
 765         key = 'http://mirror.onelab.eu/keys/RPM-GPG-KEY-fedora-21-primary'
 766
 767         rpms = [
 768             'http://mirror.onelab.eu/fedora/releases/21/Everything/x86_64/os/Packages/s/syslinux-6.03-1.fc21.x86_64.rpm',
 769             'http://mirror.onelab.eu/fedora/releases/21/Everything/x86_64/os/Packages/s/syslinux-nonlinux-6.03-1.fc21.noarch.rpm',
 770             'http://mirror.onelab.eu/fedora/releases/21/Everything/x86_64/os/Packages/s/syslinux-perl-6.03-1.fc21.x86_64.rpm',
 771         ]
 772         # this can be done several times
 773         self.run_in_guest("rpm --import {key}".format(**locals()))
 774         return self.run_in_guest("yum -y localinstall {}".format(" ".join(rpms))) == 0
 775
 776     def bonding_builds(self):
 777         """
 778         list /etc/yum.repos.d on the myplc side
 779         """
 780         self.run_in_guest("ls /etc/yum.repos.d/*partial.repo")
 781         return True
 782
 783     def bonding_nodes(self):
 784         """
 785         List nodes known to the myplc together with their nodefamiliy
 786         """
 787         print("---------------------------------------- nodes")
 788         for node in self.apiserver.GetNodes(self.auth_root()):
 789             print("{} -> {}".format(node['hostname'],
 790                                     self.apiserver.GetNodeFlavour(self.auth_root(),node['hostname'])['nodefamily']))
 791         print("---------------------------------------- nodes")
 792
 793
 794     ###
 795     def mod_python(self):
 796         """yum install mod_python, useful on f18 and above so as to avoid broken wsgi"""
 797         return self.dnf_install( ['mod_python'] )
 798
 799     ###
 800     def plc_configure(self):
 801         "run plc-config-tty"
 802         tmpname = '{}.plc-config-tty'.format(self.name())
 803         with open(tmpname,'w') as fileconf:
 804             for var, value in self.plc_spec['settings'].items():
 805                 fileconf.write('e {}\n{}\n'.format(var, value))
 806             fileconf.write('w\n')
 807             fileconf.write('q\n')
 808         utils.system('cat {}'.format(tmpname))
 809         self.run_in_guest_piped('cat {}'.format(tmpname), 'plc-config-tty')
 810         utils.system('rm {}'.format(tmpname))
 811         return True
 812
 813     # care only about f>=27
 814     def start_stop_systemd(self, service, start_or_stop):
 815         "utility to start/stop a systemd-defined service (sfa)"
 816         return self.run_in_guest("systemctl {} {}".format(start_or_stop, service)) == 0
 817
 818     def plc_start(self):
 819         "start plc through systemclt"
 820         return self.start_stop_systemd('plc', 'start')
 821
 822     def plc_stop(self):
 823         "stop plc through systemctl"
 824         return self.start_stop_systemd('plc', 'stop')
 825
 826     def plcvm_start(self):
 827         "start the PLC vserver"
 828         self.start_guest()
 829         return True
 830
 831     def plcvm_stop(self):
 832         "stop the PLC vserver"
 833         self.stop_guest()
 834         return True
 835
 836     # stores the keys from the config for further use
 837     def keys_store(self):
 838         "stores test users ssh keys in keys/"
 839         for key_spec in self.plc_spec['keys']:
 840                 TestKey(self,key_spec).store_key()
 841         return True
 842
 843     def keys_clean(self):
 844         "removes keys cached in keys/"
 845         utils.system("rm -rf ./keys")
 846         return True
 847
 848     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 849     # for later direct access to the nodes
 850     def keys_fetch(self):
 851         "gets ssh keys in /etc/planetlab/ and stores them locally in keys/"
 852         dir="./keys"
 853         if not os.path.isdir(dir):
 854             os.mkdir(dir)
 855         vservername = self.vservername
 856         vm_root = self.vm_root_in_host()
 857         overall = True
 858         prefix = 'debug_ssh_key'
 859         for ext in ['pub', 'rsa'] :
 860             src = "{vm_root}/etc/planetlab/{prefix}.{ext}".format(**locals())
 861             dst = "keys/{vservername}-debug.{ext}".format(**locals())
 862             if self.test_ssh.fetch(src, dst) != 0:
 863                 overall=False
 864         return overall
 865
 866     def sites(self):
 867         "create sites with PLCAPI"
 868         return self.do_sites()
 869
 870     def delete_sites(self):
 871         "delete sites with PLCAPI"
 872         return self.do_sites(action="delete")
 873
 874     def do_sites(self, action="add"):
 875         for site_spec in self.plc_spec['sites']:
 876             test_site = TestSite(self,site_spec)
 877             if (action != "add"):
 878                 utils.header("Deleting site {} in {}".format(test_site.name(), self.name()))
 879                 test_site.delete_site()
 880                 # deleted with the site
 881                 #test_site.delete_users()
 882                 continue
 883             else:
 884                 utils.header("Creating site {} & users in {}".format(test_site.name(), self.name()))
 885                 test_site.create_site()
 886                 test_site.create_users()
 887         return True
 888
 889     def delete_all_sites(self):
 890         "Delete all sites in PLC, and related objects"
 891         print('auth_root', self.auth_root())
 892         sites = self.apiserver.GetSites(self.auth_root(), {}, ['site_id', 'login_base'])
 893         for site in sites:
 894             # keep automatic site - otherwise we shoot in our own foot, root_auth is not valid anymore
 895             if site['login_base'] == self.plc_spec['settings']['PLC_SLICE_PREFIX']:
 896                 continue
 897             site_id = site['site_id']
 898             print('Deleting site_id', site_id)
 899             self.apiserver.DeleteSite(self.auth_root(), site_id)
 900         return True
 901
 902     def nodes(self):
 903         "create nodes with PLCAPI"
 904         return self.do_nodes()
 905     def delete_nodes(self):
 906         "delete nodes with PLCAPI"
 907         return self.do_nodes(action="delete")
 908
 909     def do_nodes(self, action="add"):
 910         for site_spec in self.plc_spec['sites']:
 911             test_site = TestSite(self, site_spec)
 912             if action != "add":
 913                 utils.header("Deleting nodes in site {}".format(test_site.name()))
 914                 for node_spec in site_spec['nodes']:
 915                     test_node = TestNode(self, test_site, node_spec)
 916                     utils.header("Deleting {}".format(test_node.name()))
 917                     test_node.delete_node()
 918             else:
 919                 utils.header("Creating nodes for site {} in {}".format(test_site.name(), self.name()))
 920                 for node_spec in site_spec['nodes']:
 921                     utils.pprint('Creating node {}'.format(node_spec), node_spec)
 922                     test_node = TestNode(self, test_site, node_spec)
 923                     test_node.create_node()
 924         return True
 925
 926     def nodegroups(self):
 927         "create nodegroups with PLCAPI"
 928         return self.do_nodegroups("add")
 929     def delete_nodegroups(self):
 930         "delete nodegroups with PLCAPI"
 931         return self.do_nodegroups("delete")
 932
 933     YEAR = 365*24*3600
 934     @staticmethod
 935     def translate_timestamp(start, grain, timestamp):
 936         if timestamp < TestPlc.YEAR:
 937             return start + timestamp*grain
 938         else:
 939             return timestamp
 940
 941     @staticmethod
 942     def timestamp_printable(timestamp):
 943         return time.strftime('%m-%d %H:%M:%S UTC', time.gmtime(timestamp))
 944
 945     def leases(self):
 946         "create leases (on reservable nodes only, use e.g. run -c default -c resa)"
 947         now = int(time.time())
 948         grain = self.apiserver.GetLeaseGranularity(self.auth_root())
 949         print('API answered grain=', grain)
 950         start = (now//grain)*grain
 951         start += grain
 952         # find out all nodes that are reservable
 953         nodes = self.all_reservable_nodenames()
 954         if not nodes:
 955             utils.header("No reservable node found - proceeding without leases")
 956             return True
 957         ok = True
 958         # attach them to the leases as specified in plc_specs
 959         # this is where the 'leases' field gets interpreted as relative of absolute
 960         for lease_spec in self.plc_spec['leases']:
 961             # skip the ones that come with a null slice id
 962             if not lease_spec['slice']:
 963                 continue
 964             lease_spec['t_from']  = TestPlc.translate_timestamp(start, grain, lease_spec['t_from'])
 965             lease_spec['t_until'] = TestPlc.translate_timestamp(start, grain, lease_spec['t_until'])
 966             lease_addition = self.apiserver.AddLeases(self.auth_root(), nodes, lease_spec['slice'],
 967                                                       lease_spec['t_from'], lease_spec['t_until'])
 968             if lease_addition['errors']:
 969                 utils.header("Cannot create leases, {}".format(lease_addition['errors']))
 970                 ok = False
 971             else:
 972                 utils.header('Leases on nodes {} for {} from {:d} ({}) until {:d} ({})'\
 973                              .format(nodes, lease_spec['slice'],
 974                                      lease_spec['t_from'],  TestPlc.timestamp_printable(lease_spec['t_from']),
 975                                      lease_spec['t_until'], TestPlc.timestamp_printable(lease_spec['t_until'])))
 976
 977         return ok
 978
 979     def delete_leases(self):
 980         "remove all leases in the myplc side"
 981         lease_ids = [ l['lease_id'] for l in self.apiserver.GetLeases(self.auth_root())]
 982         utils.header("Cleaning leases {}".format(lease_ids))
 983         self.apiserver.DeleteLeases(self.auth_root(), lease_ids)
 984         return True
 985
 986     def list_leases(self):
 987         "list all leases known to the myplc"
 988         leases = self.apiserver.GetLeases(self.auth_root())
 989         now = int(time.time())
 990         for l in leases:
 991             current = l['t_until'] >= now
 992             if self.options.verbose or current:
 993                 utils.header("{} {} from {} until {}"\
 994                              .format(l['hostname'], l['name'],
 995                                      TestPlc.timestamp_printable(l['t_from']),
 996                                      TestPlc.timestamp_printable(l['t_until'])))
 997         return True
 998
 999     # create nodegroups if needed, and populate
1000     def do_nodegroups(self, action="add"):
1001         # 1st pass to scan contents
1002         groups_dict = {}
1003         for site_spec in self.plc_spec['sites']:
1004             test_site = TestSite(self,site_spec)
1005             for node_spec in site_spec['nodes']:
1006                 test_node = TestNode(self, test_site, node_spec)
1007                 if 'nodegroups' in node_spec:
1008                     nodegroupnames = node_spec['nodegroups']
1009                     if isinstance(nodegroupnames, str):
1010                         nodegroupnames = [ nodegroupnames ]
1011                     for nodegroupname in nodegroupnames:
1012                         if nodegroupname not in groups_dict:
1013                             groups_dict[nodegroupname] = []
1014                         groups_dict[nodegroupname].append(test_node.name())
1015         auth = self.auth_root()
1016         overall = True
1017         for (nodegroupname,group_nodes) in groups_dict.items():
1018             if action == "add":
1019                 print('nodegroups:', 'dealing with nodegroup',\
1020                     nodegroupname, 'on nodes', group_nodes)
1021                 # first, check if the nodetagtype is here
1022                 tag_types = self.apiserver.GetTagTypes(auth, {'tagname':nodegroupname})
1023                 if tag_types:
1024                     tag_type_id = tag_types[0]['tag_type_id']
1025                 else:
1026                     tag_type_id = self.apiserver.AddTagType(auth,
1027                                                             {'tagname' : nodegroupname,
1028                                                              'description' : 'for nodegroup {}'.format(nodegroupname),
1029                                                              'category' : 'test'})
1030                 print('located tag (type)', nodegroupname, 'as', tag_type_id)
1031                 # create nodegroup
1032                 nodegroups = self.apiserver.GetNodeGroups(auth, {'groupname' : nodegroupname})
1033                 if not nodegroups:
1034                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
1035                     print('created nodegroup', nodegroupname, \
1036                         'from tagname', nodegroupname, 'and value', 'yes')
1037                 # set node tag on all nodes, value='yes'
1038                 for nodename in group_nodes:
1039                     try:
1040                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
1041                     except:
1042                         traceback.print_exc()
1043                         print('node', nodename, 'seems to already have tag', nodegroupname)
1044                     # check anyway
1045                     try:
1046                         expect_yes = self.apiserver.GetNodeTags(auth,
1047                                                                 {'hostname' : nodename,
1048                                                                  'tagname'  : nodegroupname},
1049                                                                 ['value'])[0]['value']
1050                         if expect_yes != "yes":
1051                             print('Mismatch node tag on node',nodename,'got',expect_yes)
1052                             overall = False
1053                     except:
1054                         if not self.options.dry_run:
1055                             print('Cannot find tag', nodegroupname, 'on node', nodename)
1056                             overall = False
1057             else:
1058                 try:
1059                     print('cleaning nodegroup', nodegroupname)
1060                     self.apiserver.DeleteNodeGroup(auth, nodegroupname)
1061                 except:
1062                     traceback.print_exc()
1063                     overall = False
1064         return overall
1065
1066     # a list of TestNode objs
1067     def all_nodes(self):
1068         nodes=[]
1069         for site_spec in self.plc_spec['sites']:
1070             test_site = TestSite(self,site_spec)
1071             for node_spec in site_spec['nodes']:
1072                 nodes.append(TestNode(self, test_site, node_spec))
1073         return nodes
1074
1075     # return a list of tuples (nodename,qemuname)
1076     def all_node_infos(self) :
1077         node_infos = []
1078         for site_spec in self.plc_spec['sites']:
1079             node_infos += [ (node_spec['node_fields']['hostname'], node_spec['host_box']) \
1080                                 for node_spec in site_spec['nodes'] ]
1081         return node_infos
1082
1083     def all_nodenames(self):
1084         return [ x[0] for x in self.all_node_infos() ]
1085     def all_reservable_nodenames(self):
1086         res = []
1087         for site_spec in self.plc_spec['sites']:
1088             for node_spec in site_spec['nodes']:
1089                 node_fields = node_spec['node_fields']
1090                 if 'node_type' in node_fields and node_fields['node_type'] == 'reservable':
1091                     res.append(node_fields['hostname'])
1092         return res
1093
1094     # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
1095     def nodes_check_boot_state(self, target_boot_state, timeout_minutes,
1096                                silent_minutes, period_seconds = 15):
1097         if self.options.dry_run:
1098             print('dry_run')
1099             return True
1100
1101         class CompleterTaskBootState(CompleterTask):
1102             def __init__(self, test_plc, hostname):
1103                 self.test_plc = test_plc
1104                 self.hostname = hostname
1105                 self.last_boot_state = 'undef'
1106             def actual_run(self):
1107                 try:
1108                     node = self.test_plc.apiserver.GetNodes(self.test_plc.auth_root(),
1109                                                             [ self.hostname ],
1110                                                             ['boot_state'])[0]
1111                     self.last_boot_state = node['boot_state']
1112                     return self.last_boot_state == target_boot_state
1113                 except:
1114                     return False
1115             def message(self):
1116                 return "CompleterTaskBootState with node {}".format(self.hostname)
1117             def failure_epilogue(self):
1118                 print("node {} in state {} - expected {}"\
1119                     .format(self.hostname, self.last_boot_state, target_boot_state))
1120
1121         timeout = timedelta(minutes=timeout_minutes)
1122         graceout = timedelta(minutes=silent_minutes)
1123         period   = timedelta(seconds=period_seconds)
1124         # the nodes that haven't checked yet - start with a full list and shrink over time
1125         utils.header("checking nodes boot state (expected {})".format(target_boot_state))
1126         tasks = [ CompleterTaskBootState(self,hostname) \
1127                       for (hostname,_) in self.all_node_infos() ]
1128         message = 'check_boot_state={}'.format(target_boot_state)
1129         return Completer(tasks, message=message).run(timeout, graceout, period)
1130
1131     def nodes_booted(self):
1132         return self.nodes_check_boot_state('boot', timeout_minutes=30, silent_minutes=28)
1133
1134     def probe_kvm_iptables(self):
1135         (_,kvmbox) = self.all_node_infos()[0]
1136         TestSsh(kvmbox).run("iptables-save")
1137         return True
1138
1139     # probing nodes
1140     def check_nodes_ping(self, timeout_seconds=60, period_seconds=10):
1141         class CompleterTaskPingNode(CompleterTask):
1142             def __init__(self, hostname):
1143                 self.hostname = hostname
1144             def run(self, silent):
1145                 command="ping -c 1 -w 1 {} >& /dev/null".format(self.hostname)
1146                 return utils.system(command, silent=silent) == 0
1147             def failure_epilogue(self):
1148                 print("Cannot ping node with name {}".format(self.hostname))
1149         timeout = timedelta(seconds = timeout_seconds)
1150         graceout = timeout
1151         period = timedelta(seconds = period_seconds)
1152         node_infos = self.all_node_infos()
1153         tasks = [ CompleterTaskPingNode(h) for (h,_) in node_infos ]
1154         return Completer(tasks, message='ping_node').run(timeout, graceout, period)
1155
1156     # ping node before we try to reach ssh, helpful for troubleshooting failing bootCDs
1157     def ping_node(self):
1158         "Ping nodes"
1159         return self.check_nodes_ping()
1160
1161     def check_nodes_ssh(self, debug, timeout_minutes, silent_minutes, period_seconds=15):
1162         # various delays
1163         timeout  = timedelta(minutes=timeout_minutes)
1164         graceout = timedelta(minutes=silent_minutes)
1165         period   = timedelta(seconds=period_seconds)
1166         vservername = self.vservername
1167         if debug:
1168             message = "debug"
1169             completer_message = 'ssh_node_debug'
1170             local_key = "keys/{vservername}-debug.rsa".format(**locals())
1171         else:
1172             message = "boot"
1173             completer_message = 'ssh_node_boot'
1174             local_key = "keys/key_admin.rsa"
1175         utils.header("checking ssh access to nodes (expected in {} mode)".format(message))
1176         node_infos = self.all_node_infos()
1177         tasks = [ CompleterTaskNodeSsh(nodename, qemuname, local_key,
1178                                         boot_state=message, dry_run=self.options.dry_run) \
1179                       for (nodename, qemuname) in node_infos ]
1180         return Completer(tasks, message=completer_message).run(timeout, graceout, period)
1181
1182     def ssh_node_debug(self):
1183         "Tries to ssh into nodes in debug mode with the debug ssh key"
1184         return self.check_nodes_ssh(debug = True,
1185                                     timeout_minutes = self.ssh_node_debug_timeout,
1186                                     silent_minutes = self.ssh_node_debug_silent)
1187
1188     def ssh_node_boot(self):
1189         "Tries to ssh into nodes in production mode with the root ssh key"
1190         return self.check_nodes_ssh(debug = False,
1191                                     timeout_minutes = self.ssh_node_boot_timeout,
1192                                     silent_minutes = self.ssh_node_boot_silent)
1193
1194     def node_bmlogs(self):
1195         "Checks that there's a non-empty dir. /var/log/bm/raw"
1196         return utils.system(self.actual_command_in_guest("ls /var/log/bm/raw")) == 0
1197
1198     @node_mapper
1199     def qemu_local_init(self): pass
1200     @node_mapper
1201     def bootcd(self): pass
1202     @node_mapper
1203     def qemu_local_config(self): pass
1204     @node_mapper
1205     def qemu_export(self): pass
1206     @node_mapper
1207     def qemu_cleanlog(self): pass
1208     @node_mapper
1209     def nodestate_reinstall(self): pass
1210     @node_mapper
1211     def nodestate_upgrade(self): pass
1212     @node_mapper
1213     def nodestate_safeboot(self): pass
1214     @node_mapper
1215     def nodestate_boot(self): pass
1216     @node_mapper
1217     def nodestate_show(self): pass
1218     @node_mapper
1219     def nodedistro_f14(self): pass
1220     @node_mapper
1221     def nodedistro_f18(self): pass
1222     @node_mapper
1223     def nodedistro_f20(self): pass
1224     @node_mapper
1225     def nodedistro_f21(self): pass
1226     @node_mapper
1227     def nodedistro_f22(self): pass
1228     @node_mapper
1229     def nodedistro_show(self): pass
1230
1231     ### check hooks : invoke scripts from hooks/{node,slice}
1232     def check_hooks_node(self):
1233         return self.locate_first_node().check_hooks()
1234     def check_hooks_sliver(self) :
1235         return self.locate_first_sliver().check_hooks()
1236
1237     def check_hooks(self):
1238         "runs unit tests in the node and slice contexts - see hooks/{node,slice}"
1239         return self.check_hooks_node() and self.check_hooks_sliver()
1240
1241     ### initscripts
1242     def do_check_initscripts(self):
1243         class CompleterTaskInitscript(CompleterTask):
1244             def __init__(self, test_sliver, stamp):
1245                 self.test_sliver = test_sliver
1246                 self.stamp = stamp
1247             def actual_run(self):
1248                 return self.test_sliver.check_initscript_stamp(self.stamp)
1249             def message(self):
1250                 return "initscript checker for {}".format(self.test_sliver.name())
1251             def failure_epilogue(self):
1252                 print("initscript stamp {} not found in sliver {}"\
1253                     .format(self.stamp, self.test_sliver.name()))
1254
1255         tasks = []
1256         for slice_spec in self.plc_spec['slices']:
1257             if 'initscriptstamp' not in slice_spec:
1258                 continue
1259             stamp = slice_spec['initscriptstamp']
1260             slicename = slice_spec['slice_fields']['name']
1261             for nodename in slice_spec['nodenames']:
1262                 print('nodename', nodename, 'slicename', slicename, 'stamp', stamp)
1263                 site,node = self.locate_node(nodename)
1264                 # xxx - passing the wrong site - probably harmless
1265                 test_site = TestSite(self, site)
1266                 test_slice = TestSlice(self, test_site, slice_spec)
1267                 test_node = TestNode(self, test_site, node)
1268                 test_sliver = TestSliver(self, test_node, test_slice)
1269                 tasks.append(CompleterTaskInitscript(test_sliver, stamp))
1270         return Completer(tasks, message='check_initscripts').\
1271             run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10))
1272
1273     def check_initscripts(self):
1274         "check that the initscripts have triggered"
1275         return self.do_check_initscripts()
1276
1277     def initscripts(self):
1278         "create initscripts with PLCAPI"
1279         for initscript in self.plc_spec['initscripts']:
1280             utils.pprint('Adding Initscript in plc {}'.format(self.plc_spec['name']), initscript)
1281             self.apiserver.AddInitScript(self.auth_root(), initscript['initscript_fields'])
1282         return True
1283
1284     def delete_initscripts(self):
1285         "delete initscripts with PLCAPI"
1286         for initscript in self.plc_spec['initscripts']:
1287             initscript_name = initscript['initscript_fields']['name']
1288             print(('Attempting to delete {} in plc {}'.format(initscript_name, self.plc_spec['name'])))
1289             try:
1290                 self.apiserver.DeleteInitScript(self.auth_root(), initscript_name)
1291                 print(initscript_name, 'deleted')
1292             except:
1293                 print('deletion went wrong - probably did not exist')
1294         return True
1295
1296     ### manage slices
1297     def slices(self):
1298         "create slices with PLCAPI"
1299         return self.do_slices(action="add")
1300
1301     def delete_slices(self):
1302         "delete slices with PLCAPI"
1303         return self.do_slices(action="delete")
1304
1305     def fill_slices(self):
1306         "add nodes in slices with PLCAPI"
1307         return self.do_slices(action="fill")
1308
1309     def empty_slices(self):
1310         "remove nodes from slices with PLCAPI"
1311         return self.do_slices(action="empty")
1312
1313     def do_slices(self,  action="add"):
1314         for slice in self.plc_spec['slices']:
1315             site_spec = self.locate_site(slice['sitename'])
1316             test_site = TestSite(self,site_spec)
1317             test_slice=TestSlice(self,test_site,slice)
1318             if action == "delete":
1319                 test_slice.delete_slice()
1320             elif action == "fill":
1321                 test_slice.add_nodes()
1322             elif action == "empty":
1323                 test_slice.delete_nodes()
1324             else:
1325                 test_slice.create_slice()
1326         return True
1327
1328     @slice_mapper__tasks(20, 10, 15)
1329     def ssh_slice(self): pass
1330     @slice_mapper__tasks(20, 19, 15)
1331     def ssh_slice_off(self): pass
1332     @slice_mapper__tasks(1, 1, 15)
1333     def slice_fs_present(self): pass
1334     @slice_mapper__tasks(1, 1, 15)
1335     def slice_fs_deleted(self): pass
1336
1337     # use another name so we can exclude/ignore it from the tests on the nightly command line
1338     def ssh_slice_again(self): return self.ssh_slice()
1339     # note that simply doing ssh_slice_again=ssh_slice would kind of work too
1340     # but for some reason the ignore-wrapping thing would not
1341
1342     @slice_mapper
1343     def ssh_slice_basics(self): pass
1344     @slice_mapper
1345     def check_vsys_defaults(self): pass
1346
1347     @node_mapper
1348     def keys_clear_known_hosts(self): pass
1349
1350     def plcapi_urls(self):
1351         """
1352         attempts to reach the PLCAPI with various forms for the URL
1353         """
1354         return PlcapiUrlScanner(self.auth_root(), ip=self.vserverip).scan()
1355
1356     def speed_up_slices(self):
1357         "tweak nodemanager cycle (wait time) to 30+/-10 s"
1358         return self._speed_up_slices (30, 10)
1359     def super_speed_up_slices(self):
1360         "dev mode: tweak nodemanager cycle (wait time) to 5+/-1 s"
1361         return self._speed_up_slices(5, 1)
1362
1363     def _speed_up_slices(self, p, r):
1364         # create the template on the server-side
1365         template = "{}.nodemanager".format(self.name())
1366         with open(template,"w") as template_file:
1367             template_file.write('OPTIONS="-p {} -r {} -d"\n'.format(p, r))
1368         in_vm = "/var/www/html/PlanetLabConf/nodemanager"
1369         remote = "{}/{}".format(self.vm_root_in_host(), in_vm)
1370         self.test_ssh.copy_abs(template, remote)
1371         # Add a conf file
1372         if not self.apiserver.GetConfFiles(self.auth_root(),
1373                                            {'dest' : '/etc/sysconfig/nodemanager'}):
1374             self.apiserver.AddConfFile(self.auth_root(),
1375                                         {'dest' : '/etc/sysconfig/nodemanager',
1376                                          'source' : 'PlanetLabConf/nodemanager',
1377                                          'postinstall_cmd' : 'service nm restart',})
1378         return True
1379
1380     def debug_nodemanager(self):
1381         "sets verbose mode for nodemanager, and speeds up cycle even more (needs speed_up_slices first)"
1382         template = "{}.nodemanager".format(self.name())
1383         with open(template,"w") as template_file:
1384             template_file.write('OPTIONS="-p 10 -r 6 -v -d"\n')
1385         in_vm = "/var/www/html/PlanetLabConf/nodemanager"
1386         remote = "{}/{}".format(self.vm_root_in_host(), in_vm)
1387         self.test_ssh.copy_abs(template, remote)
1388         return True
1389
1390     @node_mapper
1391     def qemu_start(self) : pass
1392
1393     @node_mapper
1394     def qemu_timestamp(self) : pass
1395
1396     @node_mapper
1397     def qemu_nodefamily(self): pass
1398
1399     # when a spec refers to a node possibly on another plc
1400     def locate_sliver_obj_cross(self, nodename, slicename, other_plcs):
1401         for plc in [ self ] + other_plcs:
1402             try:
1403                 return plc.locate_sliver_obj(nodename, slicename)
1404             except:
1405                 pass
1406         raise Exception("Cannot locate sliver {}@{} among all PLCs".format(nodename, slicename))
1407
1408     # implement this one as a cross step so that we can take advantage of different nodes
1409     # in multi-plcs mode
1410     def cross_check_tcp(self, other_plcs):
1411         "check TCP connectivity between 2 slices (or in loopback if only one is defined)"
1412         if 'tcp_specs' not in self.plc_spec or not self.plc_spec['tcp_specs']:
1413             utils.header("check_tcp: no/empty config found")
1414             return True
1415         specs = self.plc_spec['tcp_specs']
1416         overall = True
1417
1418         # first wait for the network to be up and ready from the slices
1419         class CompleterTaskNetworkReadyInSliver(CompleterTask):
1420             def __init__(self, test_sliver):
1421                 self.test_sliver = test_sliver
1422             def actual_run(self):
1423                 return self.test_sliver.check_tcp_ready(port = 9999)
1424             def message(self):
1425                 return "network ready checker for {}".format(self.test_sliver.name())
1426             def failure_epilogue(self):
1427                 print("could not bind port from sliver {}".format(self.test_sliver.name()))
1428
1429         sliver_specs = {}
1430         tasks = []
1431         managed_sliver_names = set()
1432         for spec in specs:
1433             # locate the TestSliver instances involved, and cache them in the spec instance
1434             spec['s_sliver'] = self.locate_sliver_obj_cross(spec['server_node'], spec['server_slice'], other_plcs)
1435             spec['c_sliver'] = self.locate_sliver_obj_cross(spec['client_node'], spec['client_slice'], other_plcs)
1436             message = "Will check TCP between s={} and c={}"\
1437                       .format(spec['s_sliver'].name(), spec['c_sliver'].name())
1438             if 'client_connect' in spec:
1439                 message += " (using {})".format(spec['client_connect'])
1440             utils.header(message)
1441             # we need to check network presence in both slivers, but also
1442             # avoid to insert a sliver several times
1443             for sliver in [ spec['s_sliver'], spec['c_sliver'] ]:
1444                 if sliver.name() not in managed_sliver_names:
1445                     tasks.append(CompleterTaskNetworkReadyInSliver(sliver))
1446                     # add this sliver's name in the set
1447                     managed_sliver_names .update( {sliver.name()} )
1448
1449         # wait for the netork to be OK in all server sides
1450         if not Completer(tasks, message='check for network readiness in slivers').\
1451            run(timedelta(seconds=30), timedelta(seconds=24), period=timedelta(seconds=5)):
1452             return False
1453
1454         # run server and client
1455         for spec in specs:
1456             port = spec['port']
1457             # server side
1458             # the issue here is that we have the server run in background
1459             # and so we have no clue if it took off properly or not
1460             # looks like in some cases it does not
1461             address = spec['s_sliver'].test_node.name()
1462             if not spec['s_sliver'].run_tcp_server(address, port, timeout=20):
1463                 overall = False
1464                 break
1465
1466             # idem for the client side
1467             # use nodename from located sliver, unless 'client_connect' is set
1468             if 'client_connect' in spec:
1469                 destination = spec['client_connect']
1470             else:
1471                 destination = spec['s_sliver'].test_node.name()
1472             if not spec['c_sliver'].run_tcp_client(destination, port):
1473                 overall = False
1474         return overall
1475
1476     # painfully enough, we need to allow for some time as netflow might show up last
1477     def check_system_slice(self):
1478         "all nodes: check that a system slice is alive"
1479         # netflow currently not working in the lxc distro
1480         # drl not built at all in the wtx distro
1481         # if we find either of them we're happy
1482         return self.check_netflow() or self.check_drl()
1483
1484     # expose these
1485     def check_netflow(self): return self._check_system_slice('netflow')
1486     def check_drl(self): return self._check_system_slice('drl')
1487
1488     # we have the slices up already here, so it should not take too long
1489     def _check_system_slice(self, slicename, timeout_minutes=5, period_seconds=15):
1490         class CompleterTaskSystemSlice(CompleterTask):
1491             def __init__(self, test_node, dry_run):
1492                 self.test_node = test_node
1493                 self.dry_run = dry_run
1494             def actual_run(self):
1495                 return self.test_node._check_system_slice(slicename, dry_run=self.dry_run)
1496             def message(self):
1497                 return "System slice {} @ {}".format(slicename, self.test_node.name())
1498             def failure_epilogue(self):
1499                 print("COULD not find system slice {} @ {}".format(slicename, self.test_node.name()))
1500         timeout = timedelta(minutes=timeout_minutes)
1501         silent  = timedelta(0)
1502         period  = timedelta(seconds=period_seconds)
1503         tasks = [ CompleterTaskSystemSlice(test_node, self.options.dry_run) \
1504                       for test_node in self.all_nodes() ]
1505         return Completer(tasks, message='_check_system_slice').run(timeout, silent, period)
1506
1507     def plcsh_stress_test(self):
1508         "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents"
1509         # install the stress-test in the plc image
1510         location = "/usr/share/plc_api/plcsh_stress_test.py"
1511         remote = "{}/{}".format(self.vm_root_in_host(), location)
1512         self.test_ssh.copy_abs("plcsh_stress_test.py", remote)
1513         command = location
1514         command += " -- --check"
1515         if self.options.size == 1:
1516             command +=  " --tiny"
1517         return self.run_in_guest(command) == 0
1518
1519     # populate runs the same utility without slightly different options
1520     # in particular runs with --preserve (dont cleanup) and without --check
1521     # also it gets run twice, once with the --foreign option for creating fake foreign entries
1522
1523     def install_pip2(self):
1524
1525         replacements = [
1526             "http://mirror.onelab.eu/third-party/python2-pip-19.1.1-7.fc33.noarch.rpm",
1527         ]
1528
1529         return (
1530                self.run_in_guest("pip2 --version") == 0
1531             or self.run_in_guest("dnf install python2-pip") == 0
1532             or self.run_in_guest("dnf localinstall -y " + " ".join(replacements)) == 0)
1533
1534
1535     def install_m2crypto(self):
1536
1537         # installing m2crypto for python2 is increasingly difficult
1538         # f29 and f31: dnf install python2-m2crypto
1539         # f33: no longer available but the f31 repos below do the job just fine
1540         # note that using pip2 does not look like a viable option because it does
1541         # an install from sources and that's quite awkward
1542
1543         replacements = [
1544             # no longer on our mirror
1545             "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/31/Everything/x86_64/os/Packages/p/python2-typing-3.6.2-5.fc31.noarch.rpm",
1546             "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/31/Everything/x86_64/os/Packages/p/python2-m2crypto-0.35.2-2.fc31.x86_64.rpm",
1547         ]
1548
1549         return (
1550                self.run_in_guest('python2 -c "import M2Crypto"', backslash=True) == 0
1551             or self.run_in_guest("pip2 install python2-m2crypto") == 0
1552             or self.run_in_guest("dnf localinstall -y " + " ".join(replacements)) == 0)
1553
1554         # about pip2: the logic goes like this
1555         # check for pip2 command
1556         # if not, try dnf install python2-pip
1557         # if still not, dnf localinstall the above
1558
1559
1560     def sfa_install_all(self):
1561         "yum install sfa sfa-plc sfa-sfatables sfa-client"
1562
1563         # the rpm/dnf packages named in python2-* are getting deprecated
1564         # we use pip2 instead
1565         # but that's not good for m2crypto
1566
1567         pip_dependencies = [
1568             'sqlalchemy-migrate',
1569             'lxml',
1570             'python-dateutil',
1571             'psycopg2-binary',
1572             'pyOpenSSL',
1573         ]
1574
1575         return (
1576                     self.install_pip2()
1577                 and self.install_m2crypto()
1578                 and all((self.run_in_guest(f"pip2 install {dep}") == 0)
1579                         for dep in pip_dependencies)
1580                 and self.dnf_install("sfa sfa-plc sfa-sfatables sfa-client")
1581                 and self.run_in_guest("systemctl enable sfa-registry")==0
1582                 and self.run_in_guest("systemctl enable sfa-aggregate")==0)
1583
1584     def sfa_install_core(self):
1585         "yum install sfa"
1586         return self.dnf_install("sfa")
1587
1588     def sfa_install_plc(self):
1589         "yum install sfa-plc"
1590         return self.dnf_install("sfa-plc")
1591
1592     def sfa_install_sfatables(self):
1593         "yum install sfa-sfatables"
1594         return self.dnf_install("sfa-sfatables")
1595
1596     # for some very odd reason, this sometimes fails with the following symptom
1597     # # yum install sfa-client
1598     # Setting up Install Process
1599     # ...
1600     # Downloading Packages:
1601     # Running rpm_check_debug
1602     # Running Transaction Test
1603     # Transaction Test Succeeded
1604     # Running Transaction
1605     # Transaction couldn't start:
1606     # installing package sfa-client-2.1-7.onelab.2012.05.23.i686 needs 68KB on the / filesystem
1607     # [('installing package sfa-client-2.1-7.onelab.2012.05.23.i686 needs 68KB on the / filesystem', (9, '/', 69632L))]
1608     # even though in the same context I have
1609     # [2012.05.23--f14-32-sfastd1-1-vplc07] / # df -h
1610     # Filesystem            Size  Used Avail Use% Mounted on
1611     # /dev/hdv1             806G  264G  501G  35% /
1612     # none                   16M   36K   16M   1% /tmp
1613     #
1614     # so as a workaround, we first try yum install, and then invoke rpm on the cached rpm...
1615     def sfa_install_client(self):
1616         "yum install sfa-client"
1617         first_try = self.dnf_install("sfa-client")
1618         if first_try:
1619             return True
1620         utils.header("********** Regular yum failed - special workaround in place, 2nd chance")
1621         code, cached_rpm_path = \
1622                 utils.output_of(self.actual_command_in_guest('find /var/cache/yum -name sfa-client\*.rpm'))
1623         utils.header("rpm_path=<<{}>>".format(rpm_path))
1624         # just for checking
1625         self.run_in_guest("rpm -i {}".format(cached_rpm_path))
1626         return self.dnf_check_installed("sfa-client")
1627
1628     def sfa_dbclean(self):
1629         "thoroughly wipes off the SFA database"
1630         return self.run_in_guest("sfaadmin reg nuke") == 0 or \
1631             self.run_in_guest("sfa-nuke.py") == 0 or \
1632             self.run_in_guest("sfa-nuke-plc.py") == 0 or \
1633             self.run_in_guest("sfaadmin registry nuke") == 0
1634
1635     def sfa_fsclean(self):
1636         "cleanup /etc/sfa/trusted_roots and /var/lib/sfa"
1637         self.run_in_guest("rm -rf /etc/sfa/trusted_roots /var/lib/sfa/authorities")
1638         return True
1639
1640     def sfa_plcclean(self):
1641         "cleans the PLC entries that were created as a side effect of running the script"
1642         # ignore result
1643         sfa_spec = self.plc_spec['sfa']
1644
1645         for auth_sfa_spec in sfa_spec['auth_sfa_specs']:
1646             login_base = auth_sfa_spec['login_base']
1647             try:
1648                 self.apiserver.DeleteSite(self.auth_root(),login_base)
1649             except:
1650                 print("Site {} already absent from PLC db".format(login_base))
1651
1652             for spec_name in ['pi_spec', 'user_spec']:
1653                 user_spec = auth_sfa_spec[spec_name]
1654                 username = user_spec['email']
1655                 try:
1656                     self.apiserver.DeletePerson(self.auth_root(),username)
1657                 except:
1658                     # this in fact is expected as sites delete their members
1659                     #print "User {} already absent from PLC db".format(username)
1660                     pass
1661
1662         print("REMEMBER TO RUN sfa_import AGAIN")
1663         return True
1664
1665     def sfa_uninstall(self):
1666         "uses rpm to uninstall sfa - ignore result"
1667         self.run_in_guest("rpm -e sfa sfa-sfatables sfa-client sfa-plc")
1668         self.run_in_guest("rm -rf /var/lib/sfa")
1669         self.run_in_guest("rm -rf /etc/sfa")
1670         self.run_in_guest("rm -rf /var/log/sfa_access.log /var/log/sfa_import_plc.log /var/log/sfa.daemon")
1671         # xxx tmp
1672         self.run_in_guest("rpm -e --noscripts sfa-plc")
1673         return True
1674
1675     ### run unit tests for SFA
1676     # NOTE: for some reason on f14/i386, yum install sfa-tests fails for no reason
1677     # Running Transaction
1678     # Transaction couldn't start:
1679     # installing package sfa-tests-1.0-21.onelab.i686 needs 204KB on the / filesystem
1680     # [('installing package sfa-tests-1.0-21.onelab.i686 needs 204KB on the / filesystem', (9, '/', 208896L))]
1681     # no matter how many Gbs are available on the testplc
1682     # could not figure out what's wrong, so...
1683     # if the yum install phase fails, consider the test is successful
1684     # other combinations will eventually run it hopefully
1685     def sfa_utest(self):
1686         "dnf install sfa-tests and run SFA unittests"
1687         self.run_in_guest("dnf -y install sfa-tests")
1688         # failed to install - forget it
1689         if self.run_in_guest("rpm -q sfa-tests") != 0:
1690             utils.header("WARNING: SFA unit tests failed to install, ignoring")
1691             return True
1692         return self.run_in_guest("/usr/share/sfa/tests/testAll.py") == 0
1693
1694     ###
1695     def confdir(self):
1696         dirname = "conf.{}".format(self.plc_spec['name'])
1697         if not os.path.isdir(dirname):
1698             utils.system("mkdir -p {}".format(dirname))
1699         if not os.path.isdir(dirname):
1700             raise Exception("Cannot create config dir for plc {}".format(self.name()))
1701         return dirname
1702
1703     def conffile(self, filename):
1704         return "{}/{}".format(self.confdir(), filename)
1705     def confsubdir(self, dirname, clean, dry_run=False):
1706         subdirname = "{}/{}".format(self.confdir(), dirname)
1707         if clean:
1708             utils.system("rm -rf {}".format(subdirname))
1709         if not os.path.isdir(subdirname):
1710             utils.system("mkdir -p {}".format(subdirname))
1711         if not dry_run and not os.path.isdir(subdirname):
1712             raise "Cannot create config subdir {} for plc {}".format(dirname, self.name())
1713         return subdirname
1714
1715     def conffile_clean(self, filename):
1716         filename=self.conffile(filename)
1717         return utils.system("rm -rf {}".format(filename))==0
1718
1719     ###
1720     def sfa_configure(self):
1721         "run sfa-config-tty"
1722         tmpname = self.conffile("sfa-config-tty")
1723         with open(tmpname,'w') as fileconf:
1724             for var, value in self.plc_spec['sfa']['settings'].items():
1725                 fileconf.write('e {}\n{}\n'.format(var, value))
1726             fileconf.write('w\n')
1727             fileconf.write('R\n')
1728             fileconf.write('q\n')
1729         utils.system('cat {}'.format(tmpname))
1730         self.run_in_guest_piped('cat {}'.format(tmpname), 'sfa-config-tty')
1731         return True
1732
1733     def aggregate_xml_line(self):
1734         port = self.plc_spec['sfa']['neighbours-port']
1735         return '<aggregate addr="{}" hrn="{}" port="{}"/>'\
1736             .format(self.vserverip, self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH'], port)
1737
1738     def registry_xml_line(self):
1739         return '<registry addr="{}" hrn="{}" port="12345"/>'\
1740             .format(self.vserverip, self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH'])
1741
1742
1743     # a cross step that takes all other plcs in argument
1744     def cross_sfa_configure(self, other_plcs):
1745         "writes aggregates.xml and registries.xml that point to all other PLCs in the test"
1746         # of course with a single plc, other_plcs is an empty list
1747         if not other_plcs:
1748             return True
1749         agg_fname = self.conffile("agg.xml")
1750         with open(agg_fname,"w") as out:
1751             out.write("<aggregates>{}</aggregates>\n"\
1752                       .format(" ".join([ plc.aggregate_xml_line() for plc in other_plcs ])))
1753         utils.header("(Over)wrote {}".format(agg_fname))
1754         reg_fname=self.conffile("reg.xml")
1755         with open(reg_fname,"w") as out:
1756             out.write("<registries>{}</registries>\n"\
1757                       .format(" ".join([ plc.registry_xml_line() for plc in other_plcs ])))
1758         utils.header("(Over)wrote {}".format(reg_fname))
1759         return self.test_ssh.copy_abs(agg_fname,
1760                                       '/{}/etc/sfa/aggregates.xml'.format(self.vm_root_in_host())) == 0 \
1761            and self.test_ssh.copy_abs(reg_fname,
1762                                       '/{}/etc/sfa/registries.xml'.format(self.vm_root_in_host())) == 0
1763
1764     def sfa_import(self):
1765         "use sfaadmin to import from plc"
1766         auth = self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH']
1767         return self.run_in_guest('sfaadmin reg import_registry') == 0
1768
1769     def sfa_start(self):
1770         "start SFA through systemctl - also install dependencies"
1771
1772         return (self.start_stop_systemd('sfa-registry', 'start')
1773             and self.start_stop_systemd('sfa-aggregate', 'start'))
1774
1775
1776     def sfi_configure(self):
1777         "Create /root/sfi on the plc side for sfi client configuration"
1778         if self.options.dry_run:
1779             utils.header("DRY RUN - skipping step")
1780             return True
1781         sfa_spec = self.plc_spec['sfa']
1782         # cannot use auth_sfa_mapper to pass dir_name
1783         for slice_spec in self.plc_spec['sfa']['auth_sfa_specs']:
1784             test_slice = TestAuthSfa(self, slice_spec)
1785             dir_basename = os.path.basename(test_slice.sfi_path())
1786             dir_name = self.confsubdir("dot-sfi/{}".format(dir_basename),
1787                                        clean=True, dry_run=self.options.dry_run)
1788             test_slice.sfi_configure(dir_name)
1789             # push into the remote /root/sfi area
1790             location = test_slice.sfi_path()
1791             remote = "{}/{}".format(self.vm_root_in_host(), location)
1792             self.test_ssh.mkdir(remote, abs=True)
1793             # need to strip last level or remote otherwise we get an extra dir level
1794             self.test_ssh.copy_abs(dir_name, os.path.dirname(remote), recursive=True)
1795
1796         return True
1797
1798     def sfi_clean(self):
1799         "clean up /root/sfi on the plc side"
1800         self.run_in_guest("rm -rf /root/sfi")
1801         return True
1802
1803     def sfa_rspec_empty(self):
1804         "expose a static empty rspec (ships with the tests module) in the sfi directory"
1805         filename = "empty-rspec.xml"
1806         overall = True
1807         for slice_spec in self.plc_spec['sfa']['auth_sfa_specs']:
1808             test_slice = TestAuthSfa(self, slice_spec)
1809             in_vm = test_slice.sfi_path()
1810             remote = "{}/{}".format(self.vm_root_in_host(), in_vm)
1811             if self.test_ssh.copy_abs(filename, remote) !=0:
1812                 overall = False
1813         return overall
1814
1815     @auth_sfa_mapper
1816     def sfa_register_site(self): pass
1817     @auth_sfa_mapper
1818     def sfa_register_pi(self): pass
1819     @auth_sfa_mapper
1820     def sfa_register_user(self): pass
1821     @auth_sfa_mapper
1822     def sfa_update_user(self): pass
1823     @auth_sfa_mapper
1824     def sfa_register_slice(self): pass
1825     @auth_sfa_mapper
1826     def sfa_renew_slice(self): pass
1827     @auth_sfa_mapper
1828     def sfa_get_expires(self): pass
1829     @auth_sfa_mapper
1830     def sfa_discover(self): pass
1831     @auth_sfa_mapper
1832     def sfa_rspec(self): pass
1833     @auth_sfa_mapper
1834     def sfa_allocate(self): pass
1835     @auth_sfa_mapper
1836     def sfa_allocate_empty(self): pass
1837     @auth_sfa_mapper
1838     def sfa_provision(self): pass
1839     @auth_sfa_mapper
1840     def sfa_provision_empty(self): pass
1841     @auth_sfa_mapper
1842     def sfa_describe(self): pass
1843     @auth_sfa_mapper
1844     def sfa_check_slice_plc(self): pass
1845     @auth_sfa_mapper
1846     def sfa_check_slice_plc_empty(self): pass
1847     @auth_sfa_mapper
1848     def sfa_update_slice(self): pass
1849     @auth_sfa_mapper
1850     def sfa_remove_user_from_slice(self): pass
1851     @auth_sfa_mapper
1852     def sfa_insert_user_in_slice(self): pass
1853     @auth_sfa_mapper
1854     def sfi_list(self): pass
1855     @auth_sfa_mapper
1856     def sfi_show_site(self): pass
1857     @auth_sfa_mapper
1858     def sfi_show_slice(self): pass
1859     @auth_sfa_mapper
1860     def sfi_show_slice_researchers(self): pass
1861     @auth_sfa_mapper
1862     def ssh_slice_sfa(self): pass
1863     @auth_sfa_mapper
1864     def sfa_delete_user(self): pass
1865     @auth_sfa_mapper
1866     def sfa_delete_slice(self): pass
1867
1868     def sfa_stop(self):
1869         "stop sfa through systemclt"
1870         return (self.start_stop_systemd('sfa-aggregate', 'stop') and
1871                 self.start_stop_systemd('sfa-registry', 'stop'))
1872
1873     def populate(self):
1874         "creates random entries in the PLCAPI"
1875         # install the stress-test in the plc image
1876         location = "/usr/share/plc_api/plcsh_stress_test.py"
1877         remote = "{}/{}".format(self.vm_root_in_host(), location)
1878         self.test_ssh.copy_abs("plcsh_stress_test.py", remote)
1879         command = location
1880         command += " -- --preserve --short-names"
1881         local = (self.run_in_guest(command) == 0);
1882         # second run with --foreign
1883         command += ' --foreign'
1884         remote = (self.run_in_guest(command) == 0);
1885         return local and remote
1886
1887
1888     ####################
1889     @bonding_redirector
1890     def bonding_init_partial(self): pass
1891
1892     @bonding_redirector
1893     def bonding_add_yum(self): pass
1894
1895     @bonding_redirector
1896     def bonding_install_rpms(self): pass
1897
1898     ####################
1899
1900     def gather_logs(self):
1901         "gets all possible logs from plc's/qemu node's/slice's for future reference"
1902         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
1903         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
1904         # (1.c) get the plc's /root/sfi -> logs/sfi.<plcname>/
1905         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
1906         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
1907         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
1908         # (1.a)
1909         print("-------------------- TestPlc.gather_logs : PLC's /var/log")
1910         self.gather_var_logs()
1911         # (1.b)
1912         print("-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/")
1913         self.gather_pgsql_logs()
1914         # (1.c)
1915         print("-------------------- TestPlc.gather_logs : PLC's /root/sfi/")
1916         self.gather_root_sfi()
1917         # (2)
1918         print("-------------------- TestPlc.gather_logs : nodes's QEMU logs")
1919         for site_spec in self.plc_spec['sites']:
1920             test_site = TestSite(self,site_spec)
1921             for node_spec in site_spec['nodes']:
1922                 test_node = TestNode(self, test_site, node_spec)
1923                 test_node.gather_qemu_logs()
1924         # (3)
1925         print("-------------------- TestPlc.gather_logs : nodes's /var/log")
1926         self.gather_nodes_var_logs()
1927         # (4)
1928         print("-------------------- TestPlc.gather_logs : sample sliver's /var/log")
1929         self.gather_slivers_var_logs()
1930         return True
1931
1932     def gather_slivers_var_logs(self):
1933         for test_sliver in self.all_sliver_objs():
1934             remote = test_sliver.tar_var_logs()
1935             utils.system("mkdir -p logs/sliver.var-log.{}".format(test_sliver.name()))
1936             command = remote + " | tar -C logs/sliver.var-log.{} -xf -".format(test_sliver.name())
1937             utils.system(command)
1938         return True
1939
1940     def gather_var_logs(self):
1941         utils.system("mkdir -p logs/myplc.var-log.{}".format(self.name()))
1942         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
1943         command = to_plc + "| tar -C logs/myplc.var-log.{} -xf -".format(self.name())
1944         utils.system(command)
1945         command = "chmod a+r,a+x logs/myplc.var-log.{}/httpd".format(self.name())
1946         utils.system(command)
1947
1948     def gather_pgsql_logs(self):
1949         utils.system("mkdir -p logs/myplc.pgsql-log.{}".format(self.name()))
1950         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
1951         command = to_plc + "| tar -C logs/myplc.pgsql-log.{} -xf -".format(self.name())
1952         utils.system(command)
1953
1954     def gather_root_sfi(self):
1955         utils.system("mkdir -p logs/sfi.{}".format(self.name()))
1956         to_plc = self.actual_command_in_guest("tar -C /root/sfi/ -cf - .")
1957         command = to_plc + "| tar -C logs/sfi.{} -xf -".format(self.name())
1958         utils.system(command)
1959
1960     def gather_nodes_var_logs(self):
1961         for site_spec in self.plc_spec['sites']:
1962             test_site = TestSite(self, site_spec)
1963             for node_spec in site_spec['nodes']:
1964                 test_node = TestNode(self, test_site, node_spec)
1965                 test_ssh = TestSsh(test_node.name(), key="keys/key_admin.rsa")
1966                 command = test_ssh.actual_command("tar -C /var/log -cf - .")
1967                 command = command + "| tar -C logs/node.var-log.{} -xf -".format(test_node.name())
1968                 utils.system("mkdir -p logs/node.var-log.{}".format(test_node.name()))
1969                 utils.system(command)
1970
1971
1972     # returns the filename to use for sql dump/restore, using options.dbname if set
1973     def dbfile(self, database):
1974         # uses options.dbname if it is found
1975         try:
1976             name = self.options.dbname
1977             if not isinstance(name, str):
1978                 raise Exception
1979         except:
1980             t = datetime.now()
1981             d = t.date()
1982             name = str(d)
1983         return "/root/{}-{}.sql".format(database, name)
1984
1985     def plc_db_dump(self):
1986         'dump the planetlab5 DB in /root in the PLC - filename has time'
1987         dump=self.dbfile("planetab5")
1988         self.run_in_guest('pg_dump -U pgsqluser planetlab5 -f '+ dump)
1989         utils.header('Dumped planetlab5 database in {}'.format(dump))
1990         return True
1991
1992     def plc_db_restore(self):
1993         'restore the planetlab5 DB - looks broken, but run -n might help'
1994         dump = self.dbfile("planetab5")
1995         self.run_in_guest('systemctl stop httpd')
1996         # xxx - need another wrapper
1997         self.run_in_guest_piped('echo drop database planetlab5', 'psql --user=pgsqluser template1')
1998         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab5')
1999         self.run_in_guest('psql -U pgsqluser planetlab5 -f ' + dump)
2000         ##starting httpd service
2001         self.run_in_guest('systemctl start httpd')
2002
2003         utils.header('Database restored from ' + dump)
2004
2005     @staticmethod
2006     def create_ignore_steps():
2007         for step in TestPlc.default_steps + TestPlc.other_steps:
2008             # default step can have a plc qualifier
2009             if '@' in step:
2010                 step, qualifier = step.split('@')
2011             # or be defined as forced or ignored by default
2012             for keyword in ['_ignore', '_force']:
2013                 if step.endswith(keyword):
2014                     step=step.replace(keyword,'')
2015             if step == SEP or step == SEPSFA :
2016                 continue
2017             method = getattr(TestPlc,step)
2018             name = step + '_ignore'
2019             wrapped = ignore_result(method)
2020 #            wrapped.__doc__ = method.__doc__ + " (run in ignore-result mode)"
2021             setattr(TestPlc, name, wrapped)
2022
2023 #    @ignore_result
2024 #    def ssh_slice_again_ignore (self): pass
2025 #    @ignore_result
2026 #    def check_initscripts_ignore (self): pass
2027
2028     def standby_1_through_20(self):
2029         """convenience function to wait for a specified number of minutes"""
2030         pass
2031     @standby_generic
2032     def standby_1(): pass
2033     @standby_generic
2034     def standby_2(): pass
2035     @standby_generic
2036     def standby_3(): pass
2037     @standby_generic
2038     def standby_4(): pass
2039     @standby_generic
2040     def standby_5(): pass
2041     @standby_generic
2042     def standby_6(): pass
2043     @standby_generic
2044     def standby_7(): pass
2045     @standby_generic
2046     def standby_8(): pass
2047     @standby_generic
2048     def standby_9(): pass
2049     @standby_generic
2050     def standby_10(): pass
2051     @standby_generic
2052     def standby_11(): pass
2053     @standby_generic
2054     def standby_12(): pass
2055     @standby_generic
2056     def standby_13(): pass
2057     @standby_generic
2058     def standby_14(): pass
2059     @standby_generic
2060     def standby_15(): pass
2061     @standby_generic
2062     def standby_16(): pass
2063     @standby_generic
2064     def standby_17(): pass
2065     @standby_generic
2066     def standby_18(): pass
2067     @standby_generic
2068     def standby_19(): pass
2069     @standby_generic
2070     def standby_20(): pass
2071
2072     # convenience for debugging the test logic
2073     def yes(self): return True
2074     def no(self): return False
2075     def fail(self): return False