system/TestPlc.py

   1 # Thierry Parmentelat <thierry.parmentelat@inria.fr>
   2 # Copyright (C) 2010 INRIA
   3 #
   4 import sys
   5 import time
   6 import os, os.path
   7 import traceback
   8 import socket
   9 from datetime import datetime, timedelta
  10
  11 import utils
  12 from Completer import Completer, CompleterTask
  13 from TestSite import TestSite
  14 from TestNode import TestNode, CompleterTaskNodeSsh
  15 from TestUser import TestUser
  16 from TestKey import TestKey
  17 from TestSlice import TestSlice
  18 from TestSliver import TestSliver
  19 from TestBoxQemu import TestBoxQemu
  20 from TestSsh import TestSsh
  21 from TestApiserver import TestApiserver
  22 from TestAuthSfa import TestAuthSfa
  23 from PlcapiUrlScanner import PlcapiUrlScanner
  24
  25 from TestBonding import TestBonding
  26
  27 has_sfa_cache_filename="sfa-cache"
  28
  29 # step methods must take (self) and return a boolean (options is a member of the class)
  30
  31 def standby(minutes, dry_run):
  32     utils.header('Entering StandBy for {:d} mn'.format(minutes))
  33     if dry_run:
  34         print('dry_run')
  35     else:
  36         time.sleep(60*minutes)
  37     return True
  38
  39 def standby_generic(func):
  40     def actual(self):
  41         minutes = int(func.__name__.split("_")[1])
  42         return standby(minutes, self.options.dry_run)
  43     return actual
  44
  45 def node_mapper(method):
  46     def map_on_nodes(self, *args, **kwds):
  47         overall = True
  48         node_method = TestNode.__dict__[method.__name__]
  49         for test_node in self.all_nodes():
  50             if not node_method(test_node, *args, **kwds):
  51                 overall=False
  52         return overall
  53     # maintain __name__ for ignore_result
  54     map_on_nodes.__name__ = method.__name__
  55     # restore the doc text
  56     map_on_nodes.__doc__ = TestNode.__dict__[method.__name__].__doc__
  57     return map_on_nodes
  58
  59 def slice_mapper(method):
  60     def map_on_slices(self):
  61         overall = True
  62         slice_method = TestSlice.__dict__[method.__name__]
  63         for slice_spec in self.plc_spec['slices']:
  64             site_spec = self.locate_site (slice_spec['sitename'])
  65             test_site = TestSite(self,site_spec)
  66             test_slice = TestSlice(self,test_site,slice_spec)
  67             if not slice_method(test_slice, self.options):
  68                 overall=False
  69         return overall
  70     # maintain __name__ for ignore_result
  71     map_on_slices.__name__ = method.__name__
  72     # restore the doc text
  73     map_on_slices.__doc__ = TestSlice.__dict__[method.__name__].__doc__
  74     return map_on_slices
  75
  76 def bonding_redirector(method):
  77     bonding_name = method.__name__.replace('bonding_', '')
  78     def redirect(self):
  79         bonding_method = TestBonding.__dict__[bonding_name]
  80         return bonding_method(self.test_bonding)
  81     # maintain __name__ for ignore_result
  82     redirect.__name__ = method.__name__
  83     # restore the doc text
  84     redirect.__doc__ = TestBonding.__dict__[bonding_name].__doc__
  85     return redirect
  86
  87 # run a step but return True so that we can go on
  88 def ignore_result(method):
  89     def ignoring(self):
  90         # ssh_slice_ignore->ssh_slice
  91         ref_name = method.__name__.replace('_ignore', '').replace('force_', '')
  92         ref_method = TestPlc.__dict__[ref_name]
  93         result = ref_method(self)
  94         print("Actual (but ignored) result for {ref_name} is {result}".format(**locals()))
  95         return Ignored(result)
  96     name = method.__name__.replace('_ignore', '').replace('force_', '')
  97     ignoring.__name__ = name
  98     ignoring.__doc__ = "ignored version of " + name
  99     return ignoring
 100
 101 # a variant that expects the TestSlice method to return a list of CompleterTasks that
 102 # are then merged into a single Completer run to avoid wating for all the slices
 103 # esp. useful when a test fails of course
 104 # because we need to pass arguments we use a class instead..
 105 class slice_mapper__tasks(object):
 106     # could not get this to work with named arguments
 107     def __init__(self, timeout_minutes, silent_minutes, period_seconds):
 108         self.timeout = timedelta(minutes = timeout_minutes)
 109         self.silent = timedelta(minutes = silent_minutes)
 110         self.period = timedelta(seconds = period_seconds)
 111     def __call__(self, method):
 112         decorator_self=self
 113         # compute augmented method name
 114         method_name = method.__name__ + "__tasks"
 115         # locate in TestSlice
 116         slice_method = TestSlice.__dict__[ method_name ]
 117         def wrappee(self):
 118             tasks=[]
 119             for slice_spec in self.plc_spec['slices']:
 120                 site_spec = self.locate_site (slice_spec['sitename'])
 121                 test_site = TestSite(self, site_spec)
 122                 test_slice = TestSlice(self, test_site, slice_spec)
 123                 tasks += slice_method (test_slice, self.options)
 124             return Completer (tasks, message=method.__name__).\
 125                 run(decorator_self.timeout, decorator_self.silent, decorator_self.period)
 126         # restore the doc text from the TestSlice method even if a bit odd
 127         wrappee.__name__ = method.__name__
 128         wrappee.__doc__ = slice_method.__doc__
 129         return wrappee
 130
 131 def auth_sfa_mapper(method):
 132     def actual(self):
 133         overall = True
 134         auth_method = TestAuthSfa.__dict__[method.__name__]
 135         for auth_spec in self.plc_spec['sfa']['auth_sfa_specs']:
 136             test_auth = TestAuthSfa(self, auth_spec)
 137             if not auth_method(test_auth, self.options):
 138                 overall=False
 139         return overall
 140     # restore the doc text
 141     actual.__doc__ = TestAuthSfa.__dict__[method.__name__].__doc__
 142     return actual
 143
 144 class Ignored:
 145     def __init__(self, result):
 146         self.result = result
 147
 148 SEP = '<sep>'
 149 SEPSFA = '<sep_sfa>'
 150
 151 class TestPlc:
 152
 153     default_steps = [
 154         'show', SEP,
 155         'plcvm_delete','plcvm_timestamp','plcvm_create', SEP,
 156         'plc_install', 'plc_configure', 'plc_start', SEP,
 157         'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP,
 158         'plcapi_urls','speed_up_slices', SEP,
 159         'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP,
 160 # slices created under plcsh interactively seem to be fine but these ones don't have the tags
 161 # keep this our of the way for now
 162         'check_vsys_defaults_ignore', SEP,
 163 # run this first off so it's easier to re-run on another qemu box
 164         'qemu_kill_mine', SEP,
 165         'nodestate_reinstall', 'qemu_local_init','bootcd', 'qemu_local_config', SEP,
 166         'qemu_clean_mine', 'qemu_export', 'qemu_start', 'qemu_timestamp', SEP,
 167         'sfa_install_all', 'sfa_configure', 'cross_sfa_configure', 'sfa_start', 'sfa_import', SEPSFA,
 168         'sfi_configure@1', 'sfa_register_site@1','sfa_register_pi@1', SEPSFA,
 169         'sfa_register_user@1', 'sfa_update_user@1', 'sfa_register_slice@1', 'sfa_renew_slice@1', SEPSFA,
 170         'sfa_remove_user_from_slice@1','sfi_show_slice_researchers@1',
 171         'sfa_insert_user_in_slice@1','sfi_show_slice_researchers@1', SEPSFA,
 172         'sfa_discover@1', 'sfa_rspec@1', 'sfa_allocate@1', 'sfa_provision@1', SEPSFA,
 173         'sfa_check_slice_plc@1', 'sfa_update_slice@1', SEPSFA,
 174         'sfi_list@1', 'sfi_show_site@1', 'sfa_utest@1', SEPSFA,
 175         # we used to run plcsh_stress_test, and then ssh_node_debug and ssh_node_boot
 176         # but as the stress test might take a while, we sometimes missed the debug mode..
 177         'probe_kvm_iptables',
 178         'ping_node', 'ssh_node_debug', 'plcsh_stress_test@1', SEP,
 179         'ssh_node_boot', 'node_bmlogs', 'ssh_slice', 'ssh_slice_basics', 'check_initscripts', SEP,
 180         'ssh_slice_sfa@1', SEPSFA,
 181         'sfa_rspec_empty@1', 'sfa_allocate_empty@1', 'sfa_provision_empty@1','sfa_check_slice_plc_empty@1', SEPSFA,
 182         'sfa_delete_slice@1', 'sfa_delete_user@1', SEPSFA,
 183         'cross_check_tcp@1', 'check_system_slice', SEP,
 184         # for inspecting the slice while it runs the first time
 185         #'fail',
 186         # check slices are turned off properly
 187         'empty_slices', 'ssh_slice_off', 'slice_fs_deleted_ignore', SEP,
 188         # check they are properly re-created with the same name
 189         'fill_slices', 'ssh_slice_again', SEP,
 190         'gather_logs_force', SEP,
 191         ]
 192     other_steps = [
 193         'export', 'show_boxes', 'super_speed_up_slices', SEP,
 194         'check_hooks', 'plc_stop', 'plcvm_start', 'plcvm_stop', SEP,
 195         'delete_initscripts', 'delete_nodegroups','delete_all_sites', SEP,
 196         'delete_sites', 'delete_nodes', 'delete_slices', 'keys_clean', SEP,
 197         'delete_leases', 'list_leases', SEP,
 198         'populate', SEP,
 199         'nodestate_show','nodestate_safeboot','nodestate_boot', SEP,
 200         'qemu_list_all', 'qemu_list_mine', 'qemu_kill_all', SEP,
 201         'sfa_install_core', 'sfa_install_sfatables', 'sfa_install_plc', 'sfa_install_client', SEPSFA,
 202         'sfa_plcclean', 'sfa_dbclean', 'sfa_stop','sfa_uninstall', 'sfi_clean', SEPSFA,
 203         'sfa_get_expires', SEPSFA,
 204         'plc_db_dump' , 'plc_db_restore', SEP,
 205         'check_netflow','check_drl', SEP,
 206         'debug_nodemanager', 'slice_fs_present', SEP,
 207         'standby_1_through_20','yes','no',SEP,
 208         ]
 209     bonding_steps = [
 210         'bonding_init_partial',
 211         'bonding_add_yum',
 212         'bonding_install_rpms', SEP,
 213         ]
 214
 215     @staticmethod
 216     def printable_steps(list):
 217         single_line = " ".join(list) + " "
 218         return single_line.replace(" "+SEP+" ", " \\\n").replace(" "+SEPSFA+" ", " \\\n")
 219     @staticmethod
 220     def valid_step(step):
 221         return step != SEP and step != SEPSFA
 222
 223     # turn off the sfa-related steps when build has skipped SFA
 224     # this was originally for centos5 but is still valid
 225     # for up to f12 as recent SFAs with sqlalchemy won't build before f14
 226     @staticmethod
 227     def _has_sfa_cached(rpms_url):
 228         if os.path.isfile(has_sfa_cache_filename):
 229             with open(has_sfa_cache_filename) as cache:
 230                 cached = cache.read() == "yes"
 231             utils.header("build provides SFA (cached):{}".format(cached))
 232             return cached
 233         # warning, we're now building 'sface' so let's be a bit more picky
 234         # full builds are expected to return with 0 here
 235         utils.header("Checking if build provides SFA package...")
 236         retcod = os.system("curl --silent {}/ | grep -q sfa-".format(rpms_url)) == 0
 237         encoded = 'yes' if retcod else 'no'
 238         with open(has_sfa_cache_filename,'w') as cache:
 239             cache.write(encoded)
 240         return retcod
 241
 242     @staticmethod
 243     def check_whether_build_has_sfa(rpms_url):
 244         has_sfa = TestPlc._has_sfa_cached(rpms_url)
 245         if has_sfa:
 246             utils.header("build does provide SFA")
 247         else:
 248             # move all steps containing 'sfa' from default_steps to other_steps
 249             utils.header("SFA package not found - removing steps with sfa or sfi")
 250             sfa_steps = [ step for step in TestPlc.default_steps
 251                           if step.find('sfa') >= 0 or step.find("sfi") >= 0 ]
 252             TestPlc.other_steps += sfa_steps
 253             for step in sfa_steps:
 254                 TestPlc.default_steps.remove(step)
 255
 256     def __init__(self, plc_spec, options):
 257         self.plc_spec = plc_spec
 258         self.options = options
 259         self.test_ssh = TestSsh(self.plc_spec['host_box'], self.options.buildname)
 260         self.vserverip = plc_spec['vserverip']
 261         self.vservername = plc_spec['vservername']
 262         self.url = "https://{}:443/PLCAPI/".format(plc_spec['vserverip'])
 263         self.apiserver = TestApiserver(self.url, options.dry_run)
 264         (self.ssh_node_boot_timeout, self.ssh_node_boot_silent) = plc_spec['ssh_node_boot_timers']
 265         (self.ssh_node_debug_timeout, self.ssh_node_debug_silent) = plc_spec['ssh_node_debug_timers']
 266
 267     def has_addresses_api(self):
 268         return self.apiserver.has_method('AddIpAddress')
 269
 270     def name(self):
 271         name = self.plc_spec['name']
 272         return "{}.{}".format(name,self.vservername)
 273
 274     def hostname(self):
 275         return self.plc_spec['host_box']
 276
 277     def is_local(self):
 278         return self.test_ssh.is_local()
 279
 280     # define the API methods on this object through xmlrpc
 281     # would help, but not strictly necessary
 282     def connect(self):
 283         pass
 284
 285     def actual_command_in_guest(self,command, backslash=False):
 286         raw1 = self.host_to_guest(command)
 287         raw2 = self.test_ssh.actual_command(raw1, dry_run=self.options.dry_run, backslash=backslash)
 288         return raw2
 289
 290     def start_guest(self):
 291       return utils.system(self.test_ssh.actual_command(self.start_guest_in_host(),
 292                                                        dry_run=self.options.dry_run))
 293
 294     def stop_guest(self):
 295       return utils.system(self.test_ssh.actual_command(self.stop_guest_in_host(),
 296                                                        dry_run=self.options.dry_run))
 297
 298     def run_in_guest(self, command, backslash=False):
 299         raw = self.actual_command_in_guest(command, backslash)
 300         return utils.system(raw)
 301
 302     def run_in_host(self,command):
 303         return self.test_ssh.run_in_buildname(command, dry_run=self.options.dry_run)
 304
 305     # backslashing turned out so awful at some point that I've turned off auto-backslashing
 306     # see e.g. plc_start esp. the version for f14
 307     #command gets run in the plc's vm
 308     def host_to_guest(self, command):
 309         vservername = self.vservername
 310         personality = self.options.personality
 311         raw = "{personality} virsh -c lxc:/// lxc-enter-namespace {vservername}".format(**locals())
 312         # f14 still needs some extra help
 313         if self.options.fcdistro == 'f14':
 314             raw +=" -- /usr/bin/env PATH=/bin:/sbin:/usr/bin:/usr/sbin {command}".format(**locals())
 315         else:
 316             raw +=" -- /usr/bin/env {command}".format(**locals())
 317         return raw
 318
 319     # this /vservers thing is legacy...
 320     def vm_root_in_host(self):
 321         return "/vservers/{}/".format(self.vservername)
 322
 323     def vm_timestamp_path(self):
 324         return "/vservers/{}/{}.timestamp".format(self.vservername, self.vservername)
 325
 326     #start/stop the vserver
 327     def start_guest_in_host(self):
 328         return "virsh -c lxc:/// start {}".format(self.vservername)
 329
 330     def stop_guest_in_host(self):
 331         return "virsh -c lxc:/// destroy {}".format(self.vservername)
 332
 333     # xxx quick n dirty
 334     def run_in_guest_piped(self,local,remote):
 335         return utils.system(local+" | "+self.test_ssh.actual_command(self.host_to_guest(remote),
 336                                                                      keep_stdin = True))
 337
 338     def yum_check_installed(self, rpms):
 339         if isinstance(rpms, list):
 340             rpms=" ".join(rpms)
 341         return self.run_in_guest("rpm -q {}".format(rpms)) == 0
 342
 343     # does a yum install in the vs, ignore yum retcod, check with rpm
 344     def yum_install(self, rpms):
 345         if isinstance(rpms, list):
 346             rpms=" ".join(rpms)
 347         self.run_in_guest("yum -y install {}".format(rpms))
 348         # yum-complete-transaction comes with yum-utils, that is in vtest.pkgs
 349         self.run_in_guest("yum-complete-transaction -y")
 350         return self.yum_check_installed(rpms)
 351
 352     def auth_root(self):
 353         return {'Username'   : self.plc_spec['settings']['PLC_ROOT_USER'],
 354                 'AuthMethod' : 'password',
 355                 'AuthString' : self.plc_spec['settings']['PLC_ROOT_PASSWORD'],
 356                 'Role'       : self.plc_spec['role'],
 357                 }
 358
 359     def locate_site(self,sitename):
 360         for site in self.plc_spec['sites']:
 361             if site['site_fields']['name'] == sitename:
 362                 return site
 363             if site['site_fields']['login_base'] == sitename:
 364                 return site
 365         raise Exception("Cannot locate site {}".format(sitename))
 366
 367     def locate_node(self, nodename):
 368         for site in self.plc_spec['sites']:
 369             for node in site['nodes']:
 370                 if node['name'] == nodename:
 371                     return site, node
 372         raise Exception("Cannot locate node {}".format(nodename))
 373
 374     def locate_hostname(self, hostname):
 375         for site in self.plc_spec['sites']:
 376             for node in site['nodes']:
 377                 if node['node_fields']['hostname'] == hostname:
 378                     return(site, node)
 379         raise Exception("Cannot locate hostname {}".format(hostname))
 380
 381     def locate_key(self, key_name):
 382         for key in self.plc_spec['keys']:
 383             if key['key_name'] == key_name:
 384                 return key
 385         raise Exception("Cannot locate key {}".format(key_name))
 386
 387     def locate_private_key_from_key_names(self, key_names):
 388         # locate the first avail. key
 389         found = False
 390         for key_name in key_names:
 391             key_spec = self.locate_key(key_name)
 392             test_key = TestKey(self,key_spec)
 393             publickey = test_key.publicpath()
 394             privatekey = test_key.privatepath()
 395             if os.path.isfile(publickey) and os.path.isfile(privatekey):
 396                 found = True
 397         if found:
 398             return privatekey
 399         else:
 400             return None
 401
 402     def locate_slice(self, slicename):
 403         for slice in self.plc_spec['slices']:
 404             if slice['slice_fields']['name'] == slicename:
 405                 return slice
 406         raise Exception("Cannot locate slice {}".format(slicename))
 407
 408     def all_sliver_objs(self):
 409         result = []
 410         for slice_spec in self.plc_spec['slices']:
 411             slicename = slice_spec['slice_fields']['name']
 412             for nodename in slice_spec['nodenames']:
 413                 result.append(self.locate_sliver_obj(nodename, slicename))
 414         return result
 415
 416     def locate_sliver_obj(self, nodename, slicename):
 417         site,node = self.locate_node(nodename)
 418         slice = self.locate_slice(slicename)
 419         # build objects
 420         test_site = TestSite(self, site)
 421         test_node = TestNode(self, test_site, node)
 422         # xxx the slice site is assumed to be the node site - mhh - probably harmless
 423         test_slice = TestSlice(self, test_site, slice)
 424         return TestSliver(self, test_node, test_slice)
 425
 426     def locate_first_node(self):
 427         nodename = self.plc_spec['slices'][0]['nodenames'][0]
 428         site,node = self.locate_node(nodename)
 429         test_site = TestSite(self, site)
 430         test_node = TestNode(self, test_site, node)
 431         return test_node
 432
 433     def locate_first_sliver(self):
 434         slice_spec = self.plc_spec['slices'][0]
 435         slicename = slice_spec['slice_fields']['name']
 436         nodename = slice_spec['nodenames'][0]
 437         return self.locate_sliver_obj(nodename,slicename)
 438
 439     # all different hostboxes used in this plc
 440     def get_BoxNodes(self):
 441         # maps on sites and nodes, return [ (host_box,test_node) ]
 442         tuples = []
 443         for site_spec in self.plc_spec['sites']:
 444             test_site = TestSite(self,site_spec)
 445             for node_spec in site_spec['nodes']:
 446                 test_node = TestNode(self, test_site, node_spec)
 447                 if not test_node.is_real():
 448                     tuples.append( (test_node.host_box(),test_node) )
 449         # transform into a dict { 'host_box' -> [ test_node .. ] }
 450         result = {}
 451         for (box,node) in tuples:
 452             if box not in result:
 453                 result[box] = [node]
 454             else:
 455                 result[box].append(node)
 456         return result
 457
 458     # a step for checking this stuff
 459     def show_boxes(self):
 460         'print summary of nodes location'
 461         for box,nodes in self.get_BoxNodes().items():
 462             print(box,":"," + ".join( [ node.name() for node in nodes ] ))
 463         return True
 464
 465     # make this a valid step
 466     def qemu_kill_all(self):
 467         'kill all qemu instances on the qemu boxes involved by this setup'
 468         # this is the brute force version, kill all qemus on that host box
 469         for (box,nodes) in self.get_BoxNodes().items():
 470             # pass the first nodename, as we don't push template-qemu on testboxes
 471             nodedir = nodes[0].nodedir()
 472             TestBoxQemu(box, self.options.buildname).qemu_kill_all(nodedir)
 473         return True
 474
 475     # make this a valid step
 476     def qemu_list_all(self):
 477         'list all qemu instances on the qemu boxes involved by this setup'
 478         for box,nodes in self.get_BoxNodes().items():
 479             # this is the brute force version, kill all qemus on that host box
 480             TestBoxQemu(box, self.options.buildname).qemu_list_all()
 481         return True
 482
 483     # kill only the qemus related to this test
 484     def qemu_list_mine(self):
 485         'list qemu instances for our nodes'
 486         for (box,nodes) in self.get_BoxNodes().items():
 487             # the fine-grain version
 488             for node in nodes:
 489                 node.list_qemu()
 490         return True
 491
 492     # kill only the qemus related to this test
 493     def qemu_clean_mine(self):
 494         'cleanup (rm -rf) qemu instances for our nodes'
 495         for box,nodes in self.get_BoxNodes().items():
 496             # the fine-grain version
 497             for node in nodes:
 498                 node.qemu_clean()
 499         return True
 500
 501     # kill only the right qemus
 502     def qemu_kill_mine(self):
 503         'kill the qemu instances for our nodes'
 504         for box,nodes in self.get_BoxNodes().items():
 505             # the fine-grain version
 506             for node in nodes:
 507                 node.kill_qemu()
 508         return True
 509
 510     #################### display config
 511     def show(self):
 512         "show test configuration after localization"
 513         self.show_pass(1)
 514         self.show_pass(2)
 515         return True
 516
 517     # uggly hack to make sure 'run export' only reports about the 1st plc
 518     # to avoid confusion - also we use 'inri_slice1' in various aliases..
 519     exported_id = 1
 520     def export(self):
 521         "print cut'n paste-able stuff to export env variables to your shell"
 522         # guess local domain from hostname
 523         if TestPlc.exported_id > 1:
 524             print("export GUESTHOSTNAME{:d}={}".format(TestPlc.exported_id, self.plc_spec['vservername']))
 525             return True
 526         TestPlc.exported_id += 1
 527         domain = socket.gethostname().split('.',1)[1]
 528         fqdn   = "{}.{}".format(self.plc_spec['host_box'], domain)
 529         print("export BUILD={}".format(self.options.buildname))
 530         print("export PLCHOSTLXC={}".format(fqdn))
 531         print("export GUESTNAME={}".format(self.plc_spec['vservername']))
 532         vplcname = self.plc_spec['vservername'].split('-')[-1]
 533         print("export GUESTHOSTNAME={}.{}".format(vplcname, domain))
 534         # find hostname of first node
 535         hostname, qemubox = self.all_node_infos()[0]
 536         print("export KVMHOST={}.{}".format(qemubox, domain))
 537         print("export NODE={}".format(hostname))
 538         return True
 539
 540     # entry point
 541     always_display_keys=['PLC_WWW_HOST', 'nodes', 'sites']
 542     def show_pass(self, passno):
 543         for (key,val) in self.plc_spec.items():
 544             if not self.options.verbose and key not in TestPlc.always_display_keys:
 545                 continue
 546             if passno == 2:
 547                 if key == 'sites':
 548                     for site in val:
 549                         self.display_site_spec(site)
 550                         for node in site['nodes']:
 551                             self.display_node_spec(node)
 552                 elif key == 'initscripts':
 553                     for initscript in val:
 554                         self.display_initscript_spec(initscript)
 555                 elif key == 'slices':
 556                     for slice in val:
 557                         self.display_slice_spec(slice)
 558                 elif key == 'keys':
 559                     for key in val:
 560                         self.display_key_spec(key)
 561             elif passno == 1:
 562                 if key not in ['sites', 'initscripts', 'slices', 'keys']:
 563                     print('+   ', key, ':', val)
 564
 565     def display_site_spec(self, site):
 566         print('+ ======== site', site['site_fields']['name'])
 567         for k,v in site.items():
 568             if not self.options.verbose and k not in TestPlc.always_display_keys:
 569                 continue
 570             if k == 'nodes':
 571                 if v:
 572                     print('+       ','nodes : ', end=' ')
 573                     for node in v:
 574                         print(node['node_fields']['hostname'],'', end=' ')
 575                     print('')
 576             elif k == 'users':
 577                 if v:
 578                     print('+       users : ', end=' ')
 579                     for user in v:
 580                         print(user['name'],'', end=' ')
 581                     print('')
 582             elif k == 'site_fields':
 583                 print('+       login_base', ':', v['login_base'])
 584             elif k == 'address_fields':
 585                 pass
 586             else:
 587                 print('+       ', end=' ')
 588                 utils.pprint(k, v)
 589
 590     def display_initscript_spec(self, initscript):
 591         print('+ ======== initscript', initscript['initscript_fields']['name'])
 592
 593     def display_key_spec(self, key):
 594         print('+ ======== key', key['key_name'])
 595
 596     def display_slice_spec(self, slice):
 597         print('+ ======== slice', slice['slice_fields']['name'])
 598         for k,v in slice.items():
 599             if k == 'nodenames':
 600                 if v:
 601                     print('+       nodes : ', end=' ')
 602                     for nodename in v:
 603                         print(nodename,'', end=' ')
 604                     print('')
 605             elif k == 'usernames':
 606                 if v:
 607                     print('+       users : ', end=' ')
 608                     for username in v:
 609                         print(username,'', end=' ')
 610                     print('')
 611             elif k == 'slice_fields':
 612                 print('+       fields',':', end=' ')
 613                 print('max_nodes=',v['max_nodes'], end=' ')
 614                 print('')
 615             else:
 616                 print('+       ',k,v)
 617
 618     def display_node_spec(self, node):
 619         print("+           node={} host_box={}".format(node['name'], node['host_box']), end=' ')
 620         print("hostname=", node['node_fields']['hostname'], end=' ')
 621         print("ip=", node['interface_fields']['ip'])
 622         if self.options.verbose:
 623             utils.pprint("node details", node, depth=3)
 624
 625     # another entry point for just showing the boxes involved
 626     def display_mapping(self):
 627         TestPlc.display_mapping_plc(self.plc_spec)
 628         return True
 629
 630     @staticmethod
 631     def display_mapping_plc(plc_spec):
 632         print('+ MyPLC',plc_spec['name'])
 633         # WARNING this would not be right for lxc-based PLC's - should be harmless though
 634         print('+\tvserver address = root@{}:/vservers/{}'.format(plc_spec['host_box'], plc_spec['vservername']))
 635         print('+\tIP = {}/{}'.format(plc_spec['settings']['PLC_API_HOST'], plc_spec['vserverip']))
 636         for site_spec in plc_spec['sites']:
 637             for node_spec in site_spec['nodes']:
 638                 TestPlc.display_mapping_node(node_spec)
 639
 640     @staticmethod
 641     def display_mapping_node(node_spec):
 642         print('+   NODE {}'.format(node_spec['name']))
 643         print('+\tqemu box {}'.format(node_spec['host_box']))
 644         print('+\thostname={}'.format(node_spec['node_fields']['hostname']))
 645
 646     # write a timestamp in /vservers/<>.timestamp
 647     # cannot be inside the vserver, that causes vserver .. build to cough
 648     def plcvm_timestamp(self):
 649         "Create a timestamp to remember creation date for this plc"
 650         now = int(time.time())
 651         # TODO-lxc check this one
 652         # a first approx. is to store the timestamp close to the VM root like vs does
 653         stamp_path = self.vm_timestamp_path()
 654         stamp_dir = os.path.dirname(stamp_path)
 655         utils.system(self.test_ssh.actual_command("mkdir -p {}".format(stamp_dir)))
 656         return utils.system(self.test_ssh.actual_command("echo {:d} > {}".format(now, stamp_path))) == 0
 657
 658     # this is called inconditionnally at the beginning of the test sequence
 659     # just in case this is a rerun, so if the vm is not running it's fine
 660     def plcvm_delete(self):
 661         "vserver delete the test myplc"
 662         stamp_path = self.vm_timestamp_path()
 663         self.run_in_host("rm -f {}".format(stamp_path))
 664         self.run_in_host("virsh -c lxc:// destroy {}".format(self.vservername))
 665         self.run_in_host("virsh -c lxc:// undefine {}".format(self.vservername))
 666         self.run_in_host("rm -fr /vservers/{}".format(self.vservername))
 667         return True
 668
 669     ### install
 670     # historically the build was being fetched by the tests
 671     # now the build pushes itself as a subdir of the tests workdir
 672     # so that the tests do not have to worry about extracting the build (svn, git, or whatever)
 673     def plcvm_create(self):
 674         "vserver creation (no install done)"
 675         # push the local build/ dir to the testplc box
 676         if self.is_local():
 677             # a full path for the local calls
 678             build_dir = os.path.dirname(sys.argv[0])
 679             # sometimes this is empty - set to "." in such a case
 680             if not build_dir:
 681                 build_dir="."
 682             build_dir += "/build"
 683         else:
 684             # use a standard name - will be relative to remote buildname
 685             build_dir = "build"
 686             # remove for safety; do *not* mkdir first, otherwise we end up with build/build/
 687             self.test_ssh.rmdir(build_dir)
 688             self.test_ssh.copy(build_dir, recursive=True)
 689         # the repo url is taken from arch-rpms-url
 690         # with the last step (i386) removed
 691         repo_url = self.options.arch_rpms_url
 692         for level in [ 'arch' ]:
 693             repo_url = os.path.dirname(repo_url)
 694
 695         # invoke initvm (drop support for vs)
 696         script = "lbuild-initvm.sh"
 697         script_options = ""
 698         # pass the vbuild-nightly options to [lv]test-initvm
 699         script_options += " -p {}".format(self.options.personality)
 700         script_options += " -d {}".format(self.options.pldistro)
 701         script_options += " -f {}".format(self.options.fcdistro)
 702         script_options += " -r {}".format(repo_url)
 703         vserver_name = self.vservername
 704         try:
 705             vserver_hostname = socket.gethostbyaddr(self.vserverip)[0]
 706             script_options += " -n {}".format(vserver_hostname)
 707         except:
 708             print("Cannot reverse lookup {}".format(self.vserverip))
 709             print("This is considered fatal, as this might pollute the test results")
 710             return False
 711         create_vserver="{build_dir}/{script} {script_options} {vserver_name}".format(**locals())
 712         return self.run_in_host(create_vserver) == 0
 713
 714     ### install_rpm
 715     def plc_install(self):
 716         "yum install myplc, noderepo, and the plain bootstrapfs"
 717
 718         # workaround for getting pgsql8.2 on centos5
 719         if self.options.fcdistro == "centos5":
 720             self.run_in_guest("rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/i386/epel-release-5-3.noarch.rpm")
 721
 722         # compute nodefamily
 723         if self.options.personality == "linux32":
 724             arch = "i386"
 725         elif self.options.personality == "linux64":
 726             arch = "x86_64"
 727         else:
 728             raise Exception("Unsupported personality {}".format(self.options.personality))
 729         nodefamily = "{}-{}-{}".format(self.options.pldistro, self.options.fcdistro, arch)
 730
 731         pkgs_list=[]
 732         pkgs_list.append("slicerepo-{}".format(nodefamily))
 733         pkgs_list.append("myplc")
 734         pkgs_list.append("noderepo-{}".format(nodefamily))
 735         pkgs_list.append("nodeimage-{}-plain".format(nodefamily))
 736         pkgs_string=" ".join(pkgs_list)
 737         return self.yum_install(pkgs_list)
 738
 739     ###
 740     def mod_python(self):
 741         """yum install mod_python, useful on f18 and above so as to avoid broken wsgi"""
 742         return self.yum_install( ['mod_python'] )
 743
 744     ###
 745     def plc_configure(self):
 746         "run plc-config-tty"
 747         tmpname = '{}.plc-config-tty'.format(self.name())
 748         with open(tmpname,'w') as fileconf:
 749             for (var,value) in self.plc_spec['settings'].items():
 750                 fileconf.write('e {}\n{}\n'.format(var, value))
 751             fileconf.write('w\n')
 752             fileconf.write('q\n')
 753         utils.system('cat {}'.format(tmpname))
 754         self.run_in_guest_piped('cat {}'.format(tmpname), 'plc-config-tty')
 755         utils.system('rm {}'.format(tmpname))
 756         return True
 757
 758 # f14 is a bit odd in this respect, although this worked fine in guests up to f18
 759 # however using a vplc guest under f20 requires this trick
 760 # the symptom is this: service plc start
 761 # Starting plc (via systemctl):  Failed to get D-Bus connection: \
 762 #    Failed to connect to socket /org/freedesktop/systemd1/private: Connection refused
 763 # weird thing is the doc says f14 uses upstart by default and not systemd
 764 # so this sounds kind of harmless
 765     def start_service(self, service):
 766         return self.start_stop_service(service, 'start')
 767     def stop_service(self, service):
 768         return self.start_stop_service(service, 'stop')
 769
 770     def start_stop_service(self, service, start_or_stop):
 771         "utility to start/stop a service with the special trick for f14"
 772         if self.options.fcdistro != 'f14':
 773             return self.run_in_guest("service {} {}".format(service, start_or_stop)) == 0
 774         else:
 775             # patch /sbin/service so it does not reset environment
 776             self.run_in_guest('sed -i -e \\"s,env -i,env,\\" /sbin/service')
 777             # this is because our own scripts in turn call service
 778             return self.run_in_guest("SYSTEMCTL_SKIP_REDIRECT=true service {} {}"\
 779                                      .format(service, start_or_stop)) == 0
 780
 781     def plc_start(self):
 782         "service plc start"
 783         return self.start_service('plc')
 784
 785     def plc_stop(self):
 786         "service plc stop"
 787         return self.stop_service('plc')
 788
 789     def plcvm_start(self):
 790         "start the PLC vserver"
 791         self.start_guest()
 792         return True
 793
 794     def plcvm_stop(self):
 795         "stop the PLC vserver"
 796         self.stop_guest()
 797         return True
 798
 799     # stores the keys from the config for further use
 800     def keys_store(self):
 801         "stores test users ssh keys in keys/"
 802         for key_spec in self.plc_spec['keys']:
 803                 TestKey(self,key_spec).store_key()
 804         return True
 805
 806     def keys_clean(self):
 807         "removes keys cached in keys/"
 808         utils.system("rm -rf ./keys")
 809         return True
 810
 811     # fetches the ssh keys in the plc's /etc/planetlab and stores them in keys/
 812     # for later direct access to the nodes
 813     def keys_fetch(self):
 814         "gets ssh keys in /etc/planetlab/ and stores them locally in keys/"
 815         dir="./keys"
 816         if not os.path.isdir(dir):
 817             os.mkdir(dir)
 818         vservername = self.vservername
 819         vm_root = self.vm_root_in_host()
 820         overall = True
 821         prefix = 'debug_ssh_key'
 822         for ext in ['pub', 'rsa'] :
 823             src = "{vm_root}/etc/planetlab/{prefix}.{ext}".format(**locals())
 824             dst = "keys/{vservername}-debug.{ext}".format(**locals())
 825             if self.test_ssh.fetch(src, dst) != 0:
 826                 overall=False
 827         return overall
 828
 829     def sites(self):
 830         "create sites with PLCAPI"
 831         return self.do_sites()
 832
 833     def delete_sites(self):
 834         "delete sites with PLCAPI"
 835         return self.do_sites(action="delete")
 836
 837     def do_sites(self, action="add"):
 838         for site_spec in self.plc_spec['sites']:
 839             test_site = TestSite(self,site_spec)
 840             if (action != "add"):
 841                 utils.header("Deleting site {} in {}".format(test_site.name(), self.name()))
 842                 test_site.delete_site()
 843                 # deleted with the site
 844                 #test_site.delete_users()
 845                 continue
 846             else:
 847                 utils.header("Creating site {} & users in {}".format(test_site.name(), self.name()))
 848                 test_site.create_site()
 849                 test_site.create_users()
 850         return True
 851
 852     def delete_all_sites(self):
 853         "Delete all sites in PLC, and related objects"
 854         print('auth_root', self.auth_root())
 855         sites = self.apiserver.GetSites(self.auth_root(), {}, ['site_id','login_base'])
 856         for site in sites:
 857             # keep automatic site - otherwise we shoot in our own foot, root_auth is not valid anymore
 858             if site['login_base'] == self.plc_spec['settings']['PLC_SLICE_PREFIX']:
 859                 continue
 860             site_id = site['site_id']
 861             print('Deleting site_id', site_id)
 862             self.apiserver.DeleteSite(self.auth_root(), site_id)
 863         return True
 864
 865     def nodes(self):
 866         "create nodes with PLCAPI"
 867         return self.do_nodes()
 868     def delete_nodes(self):
 869         "delete nodes with PLCAPI"
 870         return self.do_nodes(action="delete")
 871
 872     def do_nodes(self, action="add"):
 873         for site_spec in self.plc_spec['sites']:
 874             test_site = TestSite(self, site_spec)
 875             if action != "add":
 876                 utils.header("Deleting nodes in site {}".format(test_site.name()))
 877                 for node_spec in site_spec['nodes']:
 878                     test_node = TestNode(self, test_site, node_spec)
 879                     utils.header("Deleting {}".format(test_node.name()))
 880                     test_node.delete_node()
 881             else:
 882                 utils.header("Creating nodes for site {} in {}".format(test_site.name(), self.name()))
 883                 for node_spec in site_spec['nodes']:
 884                     utils.pprint('Creating node {}'.format(node_spec), node_spec)
 885                     test_node = TestNode(self, test_site, node_spec)
 886                     test_node.create_node()
 887         return True
 888
 889     def nodegroups(self):
 890         "create nodegroups with PLCAPI"
 891         return self.do_nodegroups("add")
 892     def delete_nodegroups(self):
 893         "delete nodegroups with PLCAPI"
 894         return self.do_nodegroups("delete")
 895
 896     YEAR = 365*24*3600
 897     @staticmethod
 898     def translate_timestamp(start, grain, timestamp):
 899         if timestamp < TestPlc.YEAR:
 900             return start+timestamp*grain
 901         else:
 902             return timestamp
 903
 904     @staticmethod
 905     def timestamp_printable(timestamp):
 906         return time.strftime('%m-%d %H:%M:%S UTC', time.gmtime(timestamp))
 907
 908     def leases(self):
 909         "create leases (on reservable nodes only, use e.g. run -c default -c resa)"
 910         now = int(time.time())
 911         grain = self.apiserver.GetLeaseGranularity(self.auth_root())
 912         print('API answered grain=', grain)
 913         start = (now/grain)*grain
 914         start += grain
 915         # find out all nodes that are reservable
 916         nodes = self.all_reservable_nodenames()
 917         if not nodes:
 918             utils.header("No reservable node found - proceeding without leases")
 919             return True
 920         ok = True
 921         # attach them to the leases as specified in plc_specs
 922         # this is where the 'leases' field gets interpreted as relative of absolute
 923         for lease_spec in self.plc_spec['leases']:
 924             # skip the ones that come with a null slice id
 925             if not lease_spec['slice']:
 926                 continue
 927             lease_spec['t_from']  = TestPlc.translate_timestamp(start, grain, lease_spec['t_from'])
 928             lease_spec['t_until'] = TestPlc.translate_timestamp(start, grain, lease_spec['t_until'])
 929             lease_addition = self.apiserver.AddLeases(self.auth_root(), nodes, lease_spec['slice'],
 930                                                       lease_spec['t_from'],lease_spec['t_until'])
 931             if lease_addition['errors']:
 932                 utils.header("Cannot create leases, {}".format(lease_addition['errors']))
 933                 ok = False
 934             else:
 935                 utils.header('Leases on nodes {} for {} from {:d} ({}) until {:d} ({})'\
 936                              .format(nodes, lease_spec['slice'],
 937                                      lease_spec['t_from'],  TestPlc.timestamp_printable(lease_spec['t_from']),
 938                                      lease_spec['t_until'], TestPlc.timestamp_printable(lease_spec['t_until'])))
 939
 940         return ok
 941
 942     def delete_leases(self):
 943         "remove all leases in the myplc side"
 944         lease_ids = [ l['lease_id'] for l in self.apiserver.GetLeases(self.auth_root())]
 945         utils.header("Cleaning leases {}".format(lease_ids))
 946         self.apiserver.DeleteLeases(self.auth_root(), lease_ids)
 947         return True
 948
 949     def list_leases(self):
 950         "list all leases known to the myplc"
 951         leases = self.apiserver.GetLeases(self.auth_root())
 952         now = int(time.time())
 953         for l in leases:
 954             current = l['t_until'] >= now
 955             if self.options.verbose or current:
 956                 utils.header("{} {} from {} until {}"\
 957                              .format(l['hostname'], l['name'],
 958                                      TestPlc.timestamp_printable(l['t_from']),
 959                                      TestPlc.timestamp_printable(l['t_until'])))
 960         return True
 961
 962     # create nodegroups if needed, and populate
 963     def do_nodegroups(self, action="add"):
 964         # 1st pass to scan contents
 965         groups_dict = {}
 966         for site_spec in self.plc_spec['sites']:
 967             test_site = TestSite(self,site_spec)
 968             for node_spec in site_spec['nodes']:
 969                 test_node = TestNode(self, test_site, node_spec)
 970                 if 'nodegroups' in node_spec:
 971                     nodegroupnames = node_spec['nodegroups']
 972                     if isinstance(nodegroupnames, str):
 973                         nodegroupnames = [ nodegroupnames ]
 974                     for nodegroupname in nodegroupnames:
 975                         if nodegroupname not in groups_dict:
 976                             groups_dict[nodegroupname] = []
 977                         groups_dict[nodegroupname].append(test_node.name())
 978         auth = self.auth_root()
 979         overall = True
 980         for (nodegroupname,group_nodes) in groups_dict.items():
 981             if action == "add":
 982                 print('nodegroups:', 'dealing with nodegroup',\
 983                     nodegroupname, 'on nodes', group_nodes)
 984                 # first, check if the nodetagtype is here
 985                 tag_types = self.apiserver.GetTagTypes(auth, {'tagname':nodegroupname})
 986                 if tag_types:
 987                     tag_type_id = tag_types[0]['tag_type_id']
 988                 else:
 989                     tag_type_id = self.apiserver.AddTagType(auth,
 990                                                             {'tagname' : nodegroupname,
 991                                                              'description' : 'for nodegroup {}'.format(nodegroupname),
 992                                                              'category' : 'test'})
 993                 print('located tag (type)', nodegroupname, 'as', tag_type_id)
 994                 # create nodegroup
 995                 nodegroups = self.apiserver.GetNodeGroups(auth, {'groupname' : nodegroupname})
 996                 if not nodegroups:
 997                     self.apiserver.AddNodeGroup(auth, nodegroupname, tag_type_id, 'yes')
 998                     print('created nodegroup', nodegroupname, \
 999                         'from tagname', nodegroupname, 'and value', 'yes')
1000                 # set node tag on all nodes, value='yes'
1001                 for nodename in group_nodes:
1002                     try:
1003                         self.apiserver.AddNodeTag(auth, nodename, nodegroupname, "yes")
1004                     except:
1005                         traceback.print_exc()
1006                         print('node', nodename, 'seems to already have tag', nodegroupname)
1007                     # check anyway
1008                     try:
1009                         expect_yes = self.apiserver.GetNodeTags(auth,
1010                                                                 {'hostname' : nodename,
1011                                                                  'tagname'  : nodegroupname},
1012                                                                 ['value'])[0]['value']
1013                         if expect_yes != "yes":
1014                             print('Mismatch node tag on node',nodename,'got',expect_yes)
1015                             overall = False
1016                     except:
1017                         if not self.options.dry_run:
1018                             print('Cannot find tag', nodegroupname, 'on node', nodename)
1019                             overall = False
1020             else:
1021                 try:
1022                     print('cleaning nodegroup', nodegroupname)
1023                     self.apiserver.DeleteNodeGroup(auth, nodegroupname)
1024                 except:
1025                     traceback.print_exc()
1026                     overall = False
1027         return overall
1028
1029     # a list of TestNode objs
1030     def all_nodes(self):
1031         nodes=[]
1032         for site_spec in self.plc_spec['sites']:
1033             test_site = TestSite(self,site_spec)
1034             for node_spec in site_spec['nodes']:
1035                 nodes.append(TestNode(self, test_site, node_spec))
1036         return nodes
1037
1038     # return a list of tuples (nodename,qemuname)
1039     def all_node_infos(self) :
1040         node_infos = []
1041         for site_spec in self.plc_spec['sites']:
1042             node_infos += [ (node_spec['node_fields']['hostname'], node_spec['host_box']) \
1043                                 for node_spec in site_spec['nodes'] ]
1044         return node_infos
1045
1046     def all_nodenames(self):
1047         return [ x[0] for x in self.all_node_infos() ]
1048     def all_reservable_nodenames(self):
1049         res = []
1050         for site_spec in self.plc_spec['sites']:
1051             for node_spec in site_spec['nodes']:
1052                 node_fields = node_spec['node_fields']
1053                 if 'node_type' in node_fields and node_fields['node_type'] == 'reservable':
1054                     res.append(node_fields['hostname'])
1055         return res
1056
1057     # silent_minutes : during the first <silent_minutes> minutes nothing gets printed
1058     def nodes_check_boot_state(self, target_boot_state, timeout_minutes,
1059                                silent_minutes, period_seconds = 15):
1060         if self.options.dry_run:
1061             print('dry_run')
1062             return True
1063
1064         class CompleterTaskBootState(CompleterTask):
1065             def __init__(self, test_plc, hostname):
1066                 self.test_plc = test_plc
1067                 self.hostname = hostname
1068                 self.last_boot_state = 'undef'
1069             def actual_run(self):
1070                 try:
1071                     node = self.test_plc.apiserver.GetNodes(self.test_plc.auth_root(),
1072                                                             [ self.hostname ],
1073                                                             ['boot_state'])[0]
1074                     self.last_boot_state = node['boot_state']
1075                     return self.last_boot_state == target_boot_state
1076                 except:
1077                     return False
1078             def message(self):
1079                 return "CompleterTaskBootState with node {}".format(self.hostname)
1080             def failure_epilogue(self):
1081                 print("node {} in state {} - expected {}"\
1082                     .format(self.hostname, self.last_boot_state, target_boot_state))
1083
1084         timeout = timedelta(minutes=timeout_minutes)
1085         graceout = timedelta(minutes=silent_minutes)
1086         period   = timedelta(seconds=period_seconds)
1087         # the nodes that haven't checked yet - start with a full list and shrink over time
1088         utils.header("checking nodes boot state (expected {})".format(target_boot_state))
1089         tasks = [ CompleterTaskBootState(self,hostname) \
1090                       for (hostname,_) in self.all_node_infos() ]
1091         message = 'check_boot_state={}'.format(target_boot_state)
1092         return Completer(tasks, message=message).run(timeout, graceout, period)
1093
1094     def nodes_booted(self):
1095         return self.nodes_check_boot_state('boot', timeout_minutes=30, silent_minutes=28)
1096
1097     def probe_kvm_iptables(self):
1098         (_,kvmbox) = self.all_node_infos()[0]
1099         TestSsh(kvmbox).run("iptables-save")
1100         return True
1101
1102     # probing nodes
1103     def check_nodes_ping(self, timeout_seconds=30, period_seconds=10):
1104         class CompleterTaskPingNode(CompleterTask):
1105             def __init__(self, hostname):
1106                 self.hostname = hostname
1107             def run(self, silent):
1108                 command="ping -c 1 -w 1 {} >& /dev/null".format(self.hostname)
1109                 return utils.system(command, silent=silent) == 0
1110             def failure_epilogue(self):
1111                 print("Cannot ping node with name {}".format(self.hostname))
1112         timeout = timedelta(seconds = timeout_seconds)
1113         graceout = timeout
1114         period = timedelta(seconds = period_seconds)
1115         node_infos = self.all_node_infos()
1116         tasks = [ CompleterTaskPingNode(h) for (h,_) in node_infos ]
1117         return Completer(tasks, message='ping_node').run(timeout, graceout, period)
1118
1119     # ping node before we try to reach ssh, helpful for troubleshooting failing bootCDs
1120     def ping_node(self):
1121         "Ping nodes"
1122         return self.check_nodes_ping()
1123
1124     def check_nodes_ssh(self, debug, timeout_minutes, silent_minutes, period_seconds=15):
1125         # various delays
1126         timeout  = timedelta(minutes=timeout_minutes)
1127         graceout = timedelta(minutes=silent_minutes)
1128         period   = timedelta(seconds=period_seconds)
1129         vservername = self.vservername
1130         if debug:
1131             message = "debug"
1132             completer_message = 'ssh_node_debug'
1133             local_key = "keys/{vservername}-debug.rsa".format(**locals())
1134         else:
1135             message = "boot"
1136             completer_message = 'ssh_node_boot'
1137             local_key = "keys/key_admin.rsa"
1138         utils.header("checking ssh access to nodes (expected in {} mode)".format(message))
1139         node_infos = self.all_node_infos()
1140         tasks = [ CompleterTaskNodeSsh(nodename, qemuname, local_key,
1141                                         boot_state=message, dry_run=self.options.dry_run) \
1142                       for (nodename, qemuname) in node_infos ]
1143         return Completer(tasks, message=completer_message).run(timeout, graceout, period)
1144
1145     def ssh_node_debug(self):
1146         "Tries to ssh into nodes in debug mode with the debug ssh key"
1147         return self.check_nodes_ssh(debug = True,
1148                                     timeout_minutes = self.ssh_node_debug_timeout,
1149                                     silent_minutes = self.ssh_node_debug_silent)
1150
1151     def ssh_node_boot(self):
1152         "Tries to ssh into nodes in production mode with the root ssh key"
1153         return self.check_nodes_ssh(debug = False,
1154                                     timeout_minutes = self.ssh_node_boot_timeout,
1155                                     silent_minutes = self.ssh_node_boot_silent)
1156
1157     def node_bmlogs(self):
1158         "Checks that there's a non-empty dir. /var/log/bm/raw"
1159         return utils.system(self.actual_command_in_guest("ls /var/log/bm/raw")) == 0
1160
1161     @node_mapper
1162     def qemu_local_init(self): pass
1163     @node_mapper
1164     def bootcd(self): pass
1165     @node_mapper
1166     def qemu_local_config(self): pass
1167     @node_mapper
1168     def nodestate_reinstall(self): pass
1169     @node_mapper
1170     def nodestate_safeboot(self): pass
1171     @node_mapper
1172     def nodestate_boot(self): pass
1173     @node_mapper
1174     def nodestate_show(self): pass
1175     @node_mapper
1176     def qemu_export(self): pass
1177
1178     ### check hooks : invoke scripts from hooks/{node,slice}
1179     def check_hooks_node(self):
1180         return self.locate_first_node().check_hooks()
1181     def check_hooks_sliver(self) :
1182         return self.locate_first_sliver().check_hooks()
1183
1184     def check_hooks(self):
1185         "runs unit tests in the node and slice contexts - see hooks/{node,slice}"
1186         return self.check_hooks_node() and self.check_hooks_sliver()
1187
1188     ### initscripts
1189     def do_check_initscripts(self):
1190         class CompleterTaskInitscript(CompleterTask):
1191             def __init__(self, test_sliver, stamp):
1192                 self.test_sliver = test_sliver
1193                 self.stamp = stamp
1194             def actual_run(self):
1195                 return self.test_sliver.check_initscript_stamp(self.stamp)
1196             def message(self):
1197                 return "initscript checker for {}".format(self.test_sliver.name())
1198             def failure_epilogue(self):
1199                 print("initscript stamp {} not found in sliver {}"\
1200                     .format(self.stamp, self.test_sliver.name()))
1201
1202         tasks = []
1203         for slice_spec in self.plc_spec['slices']:
1204             if 'initscriptstamp' not in slice_spec:
1205                 continue
1206             stamp = slice_spec['initscriptstamp']
1207             slicename = slice_spec['slice_fields']['name']
1208             for nodename in slice_spec['nodenames']:
1209                 print('nodename', nodename, 'slicename', slicename, 'stamp', stamp)
1210                 site,node = self.locate_node(nodename)
1211                 # xxx - passing the wrong site - probably harmless
1212                 test_site = TestSite(self, site)
1213                 test_slice = TestSlice(self, test_site, slice_spec)
1214                 test_node = TestNode(self, test_site, node)
1215                 test_sliver = TestSliver(self, test_node, test_slice)
1216                 tasks.append(CompleterTaskInitscript(test_sliver, stamp))
1217         return Completer(tasks, message='check_initscripts').\
1218             run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10))
1219
1220     def check_initscripts(self):
1221         "check that the initscripts have triggered"
1222         return self.do_check_initscripts()
1223
1224     def initscripts(self):
1225         "create initscripts with PLCAPI"
1226         for initscript in self.plc_spec['initscripts']:
1227             utils.pprint('Adding Initscript in plc {}'.format(self.plc_spec['name']), initscript)
1228             self.apiserver.AddInitScript(self.auth_root(), initscript['initscript_fields'])
1229         return True
1230
1231     def delete_initscripts(self):
1232         "delete initscripts with PLCAPI"
1233         for initscript in self.plc_spec['initscripts']:
1234             initscript_name = initscript['initscript_fields']['name']
1235             print(('Attempting to delete {} in plc {}'.format(initscript_name, self.plc_spec['name'])))
1236             try:
1237                 self.apiserver.DeleteInitScript(self.auth_root(), initscript_name)
1238                 print(initscript_name, 'deleted')
1239             except:
1240                 print('deletion went wrong - probably did not exist')
1241         return True
1242
1243     ### manage slices
1244     def slices(self):
1245         "create slices with PLCAPI"
1246         return self.do_slices(action="add")
1247
1248     def delete_slices(self):
1249         "delete slices with PLCAPI"
1250         return self.do_slices(action="delete")
1251
1252     def fill_slices(self):
1253         "add nodes in slices with PLCAPI"
1254         return self.do_slices(action="fill")
1255
1256     def empty_slices(self):
1257         "remove nodes from slices with PLCAPI"
1258         return self.do_slices(action="empty")
1259
1260     def do_slices(self,  action="add"):
1261         for slice in self.plc_spec['slices']:
1262             site_spec = self.locate_site(slice['sitename'])
1263             test_site = TestSite(self,site_spec)
1264             test_slice=TestSlice(self,test_site,slice)
1265             if action == "delete":
1266                 test_slice.delete_slice()
1267             elif action == "fill":
1268                 test_slice.add_nodes()
1269             elif action == "empty":
1270                 test_slice.delete_nodes()
1271             else:
1272                 test_slice.create_slice()
1273         return True
1274
1275     @slice_mapper__tasks(20, 10, 15)
1276     def ssh_slice(self): pass
1277     @slice_mapper__tasks(20, 19, 15)
1278     def ssh_slice_off(self): pass
1279     @slice_mapper__tasks(1, 1, 15)
1280     def slice_fs_present(self): pass
1281     @slice_mapper__tasks(1, 1, 15)
1282     def slice_fs_deleted(self): pass
1283
1284     # use another name so we can exclude/ignore it from the tests on the nightly command line
1285     def ssh_slice_again(self): return self.ssh_slice()
1286     # note that simply doing ssh_slice_again=ssh_slice would kind of work too
1287     # but for some reason the ignore-wrapping thing would not
1288
1289     @slice_mapper
1290     def ssh_slice_basics(self): pass
1291     @slice_mapper
1292     def check_vsys_defaults(self): pass
1293
1294     @node_mapper
1295     def keys_clear_known_hosts(self): pass
1296
1297     def plcapi_urls(self):
1298         return PlcapiUrlScanner(self.auth_root(), ip=self.vserverip).scan()
1299
1300     def speed_up_slices(self):
1301         "tweak nodemanager cycle (wait time) to 30+/-10 s"
1302         return self._speed_up_slices (30, 10)
1303     def super_speed_up_slices(self):
1304         "dev mode: tweak nodemanager cycle (wait time) to 5+/-1 s"
1305         return self._speed_up_slices(5, 1)
1306
1307     def _speed_up_slices(self, p, r):
1308         # create the template on the server-side
1309         template = "{}.nodemanager".format(self.name())
1310         with open(template,"w") as template_file:
1311             template_file.write('OPTIONS="-p {} -r {} -d"\n'.format(p, r))
1312         in_vm = "/var/www/html/PlanetLabConf/nodemanager"
1313         remote = "{}/{}".format(self.vm_root_in_host(), in_vm)
1314         self.test_ssh.copy_abs(template, remote)
1315         # Add a conf file
1316         if not self.apiserver.GetConfFiles(self.auth_root(),
1317                                            {'dest' : '/etc/sysconfig/nodemanager'}):
1318             self.apiserver.AddConfFile(self.auth_root(),
1319                                         {'dest' : '/etc/sysconfig/nodemanager',
1320                                          'source' : 'PlanetLabConf/nodemanager',
1321                                          'postinstall_cmd' : 'service nm restart',})
1322         return True
1323
1324     def debug_nodemanager(self):
1325         "sets verbose mode for nodemanager, and speeds up cycle even more (needs speed_up_slices first)"
1326         template = "{}.nodemanager".format(self.name())
1327         with open(template,"w") as template_file:
1328             template_file.write('OPTIONS="-p 10 -r 6 -v -d"\n')
1329         in_vm = "/var/www/html/PlanetLabConf/nodemanager"
1330         remote = "{}/{}".format(self.vm_root_in_host(), in_vm)
1331         self.test_ssh.copy_abs(template, remote)
1332         return True
1333
1334     @node_mapper
1335     def qemu_start(self) : pass
1336
1337     @node_mapper
1338     def qemu_timestamp(self) : pass
1339
1340     # when a spec refers to a node possibly on another plc
1341     def locate_sliver_obj_cross(self, nodename, slicename, other_plcs):
1342         for plc in [ self ] + other_plcs:
1343             try:
1344                 return plc.locate_sliver_obj(nodename, slicename)
1345             except:
1346                 pass
1347         raise Exception("Cannot locate sliver {}@{} among all PLCs".format(nodename, slicename))
1348
1349     # implement this one as a cross step so that we can take advantage of different nodes
1350     # in multi-plcs mode
1351     def cross_check_tcp(self, other_plcs):
1352         "check TCP connectivity between 2 slices (or in loopback if only one is defined)"
1353         if 'tcp_specs' not in self.plc_spec or not self.plc_spec['tcp_specs']:
1354             utils.header("check_tcp: no/empty config found")
1355             return True
1356         specs = self.plc_spec['tcp_specs']
1357         overall = True
1358
1359         # first wait for the network to be up and ready from the slices
1360         class CompleterTaskNetworkReadyInSliver(CompleterTask):
1361             def __init__(self, test_sliver):
1362                 self.test_sliver = test_sliver
1363             def actual_run(self):
1364                 return self.test_sliver.check_tcp_ready(port = 9999)
1365             def message(self):
1366                 return "network ready checker for {}".format(self.test_sliver.name())
1367             def failure_epilogue(self):
1368                 print("could not bind port from sliver {}".format(self.test_sliver.name()))
1369
1370         sliver_specs = {}
1371         tasks = []
1372         managed_sliver_names = set()
1373         for spec in specs:
1374             # locate the TestSliver instances involved, and cache them in the spec instance
1375             spec['s_sliver'] = self.locate_sliver_obj_cross(spec['server_node'], spec['server_slice'], other_plcs)
1376             spec['c_sliver'] = self.locate_sliver_obj_cross(spec['client_node'], spec['client_slice'], other_plcs)
1377             message = "Will check TCP between s={} and c={}"\
1378                       .format(spec['s_sliver'].name(), spec['c_sliver'].name())
1379             if 'client_connect' in spec:
1380                 message += " (using {})".format(spec['client_connect'])
1381             utils.header(message)
1382             # we need to check network presence in both slivers, but also
1383             # avoid to insert a sliver several times
1384             for sliver in [ spec['s_sliver'], spec['c_sliver'] ]:
1385                 if sliver.name() not in managed_sliver_names:
1386                     tasks.append(CompleterTaskNetworkReadyInSliver(sliver))
1387                     # add this sliver's name in the set
1388                     managed_sliver_names .update( {sliver.name()} )
1389
1390         # wait for the netork to be OK in all server sides
1391         if not Completer(tasks, message='check for network readiness in slivers').\
1392            run(timedelta(seconds=30), timedelta(seconds=24), period=timedelta(seconds=5)):
1393             return False
1394
1395         # run server and client
1396         for spec in specs:
1397             port = spec['port']
1398             # server side
1399             # the issue here is that we have the server run in background
1400             # and so we have no clue if it took off properly or not
1401             # looks like in some cases it does not
1402             if not spec['s_sliver'].run_tcp_server(port, timeout=20):
1403                 overall = False
1404                 break
1405
1406             # idem for the client side
1407             # use nodename from located sliver, unless 'client_connect' is set
1408             if 'client_connect' in spec:
1409                 destination = spec['client_connect']
1410             else:
1411                 destination = spec['s_sliver'].test_node.name()
1412             if not spec['c_sliver'].run_tcp_client(destination, port):
1413                 overall = False
1414         return overall
1415
1416     # painfully enough, we need to allow for some time as netflow might show up last
1417     def check_system_slice(self):
1418         "all nodes: check that a system slice is alive"
1419         # netflow currently not working in the lxc distro
1420         # drl not built at all in the wtx distro
1421         # if we find either of them we're happy
1422         return self.check_netflow() or self.check_drl()
1423
1424     # expose these
1425     def check_netflow(self): return self._check_system_slice('netflow')
1426     def check_drl(self): return self._check_system_slice('drl')
1427
1428     # we have the slices up already here, so it should not take too long
1429     def _check_system_slice(self, slicename, timeout_minutes=5, period_seconds=15):
1430         class CompleterTaskSystemSlice(CompleterTask):
1431             def __init__(self, test_node, dry_run):
1432                 self.test_node = test_node
1433                 self.dry_run = dry_run
1434             def actual_run(self):
1435                 return self.test_node._check_system_slice(slicename, dry_run=self.dry_run)
1436             def message(self):
1437                 return "System slice {} @ {}".format(slicename, self.test_node.name())
1438             def failure_epilogue(self):
1439                 print("COULD not find system slice {} @ {}".format(slicename, self.test_node.name()))
1440         timeout = timedelta(minutes=timeout_minutes)
1441         silent  = timedelta(0)
1442         period  = timedelta(seconds=period_seconds)
1443         tasks = [ CompleterTaskSystemSlice(test_node, self.options.dry_run) \
1444                       for test_node in self.all_nodes() ]
1445         return Completer(tasks, message='_check_system_slice').run(timeout, silent, period)
1446
1447     def plcsh_stress_test(self):
1448         "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents"
1449         # install the stress-test in the plc image
1450         location = "/usr/share/plc_api/plcsh_stress_test.py"
1451         remote = "{}/{}".format(self.vm_root_in_host(), location)
1452         self.test_ssh.copy_abs("plcsh_stress_test.py", remote)
1453         command = location
1454         command += " -- --check"
1455         if self.options.size == 1:
1456             command +=  " --tiny"
1457         return self.run_in_guest(command) == 0
1458
1459     # populate runs the same utility without slightly different options
1460     # in particular runs with --preserve (dont cleanup) and without --check
1461     # also it gets run twice, once with the --foreign option for creating fake foreign entries
1462
1463     def sfa_install_all(self):
1464         "yum install sfa sfa-plc sfa-sfatables sfa-client"
1465         return self.yum_install("sfa sfa-plc sfa-sfatables sfa-client")
1466
1467     def sfa_install_core(self):
1468         "yum install sfa"
1469         return self.yum_install("sfa")
1470
1471     def sfa_install_plc(self):
1472         "yum install sfa-plc"
1473         return self.yum_install("sfa-plc")
1474
1475     def sfa_install_sfatables(self):
1476         "yum install sfa-sfatables"
1477         return self.yum_install("sfa-sfatables")
1478
1479     # for some very odd reason, this sometimes fails with the following symptom
1480     # # yum install sfa-client
1481     # Setting up Install Process
1482     # ...
1483     # Downloading Packages:
1484     # Running rpm_check_debug
1485     # Running Transaction Test
1486     # Transaction Test Succeeded
1487     # Running Transaction
1488     # Transaction couldn't start:
1489     # installing package sfa-client-2.1-7.onelab.2012.05.23.i686 needs 68KB on the / filesystem
1490     # [('installing package sfa-client-2.1-7.onelab.2012.05.23.i686 needs 68KB on the / filesystem', (9, '/', 69632L))]
1491     # even though in the same context I have
1492     # [2012.05.23--f14-32-sfastd1-1-vplc07] / # df -h
1493     # Filesystem            Size  Used Avail Use% Mounted on
1494     # /dev/hdv1             806G  264G  501G  35% /
1495     # none                   16M   36K   16M   1% /tmp
1496     #
1497     # so as a workaround, we first try yum install, and then invoke rpm on the cached rpm...
1498     def sfa_install_client(self):
1499         "yum install sfa-client"
1500         first_try = self.yum_install("sfa-client")
1501         if first_try:
1502             return True
1503         utils.header("********** Regular yum failed - special workaround in place, 2nd chance")
1504         code, cached_rpm_path = \
1505                 utils.output_of(self.actual_command_in_guest('find /var/cache/yum -name sfa-client\*.rpm'))
1506         utils.header("rpm_path=<<{}>>".format(rpm_path))
1507         # just for checking
1508         self.run_in_guest("rpm -i {}".format(cached_rpm_path))
1509         return self.yum_check_installed("sfa-client")
1510
1511     def sfa_dbclean(self):
1512         "thoroughly wipes off the SFA database"
1513         return self.run_in_guest("sfaadmin reg nuke") == 0 or \
1514             self.run_in_guest("sfa-nuke.py") == 0 or \
1515             self.run_in_guest("sfa-nuke-plc.py") == 0 or \
1516             self.run_in_guest("sfaadmin registry nuke") == 0
1517
1518     def sfa_fsclean(self):
1519         "cleanup /etc/sfa/trusted_roots and /var/lib/sfa"
1520         self.run_in_guest("rm -rf /etc/sfa/trusted_roots /var/lib/sfa/authorities")
1521         return True
1522
1523     def sfa_plcclean(self):
1524         "cleans the PLC entries that were created as a side effect of running the script"
1525         # ignore result
1526         sfa_spec = self.plc_spec['sfa']
1527
1528         for auth_sfa_spec in sfa_spec['auth_sfa_specs']:
1529             login_base = auth_sfa_spec['login_base']
1530             try:
1531                 self.apiserver.DeleteSite(self.auth_root(),login_base)
1532             except:
1533                 print("Site {} already absent from PLC db".format(login_base))
1534
1535             for spec_name in ['pi_spec','user_spec']:
1536                 user_spec = auth_sfa_spec[spec_name]
1537                 username = user_spec['email']
1538                 try:
1539                     self.apiserver.DeletePerson(self.auth_root(),username)
1540                 except:
1541                     # this in fact is expected as sites delete their members
1542                     #print "User {} already absent from PLC db".format(username)
1543                     pass
1544
1545         print("REMEMBER TO RUN sfa_import AGAIN")
1546         return True
1547
1548     def sfa_uninstall(self):
1549         "uses rpm to uninstall sfa - ignore result"
1550         self.run_in_guest("rpm -e sfa sfa-sfatables sfa-client sfa-plc")
1551         self.run_in_guest("rm -rf /var/lib/sfa")
1552         self.run_in_guest("rm -rf /etc/sfa")
1553         self.run_in_guest("rm -rf /var/log/sfa_access.log /var/log/sfa_import_plc.log /var/log/sfa.daemon")
1554         # xxx tmp
1555         self.run_in_guest("rpm -e --noscripts sfa-plc")
1556         return True
1557
1558     ### run unit tests for SFA
1559     # NOTE: for some reason on f14/i386, yum install sfa-tests fails for no reason
1560     # Running Transaction
1561     # Transaction couldn't start:
1562     # installing package sfa-tests-1.0-21.onelab.i686 needs 204KB on the / filesystem
1563     # [('installing package sfa-tests-1.0-21.onelab.i686 needs 204KB on the / filesystem', (9, '/', 208896L))]
1564     # no matter how many Gbs are available on the testplc
1565     # could not figure out what's wrong, so...
1566     # if the yum install phase fails, consider the test is successful
1567     # other combinations will eventually run it hopefully
1568     def sfa_utest(self):
1569         "yum install sfa-tests and run SFA unittests"
1570         self.run_in_guest("yum -y install sfa-tests")
1571         # failed to install - forget it
1572         if self.run_in_guest("rpm -q sfa-tests") != 0:
1573             utils.header("WARNING: SFA unit tests failed to install, ignoring")
1574             return True
1575         return self.run_in_guest("/usr/share/sfa/tests/testAll.py") == 0
1576
1577     ###
1578     def confdir(self):
1579         dirname = "conf.{}".format(self.plc_spec['name'])
1580         if not os.path.isdir(dirname):
1581             utils.system("mkdir -p {}".format(dirname))
1582         if not os.path.isdir(dirname):
1583             raise Exception("Cannot create config dir for plc {}".format(self.name()))
1584         return dirname
1585
1586     def conffile(self, filename):
1587         return "{}/{}".format(self.confdir(), filename)
1588     def confsubdir(self, dirname, clean, dry_run=False):
1589         subdirname = "{}/{}".format(self.confdir(), dirname)
1590         if clean:
1591             utils.system("rm -rf {}".format(subdirname))
1592         if not os.path.isdir(subdirname):
1593             utils.system("mkdir -p {}".format(subdirname))
1594         if not dry_run and not os.path.isdir(subdirname):
1595             raise "Cannot create config subdir {} for plc {}".format(dirname, self.name())
1596         return subdirname
1597
1598     def conffile_clean(self, filename):
1599         filename=self.conffile(filename)
1600         return utils.system("rm -rf {}".format(filename))==0
1601
1602     ###
1603     def sfa_configure(self):
1604         "run sfa-config-tty"
1605         tmpname = self.conffile("sfa-config-tty")
1606         with open(tmpname,'w') as fileconf:
1607             for (var,value) in self.plc_spec['sfa']['settings'].items():
1608                 fileconf.write('e {}\n{}\n'.format(var, value))
1609             fileconf.write('w\n')
1610             fileconf.write('R\n')
1611             fileconf.write('q\n')
1612         utils.system('cat {}'.format(tmpname))
1613         self.run_in_guest_piped('cat {}'.format(tmpname), 'sfa-config-tty')
1614         return True
1615
1616     def aggregate_xml_line(self):
1617         port = self.plc_spec['sfa']['neighbours-port']
1618         return '<aggregate addr="{}" hrn="{}" port="{}"/>'\
1619             .format(self.vserverip, self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH'], port)
1620
1621     def registry_xml_line(self):
1622         return '<registry addr="{}" hrn="{}" port="12345"/>'\
1623             .format(self.vserverip, self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH'])
1624
1625
1626     # a cross step that takes all other plcs in argument
1627     def cross_sfa_configure(self, other_plcs):
1628         "writes aggregates.xml and registries.xml that point to all other PLCs in the test"
1629         # of course with a single plc, other_plcs is an empty list
1630         if not other_plcs:
1631             return True
1632         agg_fname = self.conffile("agg.xml")
1633         with open(agg_fname,"w") as out:
1634             out.write("<aggregates>{}</aggregates>\n"\
1635                       .format(" ".join([ plc.aggregate_xml_line() for plc in other_plcs ])))
1636         utils.header("(Over)wrote {}".format(agg_fname))
1637         reg_fname=self.conffile("reg.xml")
1638         with open(reg_fname,"w") as out:
1639             out.write("<registries>{}</registries>\n"\
1640                       .format(" ".join([ plc.registry_xml_line() for plc in other_plcs ])))
1641         utils.header("(Over)wrote {}".format(reg_fname))
1642         return self.test_ssh.copy_abs(agg_fname,
1643                                       '/{}/etc/sfa/aggregates.xml'.format(self.vm_root_in_host())) == 0 \
1644            and self.test_ssh.copy_abs(reg_fname,
1645                                       '/{}/etc/sfa/registries.xml'.format(self.vm_root_in_host())) == 0
1646
1647     def sfa_import(self):
1648         "use sfaadmin to import from plc"
1649         auth = self.plc_spec['sfa']['settings']['SFA_REGISTRY_ROOT_AUTH']
1650         return self.run_in_guest('sfaadmin reg import_registry') == 0
1651
1652     def sfa_start(self):
1653         "service sfa start"
1654         return self.start_service('sfa')
1655
1656
1657     def sfi_configure(self):
1658         "Create /root/sfi on the plc side for sfi client configuration"
1659         if self.options.dry_run:
1660             utils.header("DRY RUN - skipping step")
1661             return True
1662         sfa_spec = self.plc_spec['sfa']
1663         # cannot use auth_sfa_mapper to pass dir_name
1664         for slice_spec in self.plc_spec['sfa']['auth_sfa_specs']:
1665             test_slice = TestAuthSfa(self, slice_spec)
1666             dir_basename = os.path.basename(test_slice.sfi_path())
1667             dir_name = self.confsubdir("dot-sfi/{}".format(dir_basename),
1668                                        clean=True, dry_run=self.options.dry_run)
1669             test_slice.sfi_configure(dir_name)
1670             # push into the remote /root/sfi area
1671             location = test_slice.sfi_path()
1672             remote = "{}/{}".format(self.vm_root_in_host(), location)
1673             self.test_ssh.mkdir(remote, abs=True)
1674             # need to strip last level or remote otherwise we get an extra dir level
1675             self.test_ssh.copy_abs(dir_name, os.path.dirname(remote), recursive=True)
1676
1677         return True
1678
1679     def sfi_clean(self):
1680         "clean up /root/sfi on the plc side"
1681         self.run_in_guest("rm -rf /root/sfi")
1682         return True
1683
1684     def sfa_rspec_empty(self):
1685         "expose a static empty rspec (ships with the tests module) in the sfi directory"
1686         filename = "empty-rspec.xml"
1687         overall = True
1688         for slice_spec in self.plc_spec['sfa']['auth_sfa_specs']:
1689             test_slice = TestAuthSfa(self, slice_spec)
1690             in_vm = test_slice.sfi_path()
1691             remote = "{}/{}".format(self.vm_root_in_host(), in_vm)
1692             if self.test_ssh.copy_abs(filename, remote) !=0:
1693                 overall = False
1694         return overall
1695
1696     @auth_sfa_mapper
1697     def sfa_register_site(self): pass
1698     @auth_sfa_mapper
1699     def sfa_register_pi(self): pass
1700     @auth_sfa_mapper
1701     def sfa_register_user(self): pass
1702     @auth_sfa_mapper
1703     def sfa_update_user(self): pass
1704     @auth_sfa_mapper
1705     def sfa_register_slice(self): pass
1706     @auth_sfa_mapper
1707     def sfa_renew_slice(self): pass
1708     @auth_sfa_mapper
1709     def sfa_get_expires(self): pass
1710     @auth_sfa_mapper
1711     def sfa_discover(self): pass
1712     @auth_sfa_mapper
1713     def sfa_rspec(self): pass
1714     @auth_sfa_mapper
1715     def sfa_allocate(self): pass
1716     @auth_sfa_mapper
1717     def sfa_allocate_empty(self): pass
1718     @auth_sfa_mapper
1719     def sfa_provision(self): pass
1720     @auth_sfa_mapper
1721     def sfa_provision_empty(self): pass
1722     @auth_sfa_mapper
1723     def sfa_check_slice_plc(self): pass
1724     @auth_sfa_mapper
1725     def sfa_check_slice_plc_empty(self): pass
1726     @auth_sfa_mapper
1727     def sfa_update_slice(self): pass
1728     @auth_sfa_mapper
1729     def sfa_remove_user_from_slice(self): pass
1730     @auth_sfa_mapper
1731     def sfa_insert_user_in_slice(self): pass
1732     @auth_sfa_mapper
1733     def sfi_list(self): pass
1734     @auth_sfa_mapper
1735     def sfi_show_site(self): pass
1736     @auth_sfa_mapper
1737     def sfi_show_slice(self): pass
1738     @auth_sfa_mapper
1739     def sfi_show_slice_researchers(self): pass
1740     @auth_sfa_mapper
1741     def ssh_slice_sfa(self): pass
1742     @auth_sfa_mapper
1743     def sfa_delete_user(self): pass
1744     @auth_sfa_mapper
1745     def sfa_delete_slice(self): pass
1746
1747     def sfa_stop(self):
1748         "service sfa stop"
1749         return self.stop_service('sfa')
1750
1751     def populate(self):
1752         "creates random entries in the PLCAPI"
1753         # install the stress-test in the plc image
1754         location = "/usr/share/plc_api/plcsh_stress_test.py"
1755         remote = "{}/{}".format(self.vm_root_in_host(), location)
1756         self.test_ssh.copy_abs("plcsh_stress_test.py", remote)
1757         command = location
1758         command += " -- --preserve --short-names"
1759         local = (self.run_in_guest(command) == 0);
1760         # second run with --foreign
1761         command += ' --foreign'
1762         remote = (self.run_in_guest(command) == 0);
1763         return local and remote
1764
1765
1766     ####################
1767     @bonding_redirector
1768     def bonding_init_partial(self): pass
1769
1770     @bonding_redirector
1771     def bonding_add_yum(self): pass
1772
1773     @bonding_redirector
1774     def bonding_install_rpms(self): pass
1775
1776     ####################
1777
1778     def gather_logs(self):
1779         "gets all possible logs from plc's/qemu node's/slice's for future reference"
1780         # (1.a) get the plc's /var/log/ and store it locally in logs/myplc.var-log.<plcname>/*
1781         # (1.b) get the plc's  /var/lib/pgsql/data/pg_log/ -> logs/myplc.pgsql-log.<plcname>/*
1782         # (1.c) get the plc's /root/sfi -> logs/sfi.<plcname>/
1783         # (2) get all the nodes qemu log and store it as logs/node.qemu.<node>.log
1784         # (3) get the nodes /var/log and store is as logs/node.var-log.<node>/*
1785         # (4) as far as possible get the slice's /var/log as logs/sliver.var-log.<sliver>/*
1786         # (1.a)
1787         print("-------------------- TestPlc.gather_logs : PLC's /var/log")
1788         self.gather_var_logs()
1789         # (1.b)
1790         print("-------------------- TestPlc.gather_logs : PLC's /var/lib/psql/data/pg_log/")
1791         self.gather_pgsql_logs()
1792         # (1.c)
1793         print("-------------------- TestPlc.gather_logs : PLC's /root/sfi/")
1794         self.gather_root_sfi()
1795         # (2)
1796         print("-------------------- TestPlc.gather_logs : nodes's QEMU logs")
1797         for site_spec in self.plc_spec['sites']:
1798             test_site = TestSite(self,site_spec)
1799             for node_spec in site_spec['nodes']:
1800                 test_node = TestNode(self, test_site, node_spec)
1801                 test_node.gather_qemu_logs()
1802         # (3)
1803         print("-------------------- TestPlc.gather_logs : nodes's /var/log")
1804         self.gather_nodes_var_logs()
1805         # (4)
1806         print("-------------------- TestPlc.gather_logs : sample sliver's /var/log")
1807         self.gather_slivers_var_logs()
1808         return True
1809
1810     def gather_slivers_var_logs(self):
1811         for test_sliver in self.all_sliver_objs():
1812             remote = test_sliver.tar_var_logs()
1813             utils.system("mkdir -p logs/sliver.var-log.{}".format(test_sliver.name()))
1814             command = remote + " | tar -C logs/sliver.var-log.{} -xf -".format(test_sliver.name())
1815             utils.system(command)
1816         return True
1817
1818     def gather_var_logs(self):
1819         utils.system("mkdir -p logs/myplc.var-log.{}".format(self.name()))
1820         to_plc = self.actual_command_in_guest("tar -C /var/log/ -cf - .")
1821         command = to_plc + "| tar -C logs/myplc.var-log.{} -xf -".format(self.name())
1822         utils.system(command)
1823         command = "chmod a+r,a+x logs/myplc.var-log.{}/httpd".format(self.name())
1824         utils.system(command)
1825
1826     def gather_pgsql_logs(self):
1827         utils.system("mkdir -p logs/myplc.pgsql-log.{}".format(self.name()))
1828         to_plc = self.actual_command_in_guest("tar -C /var/lib/pgsql/data/pg_log/ -cf - .")
1829         command = to_plc + "| tar -C logs/myplc.pgsql-log.{} -xf -".format(self.name())
1830         utils.system(command)
1831
1832     def gather_root_sfi(self):
1833         utils.system("mkdir -p logs/sfi.{}".format(self.name()))
1834         to_plc = self.actual_command_in_guest("tar -C /root/sfi/ -cf - .")
1835         command = to_plc + "| tar -C logs/sfi.{} -xf -".format(self.name())
1836         utils.system(command)
1837
1838     def gather_nodes_var_logs(self):
1839         for site_spec in self.plc_spec['sites']:
1840             test_site = TestSite(self, site_spec)
1841             for node_spec in site_spec['nodes']:
1842                 test_node = TestNode(self, test_site, node_spec)
1843                 test_ssh = TestSsh(test_node.name(), key="keys/key_admin.rsa")
1844                 command = test_ssh.actual_command("tar -C /var/log -cf - .")
1845                 command = command + "| tar -C logs/node.var-log.{} -xf -".format(test_node.name())
1846                 utils.system("mkdir -p logs/node.var-log.{}".format(test_node.name()))
1847                 utils.system(command)
1848
1849
1850     # returns the filename to use for sql dump/restore, using options.dbname if set
1851     def dbfile(self, database):
1852         # uses options.dbname if it is found
1853         try:
1854             name = self.options.dbname
1855             if not isinstance(name, str):
1856                 raise Exception
1857         except:
1858             t = datetime.now()
1859             d = t.date()
1860             name = str(d)
1861         return "/root/{}-{}.sql".format(database, name)
1862
1863     def plc_db_dump(self):
1864         'dump the planetlab5 DB in /root in the PLC - filename has time'
1865         dump=self.dbfile("planetab5")
1866         self.run_in_guest('pg_dump -U pgsqluser planetlab5 -f '+ dump)
1867         utils.header('Dumped planetlab5 database in {}'.format(dump))
1868         return True
1869
1870     def plc_db_restore(self):
1871         'restore the planetlab5 DB - looks broken, but run -n might help'
1872         dump = self.dbfile("planetab5")
1873         ##stop httpd service
1874         self.run_in_guest('service httpd stop')
1875         # xxx - need another wrapper
1876         self.run_in_guest_piped('echo drop database planetlab5', 'psql --user=pgsqluser template1')
1877         self.run_in_guest('createdb -U postgres --encoding=UNICODE --owner=pgsqluser planetlab5')
1878         self.run_in_guest('psql -U pgsqluser planetlab5 -f ' + dump)
1879         ##starting httpd service
1880         self.run_in_guest('service httpd start')
1881
1882         utils.header('Database restored from ' + dump)
1883
1884     @staticmethod
1885     def create_ignore_steps():
1886         for step in TestPlc.default_steps + TestPlc.other_steps:
1887             # default step can have a plc qualifier
1888             if '@' in step:
1889                 step, qualifier = step.split('@')
1890             # or be defined as forced or ignored by default
1891             for keyword in ['_ignore','_force']:
1892                 if step.endswith(keyword):
1893                     step=step.replace(keyword,'')
1894             if step == SEP or step == SEPSFA :
1895                 continue
1896             method = getattr(TestPlc,step)
1897             name = step + '_ignore'
1898             wrapped = ignore_result(method)
1899 #            wrapped.__doc__ = method.__doc__ + " (run in ignore-result mode)"
1900             setattr(TestPlc, name, wrapped)
1901
1902 #    @ignore_result
1903 #    def ssh_slice_again_ignore (self): pass
1904 #    @ignore_result
1905 #    def check_initscripts_ignore (self): pass
1906
1907     def standby_1_through_20(self):
1908         """convenience function to wait for a specified number of minutes"""
1909         pass
1910     @standby_generic
1911     def standby_1(): pass
1912     @standby_generic
1913     def standby_2(): pass
1914     @standby_generic
1915     def standby_3(): pass
1916     @standby_generic
1917     def standby_4(): pass
1918     @standby_generic
1919     def standby_5(): pass
1920     @standby_generic
1921     def standby_6(): pass
1922     @standby_generic
1923     def standby_7(): pass
1924     @standby_generic
1925     def standby_8(): pass
1926     @standby_generic
1927     def standby_9(): pass
1928     @standby_generic
1929     def standby_10(): pass
1930     @standby_generic
1931     def standby_11(): pass
1932     @standby_generic
1933     def standby_12(): pass
1934     @standby_generic
1935     def standby_13(): pass
1936     @standby_generic
1937     def standby_14(): pass
1938     @standby_generic
1939     def standby_15(): pass
1940     @standby_generic
1941     def standby_16(): pass
1942     @standby_generic
1943     def standby_17(): pass
1944     @standby_generic
1945     def standby_18(): pass
1946     @standby_generic
1947     def standby_19(): pass
1948     @standby_generic
1949     def standby_20(): pass
1950
1951     # convenience for debugging the test logic
1952     def yes(self): return True
1953     def no(self): return False
1954     def fail(self): return False