the kernel flag systemd.log_level=debug is not set by default anymore, we add the...

[tests.git] / system / TestPlc.py
diff --git a/system/TestPlc.py b/system/TestPlc.py

index 0fc2c59..6b910a7 100644 (file)
--- a/system/TestPlc.py
+++ b/system/TestPlc.py
@@ -107,7 +107,7 @@ class slice_mapper__tasks (object):
                  test_site = TestSite(self,site_spec)
                  test_slice=TestSlice(self,test_site,slice_spec)
                  tasks += slice_method (test_slice, self.options)
-            return Completer (tasks).run (decorator_self.timeout, decorator_self.silent, decorator_self.period)
+            return Completer (tasks, message=method.__name__).run (decorator_self.timeout, decorator_self.silent, decorator_self.period)
          # restore the doc text from the TestSlice method even if a bit odd
          wrappee.__name__ = method.__name__
          wrappee.__doc__ = slice_method.__doc__
@@ -165,10 +165,10 @@ class TestPlc:
          'sfa_rspec_empty@1', 'sfa_allocate_empty@1', 'sfa_provision_empty@1','sfa_check_slice_plc_empty@1', SEPSFA,
          'sfa_delete_slice@1', 'sfa_delete_user@1', SEPSFA,
          'cross_check_tcp@1', 'check_system_slice', SEP,
+        # for inspecting the slice while it runs the first time
+        #'fail',
          # check slices are turned off properly
          'empty_slices', 'ssh_slice_off', 'slice_fs_deleted_ignore', SEP,
-        # xxx for Thomas
-        'fail',
          # check they are properly re-created with the same name
          'fill_slices', 'ssh_slice_again', SEP,
          'gather_logs_force', SEP,
@@ -1040,7 +1040,8 @@ class TestPlc:
          utils.header("checking nodes boot state (expected %s)"%target_boot_state)
          tasks = [ CompleterTaskBootState (self,hostname) \
                        for (hostname,_) in self.all_node_infos() ]
-        return Completer (tasks).run (timeout, graceout, period)
+        message = 'check_boot_state={}'.format(target_boot_state)
+        return Completer (tasks, message=message).run (timeout, graceout, period)
  
      def nodes_booted(self):
          return self.nodes_check_boot_state('boot',timeout_minutes=30,silent_minutes=28)
@@ -1051,11 +1052,11 @@ class TestPlc:
          return True
  
      # probing nodes
-    def check_nodes_ping(self,timeout_seconds=120,period_seconds=10):
-        class CompleterTaskPingNode (CompleterTask):
+    def check_nodes_ping(self, timeout_seconds=30, period_seconds=10):
+        class CompleterTaskPingNode(CompleterTask):
              def __init__ (self, hostname):
                  self.hostname=hostname
-            def run(self,silent):
+            def run(self, silent):
                  command="ping -c 1 -w 1 %s >& /dev/null"%self.hostname
                  return utils.system (command, silent=silent)==0
              def failure_epilogue (self):
@@ -1065,14 +1066,14 @@ class TestPlc:
          period=timedelta (seconds=period_seconds)
          node_infos = self.all_node_infos()
          tasks = [ CompleterTaskPingNode (h) for (h,_) in node_infos ]
-        return Completer (tasks).run (timeout, graceout, period)
+        return Completer (tasks, message='ping_node').run (timeout, graceout, period)
  
      # ping node before we try to reach ssh, helpful for troubleshooting failing bootCDs
      def ping_node (self):
          "Ping nodes"
          return self.check_nodes_ping ()
  
-    def check_nodes_ssh(self,debug,timeout_minutes,silent_minutes,period_seconds=15):
+    def check_nodes_ssh(self, debug, timeout_minutes, silent_minutes, period_seconds=15):
          # various delays 
          timeout  = timedelta(minutes=timeout_minutes)
          graceout = timedelta(minutes=silent_minutes)
@@ -1080,15 +1081,18 @@ class TestPlc:
          vservername=self.vservername
          if debug: 
              message="debug"
+            completer_message = 'ssh_node_debug'
              local_key = "keys/%(vservername)s-debug.rsa"%locals()
          else: 
              message="boot"
+            completer_message = 'ssh_node_boot'
             local_key = "keys/key_admin.rsa"
          utils.header("checking ssh access to nodes (expected in %s mode)"%message)
          node_infos = self.all_node_infos()
-        tasks = [ CompleterTaskNodeSsh (nodename, qemuname, local_key, boot_state=message) \
+        tasks = [ CompleterTaskNodeSsh (nodename, qemuname, local_key,
+                                        boot_state=message, dry_run=self.options.dry_run) \
                        for (nodename,qemuname) in node_infos ]
-        return Completer (tasks).run (timeout, graceout, period)
+        return Completer (tasks, message=completer_message).run (timeout, graceout, period)
          
      def ssh_node_debug(self):
          "Tries to ssh into nodes in debug mode with the debug ssh key"
@@ -1161,7 +1165,7 @@ class TestPlc:
                  test_node = TestNode (self,test_site,node)
                  test_sliver = TestSliver (self, test_node, test_slice)
                  tasks.append ( CompleterTaskInitscript (test_sliver, stamp))
-        return Completer (tasks).run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10))
+        return Completer (tasks, message='check_initscripts').run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10))
             
      def check_initscripts(self):
          "check that the initscripts have triggered"
@@ -1306,20 +1310,23 @@ class TestPlc:
          for spec in specs:
              port = spec['port']
              # server side
-            s_test_sliver = self.locate_sliver_obj_cross (spec['server_node'],spec['server_slice'],other_plcs)
-            if not s_test_sliver.run_tcp_server(port,timeout=20):
-                overall=False
+            # the issue here is that we have the server run in background
+            # and so we have no clue if it took off properly or not
+            # looks like in some cases it does not
+            s_test_sliver = self.locate_sliver_obj_cross (spec['server_node'], spec['server_slice'], other_plcs)
+            if not s_test_sliver.run_tcp_server(port, timeout=20):
+                overall = False
                  break
  
              # idem for the client side
-            c_test_sliver = self.locate_sliver_obj_cross (spec['client_node'],spec['client_slice'],other_plcs)
+            c_test_sliver = self.locate_sliver_obj_cross (spec['client_node'], spec['client_slice'], other_plcs)
              # use nodename from locatesd sliver, unless 'client_connect' is set
              if 'client_connect' in spec:
                  destination = spec['client_connect']
              else:
-                destination=s_test_sliver.test_node.name()
-            if not c_test_sliver.run_tcp_client(destination,port):
-                overall=False
+                destination = s_test_sliver.test_node.name()
+            if not c_test_sliver.run_tcp_client(destination, port):
+                overall = False
          return overall
  
      # painfully enough, we need to allow for some time as netflow might show up last
@@ -1351,7 +1358,7 @@ class TestPlc:
          period  = timedelta (seconds=period_seconds)
          tasks = [ CompleterTaskSystemSlice (test_node, self.options.dry_run) \
                        for test_node in self.all_nodes() ]
-        return Completer (tasks) . run (timeout, silent, period)
+        return Completer (tasks, message='_check_system_slice') . run (timeout, silent, period)
  
      def plcsh_stress_test (self):
          "runs PLCAPI stress test, that checks Add/Update/Delete on all types - preserves contents"