From 9d9c35913d1312a42ef63fc665f44aabc80cf001 Mon Sep 17 00:00:00 2001 From: Thierry Parmentelat Date: Wed, 4 Mar 2015 00:36:21 +0100 Subject: [PATCH] rework check-tcp so that we first wait for the network to be ready in the sliver --- system/Completer.py | 2 +- system/TestNode.py | 6 ++-- system/TestPlc.py | 57 ++++++++++++++++++++++++--------- system/TestSliver.py | 28 +++++++++++------ system/config_default.py | 3 ++ system/tcptest.py | 68 ++++++++++++++++++++++++++++------------ 6 files changed, 116 insertions(+), 48 deletions(-) diff --git a/system/Completer.py b/system/Completer.py index 5cf3c9b..3d49dac 100755 --- a/system/Completer.py +++ b/system/Completer.py @@ -13,7 +13,7 @@ class Completer: self.tasks=tasks self.verbose=verbose self.message="({})".format(message) if message else "" - def run (self, timeout_timedelta, silent_timedelta, period=None): + def run (self, timeout_timedelta, silent_timedelta, period): begin = datetime.now() timeout = begin+timeout_timedelta timeout_minutes = timeout_timedelta.total_seconds()/60 diff --git a/system/TestNode.py b/system/TestNode.py index 2b24ba8..6bf40ad 100644 --- a/system/TestNode.py +++ b/system/TestNode.py @@ -21,9 +21,9 @@ class CompleterTaskNodeSsh (CompleterTask): self.test_ssh = TestSsh (self.hostname,key=self.local_key) def run (self, silent): command = self.test_ssh.actual_command(self.command) - retcod=utils.system (command, silent=silent, dry_run=self.dry_run) - if self.expected: return retcod==0 - else: return retcod !=0 + retcod = utils.system (command, silent=silent, dry_run=self.dry_run) + if self.expected: return retcod == 0 + else: return retcod != 0 def failure_epilogue (self): print "Cannot reach %s in %s mode"%(self.hostname, self.boot_state) diff --git a/system/TestPlc.py b/system/TestPlc.py index 6b910a7..9504b70 100644 --- a/system/TestPlc.py +++ b/system/TestPlc.py @@ -1140,14 +1140,14 @@ class TestPlc: ### initscripts def do_check_initscripts(self): class CompleterTaskInitscript (CompleterTask): - def __init__ (self, test_sliver, stamp): + def __init__(self, test_sliver, stamp): self.test_sliver=test_sliver self.stamp=stamp - def actual_run (self): - return self.test_sliver.check_initscript_stamp (self.stamp) - def message (self): + def actual_run(self): + return self.test_sliver.check_initscript_stamp(self.stamp) + def message(self): return "initscript checker for %s"%self.test_sliver.name() - def failure_epilogue (self): + def failure_epilogue(self): print "initscript stamp %s not found in sliver %s"%(self.stamp,self.test_sliver.name()) tasks=[] @@ -1164,8 +1164,9 @@ class TestPlc: test_slice = TestSlice (self,test_site,slice_spec) test_node = TestNode (self,test_site,node) test_sliver = TestSliver (self, test_node, test_slice) - tasks.append ( CompleterTaskInitscript (test_sliver, stamp)) - return Completer (tasks, message='check_initscripts').run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10)) + tasks.append(CompleterTaskInitscript(test_sliver, stamp)) + return Completer(tasks, message='check_initscripts').\ + run (timedelta(minutes=5), timedelta(minutes=4), timedelta(seconds=10)) def check_initscripts(self): "check that the initscripts have triggered" @@ -1306,26 +1307,54 @@ class TestPlc: utils.header ("check_tcp: no/empty config found") return True specs = self.plc_spec['tcp_specs'] - overall=True + overall = True + + # first wait for the network to be up and ready from the slices + class CompleterTaskNetworkReadyInSliver(CompleterTask): + def __init__(self, test_sliver): + self.test_sliver = test_sliver + def actual_run(self): + return self.test_sliver.check_tcp_ready(port=9999) + def message(self): + return "network ready checker for %s" % self.test_sliver.name() + def failure_epilogue(self): + print "could not bind port from sliver %s" % self.test_sliver.name() + + tasks = [] + for spec in specs: + # locate the TestSliver instances involved, and cache them in the spec instance + spec['s_sliver'] = self.locate_sliver_obj_cross (spec['server_node'], spec['server_slice'], other_plcs) + spec['c_sliver'] = self.locate_sliver_obj_cross (spec['client_node'], spec['client_slice'], other_plcs) + message = "Will check TCP between s=%s and c=%s" % \ + (spec['s_sliver'].name(), spec['c_sliver'].name()) + if 'client_connect' in spec: + message += " (using %s)" % spec['client_connect'] + utils.header(message) + tasks.append(CompleterTaskNetworkReadyInSliver (spec['s_sliver'])) + + # wait for the netork to be OK in all server sides + if not Completer(tasks, message='check for network readiness in slivers').\ + run(timedelta(seconds=30), timedelta(seconds=24), period=timedelta(seconds=5)): + return False + + # run server and client for spec in specs: port = spec['port'] # server side # the issue here is that we have the server run in background # and so we have no clue if it took off properly or not # looks like in some cases it does not - s_test_sliver = self.locate_sliver_obj_cross (spec['server_node'], spec['server_slice'], other_plcs) - if not s_test_sliver.run_tcp_server(port, timeout=20): + if not spec['s_sliver'].run_tcp_server(port, timeout=20): overall = False break # idem for the client side - c_test_sliver = self.locate_sliver_obj_cross (spec['client_node'], spec['client_slice'], other_plcs) - # use nodename from locatesd sliver, unless 'client_connect' is set + # use nodename from located sliver, unless 'client_connect' is set if 'client_connect' in spec: destination = spec['client_connect'] else: - destination = s_test_sliver.test_node.name() - if not c_test_sliver.run_tcp_client(destination, port): + destination = spec['s_sliver'].test_node.name() + if not spec['c_sliver'].run_tcp_client(destination, port): overall = False return overall diff --git a/system/TestSliver.py b/system/TestSliver.py index a82b27b..f69e4c5 100644 --- a/system/TestSliver.py +++ b/system/TestSliver.py @@ -31,7 +31,7 @@ class TestSliver: # so that copies end up in the home dir buildname=".") - def name (self): + def name(self): return "%s@%s"%(self.test_slice.name(),self.test_node.name()) def check_initscript_stamp(self, stamp): @@ -39,19 +39,27 @@ class TestSliver: return self.test_ssh.run("ls -l /var/tmp/%s.stamp"%stamp)==0 def run_tcp_server (self, port, timeout=10): - server_command = "./tcptest.py server -p %d -t %d"%(port,timeout) - return self.test_ssh.copy("tcptest.py")==0 and \ + server_command = "./tcptest.py server -p %d -t %d"%(port, timeout) + return self.test_ssh.copy("tcptest.py") == 0 and \ self.test_ssh.run(server_command, background=True)==0 + def check_tcp_ready (self, port): + server_command = "./tcptest.py ready -p %d"%(port) + return self.test_ssh.copy("tcptest.py") == 0 and \ + self.test_ssh.run(server_command) == 0 + def run_tcp_client (self, servername, port, retry=5): client_command="./tcptest.py client -a %s -p %d"%(servername, port) - if self.test_ssh.copy("tcptest.py")!=0: return False - utils.header ("tcp client - first attempt") - if self.test_ssh.run(client_command, background=False)==0: return True - # if first try has failed, wait for s an try again - time.sleep(retry) - utils.header ("tcp client - second attempt") - if self.test_ssh.run(client_command, background=False)==0: return True + if self.test_ssh.copy("tcptest.py") != 0: + return False + # allow for 2 attempts + attempts = 2 + for attempt in range (attempts): + if attempt != 0: + time.sleep(retry) + utils.header ("tcp client - attempt # %s" % (attempt+1)) + if self.test_ssh.run(client_command) == 0: + return True return False # use the node's main ssh root entrance, as the slice entrance might be down diff --git a/system/config_default.py b/system/config_default.py index c2c1651..00ababf 100644 --- a/system/config_default.py +++ b/system/config_default.py @@ -434,6 +434,9 @@ def tcp_specs (options,index): # with the addition of omf-friendly slices.. slice3='%s_sl4'%login_base(2) slice4='%s_sl5'%login_base(2) + +# NOTE: port 9999 is hard-wired in the code to be used for checking network readiness +# so it is not to be used here # bind on 0.0.0.0 and try to reach this on localhost # not expected to work same_node_same_slice_lo = { 'server_node': 'node1', 'server_slice': slice1, diff --git a/system/tcptest.py b/system/tcptest.py index dd2bc25..bff3c66 100755 --- a/system/tcptest.py +++ b/system/tcptest.py @@ -8,19 +8,19 @@ import time import subprocess import socket import SocketServer +import threading from optparse import OptionParser -def myprint(message, is_client=True): +def myprint(message, id='client'): now=time.strftime("%H:%M:%S", time.localtime()) - id = 'tcpclient' if is_client else 'tcpserver' print "*",now,'(%s)' % id, '--',message sys.stdout.flush() -def show_network_status(is_client): - myprint("ip address show", is_client=is_client) - subprocess.call(['ip','address','show']) - myprint("ip route show", is_client=is_client) - subprocess.call(['ip','route','show']) +def show_network_status(id): + myprint("ip address show", id=id) + subprocess.call(['ip', 'address', 'show']) + myprint("ip route show", id=id) + subprocess.call(['ip', 'route', 'show']) class EchoRequestHandler(SocketServer.StreamRequestHandler): def handle(self): @@ -33,10 +33,10 @@ class UppercaseRequestHandler(SocketServer.StreamRequestHandler): self.wfile.write(line.upper()) class Server: - + """ + A TCP server, running for some finite amount of time + """ def main(self): - import threading - parser = OptionParser() parser.add_option("-p", "--port", action="store", dest="port", type="int", default=10000, help="port number") @@ -44,17 +44,15 @@ class Server: default=socket.gethostname(), help="address") parser.add_option("-t", "--timeout", action="store", dest="timeout", type="int", default="0") - (options, args) = parser.parse_args() + if len(args) != 0: parser.print_help() sys.exit(1) - show_network_status(is_client=False) - + show_network_status(id='server') server = SocketServer.TCPServer((options.address, options.port), UppercaseRequestHandler) - try: if options.timeout: t = threading.Thread(target=server.serve_forever) @@ -68,7 +66,34 @@ class Server: print 'Bailing out on keyboard interrupt' sys.exit(1) +class Ready: + """ + A utility that does exit(0) iff network as perceived + from the sliver is ready. Designed to be run before Server, + so one can wait for the right conditions. + """ + def main(self): + parser = OptionParser() + # by default use another port so we don't run into + # the SO_LINGER kind of trouble + parser.add_option("-p", "--port", action="store", dest="port", type="int", + default=9999, help="port number") + parser.add_option("-a", "--address", action="store", dest="address", + default=socket.gethostname(), help="address") + (options, args) = parser.parse_args() + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + s.bind((options.address, options.port)) + sys.exit(0) + except Exception as e: + print e + sys.exit(1) + class Client: + """ + Runs a client against a Server instance + """ def main(self): parser = OptionParser() parser.add_option("-p","--port", action="store", dest="port", type="int", @@ -88,7 +113,7 @@ class Client: result=True for i in range(1,options.loops+1): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.connect((options.address , options.port)) + s.connect((options.address, options.port)) mout=i*'ping ' + '\n' min=mout.upper() if s.send(mout) != len(mout): @@ -113,12 +138,15 @@ class Client: sys.exit(exit_return) if __name__ == '__main__': - for argv in sys.argv[1:]: - if argv.find("client") >= 0: - sys.argv.remove(argv) + for arg in sys.argv[1:]: + if arg.find("client") >= 0: + sys.argv.remove(arg) Client().main() - elif argv.find("server") >= 0: - sys.argv.remove(argv) + elif arg.find("server") >= 0: + sys.argv.remove(arg) Server().main() + elif arg.find("ready") >= 0: + sys.argv.remove(arg) + Ready().main() print 'you must specify either --client or --server' sys.exit(1) -- 2.43.0