X-Git-Url: http://git.onelab.eu/?p=monitor.git;a=blobdiff_plain;f=RunlevelAgent.py;h=646b0a7a35b34de8333d53ede072f8ded1c25b71;hp=218a87d117f3dc65445168801cb30c661204137f;hb=HEAD;hpb=5772ce036b96297a23f834ea34ce4466ef4d522c diff --git a/RunlevelAgent.py b/RunlevelAgent.py index 218a87d..646b0a7 100644 --- a/RunlevelAgent.py +++ b/RunlevelAgent.py @@ -1,4 +1,11 @@ #!/usr/bin/python +# +# RunlevelAgent - acts as a heartbeat back to myplc reporting that the node is +# online and whether it is in boot or pre-boot run-level. +# This is useful to identify nodes that are behind a firewall, as well as to +# have the machine report run-time status both in safeboot and boot modes, +# so that it is immediately visible at myplc (gui or api). +# import xml, xmlrpclib import logging @@ -89,12 +96,21 @@ def check_running(commandname): def main(): - f=open(SESSION_FILE,'r') - session_str=f.read().strip() - api = PLC(Auth(session=session_str), api_server_url) - # NOTE: What should we do if this call fails? - # TODO: handle dns failure here. - api.AuthCheck() + # Keep trying to authenticate session, waiting for NM to re-write the + # session file, or DNS to succeed, until AuthCheck succeeds. + while True: + try: + f=open(SESSION_FILE,'r') + session_str=f.read().strip() + api = PLC(Auth(session=session_str), api_server_url) + # NOTE: What should we do if this call fails? + # TODO: handle dns failure here. + api.AuthCheck() + break + except: + print "Retry in 30 seconds: ", os.popen("uptime").read().strip() + traceback.print_exc() + time.sleep(30) try: env = 'production' @@ -108,13 +124,13 @@ def main(): # NOTE: here we are inferring the runlevel by environmental # observations. We know how this process was started by the # given command line argument. Then in bootmanager - # runlevle, the bm.log gives information about the current + # runlevel, the bm.log gives information about the current # activity. # other options: # call plc for current boot state? # how long have we been running? if env == "bootmanager": - bs_val = extract_from('/tmp/bm.log', 'Current boot state:') + bs_val = extract_from('/tmp/bm.log', "'Current boot state:'") if len(bs_val) > 0: bs_val = bs_val.split()[-1] ex_val = extract_from('/tmp/bm.log', 'Exception') fs_val = extract_from('/tmp/bm.log', 'mke2fs')