X-Git-Url: http://git.onelab.eu/?p=monitor.git;a=blobdiff_plain;f=RunlevelAgent.py;h=646b0a7a35b34de8333d53ede072f8ded1c25b71;hp=04dcfeef447c74019c7637a15d01faeb2de71e31;hb=HEAD;hpb=40588e1f900ba82db3ca69c5cc375805028f2430 diff --git a/RunlevelAgent.py b/RunlevelAgent.py index 04dcfee..646b0a7 100644 --- a/RunlevelAgent.py +++ b/RunlevelAgent.py @@ -1,7 +1,7 @@ #!/usr/bin/python # # RunlevelAgent - acts as a heartbeat back to myplc reporting that the node is -# online and whether it is in boot or pre-boot run-level. +# online and whether it is in boot or pre-boot run-level. # This is useful to identify nodes that are behind a firewall, as well as to # have the machine report run-time status both in safeboot and boot modes, # so that it is immediately visible at myplc (gui or api). @@ -96,12 +96,21 @@ def check_running(commandname): def main(): - f=open(SESSION_FILE,'r') - session_str=f.read().strip() - api = PLC(Auth(session=session_str), api_server_url) - # NOTE: What should we do if this call fails? - # TODO: handle dns failure here. - api.AuthCheck() + # Keep trying to authenticate session, waiting for NM to re-write the + # session file, or DNS to succeed, until AuthCheck succeeds. + while True: + try: + f=open(SESSION_FILE,'r') + session_str=f.read().strip() + api = PLC(Auth(session=session_str), api_server_url) + # NOTE: What should we do if this call fails? + # TODO: handle dns failure here. + api.AuthCheck() + break + except: + print "Retry in 30 seconds: ", os.popen("uptime").read().strip() + traceback.print_exc() + time.sleep(30) try: env = 'production' @@ -115,13 +124,13 @@ def main(): # NOTE: here we are inferring the runlevel by environmental # observations. We know how this process was started by the # given command line argument. Then in bootmanager - # runlevle, the bm.log gives information about the current + # runlevel, the bm.log gives information about the current # activity. # other options: # call plc for current boot state? # how long have we been running? if env == "bootmanager": - bs_val = extract_from('/tmp/bm.log', 'Current boot state:') + bs_val = extract_from('/tmp/bm.log', "'Current boot state:'") if len(bs_val) > 0: bs_val = bs_val.split()[-1] ex_val = extract_from('/tmp/bm.log', 'Exception') fs_val = extract_from('/tmp/bm.log', 'mke2fs')