RunlevelAgent.py

   1 #!/usr/bin/python
   2 #
   3 # RunlevelAgent - acts as a heartbeat back to myplc reporting that the node is
   4 #     online and whether it is in boot or pre-boot run-level.
   5 #   This is useful to identify nodes that are behind a firewall, as well as to
   6 #   have the machine report run-time status both in safeboot and boot modes,
   7 #   so that it is immediately visible at myplc (gui or api).
   8 #
   9
  10 import xml, xmlrpclib
  11 import logging
  12 import time
  13 import traceback
  14 import sys
  15 import os
  16 import string
  17
  18 CONFIG_FILE="/tmp/source/configuration"
  19 SESSION_FILE="/etc/planetlab/session"
  20
  21 def read_config_file(filename):
  22     ## NOTE: text copied from BootManager.py
  23     # TODO: unify this code to make it common. i.e. use ConfigParser module
  24     vars = {}
  25     vars_file= file(filename,'r')
  26     validConfFile = True
  27     for line in vars_file:
  28         # if its a comment or a whitespace line, ignore
  29         if line[:1] == "#" or string.strip(line) == "":
  30             continue
  31
  32         parts= string.split(line,"=")
  33         if len(parts) != 2:
  34             print "Invalid line in vars file: %s" % line
  35             validConfFile = False
  36             break
  37
  38         name= string.strip(parts[0])
  39         value= string.strip(parts[1])
  40         vars[name]= value
  41
  42     vars_file.close()
  43     if not validConfFile:
  44         print "Unable to read configuration vars."
  45
  46     return vars
  47
  48 try:
  49     sys.path = ['/etc/planetlab'] + sys.path
  50     import plc_config
  51     api_server_url = "https://" + plc_config.PLC_API_HOST + plc_config.PLC_API_PATH
  52 except:
  53     filename=CONFIG_FILE
  54     vars = read_config_file(filename)
  55     api_server_url = vars['BOOT_API_SERVER']
  56
  57
  58 class Auth:
  59     def __init__(self, username=None, password=None, **kwargs):
  60         if 'session' in kwargs:
  61             self.auth= { 'AuthMethod' : 'session',
  62                     'session' : kwargs['session'] }
  63         else:
  64             if username==None and password==None:
  65                 self.auth = {'AuthMethod': "anonymous"}
  66             else:
  67                 self.auth = {'Username' : username,
  68                             'AuthMethod' : 'password',
  69                             'AuthString' : password}
  70 class PLC:
  71     def __init__(self, auth, url):
  72         self.auth = auth
  73         self.url = url
  74         self.api = xmlrpclib.Server(self.url, verbose=False, allow_none=True)
  75
  76     def __getattr__(self, name):
  77         method = getattr(self.api, name)
  78         if method is None:
  79             raise AssertionError("method does not exist")
  80
  81         return lambda *params : method(self.auth.auth, *params)
  82
  83     def __repr__(self):
  84         return self.api.__repr__()
  85
  86 def extract_from(filename, pattern):
  87     f = os.popen("grep -E %s %s" % (pattern, filename))
  88     val = f.read().strip()
  89     return val
  90
  91 def check_running(commandname):
  92     f = os.popen("ps ax | grep -E %s | grep -v grep" % (commandname))
  93     val = f.read().strip()
  94     return val
  95
  96
  97 def main():
  98
  99     # Keep trying to authenticate session, waiting for NM to re-write the
 100     # session file, or DNS to succeed, until AuthCheck succeeds.
 101     while True:
 102         try:
 103             f=open(SESSION_FILE,'r')
 104             session_str=f.read().strip()
 105             api = PLC(Auth(session=session_str), api_server_url)
 106             # NOTE: What should we do if this call fails?
 107             # TODO: handle dns failure here.
 108             api.AuthCheck()
 109             break
 110         except:
 111             print "Retry in 30 seconds: ", os.popen("uptime").read().strip()
 112             traceback.print_exc()
 113             time.sleep(30)
 114
 115     try:
 116         env = 'production'
 117         if len(sys.argv) > 1:
 118             env = sys.argv[1]
 119     except:
 120         traceback.print_exc()
 121
 122     while True:
 123         try:
 124             # NOTE: here we are inferring the runlevel by environmental
 125             #         observations.  We know how this process was started by the
 126             #         given command line argument.  Then in bootmanager
 127             #         runlevel, the bm.log gives information about the current
 128             #         activity.
 129             # other options:
 130             #   call plc for current boot state?
 131             #   how long have we been running?
 132             if env == "bootmanager":
 133                 bs_val = extract_from('/tmp/bm.log', "'Current boot state:'")
 134                 if len(bs_val) > 0: bs_val = bs_val.split()[-1]
 135                 ex_val = extract_from('/tmp/bm.log', 'Exception')
 136                 fs_val = extract_from('/tmp/bm.log', 'mke2fs')
 137                 bm_val = check_running("BootManager.py")
 138
 139                 if bs_val in ['diag', 'diagnose', 'safeboot', 'disabled', 'disable']:
 140                     api.ReportRunlevel({'run_level' : 'safeboot'})
 141
 142                 elif len(ex_val) > len("Exception"):
 143                     api.ReportRunlevel({'run_level' : 'failboot'})
 144
 145                 elif len(fs_val) > 0 and len(bm_val) > 0:
 146                     api.ReportRunlevel({'run_level' : 'reinstall'})
 147
 148                 else:
 149                     api.ReportRunlevel({'run_level' : 'failboot'})
 150
 151             elif env == "production":
 152                 api.ReportRunlevel({'run_level' : 'boot'})
 153             else:
 154                 api.ReportRunlevel({'run_level' : 'failboot'})
 155
 156         except:
 157             print "reporting error: ", os.popen("uptime").read().strip()
 158             traceback.print_exc()
 159
 160         sys.stdout.flush()
 161         # TODO: change to a configurable value
 162         time.sleep(60*15)
 163
 164 if __name__ == "__main__":
 165     main()