From 64d4adb26fce692ae87047632689abbf2ff9e128 Mon Sep 17 00:00:00 2001 From: Thierry Parmentelat Date: Mon, 10 Dec 2007 17:25:43 +0000 Subject: [PATCH] (*) basically no operational change (*) fix initscript - was using only first item in OPTIONS so only -d was used despite do_start ${OPTIONS-"-d -s"} - idem & worse when OPTIONS were set in /etc/sysconfig/NodeManager btw. it is confusing that the chkconfig name is nm while /etc/sysconfig/NodeManager is sourced (*) add -v option - allows to log stuff that uses logger.verbose - see below - stores a pretty printed version of the latest GetSlivers in /var/log/get_slivers.txt (*) actual period was period + random[1..301] with 301 hardwired fixed --period (never used because of the above) and added --random (*) logger now supports logger.verbose and logger.log_slivers (*) so bottom line is, use e.g. following settings for debugging # cat /etc/sysconfig/NodeManager OPTIONS="-d -s --period 60 --random 15 -v" --- accounts.py | 8 +++++++- database.py | 2 ++ logger.py | 34 ++++++++++++++++++++++++++++++++-- nm.init | 3 +-- nm.py | 18 ++++++++++++++++-- sliver_vs.py | 2 ++ sm.py | 6 ++++++ 7 files changed, 66 insertions(+), 7 deletions(-) diff --git a/accounts.py b/accounts.py index 47e7af5..3074e41 100644 --- a/accounts.py +++ b/accounts.py @@ -68,6 +68,7 @@ def get(name): class Account: def __init__(self, rec): + logger.verbose('Initing account %s'%rec['name']) self.name = rec['name'] self.keys = '' self.initscriptchanged = False @@ -80,6 +81,7 @@ class Account: def configure(self, rec): """Write to my authorized_keys file.""" + logger.verbose('in accounts:configure') new_keys = rec['keys'] if new_keys != self.keys: self.keys = new_keys @@ -88,7 +90,7 @@ class Account: if not os.access(dot_ssh, os.F_OK): os.mkdir(dot_ssh) os.chmod(dot_ssh, 0700) tools.write_file(dot_ssh + '/authorized_keys', lambda f: f.write(new_keys)) - logger.log('%s: installing ssh keys' % self.name) + logger.verbose('%s: installing ssh keys' % self.name) tools.fork_as(self.name, do_installation) def start(self, delay=0): pass @@ -111,7 +113,10 @@ class Worker: def ensure_created(self, rec): """Cause the account specified by to exist if it doesn't already.""" + if rec.has_key('name'): + logger.verbose('Worker.ensure_created with name=%s'%rec['name']) self._q.put((self._ensure_created, rec.copy(), Startingup)) + logger.verbose('Worker queue has %d item(s)'%self._q.qsize()) def _ensure_created(self, rec, startingup): curr_class = self._get_class() @@ -158,6 +163,7 @@ class Worker: """Repeatedly pull commands off the queue and execute. If memory usage becomes an issue, it might be wise to terminate after a while.""" while True: try: + logger.verbose('Worker:_run : getting - size is %d'%self._q.qsize()) cmd = self._q.get() cmd[0](*cmd[1:]) except: diff --git a/database.py b/database.py index 46a4e47..a834fd8 100644 --- a/database.py +++ b/database.py @@ -97,8 +97,10 @@ class Database(dict): self._compute_effective_rspecs() # create and destroy accounts as needed + logger.verbose("database:sync : fetching accounts") existing_acct_names = accounts.all() for name in existing_acct_names: + logger.verbose("database:sync : loop on %s"%name) if name not in self: accounts.get(name).ensure_destroyed() for name, rec in self.iteritems(): if rec['instantiation'] == 'plc-instantiated': accounts.get(name).ensure_created(rec) diff --git a/logger.py b/logger.py index 470a98f..1c51a40 100644 --- a/logger.py +++ b/logger.py @@ -10,9 +10,27 @@ import traceback LOG_FILE = '/var/log/nm' +LOG_SLIVERS = '/var/log/getslivers.txt' -def log(msg): - """Write to the log file.""" +# Thierry - trying to debug this for 4.2 +# basically define 3 levels +LOG_NONE=0 +LOG_NODE=1 +LOG_VERBOSE=2 +# default is to log a reasonable amount of stuff for when running on operational nodes +LOG_LEVEL=1 + +def set_level(level): + assert level in [LOG_NONE,LOG_NODE,LOG_VERBOSE] + LOG_LEVEL=level + +def verbose(msg): + log(msg,LOG_VERBOSE) + +def log(msg,level=LOG_NODE): + """Write to the log file if level >= current log level (default LOG_NODE).""" + if (level > LOG_LEVEL): + return try: fd = os.open(LOG_FILE, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600) if not msg.endswith('\n'): msg += '\n' @@ -33,3 +51,15 @@ def log_exc(name = None): log("operation on %s failed. \n %s" %(self.name, traceback.format_exc())) else: log(traceback.format_exc()) + +def log_slivers (data): + import pprint, time + try: + f=open(LOG_SLIVERS,'w') + now=time.strftime("GetSlivers stored at %H:%M:%S", time.localtime()) + f.write(now+'\n') + pp=pprint.PrettyPrinter(stream=f,indent=2) + pp.pprint(data) + f.close() + except: + log_verbose('Cannot save GetSlivers in %s'%LOG_SLIVERS) diff --git a/nm.init b/nm.init index 5ba44e8..7fb3154 100755 --- a/nm.init +++ b/nm.init @@ -22,9 +22,8 @@ RETVAL=0 do_start() { - options=$1 echo -n $"Starting $prog: " - daemon --check=nm $nm $options + daemon --check=nm $nm "$@" RETVAL=$? echo [ $RETVAL -eq 0 ] && touch ${lockfile} diff --git a/nm.py b/nm.py index 370c8df..3eaec87 100644 --- a/nm.py +++ b/nm.py @@ -26,6 +26,8 @@ parser.add_option('-s', '--startup', action='store_true', dest='startup', defaul parser.add_option('-f', '--config', action='store', dest='config', default='/etc/planetlab/plc_config', help='PLC configuration file') parser.add_option('-k', '--session', action='store', dest='session', default='/etc/planetlab/session', help='API session key (or file)') parser.add_option('-p', '--period', action='store', dest='period', default=600, help='Polling interval (sec)') +parser.add_option('-r', '--random', action='store', dest='random', default=301, help='Range for additional random polling interval (sec)') +parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='more verbose log') (options, args) = parser.parse_args() modules = [] @@ -33,6 +35,8 @@ modules = [] def GetSlivers(plc): try: data = plc.GetSlivers() except: logger.log_exc() + if (options.verbose): + logger.log_slivers(data) # Set i2 ip list for nodes in I2 nodegroup. try: net.GetSlivers(plc, data) except: logger.log_exc() @@ -47,6 +51,10 @@ def run(): try: if options.daemon: tools.daemon() + # set log level + if (options.verbose): + logger.set_level(logger.LOG_VERBOSE) + # Load /etc/planetlab/plc_config config = Config(options.config) @@ -74,16 +82,22 @@ def run(): session = options.session # Initialize XML-RPC client - plc = PLCAPI(config.plc_api_uri, config.cacert, session, timeout=options.period/2) + iperiod=int(options.period) + irandom=int(options.random) + plc = PLCAPI(config.plc_api_uri, config.cacert, session, timeout=iperiod/2) while True: # Main NM Loop + logger.verbose('mainloop - nm:getSlivers - period=%d random=%d'%(iperiod,irandom)) GetSlivers(plc) - time.sleep(options.period + random.randrange(0,301)) + delay=iperiod + random.randrange(0,irandom) + logger.verbose('mainloop - sleeping for %d s'%delay) + time.sleep(delay) except: logger.log_exc() if __name__ == '__main__': + logger.verbose("Entering nm.py") stacklim = 512*1024 # 0.5 MiB curlim = resource.getrlimit(resource.RLIMIT_STACK)[0] # soft limit if curlim > stacklim: diff --git a/sliver_vs.py b/sliver_vs.py index 49887e6..dbe568f 100644 --- a/sliver_vs.py +++ b/sliver_vs.py @@ -45,6 +45,7 @@ class Sliver_VS(accounts.Account, vserver.VServer): _init_disk_info_sem = tools.NMLock("/var/run/nm-disk-info.lock") def __init__(self, rec): + logger.verbose ('initing Sliver_VS with name=%s'%rec['name']) try: vserver.VServer.__init__(self, rec['name']) except Exception, err: @@ -65,6 +66,7 @@ class Sliver_VS(accounts.Account, vserver.VServer): @staticmethod def create(name, vref = None): + logger.verbose('Sliver_VS:create - name=%s'%name) if vref is not None: logger.log_call('/usr/sbin/vuseradd', '-t', vref, name) else: diff --git a/sm.py b/sm.py index df79c52..3147a09 100644 --- a/sm.py +++ b/sm.py @@ -59,6 +59,10 @@ def GetSlivers(data, fullupdate=True): in, use the GetSlivers() heartbeat as a cue to scan for expired slivers.""" + logger.verbose("Entering sm:GetSlivers with fullupdate=%r"%fullupdate) + for key in data.keys(): + logger.verbose('GetSlivers key : ' + key) + node_id = None try: f = open('/etc/planetlab/node_id') @@ -86,11 +90,13 @@ def GetSlivers(data, fullupdate=True): ### Emulab-specific hack ends here + logger.verbose ('dealing with initscripts') initscripts_by_id = {} for is_rec in data['initscripts']: initscripts_by_id[str(is_rec['initscript_id'])] = is_rec['script'] for sliver in data['slivers']: + logger.verbose("sm:GetSlivers in slivers loop") rec = sliver.copy() rec.setdefault('timestamp', data['timestamp']) -- 2.43.0