KEYURL:=root@$(TESTMASTER):$(BUILD)/keys/key1.rsa
endif
+key: $(NODE).key.rsa
+
$(NODE).key.rsa:
ifeq (,$(KEYURL))
@echo "sync: fetching $@ - You must define TESTMASTER, BUILD and NODE on the command line"
@echo " note that for now all test builds use the same key, so any BUILD would do"
@exit 1
else
- scp $(KEYURL) $@
+ @echo "FETCHING key"
+ +scp $(KEYURL) $@
endif
### utility - find out the node name for a given BUILD
# class show to check if net:InitNodeLimit:bwlimit.init has run.
sync(nmdbcopy)
else: logger.log("bwmon: BW limits DISABLED.")
- except: logger.log_exc()
+ except: logger.log_exc("bwmon failed")
lock.clear()
def start(*args):
tools.as_daemon_thread(run)
def GetSlivers(*args):
+ logger.verbose ("triggering dummy bwmon.GetSlivers")
pass
if data.has_key("conf_files"):
for f in data['conf_files']:
try: self.update_conf_file(f)
- except: logger.log_exc()
- else: logger.log("conf_files: No conf_files found or API failure. Skipping")
+ except: logger.log_exc("failed to update conf_file")
+ else: logger.log("conf_files: No conf_files found or API failure. Skipping")
def start(options, config): pass
logger.verbose("database:sync : fetching accounts")
existing_acct_names = accounts.all()
for name in existing_acct_names:
- logger.verbose("database:sync : loop on %s"%name)
- if name not in self: accounts.get(name).ensure_destroyed()
+ if name not in self:
+ logger.verbose("database:sync : ensure_destroy'ing %s"%name)
+ accounts.get(name).ensure_destroyed()
for name, rec in self.iteritems():
- slivr = accounts.get(name)
- logger.verbose("database:sync : %s is %s" %(name,slivr._get_class()))
- # Make sure we refresh accounts that are running
- if rec['instantiation'] == 'plc-instantiated': slivr.ensure_created(rec)
- elif rec['instantiation'] == 'nm-controller': slivr.ensure_created(rec)
- # Back door to ensure PLC overrides Ticket in delegation.
- elif rec['instantiation'] == 'delegated' and slivr._get_class() != None:
- # if the ticket has been delivered and the nm-contoroller started the slice
- # update rspecs and keep them up to date.
- if slivr.is_running(): slivr.ensure_created(rec)
+ # protect this; if anything fails for a given sliver
+ # we still need the other ones to be handled
+ try:
+ sliver = accounts.get(name)
+ logger.verbose("database:sync : looping on %s (shell account class from pwd %s)" %(name,sliver._get_class()))
+ # Make sure we refresh accounts that are running
+ if rec['instantiation'] == 'plc-instantiated':
+ logger.verbose ("database.sync : ensure_create'ing 'instantiation' sliver %s"%name)
+ sliver.ensure_created(rec)
+ elif rec['instantiation'] == 'nm-controller':
+ logger.verbose ("database.sync : ensure_create'ing 'nm-controller' sliver %s"%name)
+ sliver.ensure_created(rec)
+ # Back door to ensure PLC overrides Ticket in delegation.
+ elif rec['instantiation'] == 'delegated' and sliver._get_class() != None:
+ # if the ticket has been delivered and the nm-contoroller started the slice
+ # update rspecs and keep them up to date.
+ if sliver.is_running():
+ logger.verbose ("database.sync : ensure_create'ing 'delegated' sliver %s"%name)
+ sliver.ensure_created(rec)
+ except:
+ logger.log_exc("database.sync failed to handle sliver",name=name)
# Wake up bwmom to update limits.
bwmon.lock.set()
dump_requested = False
db_lock.release()
try: tools.write_file(DB_FILE, lambda f: f.write(db_pickle))
- except: logger.log_exc()
+ except: logger.log_exc("failed in database.start.run")
global db
try:
f = open(DB_FILE)
try: db = cPickle.load(f)
finally: f.close()
except:
- logger.log_exc()
+ logger.log_exc("failed in database.start")
db = Database()
tools.as_daemon_thread(run)
options=${OPTIONS-"-d -s"}
restartoptions=${RESTARTOPTIONS-"-d"}
# debug mode is interactive, and has faster period
-debugoptions=${DEBUGOPTIONS-"-p 60 -r 31"}
+debugoptions=${DEBUGOPTIONS-"-v -p 60 -r 31"}
pidfile=${PIDFILE-/var/run/nm.pid}
lockfile=${LOCKFILE-/var/lock/subsys/nm}
RETVAL=0
do_start $restartoptions
;;
restartdebug)
+ shift
stop
- echo "Running interactively .."
- $nm $debugoptions
+ [[ -z "$@" ]] && echo "Running interactively .."
+ $nm $debugoptions "$@"
;;
condrestart)
if [ -f ${pidfile} ] ; then
fi
;;
*)
- echo $"Usage: $0 {start|stop|restart|condrestart|status|restartdebug}"
+ echo $"Usage: $0 {start|stop|restart|condrestart|status|restartdebug [-d]}"
exit 1
esac
LOG_LEVEL=level
def verbose(msg):
- log(msg,LOG_VERBOSE)
+ log('(v) '+msg,LOG_VERBOSE)
def log(msg,level=LOG_NODE):
"""Write <msg> to the log file if level >= current log level (default LOG_NODE)."""
if child.returncode:
raise Exception("command failed:\n stdout - %s\n stderr - %s" % \
(child.stdout.readlines(), child.stderr.readlines()))
- except: log_exc()
+ except: log_exc('failed to run command %s' % ' '.join(args))
-def log_exc(name = None):
+def log_exc(msg="",name=None):
"""Log the traceback resulting from an exception."""
- if name:
- log("operation on %s failed. \n %s" %(name, traceback.format_exc()))
+ if name:
+ log("%s: EXCEPTION caught <%s> \n %s" %(name, msg, traceback.format_exc()))
else:
- log(traceback.format_exc())
+ log("EXCEPTION caught <%s> \n %s" %(msg, traceback.format_exc()))
def log_data_in_file (data, file, message=""):
import pprint, time
try:
f=open(file,'w')
- now=time.strftime("Last update: %Y.%m.%d at %H:%M:%S", time.localtime())
+ now=time.strftime("Last update: %Y.%m.%d at %H:%M:%S %Z", time.localtime())
f.write(now+'\n')
if message: f.write('Message:'+message+'\n')
pp=pprint.PrettyPrinter(stream=f,indent=2)
'''Run call backs defined in modules'''
try:
logger.log("Syncing w/ PLC")
+ # retrieve GetSlivers from PLC
data = plc.GetSlivers()
+ # use the magic 'default' slice to retrieve system-wide defaults
+ getPLCDefaults(data, config)
+ # tweak the 'vref' attribute from GetSliceFamily
+ setSliversVref (data)
+ # always dump it for debug purposes
# used to be done only in verbose; very helpful though, and tedious to obtain,
# so let's dump this unconditionnally
logger.log_slivers(data)
- getPLCDefaults(data, config)
+ logger.verbose("Sync w/ PLC done (3)")
except:
- logger.log_exc()
+ logger.log_exc("failed in nm.GetSlivers")
# XXX So some modules can at least boostrap.
logger.log("nm: Can't contact PLC to GetSlivers(). Continuing.")
data = {}
try:
callback = getattr(module, 'GetSlivers')
callback(data, config, plc)
- except: logger.log_exc()
+ except: logger.log_exc("nm.GetSlivers failed to run callback for module %r"%module)
def getPLCDefaults(data, config):
if slice['name'] == config.PLC_SLICE_PREFIX+"_default":
attr_dict = {}
for attr in slice.get('attributes'): attr_dict[attr['tagname']] = attr['value']
- # GetSlivers exposes the result of GetSliceFamily() as an separate key in data
- # It is safe to override the attributes with this, as this method has the right logic
- try:
- attr_dict['vref']=slice.get('GetSliceFamily')
- except:
- pass
if len(attr_dict):
logger.verbose("Found default slice overrides.\n %s" % attr_dict)
config.OVERRIDES = attr_dict
if 'OVERRIDES' in dir(config): del config.OVERRIDES
+def setSliversVref (data):
+ '''
+ Tweak the 'vref' attribute in all slivers based on the 'GetSliceFamily' key
+ '''
+ # GetSlivers exposes the result of GetSliceFamily() as an separate key in data
+ # It is safe to override the attributes with this, as this method has the right logic
+ for sliver in data.get('slivers'):
+ try:
+ slicefamily=sliver.get('GetSliceFamily')
+ for att in sliver['attributes']:
+ if att['tagname']=='vref':
+ att['value']=slicefamily
+ continue
+ sliver['attributes'].append({ 'tagname':'vref','value':slicefamily})
+ except:
+ logger.log_exc("Could not overwrite 'vref' attribute from 'GetSliceFamily'",name=sliver['name'])
+
+
def run():
try:
if options.daemon: tools.daemon()
while plc.check_authentication() != True:
try:
plc.update_session()
- logger.log("Authentication Failure. Retrying")
+ logger.log("Authentication Failure. Retrying")
except:
- logger.log("Retry Failed. Waiting")
+ logger.log("Retry Failed. Waiting")
time.sleep(iperiod)
logger.log("Authentication Succeeded!")
delay=iperiod + random.randrange(0,irandom)
logger.verbose('mainloop - sleeping for %d s'%delay)
time.sleep(delay)
- except: logger.log_exc()
+ except: logger.log_exc("failed in nm.run")
if __name__ == '__main__':
- logger.log("Entering nm.py "+id)
+ logger.log("======================================== Entering nm.py "+id)
run()
else:
# This is for debugging purposes. Open a copy of Python and import nm
try:
authstatus = self.AuthCheck()
except:
- logger.log_exc()
+ logger.log_exc("failed in plcapi.check_authentication")
return authstatus
f.truncate()
f.close()
try: restartService()
- except: logger.log_exc()
+ except: logger.log_exc("codemux.writeConf failed to restart service")
def sortDomains(slivers):
slicesinconf.setdefault(slice, [])
slicesinconf[slice].append({"host": host, "port": port})
f.close()
- except IOError: logger.log_exc()
+ except IOError: logger.log_exc("codemux.parseConf got IOError")
return slicesinconf
(path, slice) = line.split()
slicesinconf.append(slice)
f.close()
- except: logger.log_exc()
+ except: logger.log_exc("failed vsys.parseConf")
return slicesinconf
_init_disk_info_sem = globalsem
def __init__(self, rec):
- logger.verbose ('initing Sliver_VS with name=%s'%rec['name'])
+ name=rec['name']
+ logger.verbose ('%s: initing Sliver_VS'%name)
try:
- vserver.VServer.__init__(self, rec['name'],logfile='/var/log/nm')
+ logger.log("%s: first chance..."%name)
+ vserver.VServer.__init__(self, name,logfile='/var/log/nm')
except Exception, err:
if not isinstance(err, vserver.NoSuchVServer):
# Probably a bad vserver or vserver configuration file
- logger.log_exc(self.name)
- logger.log('%s: recreating bad vserver' % rec['name'])
- self.destroy(rec['name'])
- self.create(rec['name'], rec['vref'])
- vserver.VServer.__init__(self, rec['name'],logfile='/var/log/nm')
+ logger.log_exc("sliver_vs.__init__ (1) %s",name=name)
+ logger.log('%s: recreating bad vserver' % name)
+ self.destroy(name)
+ self.create(name, rec['vref'])
+ logger.log("%s: second chance..."%name)
+ vserver.VServer.__init__(self, name,logfile='/var/log/nm')
self.keys = ''
self.rspec = {}
def create(name, vref = None):
logger.verbose('Sliver_VS:create - name=%s'%name)
if vref is None:
- logger.log("creating %s : no vref attached, this is unexpected"%name)
- vref='default'
+ logger.log("%s: ERROR - no vref attached, this is unexpected"%name)
+ return
# used to look in /etc/planetlab/family,
# now relies on the 'GetSliceFamily' extra attribute in GetSlivers()
# which for legacy is still exposed here as the 'vref' key
# guess arch
try:
(x,y,arch)=vref.split('-')
+ # mh, this of course applies when 'vref' is e.g. 'netflow'
+ # and that's not quite right
except:
arch='i386'
os.close(fd)
try:
self.chroot_call(install_initscript)
- except: logger.log_exc(self.name)
+ except: logger.log_exc("sliver_vs.start",name=self.name)
tools.close_nonstandard_fds()
vserver.VServer.start(self)
os._exit(0)
else:
os.waitpid(child_pid, 0)
self.initscriptchanged = False
- else: logger.log('%s: not starting, is not enabled' % self.name)
+ else: logger.log('not starting, is not enabled', name=self.name)
def stop(self):
logger.log('%s: stopping' % self.name)
self.disk_usage_initialized = True
vserver.VServer.set_disklimit(self, max(disk_max, self.disk_blocks))
except:
- logger.log('%s: failed to set max disk usage' % self.name)
- logger.log_exc(self.name)
+ logger.log_exc('failed to set max disk usage',name=self.name)
# get/set the min/soft/hard values for all of the vserver
# related RLIMITS. Note that vserver currently only
f = open('/etc/planetlab/node_id')
try: node_id = int(f.read())
finally: f.close()
- except: logger.log_exc()
+ except: logger.log_exc("sm.GetSlivers failed to read /etc/planetlab/node_id")
if data.has_key('node_id') and data['node_id'] != node_id: return
initscripts[str(is_rec['name'])] = is_rec['script']
for sliver in data['slivers']:
- logger.verbose("sm:GetSlivers in slivers loop")
+ logger.verbose("%s: sm:GetSlivers in slivers loop"%sliver['name'])
rec = sliver.copy()
rec.setdefault('timestamp', data['timestamp'])
except:
os.seteuid(os.getuid()) # undo su so we can write the log file
os.setegid(os.getgid())
- logger.log_exc()
+ logger.log_exc("tools.fork_as")
os._exit(0)
else: os.waitpid(child_pid, 0)