3 # Useful information can be found at https://svn.planet-lab.org/wiki/NodeManager
6 # Faiyaz Ahmed <faiyaza at cs dot princeton dot edu>
7 # Copyright (C) 2008 The Trustees of Princeton University
26 from config import Config
27 from plcapi import PLCAPI
32 PLUGIN_PATH = "/usr/share/NodeManager/plugins"
34 DB_FILE = "/var/lib/nodemanager/getslivers.pickle"
36 # the modules in this directory that need to be run
37 # NOTE: modules listed here will also be loaded in this order
38 # once loaded, they get re-ordered after their priority (lower comes first)
39 # for determining the runtime order
40 core_modules = ['net', 'conf_files', 'slivermanager', 'bwmon']
44 default_priority = 100
48 parser = optparse.OptionParser()
49 parser.add_option('-d', '--daemon', action='store_true', dest='daemon', default=False,
50 help='run daemonized')
51 parser.add_option('-f', '--config', action='store', dest='config', default='/etc/planetlab/plc_config',
52 help='PLC configuration file')
53 parser.add_option('-k', '--session', action='store', dest='session', default='/etc/planetlab/session',
54 help='API session key (or file)')
55 parser.add_option('-p', '--period', action='store', dest='period', default=NodeManager.default_period,
56 help='Polling interval (sec) - default {}'
57 .format(NodeManager.default_period))
58 parser.add_option('-r', '--random', action='store', dest='random', default=NodeManager.default_random,
59 help='Range for additional random polling interval (sec) -- default {}'
60 .format(NodeManager.default_random))
61 parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
62 help='more verbose log')
63 parser.add_option('-P', '--path', action='store', dest='path', default=NodeManager.PLUGIN_PATH,
64 help='Path to plugins directory')
66 # NOTE: BUG the 'help' for this parser.add_option() wont list plugins from the --path argument
67 parser.add_option('-m', '--module', action='store', dest='user_module', default='', help='run a single module')
68 (self.options, args) = parser.parse_args()
74 # determine the modules to be run
75 self.modules = NodeManager.core_modules
76 # Deal with plugins directory
77 if os.path.exists(self.options.path):
78 sys.path.append(self.options.path)
79 plugins = [ os.path.split(os.path.splitext(x)[0])[1]
80 for x in glob.glob( os.path.join(self.options.path, '*.py') )
81 if not x.endswith("/__init__.py")
83 self.modules += plugins
84 if self.options.user_module:
85 assert self.options.user_module in self.modules
86 self.modules = [self.options.user_module]
87 logger.verbose('nodemanager: Running single module {}'.format(self.options.user_module))
90 def GetSlivers(self, config, plc):
91 """Retrieves GetSlivers at PLC and triggers callbacks defined in modules/plugins"""
93 logger.log("nodemanager: Syncing w/ PLC")
94 # retrieve GetSlivers from PLC
95 data = plc.GetSlivers()
96 # use the magic 'default' slice to retrieve system-wide defaults
97 self.getPLCDefaults(data, config)
98 # tweak the 'vref' attribute from GetSliceFamily
99 self.setSliversVref (data)
100 # dump it too, so it can be retrieved later in case of comm. failure
101 self.dumpSlivers(data)
102 # log it for debug purposes, no matter what verbose is
103 logger.log_slivers(data)
104 logger.verbose("nodemanager: Sync w/ PLC done")
107 logger.log_exc("nodemanager: failed in GetSlivers")
108 # XXX So some modules can at least boostrap.
109 logger.log("nodemanager: Can't contact PLC to GetSlivers(). Continuing.")
111 # for modules that request it though the 'persistent_data' property
112 last_data = self.loadSlivers()
113 # Invoke GetSlivers() functions from the callback modules
114 for module in self.loaded_modules:
115 logger.verbose('nodemanager: triggering {}.GetSlivers'.format(module.__name__))
117 callback = getattr(module, 'GetSlivers')
119 if getattr(module, 'persistent_data', False):
120 module_data = last_data
121 callback(data, config, plc)
122 except SystemExit as e:
125 logger.log_exc("nodemanager: GetSlivers failed to run callback for module {}"
129 def getPLCDefaults(self, data, config):
131 Get PLC wide defaults from _default system slice. Adds them to config class.
133 for slice in data.get('slivers'):
134 if slice['name'] == config.PLC_SLICE_PREFIX + "_default":
136 for attr in slice.get('attributes'): attr_dict[attr['tagname']] = attr['value']
138 logger.verbose("nodemanager: Found default slice overrides.\n {}".format(attr_dict))
139 config.OVERRIDES = attr_dict
141 # NOTE: if an _default slice existed, it would have been found above and
142 # the routine would return. Thus, if we've gotten here, then no default
143 # slice is bound to this node.
144 if 'OVERRIDES' in dir(config): del config.OVERRIDES
147 def setSliversVref (self, data):
149 Tweak the 'vref' attribute in all slivers based on the 'GetSliceFamily' key
151 # GetSlivers exposes the result of GetSliceFamily() as an separate key in data
152 # It is safe to override the attributes with this, as this method has the right logic
153 for sliver in data.get('slivers'):
155 slicefamily = sliver.get('GetSliceFamily')
156 for att in sliver['attributes']:
157 if att['tagname'] == 'vref':
158 att['value'] = slicefamily
160 sliver['attributes'].append({ 'tagname':'vref', 'value':slicefamily})
162 logger.log_exc("nodemanager: Could not overwrite 'vref' attribute from 'GetSliceFamily'",
165 def dumpSlivers (self, slivers):
166 f = open(NodeManager.DB_FILE, "w")
167 logger.log ("nodemanager: saving successfully fetched GetSlivers in {}".format(NodeManager.DB_FILE))
168 pickle.dump(slivers, f)
171 def loadSlivers (self):
173 f = open(NodeManager.DB_FILE, "r+")
174 logger.log("nodemanager: restoring latest known GetSlivers from {}".format(NodeManager.DB_FILE))
175 slivers = pickle.load(f)
179 logger.log("Could not restore GetSlivers from {}".format(NodeManager.DB_FILE))
183 # make sure to create /etc/planetlab/virt so others can read that
184 # used e.g. in vsys-scripts's sliceip
185 tools.get_node_virt()
187 if self.options.daemon:
191 if (self.options.verbose):
192 logger.set_level(logger.LOG_VERBOSE)
195 # Load /etc/planetlab/plc_config
196 config = Config(self.options.config)
199 other_pid = tools.pid_file()
200 if other_pid != None:
201 print """There might be another instance of the node manager running as pid {}.
202 If this is not the case, please remove the pid file {}. -- exiting""".format(other_pid, tools.PID_FILE)
205 print "Warning while writing PID file:", err
208 self.loaded_modules = []
209 for module in self.modules:
211 m = __import__(module)
212 logger.verbose("nodemanager: triggering {}.start".format(m.__name__))
214 except: logger.log("WARNING: module {} did not start".format(m.__name__))
215 self.loaded_modules.append(m)
217 if module not in NodeManager.core_modules:
218 logger.log_exc ("ERROR while loading module {} - skipped".format(module))
220 logger.log("FATAL : failed to start core module {}".format(module))
223 # sort on priority (lower first)
224 def module_priority (m):
225 return getattr(m, 'priority', NodeManager.default_priority)
226 self.loaded_modules.sort(key=module_priority)
228 logger.log('ordered modules:')
229 for module in self.loaded_modules:
230 logger.log ('{}: {}'.format(getattr(module, 'priority', NodeManager.default_priority),
233 # Load /etc/planetlab/session
234 if os.path.exists(self.options.session):
235 with open(self.options.session) as f:
236 session = f.read().strip()
242 iperiod = int(self.options.period)
243 irandom = int(self.options.random)
245 # Initialize XML-RPC client
246 plc = PLCAPI(config.plc_api_uri, config.cacert, session, timeout=iperiod/2)
249 logger.log("nodemanager: Checking Auth.")
250 while plc.check_authentication() != True:
253 logger.log("nodemanager: Authentication Failure. Retrying")
254 except Exception as e:
255 logger.log("nodemanager: Retry Failed. ({}); Waiting..".format(e))
257 logger.log("nodemanager: Authentication Succeeded!")
261 # Main nodemanager Loop
262 work_beg = time.time()
263 logger.log('nodemanager: mainloop - calling GetSlivers - period={} random={}'
264 .format(iperiod, irandom))
265 self.GetSlivers(config, plc)
266 delay = iperiod + random.randrange(0, irandom)
267 work_end = time.time()
268 work_duration = int(work_end-work_beg)
269 logger.log('nodemanager: mainloop has worked for {} s - sleeping for {} s'
270 .format(work_duration, delay))
275 logger.log_exc("nodemanager: failed in run")
278 logger.log("======================================== Entering nodemanager.py")
281 if __name__ == '__main__':
284 # This is for debugging purposes. Open a copy of Python and import nodemanager
285 tools.as_daemon_thread(run)