3 # Average bandwidth monitoring script. Run periodically via cron(8) to
4 # enforce a soft limit on daily bandwidth usage for each slice. If a
5 # slice is found to have exceeded its daily bandwidth usage when the
6 # script is run, its instantaneous rate will be capped at the desired
7 # average rate. Thus, in the worst case, a slice will only be able to
8 # send a little more than twice its average daily limit.
10 # Two separate limits are enforced, one for destinations exempt from
11 # the node bandwidth cap, and the other for all other destinations.
13 # Mark Huang <mlhuang@cs.princeton.edu>
14 # Andy Bavier <acb@cs.princeton.edu>
15 # Copyright (C) 2004-2006 The Trustees of Princeton University
33 # /etc/planetlab/plc_config.py is a Python fragment maintained by
34 # PlanetLabConf that contains PLC configuration variables.
36 sys.path.append("/etc/planetlab")
37 from plc_config import *
39 print "Warning: Configuration file /etc/planetlab/plc_config.py not found"
40 PLC_NAME = "PlanetLab"
41 PLC_MAIL_SUPPORT_ADDRESS = "support@planet-lab.org"
42 PLC_MAIL_SLICE_ADDRESS = "SLICE@slices.planet-lab.org"
45 seconds_per_day = 24 * 60 * 60
51 datafile = "/var/lib/misc/BandwidthMonitor.dat"
54 default_maxrate = bwlimit.get_bwcap()
56 default_maxexemptrate = bwlimit.bwmax
58 # 500 Kbit or 5.4 GB per day
59 default_avgrate = 500000
61 # 1.5 Mbit or 16.4 GB per day
62 default_avgexemptrate = 1500000
65 period = 1 * seconds_per_day
70 The slice %(slice)s has transmitted more than %(bytes)s from
71 %(hostname)s to %(class)s destinations
73 Its maximum %(class)s burst rate will be capped at %(avgrate)s
75 Please reduce the average %(class)s transmission rate
76 of the slice to %(avgrate)s, or %(limit)s per %(period)s.
82 %(date)s %(hostname)s bwcap %(slice)s
85 def format_bytes(bytes):
87 Formats bytes into a string
90 if bytes >= 1000000000:
91 return "%.1f GB" % (bytes / 1000000000.)
92 elif bytes >= 1000000:
93 return "%.1f MB" % (bytes / 1000000.)
95 return "%.1f KB" % (bytes / 1000.)
97 return "%.0f bytes" % bytes
99 def format_period(seconds):
101 Formats a period in seconds into a string.
104 if seconds == (24 * 60 * 60):
106 elif seconds == (60 * 60):
108 elif seconds > (24 * 60 * 60):
109 return "%.1f days" % (seconds / 24. / 60. / 60.)
110 elif seconds > (60 * 60):
111 return "%.1f hours" % (seconds / 60. / 60.)
113 return "%.1f minutes" % (seconds / 60.)
115 return "%.0f seconds" % seconds
119 Stores the last recorded bandwidth parameters of a slice.
121 xid - slice context/VServer ID
123 time - beginning of recording period in UNIX seconds
124 bytes - low bandwidth bytes transmitted at the beginning of the recording period
125 exemptbytes - high bandwidth bytes transmitted at the beginning of the recording period
126 avgrate - average low bandwidth rate to enforce over the recording period
127 avgexemptrate - average high bandwidth rate to enforce over the recording period
130 def __init__(self, xid, name, maxrate, maxexemptrate, bytes, exemptbytes):
133 self.reset(maxrate, maxexemptrate, bytes, exemptbytes)
138 def query(self, attributes):
140 Get values of various slice attributes from the Node Manager
142 values = [None for attribute in attributes]
146 # Read rspec (the NM hash code for the slice)
147 rcap = open("/var/run/pl_nm/%s.vm_rcap" % self.name, "r")
148 rspec = rcap.readline().strip()
151 for i, attribute in enumerate(attributes):
152 # NM interface allows you to pass in a tuple
153 # (attribute, default) instead of just an
154 # attribute name. default is returned if the
155 # attribute is not set.
156 (rc, (value,)) = nm.nm_inspect(rspec, attribute)
157 if type(attribute) == tuple:
158 default = attribute[1]
161 if rc == 0 and value != default:
163 except Exception, err:
164 print "Warning: Exception received while querying Node Manager:", err
168 def reset(self, maxrate, maxexemptrate, bytes, exemptbytes):
170 Begin a new recording period. Remove caps by restoring limits
171 to their default values.
174 # Reset baseline time
175 self.time = time.time()
177 # Reset baseline byte coutns
179 self.exemptbytes = exemptbytes
181 # Query Node Manager for max rate overrides
182 (new_maxrate, new_maxexemptrate) = self.query(['nm_net_max_rate', 'nm_net_max_exempt_rate'])
183 if new_maxrate is not None:
186 new_maxrate = default_maxrate
187 if new_maxexemptrate is not None:
188 new_maxexemptrate *= 1000
190 new_maxexemptrate = default_maxexemptrate
192 if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate:
193 print "%s reset to %s/%s" % \
195 bwlimit.format_tc_rate(new_maxrate),
196 bwlimit.format_tc_rate(new_maxexemptrate))
197 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate)
199 def update(self, maxrate, maxexemptrate, bytes, exemptbytes):
201 Update byte counts and check if average rates have been
202 exceeded. In the worst case (instantaneous usage of the entire
203 average daily byte limit at the beginning of the recording
204 period), the slice will be immediately capped and will get to
205 send twice the average daily byte limit. In the common case,
206 it will get to send slightly more than the average daily byte
210 # Query Node Manager for max average rate overrides
211 (self.avgrate, self.avgexemptrate) = self.query(['nm_net_max_rate', 'nm_net_max_exempt_rate'])
212 if self.avgrate is None:
213 self.avgrate = default_avgrate
214 if self.avgexemptrate is None:
215 self.avgexemptrate = default_avgexemptrate
217 # Prepare message parameters from the template
219 params = {'slice': self.name, 'hostname': socket.gethostname(),
220 'since': time.asctime(time.gmtime(self.time)) + " GMT",
221 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
222 'date': time.asctime(time.gmtime()) + " GMT",
223 'period': format_period(period)}
225 bytelimit = self.avgrate * period / bits_per_byte
226 if bytes >= (self.bytes + bytelimit) and \
227 maxrate > self.avgrate:
228 new_maxrate = self.avgrate
230 new_maxrate = maxrate
232 # Format template parameters for low bandwidth message
233 params['class'] = "low bandwidth"
234 params['bytes'] = format_bytes(bytes - self.bytes)
235 params['maxrate'] = bwlimit.format_tc_rate(maxrate)
236 params['limit'] = format_bytes(bytelimit)
237 params['avgrate'] = bwlimit.format_tc_rate(self.avgrate)
240 print "%(slice)s %(class)s " \
241 "%(bytes)s/%(limit)s (%(maxrate)s/%(avgrate)s)" % \
244 # Cap low bandwidth burst rate
245 if new_maxrate != maxrate:
246 message += template % params
247 print "%(slice)s %(class)s capped at %(avgrate)s (%(bytes)s/%(limit)s)" % params
249 exemptbytelimit = self.avgexemptrate * period / bits_per_byte
250 if exemptbytes >= (self.exemptbytes + exemptbytelimit) and \
251 maxexemptrate > self.avgexemptrate:
252 new_maxexemptrate = self.avgexemptrate
254 new_maxexemptrate = maxexemptrate
256 # Format template parameters for high bandwidth message
257 params['class'] = "high bandwidth"
258 params['bytes'] = format_bytes(exemptbytes - self.exemptbytes)
259 params['maxrate'] = bwlimit.format_tc_rate(maxexemptrate)
260 params['limit'] = format_bytes(exemptbytelimit)
261 params['avgrate'] = bwlimit.format_tc_rate(self.avgexemptrate)
264 print "%(slice)s %(class)s " \
265 "%(bytes)s/%(limit)s (%(maxrate)s/%(avgrate)s)" % \
268 # Cap high bandwidth burst rate
269 if new_maxexemptrate != maxexemptrate:
270 message += template % params
271 print "%(slice)s %(class)s capped at %(avgrate)s (%(bytes)s/%(limit)s)" % params
274 if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate:
275 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate)
279 params['from'] = "%s Support <%s>" % (PLC_NAME, PLC_MAIL_SUPPORT_ADDRESS)
280 params['to'] = PLC_MAIL_SLICE_ADDRESS.replace("SLICE", self.name)
281 # PLC has a separate list for pl_mom messages
282 if PLC_MAIL_SUPPORT_ADDRESS == "support@planet-lab.org":
283 params['cc'] = "pl-mom@planet-lab.org"
285 params['cc'] = PLC_MAIL_SUPPORT_ADDRESS
286 params['version'] = sys.version.split(" ")[0]
289 sendmail = sys.stdout
291 sendmail = os.popen("/usr/sbin/sendmail -t -f%s" % PLC_MAIL_SUPPORT_ADDRESS, "w")
296 Content-type: text/plain
301 X-Mailer: Python/%(version)s
302 Subject: pl_mom capped bandwidth of slice %(slice)s on %(hostname)s
304 """.lstrip() % params)
307 sendmail.write(message)
310 sendmail.write(footer % params)
312 if sendmail != sys.stdout:
317 Simple file-like class for redirecting stdout and stderr to /var/log/messages
319 def write(self, text):
326 Usage: %s [OPTIONS]...
329 -d, --debug Enable debugging (default: %s)
330 -v, --verbose Increase verbosity level (default: %d)
331 -f, --file=FILE Data file (default: %s)
332 -s, --slice=SLICE Constrain monitoring to these slices (default: all)
333 -p, --period=SECONDS Interval in seconds over which to enforce average byte limits (default: %s)
334 -h, --help This message
335 """.lstrip() % (sys.argv[0], debug, verbose, datafile, format_period(period))
339 global debug, verbose, datafile, period, nm
344 longopts = ["debug", "verbose", "file=", "slice=", "period=", "help"]
345 (opts, argv) = getopt.getopt(sys.argv[1:], "dvf:s:p:h", longopts)
346 except getopt.GetoptError, err:
347 print "Error: " + err.msg
351 for (opt, optval) in opts:
352 if opt == "-d" or opt == "--debug":
354 elif opt == "-v" or opt == "--verbose":
356 bwlimit.verbose = verbose - 1
357 elif opt == "-f" or opt == "--file":
359 elif opt == "-s" or opt == "--slice":
361 elif opt == "-p" or opt == "--period":
367 # Redirect stdout and stderr to syslog
369 syslog.openlog("pl_mom")
370 sys.stdout = sys.stderr = Logger()
374 print "Loading %s" % datafile
375 f = open(datafile, "r+")
376 (version, slices) = pickle.load(f)
378 # Check version of data file
379 if version != "$Id$":
380 print "Not using old version '%s' data file %s" % (version, datafile)
386 # Get special slice IDs
387 root_xid = bwlimit.get_xid("root")
388 default_xid = bwlimit.get_xid("default")
390 # Open connection to Node Manager
391 socket.setdefaulttimeout(10)
393 nm = xmlrpclib.ServerProxy("http://localhost:812/")
394 except Exception, err:
395 print "Warning: Exception received while opening connection to Node Manager:", err
399 for params in bwlimit.get():
402 minexemptrate, maxexemptrate,
403 bytes, exemptbytes) = params
406 # Ignore root and default buckets
407 if xid == root_xid or xid == default_xid:
410 name = bwlimit.get_slice(xid)
412 # Orphaned (not associated with a slice) class
415 # Monitor only the specified slices
416 if names and name not in names:
419 if slices.has_key(xid):
421 if time.time() >= (slice.time + period) or \
422 bytes < slice.bytes or exemptbytes < slice.exemptbytes:
423 # Reset to defaults every 24 hours or if it appears
424 # that the byte counters have overflowed (or, more
425 # likely, the node was restarted or the HTB buckets
426 # were re-initialized).
427 slice.reset(maxrate, maxexemptrate, bytes, exemptbytes)
430 slice.update(maxrate, maxexemptrate, bytes, exemptbytes)
432 # New slice, initialize state
433 slice = slices[xid] = Slice(xid, name, maxrate, maxexemptrate, bytes, exemptbytes)
436 dead = Set(slices.keys()) - Set(live)
440 # Close connection to Node Manager
444 print "Saving %s" % datafile
445 f = open(datafile, "w")
446 pickle.dump((version, slices), f)
449 if __name__ == '__main__':