3 # Average bandwidth monitoring script. Run periodically via cron(8) to
4 # enforce a soft limit on daily bandwidth usage for each slice. If a
5 # slice is found to have exceeded its daily bandwidth usage when the
6 # script is run, its instantaneous rate will be capped at the desired
7 # average rate. Thus, in the worst case, a slice will only be able to
8 # send a little more than twice its average daily limit.
10 # Two separate limits are enforced, one for destinations exempt from
11 # the node bandwidth cap, and the other for all other destinations.
13 # Mark Huang <mlhuang@cs.princeton.edu>
14 # Andy Bavier <acb@cs.princeton.edu>
15 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
16 # Copyright (C) 2004-2006 The Trustees of Princeton University
18 # $Id: bwmon.py,v 1.5.2.2 2006/08/21 21:27:35 mlhuang Exp $
38 seconds_per_day = 24 * 60 * 60
44 datafile = "/var/lib/misc/bwmon.dat"
47 # Burst to line rate (or node cap). Set by NM.
48 default_maxrate = bwlimit.get_bwcap()
49 default_maxexemptrate = bwlimit.bwmax
51 # What we cap to when slices break the rules.
52 # 500 Kbit or 5.4 GB per day
53 #default_avgrate = 500000
54 # 1.5 Mbit or 16.4 GB per day
55 #default_avgexemptrate = 1500000
57 # 5.4 Gbyte per day. 5.4 * 1024 k * 1024M * 1024G
58 # 5.4 Gbyte per day max allowed transfered per recording period
59 default_ByteMax = 5798205850
60 default_ByteThresh = int(.8 * default_ByteMax)
61 # 16.4 Gbyte per day max allowed transfered per recording period to I2
62 default_ExemptByteMax = 17609365914
63 default_ExemptByteThresh = int(.8 * default_ExemptByteMax)
67 period = 1 * seconds_per_day
72 The slice %(slice)s has transmitted more than %(bytes)s from
73 %(hostname)s to %(class)s destinations
76 Its maximum %(class)s burst rate will be capped at %(new_maxrate)s
79 Please reduce the average %(class)s transmission rate
80 of the slice %(limit)s per %(period)s.
86 %(date)s %(hostname)s bwcap %(slice)s
91 Stores the last recorded bandwidth parameters of a slice.
93 xid - slice context/VServer ID
95 time - beginning of recording period in UNIX seconds
96 bytes - low bandwidth bytes transmitted at the beginning of the recording period
97 exemptbytes - high bandwidth bytes transmitted at the beginning of the recording period (for I2 -F)
98 ByteMax - total volume of data allowed
99 ByteThresh - After thresh, cap node to (maxbyte - bytes)/(time left in period)
100 ExemptByteMax - Same as above, but for i2.
101 ExemptByteThresh - i2 ByteThresh
102 maxrate - max_rate slice attribute.
103 maxexemptrate - max_exempt_rate slice attribute.
107 def __init__(self, xid, name, maxrate, maxexemptrate, bytes, exemptbytes):
113 self.ByteMax = default_ByteMax
114 self.ByteThresh = default_ByteThresh
115 self.ExemptByteMax = default_ExemptByteMax
116 self.ExemptByteThresh = default_ExemptByteThresh
117 self.maxrate = default_maxrate
118 self.maxexemptrate = default_maxexemptrate
120 # Get real values where applicable
121 self.reset(maxrate, maxexemptrate, bytes, exemptbytes)
126 def updateSliceAttributes(self):
127 # Query Node Manager for max rate overrides
129 vals = nm.query(self.name,
130 [('nm_net_max_rate', self.maxrate),
131 ('nm_net_max_exempt_rate', self.maxexemptrate),
132 ("nm_net_max_byte", int(self.ByteMax / 1024)),
133 ("nm_net_max_exempt_byte", int(self.ExemptByteMax / 1024)),
134 ("nm_net_max_thresh_byte", int( .8 * self.ByteMax / 1024)),
135 ("nm_net_max_thresh_exempt_byte", int(.8 * self.ExemptByteMax / 1024)),
136 ("nm_net_avg_rate", 0),
137 ("nm_net_avg_exempt_rate", 0)])
146 avgexemptrate) = vals
148 # The shitty bit. Gotta bias the limits so as not to overflow xmlrpc
149 self.ByteMax = ByteMax * 1024
150 self.ByteThresh = ByteMax * 1024
151 self.ExemptByteMax = ExemptByteMax * 1024
152 self.ExemptByteThresh = ExemptByteThresh * 1024
154 # The hack here is that when i pass 0 to the xmlrpc request to NM,
155 # for rate limits and it comes back non zero, then screw the byte limits.
156 # Mult by the period and recompute the byte limits. The thought is
157 # If/when PLC switches to byte limits, the avgrates wont be used as
158 # slice attributes and will return as 0
160 self.ByteMax = avgrate * period
161 self.ByteThresh = int(self.ByteMax * .8)
163 if (avgexemptrate != 0):
164 self.ExemptByteMax = avgexemptrate * period
165 self.ExemptByteThresh = int(self.ExemptByteMax * .8)
167 except Exception, err:
168 print "Warning: Exception received while querying NM:", err
170 def reset(self, maxrate, maxexemptrate, bytes, exemptbytes):
172 Begin a new recording period. Remove caps by restoring limits
173 to their default values.
176 # Query Node Manager for max rate overrides
177 self.updateSliceAttributes()
179 # Reset baseline time
180 self.time = time.time()
182 # Reset baseline byte coutns
184 self.exemptbytes = exemptbytes
186 if (self.maxrate != maxrate) or (self.maxexemptrate != maxexemptrate):
187 print "%s reset to %s/%s" % \
189 #bwlimit.format_tc_rate(self.maxrate),
191 #bwlimit.format_tc_rate(self.maxexemptrate))
193 bwlimit.set(xid = self.xid, maxrate = self.maxrate, maxexemptrate = self.maxexemptrate)
195 def update(self, maxrate, maxexemptrate, bytes, exemptbytes):
197 Update byte counts and check if byte limits have been
201 # Query Node Manager for max rate overrides
202 self.updateSliceAttributes()
204 # Prepare message parameters from the template
206 params = {'slice': self.name, 'hostname': socket.gethostname(),
207 'since': time.asctime(time.gmtime(self.time)) + " GMT",
208 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
209 'date': time.asctime(time.gmtime()) + " GMT",
210 'period': format_period(period)}
212 if bytes >= (self.bytes + self.ByteThresh):
214 int((self.ByteMax - self.bytes + bytes)/(period - time.time() - self.time))
216 new_maxrate = maxrate
218 # Format template parameters for low bandwidth message
219 params['class'] = "low bandwidth"
220 params['bytes'] = format_bytes(bytes - self.bytes)
221 params['maxrate'] = bwlimit.format_tc_rate(maxrate)
222 params['limit'] = format_bytes(self.ByteMax)
223 params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate)
226 print "%(slice)s %(class)s " \
227 "%(bytes)s of %(limit)s (%(new_maxrate)s maxrate)" % \
230 # Cap low bandwidth burst rate
231 if new_maxrate != maxrate:
232 message += template % params
233 print "%(slice)s %(class)s capped at %(new_maxrate)s " % params
235 if exemptbytes >= (self.exemptbytes + self.ExemptByteThresh):
236 new_maxexemptrate = \
237 int((self.ExemptByteMax - (self.bytes + bytes))/(period - (time.time() - self.time)))
239 new_maxexemptrate = maxexemptrate
241 # Format template parameters for high bandwidth message
242 params['class'] = "high bandwidth"
243 params['bytes'] = format_bytes(exemptbytes - self.exemptbytes)
244 params['maxrate'] = bwlimit.format_tc_rate(maxexemptrate)
245 params['limit'] = format_bytes(self.ExemptByteMax)
246 params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxexemptrate)
249 print "%(slice)s %(class)s " \
250 "%(bytes)s of %(limit)s (%(new_maxrate)s maxrate)" % params
252 # Cap high bandwidth burst rate
253 if new_maxexemptrate != maxexemptrate:
254 message += template % params
255 print "%(slice)s %(class)s capped at %(new_maxexemptrate)s" % params
258 if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate:
259 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate)
263 # subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params
266 # print message + (footer % params)
268 # slicemail(self.name, subject, message + (footer % params))
274 Usage: %s [OPTIONS]...
277 -d, --debug Enable debugging (default: %s)
278 -v, --verbose Increase verbosity level (default: %d)
279 -f, --file=FILE Data file (default: %s)
280 -s, --slice=SLICE Constrain monitoring to these slices (default: all)
281 -p, --period=SECONDS Interval in seconds over which to enforce average byte limits (default: %s)
282 -h, --help This message
283 """.lstrip() % (sys.argv[0], debug, verbose, datafile, format_period(period))
287 global debug, verbose, datafile, period, nm
292 longopts = ["debug", "verbose", "file=", "slice=", "period=", "help"]
293 (opts, argv) = getopt.getopt(sys.argv[1:], "dvf:s:p:h", longopts)
294 except getopt.GetoptError, err:
295 print "Error: " + err.msg
299 for (opt, optval) in opts:
300 if opt == "-d" or opt == "--debug":
302 elif opt == "-v" or opt == "--verbose":
304 bwlimit.verbose = verbose - 1
305 elif opt == "-f" or opt == "--file":
307 elif opt == "-s" or opt == "--slice":
309 elif opt == "-p" or opt == "--period":
315 # Check if we are already running
319 # Redirect stdout and stderr to syslog
320 syslog.openlog("bwmon")
321 sys.stdout = sys.stderr = Logger()
324 f = open(datafile, "r+")
326 print "Loading %s" % datafile
327 (version, slices) = pickle.load(f)
329 # Check version of data file
330 if version != "$Id: bwmon.py,v 1.5.2.2 2006/08/21 21:27:35 mlhuang Exp $":
331 print "Not using old version '%s' data file %s" % (version, datafile)
334 version = "$Id: bwmon.py,v 1.5.2.2 2006/08/21 21:27:35 mlhuang Exp $"
337 # Get special slice IDs
338 root_xid = bwlimit.get_xid("root")
339 default_xid = bwlimit.get_xid("default")
341 #Open connection to Node Manager. Global.
345 # Get actuall running values from tc.
346 for params in bwlimit.get():
349 minexemptrate, maxexemptrate,
350 bytes, exemptbytes) = params
354 print("\nRunning stats for %s from tc."\
355 "\nminrate %s, maxrate %s, minexemptrate %s,"\
356 " maxexemptrate %s, bytes %s, exemptbytes %s" % \
357 (bwlimit.get_slice(xid),
358 bwlimit.format_tc_rate(minrate),
359 bwlimit.format_tc_rate(maxrate),
360 bwlimit.format_tc_rate(minexemptrate),
361 bwlimit.format_tc_rate(maxexemptrate),
365 # Ignore root and default buckets
366 if xid == root_xid or xid == default_xid:
369 name = bwlimit.get_slice(xid)
371 # Orphaned (not associated with a slice) class
374 # Monitor only the specified slices
375 if names and name not in names:
377 #slices is populated from the pickle file
378 #xid is populated from bwlimit (read from /etc/passwd)
379 if slices.has_key(xid):
381 if time.time() >= (slice.time + period) or \
382 bytes < slice.bytes or exemptbytes < slice.exemptbytes:
383 # Reset to defaults every 24 hours or if it appears
384 # that the byte counters have overflowed (or, more
385 # likely, the node was restarted or the HTB buckets
386 # were re-initialized).
387 slice.reset(maxrate, maxexemptrate, bytes, exemptbytes)
390 slice.update(maxrate, maxexemptrate, bytes, exemptbytes)
392 # New slice, initialize state
393 slice = slices[xid] = Slice(xid, name, maxrate, maxexemptrate, bytes, exemptbytes)
396 dead = Set(slices.keys()) - Set(live)
401 print "Saving %s" % datafile
402 f = open(datafile, "w")
403 pickle.dump((version, slices), f)
408 if __name__ == '__main__':